├── .gitignore ├── LICENSE ├── README.md ├── book ├── Makefile ├── book.tex ├── figs │ ├── 200px-Binary_search_tree-svg.png │ ├── Binary_search_tree_1229.png │ ├── DOMinspector.png │ ├── DOMtree01.png │ ├── bst.odg │ ├── bst.pdf │ ├── dom_tree1.odg │ ├── dom_tree1.pdf │ ├── figure01small.png │ ├── figure02.png │ ├── figure02small.png │ ├── figure03.png │ ├── figure03small.png │ ├── figure04.png │ ├── figure04small.png │ ├── figure1.png │ ├── figure2.png │ ├── figure3.png │ ├── hashtable.png │ ├── index.odg │ ├── index.pdf │ ├── linked_list1.odg │ ├── linked_list1.pdf │ ├── linked_list_diagram1.png │ ├── merge_sort1.odg │ ├── merge_sort1.pdf │ ├── merge_sort2.odg │ ├── merge_sort2.pdf │ ├── profile1.png │ ├── profile2.png │ ├── profile3.png │ ├── profile4.png │ ├── radix_sort1.odg │ ├── radix_sort1.pdf │ ├── tower.odg │ ├── tower.pdf │ ├── towers.pdf │ ├── yuml1.pdf │ ├── yuml1.png │ └── yuml2.pdf ├── footer.html └── header.html ├── code ├── build.xml ├── lib │ ├── ant-junit.jar │ ├── commons-math3-3.6.jar │ ├── hamcrest-core-1.3.jar │ ├── hamcrest-library-1.3.jar │ ├── jcommon-1.0.23.jar │ ├── jedis-2.8.0.jar │ ├── jfreechart-1.0.19.jar │ ├── jsoup-1.8.3.jar │ ├── junit-4.12.jar │ └── servlet.jar └── src │ └── com │ └── allendowney │ └── thinkdast │ ├── Card.java │ ├── HelloJsoup.java │ ├── Index.java │ ├── IndexTest.java │ ├── JedisIndex.java │ ├── JedisIndexTest.java │ ├── JedisMaker.java │ ├── JedisTermCounter.java │ ├── JedisTermCounterTest.java │ ├── LinkedListExample.java │ ├── ListClientExample.java │ ├── ListClientExampleTest.java │ ├── ListLinks.java │ ├── ListNode.java │ ├── ListSorter.java │ ├── ListSorterTest.java │ ├── MyArrayList.java │ ├── MyArrayListTest.java │ ├── MyBetterMap.java │ ├── MyBetterMapTest.java │ ├── MyFixedHashMap.java │ ├── MyFixedHashMapTest.java │ ├── MyHashMap.java │ ├── MyHashMapTest.java │ ├── MyLinearMap.java │ ├── MyLinearMapTest.java │ ├── MyLinkedList.java │ ├── MyLinkedListTest.java │ ├── MyTreeMap.java │ ├── MyTreeMapExample.java │ ├── MyTreeMapTest.java │ ├── Page.java │ ├── ProfileListAdd.java │ ├── ProfileMapPut.java │ ├── Profiler.java │ ├── SelectionSort.java │ ├── SillyArray.java │ ├── SillyString.java │ ├── TermCounter.java │ ├── TermCounterTest.java │ ├── WikiCrawler.java │ ├── WikiCrawlerTest.java │ ├── WikiFetcher.java │ ├── WikiNodeExample.java │ ├── WikiNodeIterable.java │ ├── WikiParser.java │ ├── WikiParserTest.java │ ├── WikiPhilosophy.java │ ├── WikiPhilosophyTest.java │ ├── WikiSearch.java │ └── WikiSearchTest.java └── solutions ├── build.xml ├── lib ├── ant-junit.jar ├── commons-math3-3.6.jar ├── hamcrest-core-1.3.jar ├── hamcrest-library-1.3.jar ├── jcommon-1.0.23.jar ├── jedis-2.8.0.jar ├── jfreechart-1.0.19.jar ├── jsoup-1.8.3.jar ├── junit-4.12.jar └── servlet.jar └── src ├── com └── allendowney │ └── thinkdast │ ├── Card.java │ ├── HelloJsoup.java │ ├── Index.java │ ├── IndexTest.java │ ├── JedisIndex.java │ ├── JedisIndexTest.java │ ├── JedisMaker.java │ ├── JedisTermCounter.java │ ├── JedisTermCounterTest.java │ ├── LinkedListExample.java │ ├── ListClientExample.java │ ├── ListClientExampleTest.java │ ├── ListLinks.java │ ├── ListNode.java │ ├── ListSorter.java │ ├── ListSorterTest.java │ ├── MyArrayList.java │ ├── MyArrayListTest.java │ ├── MyBetterMap.java │ ├── MyBetterMapTest.java │ ├── MyFixedHashMap.java │ ├── MyFixedHashMapTest.java │ ├── MyHashMap.java │ ├── MyHashMapTest.java │ ├── MyLinearMap.java │ ├── MyLinearMapTest.java │ ├── MyLinkedList.java │ ├── MyLinkedListTest.java │ ├── MyTreeMap.java │ ├── MyTreeMapExample.java │ ├── MyTreeMapTest.java │ ├── Page.java │ ├── ProfileListAdd.java │ ├── ProfileMapPut.java │ ├── Profiler.java │ ├── SelectionSort.java │ ├── SillyArray.java │ ├── SillyString.java │ ├── TermCounter.java │ ├── TermCounterTest.java │ ├── WikiCrawler.java │ ├── WikiCrawlerTest.java │ ├── WikiFetcher.java │ ├── WikiNodeExample.java │ ├── WikiNodeIterable.java │ ├── WikiParser.java │ ├── WikiParserTest.java │ ├── WikiPhilosophy.java │ ├── WikiPhilosophyTest.java │ ├── WikiSearch.java │ └── WikiSearchTest.java └── resources └── en.wikipedia.org └── wiki ├── Awareness ├── Computer_science ├── Concurrent_computing ├── Consciousness ├── Java_(Programming_Language) ├── Java_(programming_language) ├── Knowledge ├── Mathematics ├── Modern_philosophy ├── Philosophy ├── Programming_language ├── Property_(philosophy) ├── Quality_(philosophy) └── Science /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Allen Downey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ThinkDataStructures 2 | 3 | LaTeX source and supporting code for *Think Data Structures: Algorithms and Information Retrieval in Java* 4 | 5 | 6 | Data structures and algorithms are among the most important inventions 7 | of the last 50 years, and they are fundamental tools 8 | software engineers need to know. But in my opinion, most of the books 9 | on these topics are too theoretical, too big, and too bottom-up: 10 | 11 | * Too theoretical: Mathematical analysis of algorithms is based 12 | on simplifying assumptions that limit its usefulness in practice. 13 | Many presentations of this topic gloss over the simplifications and 14 | focus on the math. In this book I present the most practical subset 15 | of this material and eliminate the rest. 16 | 17 | * Too big: Most books on these topics are at least 500 pages, 18 | and some are more than 1000. By focusing on the topics I think are 19 | most useful for software engineers, I kept this book under 20 | 250 pages. 21 | 22 | * Too bottom-up: Many data structures books focus on how data 23 | structures work (the implementations), with less about how to use 24 | them (the interfaces). In this book, I go ``top down'', starting 25 | with the interfaces. Readers learn to use the structures in the 26 | Java Collections Framework before getting into the details of how 27 | they work. 28 | 29 | Finally, many present this material out of context and without 30 | motivation: it's just one damn data structure after another! 31 | 32 | I try to alleviate the boredom by organizing the topics around an 33 | application -- web search -- that uses data structures extensively, 34 | and is an interesting and important topic in its own right. 35 | 36 | This application also motivates some topics that are not usually 37 | covered in an introductory data structures class, including persistent 38 | data structures, with Redis, and streaming algorithms. 39 | 40 | I have made difficult decisions about what to leave out, but 41 | I have made some compromises. I include a few topics 42 | that most readers will never use, but that they might be expected to 43 | know, possibly in a technical interview. For these topics, I 44 | present both the conventional wisdom as well as my reasons to be 45 | skeptical. 46 | 47 | This book also presents basic aspects of software engineering practice, 48 | including version control and unit testing. Each chapter ends with 49 | an exercise that allows readers to apply what they have learned. 50 | Each exercise includes automated tests that check the solution. 51 | And for most exercises, I present my solution at the beginning of 52 | the next chapter. 53 | 54 | This book is intended for college students in computer science and related 55 | fields, as well as professional software engineers, people training in 56 | software engineering, and people preparing for technical interviews. 57 | 58 | I assume that the reader knows Java at an intermediate level, 59 | but I explain some Java features along the way, and provide pointers 60 | to supplementary material. 61 | 62 | People who have read *Think Java* or *Head First 63 | Java* are prepared for this book. 64 | -------------------------------------------------------------------------------- /book/Makefile: -------------------------------------------------------------------------------- 1 | F = thinkdast 2 | 3 | all: 4 | pdflatex book 5 | makeindex book.idx # shouldn't need .idx here, but we do 6 | pdflatex book 7 | evince book.pdf 8 | 9 | hevea: book.tex header.html footer.html 10 | cp book.tex $(F).tex 11 | rm -rf html 12 | mkdir html 13 | hevea -O -exec xxdate.exe -e latexonly htmlonly $(F).tex 14 | imagen -png -pdf $(F) 15 | hacha $(F).html 16 | cp up.png next.png back.png html 17 | # TODO: might have to bring back $(F).png 18 | mv -i index.html $(F).css $(F)*.html html 19 | rm *motif.gif 20 | 21 | plastex: 22 | # Before running plastex, we need the current directory in PYTHONPATH 23 | # export PYTHONPATH=$PYTHONPATH:. 24 | python preprocess.py book.tex > $(F).plastex 25 | plastex --renderer=DocBook --theme=book --image-resolution=300 --filename=$(F).xml $(F).plastex 26 | cd $(F); python ../postprocess.py $(F).xml > temp; mv temp $(F).xml 27 | 28 | xxe: 29 | xmlcopyeditor ~/ThinkDataStructures/$(F)/$(F).xml & 30 | 31 | lint: 32 | xmllint -noout $(F)/$(F).xml 33 | 34 | oreilly: 35 | rsync -a $(F)/$(F).xml atlas 36 | rsync -a figs/*.pdf atlas/figs/ 37 | rsync -a figs/*.png atlas/figs/ 38 | cd atlas; git add $(F).xml figs/* 39 | cd atlas; git commit -m "Automated check in." 40 | cd atlas; git push 41 | 42 | DEST = /home/downey/public_html/greent/$(F) 43 | 44 | distrib: 45 | rm -rf dist 46 | mkdir dist 47 | cp book.pdf $(F).pdf 48 | rsync -a $(F).pdf html dist 49 | rsync -a dist/* $(DEST) 50 | chmod -R o+r $(DEST)/* 51 | cd $(DEST)/..; sh back 52 | -------------------------------------------------------------------------------- /book/figs/200px-Binary_search_tree-svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/200px-Binary_search_tree-svg.png -------------------------------------------------------------------------------- /book/figs/Binary_search_tree_1229.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/Binary_search_tree_1229.png -------------------------------------------------------------------------------- /book/figs/DOMinspector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/DOMinspector.png -------------------------------------------------------------------------------- /book/figs/DOMtree01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/DOMtree01.png -------------------------------------------------------------------------------- /book/figs/bst.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/bst.odg -------------------------------------------------------------------------------- /book/figs/bst.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/bst.pdf -------------------------------------------------------------------------------- /book/figs/dom_tree1.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/dom_tree1.odg -------------------------------------------------------------------------------- /book/figs/dom_tree1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/dom_tree1.pdf -------------------------------------------------------------------------------- /book/figs/figure01small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure01small.png -------------------------------------------------------------------------------- /book/figs/figure02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure02.png -------------------------------------------------------------------------------- /book/figs/figure02small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure02small.png -------------------------------------------------------------------------------- /book/figs/figure03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure03.png -------------------------------------------------------------------------------- /book/figs/figure03small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure03small.png -------------------------------------------------------------------------------- /book/figs/figure04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure04.png -------------------------------------------------------------------------------- /book/figs/figure04small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure04small.png -------------------------------------------------------------------------------- /book/figs/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure1.png -------------------------------------------------------------------------------- /book/figs/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure2.png -------------------------------------------------------------------------------- /book/figs/figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/figure3.png -------------------------------------------------------------------------------- /book/figs/hashtable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/hashtable.png -------------------------------------------------------------------------------- /book/figs/index.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/index.odg -------------------------------------------------------------------------------- /book/figs/index.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/index.pdf -------------------------------------------------------------------------------- /book/figs/linked_list1.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/linked_list1.odg -------------------------------------------------------------------------------- /book/figs/linked_list1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/linked_list1.pdf -------------------------------------------------------------------------------- /book/figs/linked_list_diagram1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/linked_list_diagram1.png -------------------------------------------------------------------------------- /book/figs/merge_sort1.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/merge_sort1.odg -------------------------------------------------------------------------------- /book/figs/merge_sort1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/merge_sort1.pdf -------------------------------------------------------------------------------- /book/figs/merge_sort2.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/merge_sort2.odg -------------------------------------------------------------------------------- /book/figs/merge_sort2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/merge_sort2.pdf -------------------------------------------------------------------------------- /book/figs/profile1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/profile1.png -------------------------------------------------------------------------------- /book/figs/profile2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/profile2.png -------------------------------------------------------------------------------- /book/figs/profile3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/profile3.png -------------------------------------------------------------------------------- /book/figs/profile4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/profile4.png -------------------------------------------------------------------------------- /book/figs/radix_sort1.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/radix_sort1.odg -------------------------------------------------------------------------------- /book/figs/radix_sort1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/radix_sort1.pdf -------------------------------------------------------------------------------- /book/figs/tower.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/tower.odg -------------------------------------------------------------------------------- /book/figs/tower.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/tower.pdf -------------------------------------------------------------------------------- /book/figs/towers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/towers.pdf -------------------------------------------------------------------------------- /book/figs/yuml1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/yuml1.pdf -------------------------------------------------------------------------------- /book/figs/yuml1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/yuml1.png -------------------------------------------------------------------------------- /book/figs/yuml2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/book/figs/yuml2.pdf -------------------------------------------------------------------------------- /book/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 |
6 | 9 | 10 |

This HTML version of Think Data Structures is provided for convenience, but it is not the best format of the book. 11 | In particular, some of the symbols are not rendered correctly.

12 | 13 |

You might prefer to read the PDF version. 14 | 15 |

Or you can buy this book on Amazon.com. 16 | -------------------------------------------------------------------------------- /code/lib/ant-junit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/ant-junit.jar -------------------------------------------------------------------------------- /code/lib/commons-math3-3.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/commons-math3-3.6.jar -------------------------------------------------------------------------------- /code/lib/hamcrest-core-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/hamcrest-core-1.3.jar -------------------------------------------------------------------------------- /code/lib/hamcrest-library-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/hamcrest-library-1.3.jar -------------------------------------------------------------------------------- /code/lib/jcommon-1.0.23.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/jcommon-1.0.23.jar -------------------------------------------------------------------------------- /code/lib/jedis-2.8.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/jedis-2.8.0.jar -------------------------------------------------------------------------------- /code/lib/jfreechart-1.0.19.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/jfreechart-1.0.19.jar -------------------------------------------------------------------------------- /code/lib/jsoup-1.8.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/jsoup-1.8.3.jar -------------------------------------------------------------------------------- /code/lib/junit-4.12.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/junit-4.12.jar -------------------------------------------------------------------------------- /code/lib/servlet.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/code/lib/servlet.jar -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/Card.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.List; 7 | 8 | 9 | /** 10 | * Represents a playing card. 11 | * 12 | */ 13 | public class Card implements Comparable { 14 | 15 | // string representations of ranks 16 | public static final String[] RANKS = { 17 | null, "Ace", "2", "3", "4", "5", "6", "7", 18 | "8", "9", "10", "Jack", "Queen", "King"}; 19 | 20 | // string representations of suits 21 | public static final String[] SUITS = { 22 | "Clubs", "Diamonds", "Hearts", "Spades"}; 23 | 24 | // rank and suit are instance variables 25 | private final int rank; 26 | private final int suit; 27 | 28 | /** 29 | * Constructs a card of the given rank and suit. 30 | */ 31 | public Card(int rank, int suit) { 32 | this.rank = rank; 33 | this.suit = suit; 34 | } 35 | 36 | /** 37 | * Gets the card's rank. 38 | */ 39 | public int getRank() { 40 | return this.rank; 41 | } 42 | 43 | /** 44 | * Gets the card's suit. 45 | */ 46 | public int getSuit() { 47 | return this.suit; 48 | } 49 | 50 | /** 51 | * Returns a string representation of the card. 52 | */ 53 | public String toString() { 54 | return RANKS[this.rank] + " of " + SUITS[this.suit]; 55 | } 56 | 57 | /** 58 | * Returns a negative integer if this card comes before 59 | * the given card, zero if the two cards are equal, or 60 | * a positive integer if this card comes after the card. 61 | */ 62 | public int compareTo(Card that) { 63 | if (this.suit < that.suit) { 64 | return -1; 65 | } 66 | if (this.suit > that.suit) { 67 | return 1; 68 | } 69 | if (this.rank < that.rank) { 70 | return -1; 71 | } 72 | if (this.rank > that.rank) { 73 | return 1; 74 | } 75 | return 0; 76 | } 77 | 78 | /** 79 | * Returns true if the given card has the same 80 | * rank AND same suit; otherwise returns false. 81 | */ 82 | public boolean equals(Card that) { 83 | return this.rank == that.rank 84 | && this.suit == that.suit; 85 | } 86 | 87 | /** 88 | * Make a List of 52 cards. 89 | */ 90 | public static List makeDeck() { 91 | List cards = new ArrayList(); 92 | for (int suit = 0; suit <= 3; suit++) { 93 | for (int rank = 1; rank <= 13; rank++) { 94 | Card card = new Card(rank, suit); 95 | cards.add(card); 96 | } 97 | } 98 | return cards; 99 | } 100 | 101 | /** 102 | * Demonstrates how to call the search methods. 103 | */ 104 | public static void main(String[] args) { 105 | 106 | // sort the cards using the natural ordering 107 | List cards = makeDeck(); 108 | Collections.sort(cards); 109 | System.out.println(cards.get(0)); 110 | System.out.println(cards.get(51)); 111 | 112 | Comparator comparator = new Comparator() { 113 | @Override 114 | public int compare(Card card1, Card card2) { 115 | if (card1.getSuit() < card2.getSuit()) { 116 | return -1; 117 | } 118 | if (card1.getSuit() > card2.getSuit()) { 119 | return 1; 120 | } 121 | int rank1 = getRankAceHigh(card1); 122 | int rank2 = getRankAceHigh(card2); 123 | 124 | if (rank1 < rank2) { 125 | return -1; 126 | } 127 | if (rank1 > rank2) { 128 | return 1; 129 | } 130 | return 0; 131 | } 132 | 133 | private int getRankAceHigh(Card card) { 134 | int rank = card.getRank(); 135 | if (rank == 1) { 136 | return 14; 137 | } else { 138 | return rank; 139 | } 140 | } 141 | }; 142 | 143 | // sort the cards using an external comparator 144 | Collections.sort(cards, comparator); 145 | System.out.println(cards.get(0)); 146 | System.out.println(cards.get(51)); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/HelloJsoup.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | /** 12 | * Example program to list links from a URL. 13 | * 14 | * From: http://jsoup.org/cookbook/extracting-data/example-list-links 15 | */ 16 | public class HelloJsoup { 17 | 18 | public static void main(String[] args) throws IOException { 19 | 20 | String dirname = System.getProperty("user.dir"); 21 | String filename = "src/resources/en.wikipedia.org/wiki/Computer_science"; 22 | String baseURI = dirname + "/" + filename; 23 | 24 | File input = new File(baseURI); 25 | Document doc = Jsoup.parse(input, "UTF-8", baseURI); 26 | 27 | //print("Fetching %s...", url); 28 | //Document doc = Jsoup.connect(url).get(); 29 | 30 | Element content = doc.getElementById("mw-content-text"); 31 | Elements paragraphs = content.getElementsByTag("p"); 32 | 33 | 34 | for (Element p : paragraphs) { 35 | Elements links = p.select("a[href]"); 36 | for (Element link : links) { 37 | print(" * a: <%s> (%s)", link.attr("href"), trim(link.text(), 35)); 38 | } 39 | break; 40 | } 41 | 42 | 43 | // Elements media = doc.select("[src]"); 44 | // Elements imports = doc.select("link[href]"); 45 | // 46 | // print("\nMedia: (%d)", media.size()); 47 | // for (Element src : media) { 48 | // if (src.tagName().equals("img")) { 49 | // print(" * %s: <%s> %sx%s (%s)", 50 | // src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), 51 | // trim(src.attr("alt"), 20)); 52 | // } else { 53 | // print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); 54 | // } 55 | // } 56 | // 57 | // print("\nImports: (%d)", imports.size()); 58 | // for (Element link : imports) { 59 | // print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel")); 60 | // } 61 | // 62 | // print("\nLinks: (%d)", links.size()); 63 | // for (Element link : links) { 64 | // print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); 65 | // break; 66 | // } 67 | } 68 | 69 | private static void print(String msg, Object... args) { 70 | System.out.println(String.format(msg, args)); 71 | } 72 | 73 | private static String trim(String s, int width) { 74 | if (s.length() > width) 75 | return s.substring(0, width-1) + "."; 76 | else 77 | return s; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/Index.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.HashSet; 8 | 9 | import org.jsoup.select.Elements; 10 | 11 | /** 12 | * Encapsulates a map from search term to set of TermCounter. 13 | * 14 | * @author downey 15 | * 16 | */ 17 | public class Index { 18 | 19 | private Map> index = new HashMap>(); 20 | 21 | /** 22 | * Adds a TermCounter to the set associated with `term`. 23 | * 24 | * @param term 25 | * @param tc 26 | */ 27 | public void add(String term, TermCounter tc) { 28 | Set set = get(term); 29 | 30 | // if we're seeing a term for the first time, make a new Set 31 | if (set == null) { 32 | set = new HashSet(); 33 | index.put(term, set); 34 | } 35 | // otherwise we can modify an existing Set 36 | set.add(tc); 37 | } 38 | 39 | /** 40 | * Looks up a search term and returns a set of TermCounters. 41 | * 42 | * @param term 43 | * @return 44 | */ 45 | public Set get(String term) { 46 | return index.get(term); 47 | } 48 | 49 | /** 50 | * Prints the contents of the index. 51 | */ 52 | public void printIndex() { 53 | // loop through the search terms 54 | for (String term: keySet()) { 55 | System.out.println(term); 56 | 57 | // for each term, print the pages where it appears 58 | Set tcs = get(term); 59 | for (TermCounter tc: tcs) { 60 | Integer count = tc.get(term); 61 | System.out.println(" " + tc.getLabel() + " " + count); 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Returns the set of terms that have been indexed. 68 | * 69 | * @return 70 | */ 71 | public Set keySet() { 72 | return index.keySet(); 73 | } 74 | 75 | /** 76 | * Add a page to the index. 77 | * 78 | * @param url URL of the page. 79 | * @param paragraphs Collection of elements that should be indexed. 80 | */ 81 | public void indexPage(String url, Elements paragraphs) { 82 | // TODO: Your code here 83 | 84 | // make a TermCounter and count the terms in the paragraphs 85 | 86 | // for each term in the TermCounter, add the TermCounter to the index 87 | } 88 | 89 | /** 90 | * @param args 91 | * @throws IOException 92 | */ 93 | public static void main(String[] args) throws IOException { 94 | 95 | WikiFetcher wf = new WikiFetcher(); 96 | Index indexer = new Index(); 97 | 98 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 99 | Elements paragraphs = wf.fetchWikipedia(url); 100 | indexer.indexPage(url, paragraphs); 101 | 102 | url = "https://en.wikipedia.org/wiki/Programming_language"; 103 | paragraphs = wf.fetchWikipedia(url); 104 | indexer.indexPage(url, paragraphs); 105 | 106 | indexer.printIndex(); 107 | } 108 | } -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/IndexTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.io.IOException; 7 | import java.util.Set; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | public class IndexTest { 14 | 15 | private Index index; 16 | private WikiFetcher wf; 17 | 18 | @Before 19 | public void setUp() { 20 | wf = new WikiFetcher(); 21 | index = new Index(); 22 | } 23 | 24 | @Test 25 | public void testIndexPage() throws IOException { 26 | // add two pages to the index 27 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 28 | Elements paragraphs = wf.readWikipedia(url); 29 | index.indexPage(url, paragraphs); 30 | 31 | url = "https://en.wikipedia.org/wiki/Programming_language"; 32 | paragraphs = wf.readWikipedia(url); 33 | index.indexPage(url, paragraphs); 34 | 35 | // check the results: the word "occur" only appears on one page, twice 36 | Set set = index.get("occur"); 37 | assertThat(set.size(), is(1)); 38 | 39 | for (TermCounter tc: set) { 40 | // this loop only happens once 41 | assertThat(tc.size(), is(4798)); 42 | assertThat(tc.get("occur"), is(2)); 43 | assertThat(tc.get("not there"), is(0)); 44 | } 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/JedisIndexTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.io.IOException; 7 | import java.util.Map; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.After; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | import redis.clients.jedis.Jedis; 15 | 16 | /** 17 | * @author downey 18 | * 19 | */ 20 | public class JedisIndexTest { 21 | 22 | private static String url1, url2; 23 | private Jedis jedis; 24 | private JedisIndex index; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | jedis = JedisMaker.make(); 32 | index = new JedisIndex(jedis); 33 | 34 | loadIndex(index); 35 | } 36 | 37 | /** 38 | * Loads the index with two pages read from files. 39 | * 40 | * @return 41 | * @throws IOException 42 | */ 43 | private static void loadIndex(JedisIndex index) throws IOException { 44 | WikiFetcher wf = new WikiFetcher(); 45 | 46 | url1 = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 47 | Elements paragraphs = wf.readWikipedia(url1); 48 | index.indexPage(url1, paragraphs); 49 | 50 | url2 = "https://en.wikipedia.org/wiki/Programming_language"; 51 | paragraphs = wf.readWikipedia(url2); 52 | index.indexPage(url2, paragraphs); 53 | } 54 | 55 | /** 56 | * @throws java.lang.Exception 57 | */ 58 | @After 59 | public void tearDown() throws Exception { 60 | jedis.close(); 61 | } 62 | 63 | /** 64 | * Test method for {@link JedisIndex#getCounts(java.lang.String)}. 65 | */ 66 | @Test 67 | public void testGetCounts() { 68 | Map map = index.getCounts("the"); 69 | assertThat(map.get(url1), is(339)); 70 | assertThat(map.get(url2), is(264)); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/JedisMaker.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import java.net.URL; 11 | import java.net.URLDecoder; 12 | 13 | import redis.clients.jedis.Jedis; 14 | 15 | 16 | public class JedisMaker { 17 | 18 | /** 19 | * Make a Jedis object and authenticate it. 20 | * 21 | * @return 22 | * @throws IOException 23 | */ 24 | public static Jedis make() throws IOException { 25 | 26 | // assemble the directory name 27 | String slash = File.separator; 28 | String filename = "resources" + slash + "redis_url.txt"; 29 | URL fileURL = JedisMaker.class.getClassLoader().getResource(filename); 30 | String filepath = URLDecoder.decode(fileURL.getFile(), "UTF-8"); 31 | 32 | // open the file 33 | StringBuilder sb = new StringBuilder(); 34 | BufferedReader br; 35 | try { 36 | br = new BufferedReader(new FileReader(filepath)); 37 | } catch (FileNotFoundException e1) { 38 | System.out.println("File not found: " + filename); 39 | printInstructions(); 40 | return null; 41 | } 42 | 43 | // read the file 44 | while (true) { 45 | String line = br.readLine(); 46 | if (line == null) break; 47 | sb.append(line); 48 | } 49 | br.close(); 50 | 51 | // parse the URL 52 | URI uri; 53 | try { 54 | uri = new URI(sb.toString()); 55 | } catch (URISyntaxException e) { 56 | System.out.println("Reading file: " + filename); 57 | System.out.println("It looks like this file does not contain a valid URI."); 58 | printInstructions(); 59 | return null; 60 | } 61 | String host = uri.getHost(); 62 | int port = uri.getPort(); 63 | 64 | String[] array = uri.getAuthority().split("[:@]"); 65 | String auth = array[1]; 66 | 67 | // connect to the server 68 | Jedis jedis = new Jedis(host, port); 69 | 70 | try { 71 | jedis.auth(auth); 72 | } catch (Exception e) { 73 | System.out.println("Trying to connect to " + host); 74 | System.out.println("on port " + port); 75 | System.out.println("with authcode " + auth); 76 | System.out.println("Got exception " + e); 77 | printInstructions(); 78 | return null; 79 | } 80 | return jedis; 81 | } 82 | 83 | 84 | /** 85 | * 86 | */ 87 | private static void printInstructions() { 88 | System.out.println(""); 89 | System.out.println("To connect to RedisToGo, you have to provide a file called"); 90 | System.out.println("redis_url.txt that contains the URL of your Redis server."); 91 | System.out.println("If you select an instance on the RedisToGo web page,"); 92 | System.out.println("you should see a URL that contains the information you need:"); 93 | System.out.println("redis://redistogo:AUTH@HOST:PORT"); 94 | System.out.println("Create a file called redis_url.txt in the src/resources"); 95 | System.out.println("directory, and paste in the URL."); 96 | } 97 | 98 | 99 | /** 100 | * @param args 101 | * @throws IOException 102 | */ 103 | public static void main(String[] args) throws IOException { 104 | 105 | Jedis jedis = make(); 106 | 107 | // String 108 | jedis.set("mykey", "myvalue"); 109 | String value = jedis.get("mykey"); 110 | System.out.println("Got value: " + value); 111 | 112 | // Set 113 | jedis.sadd("myset", "element1", "element2", "element3"); 114 | System.out.println("element2 is member: " + jedis.sismember("myset", "element2")); 115 | 116 | // List 117 | jedis.rpush("mylist", "element1", "element2", "element3"); 118 | System.out.println("element at index 1: " + jedis.lindex("mylist", 1)); 119 | 120 | // Hash 121 | jedis.hset("myhash", "word1", Integer.toString(2)); 122 | jedis.hincrBy("myhash", "word2", 1); 123 | System.out.println("frequency of word1: " + jedis.hget("myhash", "word1")); 124 | System.out.println("frequency of word2: " + jedis.hget("myhash", "word2")); 125 | 126 | jedis.close(); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/JedisTermCounter.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | import java.util.Map; 6 | import org.jsoup.select.Elements; 7 | 8 | import redis.clients.jedis.Jedis; 9 | import redis.clients.jedis.Transaction; 10 | 11 | 12 | /** 13 | * Encapsulates a map from search term to frequency (count). 14 | * 15 | * @author downey 16 | * 17 | */ 18 | public class JedisTermCounter extends TermCounter { 19 | 20 | public JedisTermCounter(String label) { 21 | super(label); 22 | } 23 | 24 | /** 25 | * 26 | * @return 27 | */ 28 | public List pushToRedis(Jedis jedis) { 29 | Transaction t = jedis.multi(); 30 | 31 | String hashname = hashName(); 32 | t.del(hashname); 33 | 34 | for (String key: keySet()) { 35 | Integer count = get(key); 36 | t.hset(hashname, key, count.toString()); 37 | } 38 | List res = t.exec(); 39 | return res; 40 | } 41 | 42 | /** 43 | * Returns the Redis key for this TermCounter. 44 | * 45 | * @return 46 | */ 47 | private String hashName() { 48 | return "TermCounter:" + getLabel(); 49 | } 50 | 51 | public Map pullFromRedis(Jedis jedis) { 52 | Map result = jedis.hgetAll(hashName()); 53 | return result; 54 | } 55 | 56 | /** 57 | * @param args 58 | * @throws IOException 59 | */ 60 | public static void main(String[] args) throws IOException { 61 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 62 | 63 | WikiFetcher wf = new WikiFetcher(); 64 | Elements paragraphs = wf.fetchWikipedia(url); 65 | JedisTermCounter counter = new JedisTermCounter(url.toString()); 66 | counter.processElements(paragraphs); 67 | 68 | Jedis jedis = JedisMaker.make(); 69 | 70 | counter.pushToRedis(jedis); 71 | System.out.println("Done pushing."); 72 | 73 | Map map = counter.pullFromRedis(jedis); 74 | for (Map.Entry entry: map.entrySet()) { 75 | System.out.println(entry.getKey() + ", " + entry.getValue()); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/JedisTermCounterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.Map; 10 | 11 | import org.jsoup.select.Elements; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import redis.clients.jedis.Jedis; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class JedisTermCounterTest { 22 | 23 | private Jedis jedis; 24 | private JedisTermCounter counter; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 32 | 33 | WikiFetcher wf = new WikiFetcher(); 34 | Elements paragraphs = wf.readWikipedia(url); 35 | 36 | jedis = JedisMaker.make(); 37 | 38 | counter = new JedisTermCounter(url.toString()); 39 | counter.processElements(paragraphs); 40 | } 41 | 42 | /** 43 | * Test method for {@link JedisTermCounter#pushToRedis(redis.clients.jedis.Jedis)}. 44 | * @throws 45 | */ 46 | @Test 47 | public void testPushToRedis() { 48 | counter.pushToRedis(jedis); 49 | assertThat(counter.size(), is(4798)); 50 | 51 | Map map = counter.pullFromRedis(jedis); 52 | 53 | assertThat(map.size(), is(1184)); 54 | assertThat(map.get("the"), is("339")); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/LinkedListExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | /** 4 | * @author downey 5 | * 6 | */ 7 | public class LinkedListExample { 8 | 9 | /** 10 | * @param args 11 | */ 12 | public static void main(String[] args) { 13 | ListNode node1 = new ListNode(1); 14 | ListNode node2 = new ListNode(2); 15 | ListNode node3 = new ListNode(3); 16 | 17 | node1.next = node2; 18 | node2.next = node3; 19 | node3.next = null; 20 | 21 | ListNode node0 = new ListNode(0, node1); 22 | System.out.println(node0); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ListClientExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | public class ListClientExample { 7 | @SuppressWarnings("rawtypes") 8 | private List list; 9 | 10 | @SuppressWarnings("rawtypes") 11 | public ListClientExample() { 12 | list = new LinkedList(); 13 | } 14 | 15 | @SuppressWarnings("rawtypes") 16 | public List getList() { 17 | return list; 18 | } 19 | 20 | public static void main(String[] args) { 21 | ListClientExample lce = new ListClientExample(); 22 | @SuppressWarnings("rawtypes") 23 | List list = lce.getList(); 24 | System.out.println(list); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ListClientExampleTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.assertThat; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import org.junit.Test; 10 | 11 | /** 12 | * @author downey 13 | * 14 | */ 15 | public class ListClientExampleTest { 16 | 17 | /** 18 | * Test method for {@link ListClientExample}. 19 | */ 20 | @Test 21 | public void testListClientExample() { 22 | ListClientExample lce = new ListClientExample(); 23 | @SuppressWarnings("rawtypes") 24 | List list = lce.getList(); 25 | assertThat(list, instanceOf(ArrayList.class) ); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ListLinks.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | /** 12 | * Example program to list links from a URL. 13 | * 14 | * From: http://jsoup.org/cookbook/extracting-data/example-list-links 15 | */ 16 | public class ListLinks { 17 | 18 | public static void main(String[] args) throws IOException { 19 | 20 | String dirname = System.getProperty("user.dir"); 21 | String filename = "src/resources/en.wikipedia.org/wiki/Computer_science"; 22 | String baseURI = dirname + "/" + filename; 23 | 24 | File input = new File(baseURI); 25 | Document doc = Jsoup.parse(input, "UTF-8", baseURI); 26 | 27 | //print("Fetching %s...", url); 28 | //Document doc = Jsoup.connect(url).get(); 29 | 30 | Element content = doc.getElementById("mw-content-text"); 31 | Elements paragraphs = content.getElementsByTag("p"); 32 | 33 | 34 | for (Element p : paragraphs) { 35 | Elements links = p.select("a[href]"); 36 | for (Element link : links) { 37 | print(" * a: <%s> (%s)", link.attr("href"), trim(link.text(), 35)); 38 | } 39 | break; 40 | } 41 | 42 | 43 | // Elements media = doc.select("[src]"); 44 | // Elements imports = doc.select("link[href]"); 45 | // 46 | // print("\nMedia: (%d)", media.size()); 47 | // for (Element src : media) { 48 | // if (src.tagName().equals("img")) { 49 | // print(" * %s: <%s> %sx%s (%s)", 50 | // src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), 51 | // trim(src.attr("alt"), 20)); 52 | // } else { 53 | // print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); 54 | // } 55 | // } 56 | // 57 | // print("\nImports: (%d)", imports.size()); 58 | // for (Element link : imports) { 59 | // print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel")); 60 | // } 61 | // 62 | // print("\nLinks: (%d)", links.size()); 63 | // for (Element link : links) { 64 | // print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); 65 | // break; 66 | // } 67 | } 68 | 69 | private static void print(String msg, Object... args) { 70 | System.out.println(String.format(msg, args)); 71 | } 72 | 73 | private static String trim(String s, int width) { 74 | if (s.length() > width) 75 | return s.substring(0, width-1) + "."; 76 | else 77 | return s; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ListNode.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | /** 4 | * @author downey 5 | * 6 | */ 7 | public class ListNode { 8 | 9 | public Object data; 10 | public ListNode next; 11 | 12 | public ListNode() { 13 | this.data = null; 14 | this.next = null; 15 | } 16 | 17 | public ListNode(Object data) { 18 | this.data = data; 19 | this.next = null; 20 | } 21 | 22 | public ListNode(Object data, ListNode next) { 23 | this.data = data; 24 | this.next = next; 25 | } 26 | 27 | public String toString() { 28 | return "ListNode(" + data.toString() + ")"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ListSorterTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Arrays; 8 | import java.util.Comparator; 9 | import java.util.List; 10 | 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | /** 15 | * @author downey 16 | * 17 | */ 18 | public class ListSorterTest { 19 | 20 | private ListSorter sorter; 21 | private Comparator comparator; 22 | 23 | /** 24 | * @throws java.lang.Exception 25 | */ 26 | @Before 27 | public void setUp() throws Exception { 28 | 29 | comparator = new Comparator() { 30 | @Override 31 | public int compare(Integer elt1, Integer elt2) { 32 | return elt1.compareTo(elt2); 33 | } 34 | }; 35 | 36 | sorter = new ListSorter(); 37 | } 38 | 39 | /** 40 | * Test method for {@link ListSorter#insertionSort(java.util.List, java.util.Comparator)}. 41 | */ 42 | @Test 43 | public void testInsertionSort() { 44 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 45 | sorter.insertionSort(list, comparator); 46 | isSorted(list); 47 | } 48 | 49 | /** 50 | * @param list 51 | * 52 | */ 53 | private void isSorted(List list) { 54 | assertThat(list.size(), is(5)); 55 | assertThat(list.get(0), is(1)); 56 | assertThat(list.get(1), is(2)); 57 | assertThat(list.get(2), is(3)); 58 | assertThat(list.get(3), is(4)); 59 | assertThat(list.get(4), is(5)); 60 | } 61 | 62 | /** 63 | * Test method for {@link mergeSortInPlace(java.util.List, java.util.Comparator)}. 64 | */ 65 | @Test 66 | public void testMergeSortInPlace() { 67 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 68 | sorter.mergeSortInPlace(list, comparator); 69 | isSorted(list); 70 | } 71 | 72 | /** 73 | * Test method for {@link mergeSort(java.util.List, java.util.Comparator)}. 74 | */ 75 | @Test 76 | public void testMergeSort() { 77 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 78 | List sorted = sorter.mergeSort(list, comparator); 79 | isSorted(sorted); 80 | } 81 | 82 | /** 83 | * Test method for {@link heapSort(java.util.List, java.util.Comparator)}. 84 | */ 85 | @Test 86 | public void testHeapSort() { 87 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 88 | sorter.heapSort(list, comparator); 89 | isSorted(list); 90 | } 91 | 92 | /** 93 | * Test method for {@link topK(int, java.util.List, java.util.Comparator)}. 94 | */ 95 | @Test 96 | public void testTopK() { 97 | List list = new ArrayList(Arrays.asList(6, 3, 5, 8, 1, 4, 2, 7)); 98 | 99 | List res = sorter.topK(4, list, comparator); 100 | assertThat(res.size(), is(4)); 101 | assertThat(res.get(0), is(5)); 102 | assertThat(res.get(1), is(6)); 103 | assertThat(res.get(2), is(7)); 104 | assertThat(res.get(3), is(8)); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyBetterMap.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * Implementation of a Map using a collection of MyLinearMap, and 12 | * using `hashCode` to determine which map each key should go in. 13 | * 14 | * @author downey 15 | * @param 16 | * @param 17 | * 18 | */ 19 | public class MyBetterMap implements Map { 20 | 21 | // MyBetterMap uses a collection of MyLinearMap 22 | protected List> maps; 23 | 24 | /** 25 | * Initialize the map with 2 sub-maps. 26 | * 27 | */ 28 | public MyBetterMap() { 29 | makeMaps(2); 30 | } 31 | 32 | /** 33 | * Makes a collection of `k` MyLinearMap 34 | * 35 | * @param k 36 | */ 37 | protected void makeMaps(int k) { 38 | maps = new ArrayList>(k); 39 | for (int i=0; i()); 41 | } 42 | } 43 | 44 | @Override 45 | public void clear() { 46 | // clear the sub-maps 47 | for (int i=0; i chooseMap(Object key) { 59 | int index = key==null ? 0 : Math.abs(key.hashCode()) % maps.size(); 60 | return maps.get(index); 61 | } 62 | 63 | @Override 64 | public boolean containsKey(Object target) { 65 | // to find a key, we only have to search one map 66 | // TODO: FILL THIS IN! 67 | return false; 68 | } 69 | 70 | @Override 71 | public boolean containsValue(Object target) { 72 | // to find a value, we have to search all map 73 | // TODO: FILL THIS IN! 74 | return false; 75 | } 76 | 77 | @Override 78 | public Set> entrySet() { 79 | throw new UnsupportedOperationException(); 80 | } 81 | 82 | @Override 83 | public V get(Object key) { 84 | MyLinearMap map = chooseMap(key); 85 | return map.get(key); 86 | } 87 | 88 | @Override 89 | public boolean isEmpty() { 90 | return size() == 0; 91 | } 92 | 93 | @Override 94 | public Set keySet() { 95 | // add up the keySets from the sub-maps 96 | Set set = new HashSet(); 97 | for (MyLinearMap map: maps) { 98 | set.addAll(map.keySet()); 99 | } 100 | return set; 101 | } 102 | 103 | @Override 104 | public V put(K key, V value) { 105 | MyLinearMap map = chooseMap(key); 106 | return map.put(key, value); 107 | } 108 | 109 | @Override 110 | public void putAll(Map map) { 111 | for (Map.Entry entry: map.entrySet()) { 112 | put(entry.getKey(), entry.getValue()); 113 | } 114 | } 115 | 116 | @Override 117 | public V remove(Object key) { 118 | MyLinearMap map = chooseMap(key); 119 | return map.remove(key); 120 | } 121 | 122 | @Override 123 | public int size() { 124 | // add up the sizes of the sub-maps 125 | int total = 0; 126 | for (MyLinearMap map: maps) { 127 | total += map.size(); 128 | } 129 | return total; 130 | } 131 | 132 | @Override 133 | public Collection values() { 134 | // add up the valueSets from the sub-maps 135 | Set set = new HashSet(); 136 | for (MyLinearMap map: maps) { 137 | set.addAll(map.values()); 138 | } 139 | return set; 140 | } 141 | 142 | /** 143 | * @param args 144 | */ 145 | public static void main(String[] args) { 146 | Map map = new MyBetterMap(); 147 | map.put("Word1", 1); 148 | map.put("Word2", 2); 149 | Integer value = map.get("Word1"); 150 | System.out.println(value); 151 | 152 | for (String key: map.keySet()) { 153 | System.out.println(key + ", " + map.get(key)); 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyBetterMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyBetterMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyBetterMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyFixedHashMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Map; 7 | 8 | /** 9 | * Implementation of a HashMap using a collection of MyLinearMap and 10 | * resizing when there are too many entries. 11 | * 12 | * @author downey 13 | * @param 14 | * @param 15 | * 16 | */ 17 | public class MyFixedHashMap extends MyHashMap implements Map { 18 | 19 | private int size = 0; 20 | 21 | @Override 22 | public void clear() { 23 | super.clear(); 24 | size = 0; 25 | } 26 | 27 | @Override 28 | public V put(K key, V value) { 29 | MyLinearMap map = chooseMap(key); 30 | size -= map.size(); 31 | V oldValue = map.put(key, value); 32 | size += map.size(); 33 | 34 | if (size() > maps.size() * FACTOR) { 35 | size = 0; 36 | rehash(); 37 | } 38 | return oldValue; 39 | } 40 | 41 | @Override 42 | public V remove(Object key) { 43 | MyLinearMap map = chooseMap(key); 44 | size -= map.size(); 45 | V oldValue = map.remove(key); 46 | size += map.size(); 47 | return oldValue; 48 | } 49 | 50 | @Override 51 | public int size() { 52 | return size; 53 | } 54 | 55 | /** 56 | * @param args 57 | */ 58 | public static void main(String[] args) { 59 | Map map = new MyFixedHashMap(); 60 | for (int i=0; i<10; i++) { 61 | map.put(new Integer(i).toString(), i); 62 | } 63 | Integer value = map.get("3"); 64 | System.out.println(value); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyFixedHashMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyFixedHashMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyFixedHashMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyHashMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | /** 10 | * Implementation of a HashMap using a collection of MyLinearMap and 11 | * resizing when there are too many entries. 12 | * 13 | * @author downey 14 | * @param 15 | * @param 16 | * 17 | */ 18 | public class MyHashMap extends MyBetterMap implements Map { 19 | 20 | // average number of entries per map before we rehash 21 | protected static final double FACTOR = 1.0; 22 | 23 | @Override 24 | public V put(K key, V value) { 25 | V oldValue = super.put(key, value); 26 | 27 | //System.out.println("Put " + key + " in " + map + " size now " + map.size()); 28 | 29 | // check if the number of elements per map exceeds the threshold 30 | if (size() > maps.size() * FACTOR) { 31 | rehash(); 32 | } 33 | return oldValue; 34 | } 35 | 36 | /** 37 | * Doubles the number of maps and rehashes the existing entries. 38 | */ 39 | /** 40 | * 41 | */ 42 | protected void rehash() { 43 | // TODO: FILL THIS IN! 44 | } 45 | 46 | /** 47 | * @param args 48 | */ 49 | public static void main(String[] args) { 50 | Map map = new MyHashMap(); 51 | for (int i=0; i<10; i++) { 52 | map.put(new Integer(i).toString(), i); 53 | } 54 | Integer value = map.get("3"); 55 | System.out.println(value); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyHashMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyHashMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyHashMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyLinearMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Collection; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Set; 12 | 13 | /** 14 | * Implementation of a Map using a List of entries, so most 15 | * operations are linear time. 16 | * 17 | * @author downey 18 | * @param 19 | * @param 20 | * 21 | */ 22 | public class MyLinearMap implements Map { 23 | 24 | private List entries = new ArrayList(); 25 | 26 | public class Entry implements Map.Entry { 27 | private K key; 28 | private V value; 29 | 30 | public Entry(K key, V value) { 31 | this.key = key; 32 | this.value = value; 33 | } 34 | 35 | @Override 36 | public K getKey() { 37 | return key; 38 | } 39 | @Override 40 | public V getValue() { 41 | return value; 42 | } 43 | @Override 44 | public V setValue(V newValue) { 45 | value = newValue; 46 | return value; 47 | } 48 | } 49 | 50 | @Override 51 | public void clear() { 52 | entries.clear(); 53 | } 54 | 55 | @Override 56 | public boolean containsKey(Object target) { 57 | return findEntry(target) != null; 58 | } 59 | 60 | /** 61 | * Returns the entry that contains the target key, or null if there is none. 62 | * 63 | * @param target 64 | */ 65 | private Entry findEntry(Object target) { 66 | // TODO: FILL THIS IN! 67 | return null; 68 | } 69 | 70 | /** 71 | * Compares two keys or two values, handling null correctly. 72 | * 73 | * @param target 74 | * @param obj 75 | * @return 76 | */ 77 | private boolean equals(Object target, Object obj) { 78 | if (target == null) { 79 | return obj == null; 80 | } 81 | return target.equals(obj); 82 | } 83 | 84 | @Override 85 | public boolean containsValue(Object target) { 86 | for (Map.Entry entry: entries) { 87 | if (equals(target, entry.getValue())) { 88 | return true; 89 | } 90 | } 91 | return false; 92 | } 93 | 94 | @Override 95 | public Set> entrySet() { 96 | throw new UnsupportedOperationException(); 97 | } 98 | 99 | @Override 100 | public V get(Object key) { 101 | // TODO: FILL THIS IN! 102 | return null; 103 | } 104 | 105 | @Override 106 | public boolean isEmpty() { 107 | return entries.isEmpty(); 108 | } 109 | 110 | @Override 111 | public Set keySet() { 112 | Set set = new HashSet(); 113 | for (Entry entry: entries) { 114 | set.add(entry.getKey()); 115 | } 116 | return set; 117 | } 118 | 119 | @Override 120 | public V put(K key, V value) { 121 | // TODO: FILL THIS IN! 122 | return null; 123 | } 124 | 125 | @Override 126 | public void putAll(Map map) { 127 | for (Map.Entry entry: map.entrySet()) { 128 | put(entry.getKey(), entry.getValue()); 129 | } 130 | } 131 | 132 | @Override 133 | public V remove(Object key) { 134 | // TODO: FILL THIS IN! 135 | return null; 136 | } 137 | 138 | @Override 139 | public int size() { 140 | return entries.size(); 141 | } 142 | 143 | @Override 144 | public Collection values() { 145 | Set set = new HashSet(); 146 | for (Entry entry: entries) { 147 | set.add(entry.getValue()); 148 | } 149 | return set; 150 | } 151 | 152 | /** 153 | * @param args 154 | */ 155 | public static void main(String[] args) { 156 | Map map = new MyLinearMap(); 157 | map.put("Word1", 1); 158 | map.put("Word2", 2); 159 | Integer value = map.get("Word1"); 160 | System.out.println(value); 161 | 162 | for (String key: map.keySet()) { 163 | System.out.println(key + ", " + map.get(key)); 164 | } 165 | } 166 | 167 | /** 168 | * Returns a reference to `entries`. 169 | * 170 | * This is not part of the Map interface; it is here to provide the functionality 171 | * of `entrySet` in a way that is substantially simpler than the "right" way. 172 | * 173 | * @return 174 | */ 175 | protected Collection> getEntries() { 176 | return entries; 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyLinearMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.Collection; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | import java.util.Set; 13 | 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class MyLinearMapTest { 22 | 23 | protected Map map; 24 | 25 | /** 26 | * @throws java.lang.Exception 27 | */ 28 | @Before 29 | public void setUp() throws Exception { 30 | map = new MyLinearMap(); 31 | map.put("One", 1); 32 | map.put("Two", 2); 33 | map.put("Three", 3); 34 | map.put(null, 0); 35 | } 36 | 37 | /** 38 | * Test method for {@link MyLinearMap#clear()}. 39 | */ 40 | @Test 41 | public void testClear() { 42 | map.clear(); 43 | assertThat(map.size(), is(0)); 44 | } 45 | 46 | /** 47 | * Test method for {@link MyLinearMap#containsKey(java.lang.Object)}. 48 | */ 49 | @Test 50 | public void testContainsKey() { 51 | assertThat(map.containsKey("Three"), is(true)); 52 | assertThat(map.containsKey(null), is(true)); 53 | assertThat(map.containsKey("Four"), is(false)); 54 | } 55 | 56 | /** 57 | * Test method for {@link MyLinearMap#containsValue(java.lang.Object)}. 58 | */ 59 | @Test 60 | public void testContainsValue() { 61 | assertThat(map.containsValue(3), is(true)); 62 | assertThat(map.containsValue(0), is(true)); 63 | assertThat(map.containsValue(4), is(false)); 64 | } 65 | 66 | /** 67 | * Test method for {@link MyLinearMap#get(java.lang.Object)}. 68 | */ 69 | @Test 70 | public void testGet() { 71 | assertThat(map.get("Three"), is(3)); 72 | assertThat(map.get(null), is(0)); 73 | assertThat(map.get("Four"), nullValue()); 74 | } 75 | 76 | /** 77 | * Test method for {@link MyLinearMap#isEmpty()}. 78 | */ 79 | @Test 80 | public void testIsEmpty() { 81 | assertThat(map.isEmpty(), is(false)); 82 | map.clear(); 83 | assertThat(map.isEmpty(), is(true)); 84 | } 85 | 86 | /** 87 | * Test method for {@link MyLinearMap#keySet()}. 88 | */ 89 | @Test 90 | public void testKeySet() { 91 | Set keySet = map.keySet(); 92 | assertThat(keySet.size(), is(4)); 93 | assertThat(keySet.contains("Three"), is(true)); 94 | assertThat(keySet.contains(null), is(true)); 95 | assertThat(keySet.contains("Four"), is(false)); 96 | } 97 | 98 | /** 99 | * Test method for {@link MyLinearMap#put(java.lang.Object, java.lang.Object)}. 100 | */ 101 | @Test 102 | public void testPut() { 103 | map.put("One", 11); 104 | assertThat(map.size(), is(4)); 105 | assertThat(map.get("One"), is(11)); 106 | 107 | map.put("Five", 5); 108 | assertThat(map.size(), is(5)); 109 | assertThat(map.get("Five"), is(5)); 110 | } 111 | 112 | /** 113 | * Test method for {@link MyLinearMap#putAll(java.util.Map)}. 114 | */ 115 | @Test 116 | public void testPutAll() { 117 | Map m = new HashMap(); 118 | m.put("Six", 6); 119 | m.put("Seven", 7); 120 | m.put("Eight", 8); 121 | map.putAll(m); 122 | assertThat(map.size(), is(7)); 123 | } 124 | 125 | /** 126 | * Test method for {@link MyLinearMap#remove(java.lang.Object)}. 127 | */ 128 | @Test 129 | public void testRemove() { 130 | map.remove("One"); 131 | assertThat(map.size(), is(3)); 132 | assertThat(map.get("One"), nullValue()); 133 | } 134 | 135 | /** 136 | * Test method for {@link MyLinearMap#size()}. 137 | */ 138 | @Test 139 | public void testSize() { 140 | assertThat(map.size(), is(4)); 141 | } 142 | 143 | /** 144 | * Test method for {@link MyLinearMap#values()}. 145 | */ 146 | @Test 147 | public void testValues() { 148 | Collection keySet = map.values(); 149 | assertThat(keySet.size(), is(4)); 150 | assertThat(keySet.contains(3), is(true)); 151 | assertThat(keySet.contains(0), is(true)); 152 | assertThat(keySet.contains(4), is(false)); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyLinkedListTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayList; 7 | 8 | import org.junit.Before; 9 | 10 | 11 | /** 12 | * @author downey 13 | * 14 | */ 15 | public class MyLinkedListTest extends MyArrayListTest { 16 | 17 | /** 18 | * @throws java.lang.Exception 19 | */ 20 | @Before 21 | public void setUp() throws Exception { 22 | list = new ArrayList(); 23 | list.add(1); 24 | list.add(2); 25 | list.add(3); 26 | 27 | mylist = new MyLinkedList(); 28 | mylist.addAll(list); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/MyTreeMapExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | import java.util.TreeMap; 10 | import java.util.UUID; 11 | 12 | public class MyTreeMapExample { 13 | 14 | public static void main(String[] args) { 15 | int n = 16384; 16 | System.out.println("\nTesting MyTreeMap with random strings"); 17 | putRandomStrings(n); 18 | 19 | System.out.println("\nTesting MyTreeMap with timestamps"); 20 | putTimestamps(n); 21 | 22 | } 23 | 24 | /** 25 | * @param map 26 | * @param n 27 | */ 28 | private static void putRandomStrings(int n) { 29 | // MyTreeMap map = new MyTreeMap(); 30 | TreeMap map = new TreeMap(); 31 | 32 | final long startTime = System.currentTimeMillis(); 33 | for (int i=0; i map = new MyTreeMap(); 48 | TreeMap map = new TreeMap(); 49 | 50 | final long startTime = System.currentTimeMillis(); 51 | for (int i=0; i map, final long elapsed, int height) { 65 | System.out.println(" Time in milliseconds = " + (elapsed)); 66 | System.out.println(" Final size of MyTreeMap = " + map.size()); 67 | System.out.println(" log base 2 of size of MyTreeMap = " + Math.log(map.size()) / Math.log(2)); 68 | System.out.println(" Final height of MyTreeMap = " + height); 69 | } 70 | /** 71 | * @param map 72 | * @param n 73 | */ 74 | @SuppressWarnings("unused") 75 | private static void putWordList(int n) { 76 | // assemble the file name 77 | String slash = File.separator; 78 | String filename = System.getProperty("user.dir") + slash + 79 | "src" + slash + "resources" + slash + "words.txt"; 80 | 81 | MyTreeMap map = new MyTreeMap(); 82 | 83 | final long startTime = System.currentTimeMillis(); 84 | try (BufferedReader br = new BufferedReader(new FileReader(filename))) { 85 | String line; 86 | int i = 0; 87 | while ((line = br.readLine()) != null) { 88 | map.put(line, 0); 89 | 90 | i++; 91 | if (i >= n) { 92 | break; 93 | } 94 | } 95 | } catch (FileNotFoundException e) { 96 | e.printStackTrace(); 97 | } catch (IOException e) { 98 | e.printStackTrace(); 99 | } 100 | final long elapsed = System.currentTimeMillis() - startTime; 101 | printResults(map, elapsed, map.height()); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/Page.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.io.IOException; 7 | import java.net.URL; 8 | import java.nio.CharBuffer; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | /** 13 | * @author downey 14 | * 15 | */ 16 | public class Page implements Readable { 17 | public URL url; 18 | public List content; 19 | 20 | public Page(URL url) { 21 | this.url = url; 22 | this.content = new ArrayList(); 23 | } 24 | 25 | public void addLine(String line) { 26 | content.add(line); 27 | } 28 | 29 | @Override 30 | public int read(CharBuffer arg0) throws IOException { 31 | // TODO Auto-generated method stub 32 | return 0; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/ProfileListAdd.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | import org.jfree.data.xy.XYSeries; 8 | 9 | import com.allendowney.thinkdast.Profiler.Timeable; 10 | 11 | public class ProfileListAdd { 12 | 13 | /** 14 | * @param args 15 | */ 16 | public static void main(String[] args) { 17 | profileArrayListAddEnd(); 18 | //profileArrayListAddBeginning(); 19 | //profileLinkedListAddBeginning(); 20 | //profileLinkedListAddEnd(); 21 | } 22 | 23 | /** 24 | * Characterize the run time of adding to the end of an ArrayList 25 | */ 26 | public static void profileArrayListAddEnd() { 27 | Timeable timeable = new Timeable() { 28 | List list; 29 | 30 | public void setup(int n) { 31 | list = new ArrayList(); 32 | } 33 | 34 | public void timeMe(int n) { 35 | for (int i=0; i map; 27 | 28 | public void setup(int n) { 29 | map = new HashMap(); 30 | } 31 | 32 | public void timeMe(int n) { 33 | for (int i=0; i map; 49 | 50 | public void setup(int n) { 51 | map = new MyHashMap(); 52 | } 53 | 54 | public void timeMe(int n) { 55 | for (int i=0; i map; 72 | 73 | public void setup(int n) { 74 | map = new MyFixedHashMap(); 75 | } 76 | 77 | public void timeMe(int n) { 78 | for (int i=0; i 4) { 82 | series.add(n, total); 83 | } 84 | 85 | // stop when the runtime exceeds the end threshold 86 | if (total > endMillis) { 87 | break; 88 | } 89 | // otherwise double the size and continue 90 | n *= 2; 91 | } 92 | return series; 93 | } 94 | 95 | /** 96 | * Invokes setup and timeMe on the embedded Timeable. 97 | * 98 | * @param n 99 | * @return 100 | */ 101 | public long timeIt(int n) { 102 | timeable.setup(n); 103 | final long startTime = System.currentTimeMillis(); 104 | timeable.timeMe(n); 105 | final long endTime = System.currentTimeMillis(); 106 | return endTime - startTime; 107 | } 108 | 109 | /** 110 | * Plots the results. 111 | * 112 | * @param series 113 | */ 114 | public void plotResults(XYSeries series) { 115 | double slope = estimateSlope(series); 116 | System.out.println("Estimated slope= " + slope); 117 | 118 | final XYSeriesCollection dataset = new XYSeriesCollection(); 119 | dataset.addSeries(series); 120 | 121 | final JFreeChart chart = ChartFactory.createXYLineChart( 122 | "", // chart title 123 | "", // domain axis label 124 | "", // range axis label 125 | dataset, // data 126 | PlotOrientation.VERTICAL, 127 | false, // include legend 128 | true, 129 | false 130 | ); 131 | 132 | final XYPlot plot = chart.getXYPlot(); 133 | final NumberAxis domainAxis = new LogarithmicAxis("Problem size (n)"); 134 | final NumberAxis rangeAxis = new LogarithmicAxis("Runtime (ms)"); 135 | plot.setDomainAxis(domainAxis); 136 | plot.setRangeAxis(rangeAxis); 137 | chart.setBackgroundPaint(Color.white); 138 | plot.setOutlinePaint(Color.black); 139 | final ChartPanel chartPanel = new ChartPanel(chart); 140 | chartPanel.setPreferredSize(new java.awt.Dimension(1000, 600)); 141 | setContentPane(chartPanel); 142 | pack(); 143 | RefineryUtilities.centerFrameOnScreen(this); 144 | setVisible(true); 145 | } 146 | 147 | /** 148 | * Uses simple regression to estimate the slope of the series. 149 | * 150 | * @param series 151 | * @return 152 | */ 153 | public double estimateSlope(XYSeries series) { 154 | SimpleRegression regression = new SimpleRegression(); 155 | 156 | for (Object item: series.getItems()) { 157 | XYDataItem xy = (XYDataItem) item; 158 | regression.addData(Math.log(xy.getXValue()), Math.log(xy.getYValue())); 159 | } 160 | return regression.getSlope(); 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/SelectionSort.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Arrays; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class SelectionSort { 13 | 14 | /** 15 | * Swaps the elements at indexes i and j. 16 | */ 17 | public static void swapElements(int[] array, int i, int j) { 18 | int temp = array[i]; 19 | array[i] = array[j]; 20 | array[j] = temp; 21 | } 22 | 23 | /** 24 | * Finds the index of the lowest value 25 | * between indices low and high (inclusive). 26 | */ 27 | public static int indexLowest(int[] array, int start) { 28 | int lowIndex = start; 29 | for (int i = start; i < array.length; i++) { 30 | if (array[i] < array[lowIndex]) { 31 | lowIndex = i; 32 | } 33 | } 34 | return lowIndex; 35 | } 36 | 37 | /** 38 | * Sorts the cards (in place) using selection sort. 39 | */ 40 | public static void selectionSort(int[] array) { 41 | for (int i = 0; i < array.length; i++) { 42 | int j = indexLowest(array, i); 43 | swapElements(array, i, j); 44 | } 45 | } 46 | 47 | /** 48 | * @param args 49 | */ 50 | public static void main(String[] args) { 51 | int[] array = {2, 5, 6, 1, 3}; 52 | selectionSort(array); 53 | System.out.println(Arrays.toString(array)); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/SillyArray.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Arrays; 7 | import java.util.Map; 8 | 9 | /** 10 | * @author downey 11 | * 12 | */ 13 | public class SillyArray { 14 | private final char[] array; 15 | 16 | public SillyArray(char[] array) { 17 | this.array = array; 18 | } 19 | 20 | public String toString() { 21 | return Arrays.toString(array); 22 | } 23 | 24 | public void setChar(int i, char c) { 25 | this.array[i] = c; 26 | } 27 | 28 | @Override 29 | public boolean equals(Object other) { 30 | return this.toString().equals(other.toString()); 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | int total = 0; 36 | for (int i=0; i map = new MyBetterMap(); 48 | 49 | SillyArray array1 = new SillyArray("Word1".toCharArray()); 50 | map.put(array1, 1); 51 | 52 | // what happens if we mutate a key while it's in the Map? 53 | array1.setChar(0, 'C'); 54 | 55 | Integer value = map.get(array1); 56 | System.out.println(value); 57 | 58 | for (SillyArray key: map.keySet()) { 59 | System.out.println(key + ", " + map.get(key)); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/SillyString.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Map; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class SillyString { 13 | private final String innerString; 14 | 15 | public SillyString(String innerString) { 16 | this.innerString = innerString; 17 | } 18 | 19 | public String toString() { 20 | return innerString; 21 | } 22 | 23 | @Override 24 | public boolean equals(Object other) { 25 | return this.toString().equals(other.toString()); 26 | } 27 | 28 | @Override 29 | public int hashCode() { 30 | int total = 0; 31 | for (int i=0; i map = new MyBetterMap(); 43 | 44 | map.put(new SillyString("Word1"), 1); 45 | map.put(new SillyString("Word2"), 2); 46 | Integer value = map.get(new SillyString("Word1")); 47 | System.out.println(value); 48 | 49 | for (SillyString key: map.keySet()) { 50 | System.out.println(key + ", " + map.get(key)); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/TermCounter.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | import org.jsoup.nodes.Node; 9 | import org.jsoup.nodes.TextNode; 10 | import org.jsoup.select.Elements; 11 | 12 | 13 | /** 14 | * Encapsulates a map from search term to frequency (count). 15 | * 16 | * @author downey 17 | * 18 | */ 19 | public class TermCounter { 20 | 21 | private Map map; 22 | private String label; 23 | 24 | public TermCounter(String label) { 25 | this.label = label; 26 | this.map = new HashMap(); 27 | } 28 | 29 | public String getLabel() { 30 | return label; 31 | } 32 | 33 | /** 34 | * Returns the total of all counts. 35 | * 36 | * @return 37 | */ 38 | public int size() { 39 | // TODO: FILL THIS IN! 40 | return 0; 41 | } 42 | 43 | /** 44 | * Takes a collection of Elements and counts their words. 45 | * 46 | * @param paragraphs 47 | */ 48 | public void processElements(Elements paragraphs) { 49 | for (Node node: paragraphs) { 50 | processTree(node); 51 | } 52 | } 53 | 54 | /** 55 | * Finds TextNodes in a DOM tree and counts their words. 56 | * 57 | * @param root 58 | */ 59 | public void processTree(Node root) { 60 | // NOTE: we could use select to find the TextNodes, but since 61 | // we already have a tree iterator, let's use it. 62 | for (Node node: new WikiNodeIterable(root)) { 63 | if (node instanceof TextNode) { 64 | processText(((TextNode) node).text()); 65 | } 66 | } 67 | } 68 | 69 | /** 70 | * Splits `text` into words and counts them. 71 | * 72 | * @param text The text to process. 73 | */ 74 | public void processText(String text) { 75 | // replace punctuation with spaces, convert to lower case, and split on whitespace 76 | String[] array = text.replaceAll("\\pP", " "). 77 | toLowerCase(). 78 | split("\\s+"); 79 | 80 | for (int i=0; i keySet() { 123 | return map.keySet(); 124 | } 125 | 126 | /** 127 | * Print the terms and their counts in arbitrary order. 128 | */ 129 | public void printCounts() { 130 | for (String key: keySet()) { 131 | Integer count = get(key); 132 | System.out.println(key + ", " + count); 133 | } 134 | System.out.println("Total of all counts = " + size()); 135 | } 136 | 137 | /** 138 | * @param args 139 | * @throws IOException 140 | */ 141 | public static void main(String[] args) throws IOException { 142 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 143 | 144 | WikiFetcher wf = new WikiFetcher(); 145 | Elements paragraphs = wf.fetchWikipedia(url); 146 | 147 | TermCounter counter = new TermCounter(url.toString()); 148 | counter.processElements(paragraphs); 149 | counter.printCounts(); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/TermCounterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | /** 14 | * @author downey 15 | * 16 | */ 17 | public class TermCounterTest { 18 | 19 | private TermCounter counter; 20 | 21 | /** 22 | * @throws java.lang.Exception 23 | */ 24 | @Before 25 | public void setUp() throws Exception { 26 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 27 | 28 | WikiFetcher wf = new WikiFetcher(); 29 | Elements paragraphs = wf.readWikipedia(url); 30 | 31 | counter = new TermCounter(url.toString()); 32 | counter.processElements(paragraphs); 33 | } 34 | 35 | @Test 36 | public void testSize() { 37 | assertThat(counter.size(), is(4798)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiCrawler.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.LinkedList; 5 | import java.util.Map; 6 | import java.util.Map.Entry; 7 | import java.util.Queue; 8 | 9 | import org.jsoup.nodes.Element; 10 | import org.jsoup.select.Elements; 11 | 12 | import redis.clients.jedis.Jedis; 13 | 14 | 15 | public class WikiCrawler { 16 | // keeps track of where we started 17 | @SuppressWarnings("unused") 18 | private final String source; 19 | 20 | // the index where the results go 21 | private JedisIndex index; 22 | 23 | // queue of URLs to be indexed 24 | private Queue queue = new LinkedList(); 25 | 26 | // fetcher used to get pages from Wikipedia 27 | final static WikiFetcher wf = new WikiFetcher(); 28 | 29 | /** 30 | * Constructor. 31 | * 32 | * @param source 33 | * @param index 34 | */ 35 | public WikiCrawler(String source, JedisIndex index) { 36 | this.source = source; 37 | this.index = index; 38 | queue.offer(source); 39 | } 40 | 41 | /** 42 | * Returns the number of URLs in the queue. 43 | * 44 | * @return 45 | */ 46 | public int queueSize() { 47 | return queue.size(); 48 | } 49 | 50 | /** 51 | * Gets a URL from the queue and indexes it. 52 | * @param testing 53 | * 54 | * @return URL of page indexed. 55 | * @throws IOException 56 | */ 57 | public String crawl(boolean testing) throws IOException { 58 | // TODO: FILL THIS IN! 59 | return null; 60 | } 61 | 62 | /** 63 | * Parses paragraphs and adds internal links to the queue. 64 | * 65 | * @param paragraphs 66 | */ 67 | // NOTE: absence of access level modifier means package-level 68 | void queueInternalLinks(Elements paragraphs) { 69 | // TODO: FILL THIS IN! 70 | } 71 | 72 | public static void main(String[] args) throws IOException { 73 | // make a WikiCrawler 74 | Jedis jedis = JedisMaker.make(); 75 | JedisIndex index = new JedisIndex(jedis); 76 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 77 | WikiCrawler wc = new WikiCrawler(source, index); 78 | 79 | // for testing purposes, load up the queue 80 | Elements paragraphs = wf.fetchWikipedia(source); 81 | wc.queueInternalLinks(paragraphs); 82 | 83 | // loop until we index a new page 84 | String res; 85 | do { 86 | res = wc.crawl(false); 87 | 88 | // REMOVE THIS BREAK STATEMENT WHEN crawl() IS WORKING 89 | break; 90 | } while (res == null); 91 | 92 | Map map = index.getCounts("the"); 93 | for (Entry entry: map.entrySet()) { 94 | System.out.println(entry); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiCrawlerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.io.IOException; 10 | import java.util.Map; 11 | 12 | import org.jsoup.select.Elements; 13 | import org.junit.After; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import redis.clients.jedis.Jedis; 18 | 19 | /** 20 | * @author downey 21 | * 22 | */ 23 | public class WikiCrawlerTest { 24 | 25 | private Jedis jedis; 26 | private WikiCrawler wc; 27 | private JedisIndex index; 28 | 29 | /** 30 | * @throws java.lang.Exception 31 | */ 32 | @Before 33 | public void setUp() throws Exception { 34 | // make a WikiCrawler 35 | jedis = JedisMaker.make(); 36 | index = new JedisIndex(jedis); 37 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 38 | wc = new WikiCrawler(source, index); 39 | 40 | // for testing purposes, load up the queue 41 | WikiFetcher wf = new WikiFetcher(); 42 | Elements paragraphs = wf.readWikipedia(source); 43 | wc.queueInternalLinks(paragraphs); 44 | } 45 | 46 | /** 47 | * @throws java.lang.Exception 48 | */ 49 | @After 50 | public void tearDown() throws Exception { 51 | jedis.close(); 52 | } 53 | 54 | /** 55 | * Test method for {@link WikiCrawler#crawl()}. 56 | * @throws IOException 57 | */ 58 | @Test 59 | public void testCrawl() throws IOException { 60 | String url1 = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 61 | String url2 = "https://en.wikipedia.org/wiki/Programming_language"; 62 | String url3 = "https://en.wikipedia.org/wiki/Concurrent_computing"; 63 | 64 | String res = wc.crawl(true); 65 | assertThat(url1.equals(res), is(true)); 66 | assertThat(wc.queueSize(), is(396)); 67 | 68 | res = wc.crawl(true); 69 | assertThat(url2.equals(res), is(true)); 70 | assertThat(wc.queueSize(), is(653)); 71 | 72 | res = wc.crawl(true); 73 | assertThat(url3.equals(res), is(true)); 74 | assertThat(wc.queueSize(), is(704)); 75 | 76 | Map map = index.getCounts("the"); 77 | 78 | int count = map.get(url1); 79 | assertThat(count, is(339)); 80 | 81 | count = map.get(url2); 82 | assertThat(count, is(264)); 83 | 84 | count = map.get(url3); 85 | assertThat(count, is(53)); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiFetcher.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.net.URL; 7 | 8 | import org.jsoup.Connection; 9 | import org.jsoup.Jsoup; 10 | import org.jsoup.nodes.Document; 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | 14 | 15 | public class WikiFetcher { 16 | private long lastRequestTime = -1; 17 | private long minInterval = 1000; 18 | 19 | /** 20 | * Fetches and parses a URL string, returning a list of paragraph elements. 21 | * 22 | * @param url 23 | * @return 24 | * @throws IOException 25 | */ 26 | public Elements fetchWikipedia(String url) throws IOException { 27 | sleepIfNeeded(); 28 | 29 | // download and parse the document 30 | Connection conn = Jsoup.connect(url); 31 | Document doc = conn.get(); 32 | 33 | // select the content text and pull out the paragraphs. 34 | Element content = doc.getElementById("mw-content-text"); 35 | 36 | // TODO: avoid selecting paragraphs from sidebars and boxouts 37 | Elements paras = content.select("p"); 38 | return paras; 39 | } 40 | 41 | /** 42 | * Reads the contents of a Wikipedia page from src/resources. 43 | * 44 | * @param url 45 | * @return 46 | * @throws IOException 47 | */ 48 | public Elements readWikipedia(String url) throws IOException { 49 | URL realURL = new URL(url); 50 | 51 | // assemble the file name 52 | String slash = File.separator; 53 | String filename = "resources" + slash + realURL.getHost() + realURL.getPath(); 54 | 55 | // read the file 56 | InputStream stream = WikiFetcher.class.getClassLoader().getResourceAsStream(filename); 57 | Document doc = Jsoup.parse(stream, "UTF-8", filename); 58 | 59 | // parse the contents of the file 60 | Element content = doc.getElementById("mw-content-text"); 61 | Elements paras = content.select("p"); 62 | return paras; 63 | } 64 | 65 | /** 66 | * Rate limits by waiting at least the minimum interval between requests. 67 | */ 68 | private void sleepIfNeeded() { 69 | if (lastRequestTime != -1) { 70 | long currentTime = System.currentTimeMillis(); 71 | long nextRequestTime = lastRequestTime + minInterval; 72 | if (currentTime < nextRequestTime) { 73 | try { 74 | //System.out.println("Sleeping until " + nextRequestTime); 75 | Thread.sleep(nextRequestTime - currentTime); 76 | } catch (InterruptedException e) { 77 | System.err.println("Warning: sleep interrupted in fetchWikipedia."); 78 | } 79 | } 80 | } 81 | lastRequestTime = System.currentTimeMillis(); 82 | } 83 | 84 | /** 85 | * @param args 86 | * @throws IOException 87 | */ 88 | public static void main(String[] args) throws IOException { 89 | WikiFetcher wf = new WikiFetcher(); 90 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 91 | Elements paragraphs = wf.readWikipedia(url); 92 | 93 | for (Element paragraph: paragraphs) { 94 | System.out.println(paragraph); 95 | } 96 | } 97 | } -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiNodeExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayDeque; 5 | import java.util.ArrayList; 6 | import java.util.Collections; 7 | import java.util.Deque; 8 | import java.util.List; 9 | 10 | import org.jsoup.Connection; 11 | import org.jsoup.Jsoup; 12 | import org.jsoup.nodes.Document; 13 | import org.jsoup.nodes.Element; 14 | import org.jsoup.nodes.Node; 15 | import org.jsoup.nodes.TextNode; 16 | import org.jsoup.select.Elements; 17 | 18 | public class WikiNodeExample { 19 | 20 | public static void main(String[] args) throws IOException { 21 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 22 | 23 | // download and parse the document 24 | Connection conn = Jsoup.connect(url); 25 | Document doc = conn.get(); 26 | 27 | // select the content text and pull out the paragraphs. 28 | Element content = doc.getElementById("mw-content-text"); 29 | 30 | // TODO: avoid selecting paragraphs from sidebars and boxouts 31 | Elements paras = content.select("p"); 32 | Element firstPara = paras.get(0); 33 | 34 | recursiveDFS(firstPara); 35 | System.out.println(); 36 | 37 | iterativeDFS(firstPara); 38 | System.out.println(); 39 | 40 | Iterable iter = new WikiNodeIterable(firstPara); 41 | for (Node node: iter) { 42 | if (node instanceof TextNode) { 43 | System.out.print(node); 44 | } 45 | } 46 | } 47 | 48 | private static void iterativeDFS(Node root) { 49 | Deque stack = new ArrayDeque(); 50 | stack.push(root); 51 | 52 | // if the stack is empty, we're done 53 | while (!stack.isEmpty()) { 54 | 55 | // otherwise pop the next Node off the stack 56 | Node node = stack.pop(); 57 | if (node instanceof TextNode) { 58 | System.out.print(node); 59 | } 60 | 61 | // push the children onto the stack in reverse order 62 | List nodes = new ArrayList(node.childNodes()); 63 | Collections.reverse(nodes); 64 | 65 | for (Node child: nodes) { 66 | stack.push(child); 67 | } 68 | } 69 | } 70 | 71 | private static void recursiveDFS(Node node) { 72 | if (node instanceof TextNode) { 73 | System.out.print(node); 74 | } 75 | for (Node child: node.childNodes()) { 76 | recursiveDFS(child); 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiNodeIterable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayDeque; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.Deque; 10 | import java.util.Iterator; 11 | import java.util.List; 12 | import java.util.NoSuchElementException; 13 | 14 | import org.jsoup.nodes.Node; 15 | 16 | 17 | /** 18 | * Performs a depth-first traversal of a jsoup Node. 19 | * 20 | * @author downey 21 | * 22 | */ 23 | public class WikiNodeIterable implements Iterable { 24 | 25 | private Node root; 26 | 27 | /** 28 | * Creates an iterable starting with the given Node. 29 | * 30 | * @param root 31 | */ 32 | public WikiNodeIterable(Node root) { 33 | this.root = root; 34 | } 35 | 36 | @Override 37 | public Iterator iterator() { 38 | return new WikiNodeIterator(root); 39 | } 40 | 41 | /** 42 | * Inner class that implements the Iterator. 43 | * 44 | * @author downey 45 | * 46 | */ 47 | private class WikiNodeIterator implements Iterator { 48 | 49 | // this stack keeps track of the Nodes waiting to be visited 50 | Deque stack; 51 | 52 | /** 53 | * Initializes the Iterator with the root Node on the stack. 54 | * 55 | * @param node 56 | */ 57 | public WikiNodeIterator(Node node) { 58 | stack = new ArrayDeque(); 59 | stack.push(root); 60 | } 61 | 62 | @Override 63 | public boolean hasNext() { 64 | return !stack.isEmpty(); 65 | } 66 | 67 | @Override 68 | public Node next() { 69 | // if the stack is empty, we're done 70 | if (stack.isEmpty()) { 71 | throw new NoSuchElementException(); 72 | } 73 | 74 | // otherwise pop the next Node off the stack 75 | Node node = stack.pop(); 76 | //System.out.println(node); 77 | 78 | // push the children onto the stack in reverse order 79 | List nodes = new ArrayList(node.childNodes()); 80 | Collections.reverse(nodes); 81 | for (Node child: nodes) { 82 | stack.push(child); 83 | } 84 | return node; 85 | } 86 | 87 | @Override 88 | public void remove() { 89 | throw new UnsupportedOperationException(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiParser.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.nodes.Element; 4 | import org.jsoup.nodes.Node; 5 | import org.jsoup.nodes.TextNode; 6 | import org.jsoup.select.Elements; 7 | import java.util.ArrayDeque; 8 | import java.util.Deque; 9 | import java.util.StringTokenizer; 10 | 11 | /** 12 | * 13 | */ 14 | /** 15 | * @author downey 16 | * 17 | */ 18 | public class WikiParser { 19 | 20 | // the list of paragraphs we should search 21 | private Elements paragraphs; 22 | 23 | // the stack of open delimiters 24 | // TODO: consider simplifying this by counting parentheses 25 | private Deque parenthesisStack; 26 | 27 | 28 | /** 29 | * Initializes a WikiParser with a list of Elements. 30 | * 31 | * @param paragraphs 32 | */ 33 | public WikiParser(Elements paragraphs) { 34 | this.paragraphs = paragraphs; 35 | this.parenthesisStack = new ArrayDeque(); 36 | } 37 | 38 | /** 39 | * Searches the paragraphs for a valid link. 40 | * 41 | * Warns if a paragraph ends with unbalanced parentheses. 42 | * 43 | * @return 44 | */ 45 | public Element findFirstLink() { 46 | for (Element paragraph: paragraphs) { 47 | Element firstLink = findFirstLinkPara(paragraph); 48 | if (firstLink != null) { 49 | return firstLink; 50 | } 51 | if (!parenthesisStack.isEmpty()) { 52 | System.err.println("Warning: unbalanced parentheses."); 53 | } 54 | } 55 | return null; 56 | } 57 | 58 | /** 59 | * Returns the first valid link in a paragraph, or null. 60 | * 61 | * @param root 62 | */ 63 | private Element findFirstLinkPara(Node root) { 64 | // create an Iterable that traverses the tree 65 | Iterable nt = new WikiNodeIterable(root); 66 | 67 | // loop through the nodes 68 | for (Node node: nt) { 69 | // process TextNodes to get parentheses 70 | if (node instanceof TextNode) { 71 | processTextNode((TextNode) node); 72 | } 73 | // process elements to get find links 74 | if (node instanceof Element) { 75 | Element firstLink = processElement((Element) node); 76 | if (firstLink != null) { 77 | return firstLink; 78 | } 79 | } 80 | } 81 | return null; 82 | } 83 | 84 | /** 85 | * Returns the element if it is a valid link, null otherwise. 86 | * 87 | * 88 | * 89 | * @param elt 90 | */ 91 | private Element processElement(Element elt) { 92 | //System.out.println(elt.tagName()); 93 | if (validLink(elt)) { 94 | return elt; 95 | } 96 | return null; 97 | } 98 | 99 | /** 100 | * Checks whether a link is value. 101 | * 102 | * @param elt 103 | * @return 104 | */ 105 | private boolean validLink(Element elt) { 106 | // it's no good if it's 107 | // not a link 108 | if (!elt.tagName().equals("a")) { 109 | return false; 110 | } 111 | // in italics 112 | if (isItalic(elt)) { 113 | return false; 114 | } 115 | // in parenthesis 116 | if (isInParens(elt)) { 117 | return false; 118 | } 119 | // a bookmark 120 | if (startsWith(elt, "#")) { 121 | return false; 122 | } 123 | // a Wikipedia help page 124 | if (startsWith(elt, "/wiki/Help:")) { 125 | return false; 126 | } 127 | // TODO: there are a couple of other "rules" we haven't handled 128 | return true; 129 | } 130 | 131 | /** 132 | * Checks whether a link starts with a given String. 133 | * 134 | * @param elt 135 | * @param s 136 | * @return 137 | */ 138 | private boolean startsWith(Element elt, String s) { 139 | //System.out.println(elt.attr("href")); 140 | return (elt.attr("href").startsWith(s)); 141 | } 142 | 143 | /** 144 | * Checks whether the element is in parentheses (possibly nested). 145 | * 146 | * @param elt 147 | * @return 148 | */ 149 | private boolean isInParens(Element elt) { 150 | // check whether there are any parentheses on the stack 151 | return !parenthesisStack.isEmpty(); 152 | } 153 | 154 | /** 155 | * Checks whether the element is in italics. 156 | * 157 | * (Either a "i" or "em" tag) 158 | * 159 | * @param start 160 | * @return 161 | */ 162 | private boolean isItalic(Element start) { 163 | // follow the parent chain until we get to null 164 | for (Element elt=start; elt != null; elt = elt.parent()) { 165 | if (elt.tagName().equals("i") || elt.tagName().equals("em")) { 166 | return true; 167 | } 168 | } 169 | return false; 170 | } 171 | 172 | /** 173 | * Processes a text node, splitting it up and checking parentheses. 174 | * 175 | * @param node 176 | */ 177 | private void processTextNode(TextNode node) { 178 | StringTokenizer st = new StringTokenizer(node.text(), " ()", true); 179 | while (st.hasMoreTokens()) { 180 | String token = st.nextToken(); 181 | // System.out.print(token); 182 | if (token.equals("(")) { 183 | parenthesisStack.push(token); 184 | } 185 | if (token.equals(")")) { 186 | if (parenthesisStack.isEmpty()) { 187 | System.err.println("Warning: unbalanced parentheses."); 188 | } 189 | parenthesisStack.pop(); 190 | } 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiParserTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.io.IOException; 10 | 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | import org.junit.Test; 14 | 15 | /** 16 | * @author downey 17 | * 18 | */ 19 | public class WikiParserTest { 20 | 21 | final static WikiFetcher wf = new WikiFetcher(); 22 | 23 | /** 24 | * Test method for {@link WikiParser#findFirstLink()}. 25 | * @throws IOException 26 | */ 27 | @Test 28 | public void testFindFirstLink1() throws IOException { 29 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 30 | String href = findFirstLink(url); 31 | assertThat(href, is("/wiki/Programming_language")); 32 | } 33 | 34 | /** 35 | * Test method for {@link WikiParser#findFirstLink()}. 36 | * @throws IOException 37 | */ 38 | @Test 39 | public void testFindFirstLink2() throws IOException { 40 | String url = "https://en.wikipedia.org/wiki/Mathematics"; 41 | String href = findFirstLink(url); 42 | assertThat(href, is("/wiki/Quantity")); 43 | } 44 | 45 | /** 46 | * Uses WikiParser to find the first link in the given URL. 47 | * 48 | * @param url 49 | * @return 50 | * @throws IOException 51 | */ 52 | private String findFirstLink(String url) throws IOException { 53 | Elements paragraphs = wf.readWikipedia(url); 54 | WikiParser wp = new WikiParser(paragraphs); 55 | Element elt = wp.findFirstLink(); 56 | String href = elt.attr("href"); 57 | return href; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiPhilosophy.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | public class WikiPhilosophy { 11 | 12 | final static List visited = new ArrayList(); 13 | final static WikiFetcher wf = new WikiFetcher(); 14 | 15 | /** 16 | * Tests a conjecture about Wikipedia and Philosophy. 17 | * 18 | * https://en.wikipedia.org/wiki/Wikipedia:Getting_to_Philosophy 19 | * 20 | * 1. Clicking on the first non-parenthesized, non-italicized link 21 | * 2. Ignoring external links, links to the current page, or red links 22 | * 3. Stopping when reaching "Philosophy", a page with no links or a page 23 | * that does not exist, or when a loop occurs 24 | * 25 | * @param args 26 | * @throws IOException 27 | */ 28 | public static void main(String[] args) throws IOException { 29 | String destination = "https://en.wikipedia.org/wiki/Philosophy"; 30 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 31 | 32 | testConjecture(destination, source, 10); 33 | } 34 | 35 | /** 36 | * Starts from given URL and follows first link until it finds the destination or exceeds the limit. 37 | * 38 | * @param destination 39 | * @param source 40 | * @throws IOException 41 | */ 42 | public static void testConjecture(String destination, String source, int limit) throws IOException { 43 | // TODO: FILL THIS IN! 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiPhilosophyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | 8 | import java.io.IOException; 9 | 10 | import org.junit.Test; 11 | 12 | /** 13 | * @author downey 14 | * 15 | */ 16 | public class WikiPhilosophyTest { 17 | 18 | 19 | /** 20 | * Test method for {@link WikiPhilosophy#main(java.lang.String[])}. 21 | */ 22 | @Test 23 | public void testMain() { 24 | // Because this lab is more open-ended than others, we can't provide unit 25 | // tests. Instead, we just check that you've modified WikiPhilosophy.java 26 | // so it doesn't throw an exception. 27 | String[] args = {}; 28 | try { 29 | WikiPhilosophy.main(args); 30 | } catch (IOException e) { 31 | e.printStackTrace(); 32 | fail(); 33 | } 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiSearch.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.HashMap; 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.Map.Entry; 11 | 12 | import redis.clients.jedis.Jedis; 13 | 14 | 15 | /** 16 | * Represents the results of a search query. 17 | * 18 | */ 19 | public class WikiSearch { 20 | 21 | // map from URLs that contain the term(s) to relevance score 22 | private Map map; 23 | 24 | /** 25 | * Constructor. 26 | * 27 | * @param map 28 | */ 29 | public WikiSearch(Map map) { 30 | this.map = map; 31 | } 32 | 33 | /** 34 | * Looks up the relevance of a given URL. 35 | * 36 | * @param url 37 | * @return 38 | */ 39 | public Integer getRelevance(String url) { 40 | Integer relevance = map.get(url); 41 | return relevance==null ? 0: relevance; 42 | } 43 | 44 | /** 45 | * Prints the contents in order of term frequency. 46 | * 47 | * @param 48 | */ 49 | private void print() { 50 | List> entries = sort(); 51 | for (Entry entry: entries) { 52 | System.out.println(entry); 53 | } 54 | } 55 | 56 | /** 57 | * Computes the union of two search results. 58 | * 59 | * @param that 60 | * @return New WikiSearch object. 61 | */ 62 | public WikiSearch or(WikiSearch that) { 63 | // TODO: FILL THIS IN! 64 | return null; 65 | } 66 | 67 | /** 68 | * Computes the intersection of two search results. 69 | * 70 | * @param that 71 | * @return New WikiSearch object. 72 | */ 73 | public WikiSearch and(WikiSearch that) { 74 | // TODO: FILL THIS IN! 75 | return null; 76 | } 77 | 78 | /** 79 | * Computes the intersection of two search results. 80 | * 81 | * @param that 82 | * @return New WikiSearch object. 83 | */ 84 | public WikiSearch minus(WikiSearch that) { 85 | // TODO: FILL THIS IN! 86 | return null; 87 | } 88 | 89 | /** 90 | * Computes the relevance of a search with multiple terms. 91 | * 92 | * @param rel1: relevance score for the first search 93 | * @param rel2: relevance score for the second search 94 | * @return 95 | */ 96 | protected int totalRelevance(Integer rel1, Integer rel2) { 97 | // simple starting place: relevance is the sum of the term frequencies. 98 | return rel1 + rel2; 99 | } 100 | 101 | /** 102 | * Sort the results by relevance. 103 | * 104 | * @return List of entries with URL and relevance. 105 | */ 106 | public List> sort() { 107 | // TODO: FILL THIS IN! 108 | return null; 109 | } 110 | 111 | 112 | /** 113 | * Performs a search and makes a WikiSearch object. 114 | * 115 | * @param term 116 | * @param index 117 | * @return 118 | */ 119 | public static WikiSearch search(String term, JedisIndex index) { 120 | Map map = index.getCounts(term); 121 | return new WikiSearch(map); 122 | } 123 | 124 | public static void main(String[] args) throws IOException { 125 | 126 | // make a JedisIndex 127 | Jedis jedis = JedisMaker.make(); 128 | JedisIndex index = new JedisIndex(jedis); 129 | 130 | // search for the first term 131 | String term1 = "java"; 132 | System.out.println("Query: " + term1); 133 | WikiSearch search1 = search(term1, index); 134 | search1.print(); 135 | 136 | // search for the second term 137 | String term2 = "programming"; 138 | System.out.println("Query: " + term2); 139 | WikiSearch search2 = search(term2, index); 140 | search2.print(); 141 | 142 | // compute the intersection of the searches 143 | System.out.println("Query: " + term1 + " AND " + term2); 144 | WikiSearch intersection = search1.and(search2); 145 | intersection.print(); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /code/src/com/allendowney/thinkdast/WikiSearchTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Map.Entry; 13 | 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class WikiSearchTest { 22 | 23 | private WikiSearch search1; 24 | private WikiSearch search2; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | Map map1 = new HashMap(); 32 | map1.put("Page1", 1); 33 | map1.put("Page2", 2); 34 | map1.put("Page3", 3); 35 | search1 = new WikiSearch(map1); 36 | 37 | Map map2 = new HashMap(); 38 | map2.put("Page2", 4); 39 | map2.put("Page3", 5); 40 | map2.put("Page4", 7); 41 | search2 = new WikiSearch(map2); 42 | } 43 | 44 | /** 45 | * Test method for {@link WikiSearch#or(WikiSearch)}. 46 | */ 47 | @Test 48 | public void testOr() { 49 | WikiSearch search = search1.or(search2); 50 | assertThat(search.getRelevance("Page1"), is(1)); 51 | assertThat(search.getRelevance("Page2"), is(6)); 52 | assertThat(search.getRelevance("Page3"), is(8)); 53 | assertThat(search.getRelevance("Page4"), is(7)); 54 | assertThat(search.getRelevance("Page5"), is(0)); 55 | } 56 | 57 | /** 58 | * Test method for {@link WikiSearch#and(WikiSearch)}. 59 | */ 60 | @Test 61 | public void testAnd() { 62 | WikiSearch search = search1.and(search2); 63 | assertThat(search.getRelevance("Page1"), is(0)); 64 | assertThat(search.getRelevance("Page2"), is(6)); 65 | assertThat(search.getRelevance("Page3"), is(8)); 66 | assertThat(search.getRelevance("Page4"), is(0)); 67 | assertThat(search.getRelevance("Page5"), is(0)); 68 | } 69 | 70 | /** 71 | * Test method for {@link WikiSearch#minus(WikiSearch)}. 72 | */ 73 | @Test 74 | public void testMinus() { 75 | WikiSearch search = search1.minus(search2); 76 | assertThat(search.getRelevance("Page1"), is(1)); 77 | assertThat(search.getRelevance("Page2"), is(0)); 78 | assertThat(search.getRelevance("Page3"), is(0)); 79 | assertThat(search.getRelevance("Page4"), is(0)); 80 | assertThat(search.getRelevance("Page5"), is(0)); 81 | } 82 | 83 | /** 84 | * Test method for {@link WikiSearch#sort()}. 85 | */ 86 | @Test 87 | public void testSort() { 88 | List> list = search2.sort(); 89 | assertThat(list.get(0).getValue(), is(4)); 90 | assertThat(list.get(1).getValue(), is(5)); 91 | assertThat(list.get(2).getValue(), is(7)); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /solutions/lib/ant-junit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/ant-junit.jar -------------------------------------------------------------------------------- /solutions/lib/commons-math3-3.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/commons-math3-3.6.jar -------------------------------------------------------------------------------- /solutions/lib/hamcrest-core-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/hamcrest-core-1.3.jar -------------------------------------------------------------------------------- /solutions/lib/hamcrest-library-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/hamcrest-library-1.3.jar -------------------------------------------------------------------------------- /solutions/lib/jcommon-1.0.23.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/jcommon-1.0.23.jar -------------------------------------------------------------------------------- /solutions/lib/jedis-2.8.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/jedis-2.8.0.jar -------------------------------------------------------------------------------- /solutions/lib/jfreechart-1.0.19.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/jfreechart-1.0.19.jar -------------------------------------------------------------------------------- /solutions/lib/jsoup-1.8.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/jsoup-1.8.3.jar -------------------------------------------------------------------------------- /solutions/lib/junit-4.12.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/junit-4.12.jar -------------------------------------------------------------------------------- /solutions/lib/servlet.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenDowney/ThinkDataStructures/cc10971a1904eda5f5b818a3d70ee1a836fd8799/solutions/lib/servlet.jar -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/Card.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.List; 7 | 8 | 9 | /** 10 | * Represents a playing card. 11 | * 12 | */ 13 | public class Card implements Comparable { 14 | 15 | // string representations of ranks 16 | public static final String[] RANKS = { 17 | null, "Ace", "2", "3", "4", "5", "6", "7", 18 | "8", "9", "10", "Jack", "Queen", "King"}; 19 | 20 | // string representations of suits 21 | public static final String[] SUITS = { 22 | "Clubs", "Diamonds", "Hearts", "Spades"}; 23 | 24 | // rank and suit are instance variables 25 | private final int rank; 26 | private final int suit; 27 | 28 | /** 29 | * Constructs a card of the given rank and suit. 30 | */ 31 | public Card(int rank, int suit) { 32 | this.rank = rank; 33 | this.suit = suit; 34 | } 35 | 36 | /** 37 | * Gets the card's rank. 38 | */ 39 | public int getRank() { 40 | return this.rank; 41 | } 42 | 43 | /** 44 | * Gets the card's suit. 45 | */ 46 | public int getSuit() { 47 | return this.suit; 48 | } 49 | 50 | /** 51 | * Returns a string representation of the card. 52 | */ 53 | public String toString() { 54 | return RANKS[this.rank] + " of " + SUITS[this.suit]; 55 | } 56 | 57 | /** 58 | * Returns a negative integer if this card comes before 59 | * the given card, zero if the two cards are equal, or 60 | * a positive integer if this card comes after the card. 61 | */ 62 | public int compareTo(Card that) { 63 | if (this.suit < that.suit) { 64 | return -1; 65 | } 66 | if (this.suit > that.suit) { 67 | return 1; 68 | } 69 | if (this.rank < that.rank) { 70 | return -1; 71 | } 72 | if (this.rank > that.rank) { 73 | return 1; 74 | } 75 | return 0; 76 | } 77 | 78 | /** 79 | * Returns true if the given card has the same 80 | * rank AND same suit; otherwise returns false. 81 | */ 82 | public boolean equals(Card that) { 83 | return this.rank == that.rank 84 | && this.suit == that.suit; 85 | } 86 | 87 | /** 88 | * Make a List of 52 cards. 89 | */ 90 | public static List makeDeck() { 91 | List cards = new ArrayList(); 92 | for (int suit = 0; suit <= 3; suit++) { 93 | for (int rank = 1; rank <= 13; rank++) { 94 | Card card = new Card(rank, suit); 95 | cards.add(card); 96 | } 97 | } 98 | return cards; 99 | } 100 | 101 | /** 102 | * Demonstrates how to call the search methods. 103 | */ 104 | public static void main(String[] args) { 105 | 106 | // sort the cards using the natural ordering 107 | List cards = makeDeck(); 108 | Collections.sort(cards); 109 | System.out.println(cards.get(0)); 110 | System.out.println(cards.get(51)); 111 | 112 | Comparator comparator = new Comparator() { 113 | @Override 114 | public int compare(Card card1, Card card2) { 115 | if (card1.getSuit() < card2.getSuit()) { 116 | return -1; 117 | } 118 | if (card1.getSuit() > card2.getSuit()) { 119 | return 1; 120 | } 121 | int rank1 = getRankAceHigh(card1); 122 | int rank2 = getRankAceHigh(card2); 123 | 124 | if (rank1 < rank2) { 125 | return -1; 126 | } 127 | if (rank1 > rank2) { 128 | return 1; 129 | } 130 | return 0; 131 | } 132 | 133 | private int getRankAceHigh(Card card) { 134 | int rank = card.getRank(); 135 | if (rank == 1) { 136 | return 14; 137 | } else { 138 | return rank; 139 | } 140 | } 141 | }; 142 | 143 | // sort the cards using an external comparator 144 | Collections.sort(cards, comparator); 145 | System.out.println(cards.get(0)); 146 | System.out.println(cards.get(51)); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/HelloJsoup.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | /** 12 | * Example program to list links from a URL. 13 | * 14 | * From: http://jsoup.org/cookbook/extracting-data/example-list-links 15 | */ 16 | public class HelloJsoup { 17 | 18 | public static void main(String[] args) throws IOException { 19 | 20 | String dirname = System.getProperty("user.dir"); 21 | String filename = "src/resources/en.wikipedia.org/wiki/Computer_science"; 22 | String baseURI = dirname + "/" + filename; 23 | 24 | File input = new File(baseURI); 25 | Document doc = Jsoup.parse(input, "UTF-8", baseURI); 26 | 27 | //print("Fetching %s...", url); 28 | //Document doc = Jsoup.connect(url).get(); 29 | 30 | Element content = doc.getElementById("mw-content-text"); 31 | Elements paragraphs = content.getElementsByTag("p"); 32 | 33 | 34 | for (Element p : paragraphs) { 35 | Elements links = p.select("a[href]"); 36 | for (Element link : links) { 37 | print(" * a: <%s> (%s)", link.attr("href"), trim(link.text(), 35)); 38 | } 39 | break; 40 | } 41 | 42 | 43 | // Elements media = doc.select("[src]"); 44 | // Elements imports = doc.select("link[href]"); 45 | // 46 | // print("\nMedia: (%d)", media.size()); 47 | // for (Element src : media) { 48 | // if (src.tagName().equals("img")) { 49 | // print(" * %s: <%s> %sx%s (%s)", 50 | // src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), 51 | // trim(src.attr("alt"), 20)); 52 | // } else { 53 | // print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); 54 | // } 55 | // } 56 | // 57 | // print("\nImports: (%d)", imports.size()); 58 | // for (Element link : imports) { 59 | // print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel")); 60 | // } 61 | // 62 | // print("\nLinks: (%d)", links.size()); 63 | // for (Element link : links) { 64 | // print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); 65 | // break; 66 | // } 67 | } 68 | 69 | private static void print(String msg, Object... args) { 70 | System.out.println(String.format(msg, args)); 71 | } 72 | 73 | private static String trim(String s, int width) { 74 | if (s.length() > width) 75 | return s.substring(0, width-1) + "."; 76 | else 77 | return s; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/Index.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.HashSet; 8 | 9 | import org.jsoup.select.Elements; 10 | 11 | /** 12 | * Encapsulates a map from search term to set of TermCounter. 13 | * 14 | * @author downey 15 | * 16 | */ 17 | public class Index { 18 | 19 | private Map> index = new HashMap>(); 20 | 21 | /** 22 | * Adds a TermCounter to the set associated with `term`. 23 | * 24 | * @param term 25 | * @param tc 26 | */ 27 | public void add(String term, TermCounter tc) { 28 | Set set = get(term); 29 | 30 | // if we're seeing a term for the first time, make a new Set 31 | if (set == null) { 32 | set = new HashSet(); 33 | index.put(term, set); 34 | } 35 | // otherwise we can modify an existing Set 36 | set.add(tc); 37 | } 38 | 39 | /** 40 | * Looks up a search term and returns a set of TermCounters. 41 | * 42 | * @param term 43 | * @return 44 | */ 45 | public Set get(String term) { 46 | return index.get(term); 47 | } 48 | 49 | /** 50 | * Prints the contents of the index. 51 | */ 52 | public void printIndex() { 53 | // loop through the search terms 54 | for (String term: keySet()) { 55 | System.out.println(term); 56 | 57 | // for each term, print the pages where it appears 58 | Set tcs = get(term); 59 | for (TermCounter tc: tcs) { 60 | Integer count = tc.get(term); 61 | System.out.println(" " + tc.getLabel() + " " + count); 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Returns the set of terms that have been indexed. 68 | * 69 | * @return 70 | */ 71 | public Set keySet() { 72 | return index.keySet(); 73 | } 74 | 75 | /** 76 | * Add a page to the index. 77 | * 78 | * @param url URL of the page. 79 | * @param paragraphs Collection of elements that should be indexed. 80 | */ 81 | public void indexPage(String url, Elements paragraphs) { 82 | // make a TermCounter and count the terms in the paragraphs 83 | TermCounter tc = new TermCounter(url); 84 | tc.processElements(paragraphs); 85 | 86 | // for each term in the TermCounter, add the TermCounter to the index 87 | for (String term: tc.keySet()) { 88 | add(term, tc); 89 | } 90 | } 91 | 92 | /** 93 | * @param args 94 | * @throws IOException 95 | */ 96 | public static void main(String[] args) throws IOException { 97 | 98 | WikiFetcher wf = new WikiFetcher(); 99 | Index indexer = new Index(); 100 | 101 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 102 | Elements paragraphs = wf.fetchWikipedia(url); 103 | indexer.indexPage(url, paragraphs); 104 | 105 | url = "https://en.wikipedia.org/wiki/Programming_language"; 106 | paragraphs = wf.fetchWikipedia(url); 107 | indexer.indexPage(url, paragraphs); 108 | 109 | indexer.printIndex(); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/IndexTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.io.IOException; 7 | import java.util.Set; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | public class IndexTest { 14 | 15 | private Index index; 16 | private WikiFetcher wf; 17 | 18 | @Before 19 | public void setUp() { 20 | wf = new WikiFetcher(); 21 | index = new Index(); 22 | } 23 | 24 | @Test 25 | public void testIndexPage() throws IOException { 26 | // add two pages to the index 27 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 28 | Elements paragraphs = wf.readWikipedia(url); 29 | index.indexPage(url, paragraphs); 30 | 31 | url = "https://en.wikipedia.org/wiki/Programming_language"; 32 | paragraphs = wf.readWikipedia(url); 33 | index.indexPage(url, paragraphs); 34 | 35 | // check the results: the word "occur" only appears on one page, twice 36 | Set set = index.get("occur"); 37 | assertThat(set.size(), is(1)); 38 | 39 | for (TermCounter tc: set) { 40 | // this loop only happens once 41 | assertThat(tc.size(), is(4798)); 42 | assertThat(tc.get("occur"), is(2)); 43 | assertThat(tc.get("not there"), is(0)); 44 | } 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/JedisIndexTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.io.IOException; 7 | import java.util.Map; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.After; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | import redis.clients.jedis.Jedis; 15 | 16 | /** 17 | * @author downey 18 | * 19 | */ 20 | public class JedisIndexTest { 21 | 22 | private static String url1, url2; 23 | private Jedis jedis; 24 | private JedisIndex index; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | jedis = JedisMaker.make(); 32 | index = new JedisIndex(jedis); 33 | 34 | loadIndex(index); 35 | } 36 | 37 | /** 38 | * Loads the index with two pages read from files. 39 | * 40 | * @return 41 | * @throws IOException 42 | */ 43 | private static void loadIndex(JedisIndex index) throws IOException { 44 | WikiFetcher wf = new WikiFetcher(); 45 | 46 | url1 = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 47 | Elements paragraphs = wf.readWikipedia(url1); 48 | index.indexPage(url1, paragraphs); 49 | 50 | url2 = "https://en.wikipedia.org/wiki/Programming_language"; 51 | paragraphs = wf.readWikipedia(url2); 52 | index.indexPage(url2, paragraphs); 53 | } 54 | 55 | /** 56 | * @throws java.lang.Exception 57 | */ 58 | @After 59 | public void tearDown() throws Exception { 60 | jedis.close(); 61 | } 62 | 63 | /** 64 | * Test method for {@link JedisIndex#getCounts(java.lang.String)}. 65 | */ 66 | @Test 67 | public void testGetCounts() { 68 | Map map = index.getCounts("the"); 69 | assertThat(map.get(url1), is(339)); 70 | assertThat(map.get(url2), is(264)); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/JedisMaker.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import java.net.URL; 11 | import java.net.URLDecoder; 12 | 13 | import redis.clients.jedis.Jedis; 14 | 15 | 16 | public class JedisMaker { 17 | 18 | /** 19 | * Make a Jedis object and authenticate it. 20 | * 21 | * @return 22 | * @throws IOException 23 | */ 24 | public static Jedis make() throws IOException { 25 | 26 | // assemble the directory name 27 | String slash = File.separator; 28 | String filename = "resources" + slash + "redis_url.txt"; 29 | URL fileURL = JedisMaker.class.getClassLoader().getResource(filename); 30 | String filepath = URLDecoder.decode(fileURL.getFile(), "UTF-8"); 31 | 32 | // open the file 33 | StringBuilder sb = new StringBuilder(); 34 | BufferedReader br; 35 | try { 36 | br = new BufferedReader(new FileReader(filepath)); 37 | } catch (FileNotFoundException e1) { 38 | System.out.println("File not found: " + filename); 39 | printInstructions(); 40 | return null; 41 | } 42 | 43 | // read the file 44 | while (true) { 45 | String line = br.readLine(); 46 | if (line == null) break; 47 | sb.append(line); 48 | } 49 | br.close(); 50 | 51 | // parse the URL 52 | URI uri; 53 | try { 54 | uri = new URI(sb.toString()); 55 | } catch (URISyntaxException e) { 56 | System.out.println("Reading file: " + filename); 57 | System.out.println("It looks like this file does not contain a valid URI."); 58 | printInstructions(); 59 | return null; 60 | } 61 | String host = uri.getHost(); 62 | int port = uri.getPort(); 63 | 64 | String[] array = uri.getAuthority().split("[:@]"); 65 | String auth = array[1]; 66 | 67 | // connect to the server 68 | Jedis jedis = new Jedis(host, port); 69 | 70 | try { 71 | jedis.auth(auth); 72 | } catch (Exception e) { 73 | System.out.println("Trying to connect to " + host); 74 | System.out.println("on port " + port); 75 | System.out.println("with authcode " + auth); 76 | System.out.println("Got exception " + e); 77 | printInstructions(); 78 | return null; 79 | } 80 | return jedis; 81 | } 82 | 83 | 84 | /** 85 | * 86 | */ 87 | private static void printInstructions() { 88 | System.out.println(""); 89 | System.out.println("To connect to RedisToGo, you have to provide a file called"); 90 | System.out.println("redis_url.txt that contains the URL of your Redis server."); 91 | System.out.println("If you select an instance on the RedisToGo web page,"); 92 | System.out.println("you should see a URL that contains the information you need:"); 93 | System.out.println("redis://redistogo:AUTH@HOST:PORT"); 94 | System.out.println("Create a file called redis_url.txt in the src/resources"); 95 | System.out.println("directory, and paste in the URL."); 96 | } 97 | 98 | 99 | /** 100 | * @param args 101 | * @throws IOException 102 | */ 103 | public static void main(String[] args) throws IOException { 104 | 105 | Jedis jedis = make(); 106 | 107 | // String 108 | jedis.set("mykey", "myvalue"); 109 | String value = jedis.get("mykey"); 110 | System.out.println("Got value: " + value); 111 | 112 | // Set 113 | jedis.sadd("myset", "element1", "element2", "element3"); 114 | System.out.println("element2 is member: " + jedis.sismember("myset", "element2")); 115 | 116 | // List 117 | jedis.rpush("mylist", "element1", "element2", "element3"); 118 | System.out.println("element at index 1: " + jedis.lindex("mylist", 1)); 119 | 120 | // Hash 121 | jedis.hset("myhash", "word1", Integer.toString(2)); 122 | jedis.hincrBy("myhash", "word2", 1); 123 | System.out.println("frequency of word1: " + jedis.hget("myhash", "word1")); 124 | System.out.println("frequency of word2: " + jedis.hget("myhash", "word2")); 125 | 126 | jedis.close(); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/JedisTermCounter.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | import java.util.Map; 6 | import org.jsoup.select.Elements; 7 | 8 | import redis.clients.jedis.Jedis; 9 | import redis.clients.jedis.Transaction; 10 | 11 | 12 | /** 13 | * Encapsulates a map from search term to frequency (count). 14 | * 15 | * @author downey 16 | * 17 | */ 18 | public class JedisTermCounter extends TermCounter { 19 | 20 | public JedisTermCounter(String label) { 21 | super(label); 22 | } 23 | 24 | /** 25 | * 26 | * @return 27 | */ 28 | public List pushToRedis(Jedis jedis) { 29 | Transaction t = jedis.multi(); 30 | 31 | String hashname = hashName(); 32 | t.del(hashname); 33 | 34 | for (String key: keySet()) { 35 | Integer count = get(key); 36 | t.hset(hashname, key, count.toString()); 37 | } 38 | List res = t.exec(); 39 | return res; 40 | } 41 | 42 | /** 43 | * Returns the Redis key for this TermCounter. 44 | * 45 | * @return 46 | */ 47 | private String hashName() { 48 | return "TermCounter:" + getLabel(); 49 | } 50 | 51 | public Map pullFromRedis(Jedis jedis) { 52 | Map result = jedis.hgetAll(hashName()); 53 | return result; 54 | } 55 | 56 | /** 57 | * @param args 58 | * @throws IOException 59 | */ 60 | public static void main(String[] args) throws IOException { 61 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 62 | 63 | WikiFetcher wf = new WikiFetcher(); 64 | Elements paragraphs = wf.fetchWikipedia(url); 65 | JedisTermCounter counter = new JedisTermCounter(url.toString()); 66 | counter.processElements(paragraphs); 67 | 68 | Jedis jedis = JedisMaker.make(); 69 | 70 | counter.pushToRedis(jedis); 71 | System.out.println("Done pushing."); 72 | 73 | Map map = counter.pullFromRedis(jedis); 74 | for (Map.Entry entry: map.entrySet()) { 75 | System.out.println(entry.getKey() + ", " + entry.getValue()); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/JedisTermCounterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.Map; 10 | 11 | import org.jsoup.select.Elements; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import redis.clients.jedis.Jedis; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class JedisTermCounterTest { 22 | 23 | private Jedis jedis; 24 | private JedisTermCounter counter; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 32 | 33 | WikiFetcher wf = new WikiFetcher(); 34 | Elements paragraphs = wf.readWikipedia(url); 35 | 36 | jedis = JedisMaker.make(); 37 | 38 | counter = new JedisTermCounter(url.toString()); 39 | counter.processElements(paragraphs); 40 | } 41 | 42 | /** 43 | * Test method for {@link JedisTermCounter#pushToRedis(redis.clients.jedis.Jedis)}. 44 | * @throws 45 | */ 46 | @Test 47 | public void testPushToRedis() { 48 | counter.pushToRedis(jedis); 49 | assertThat(counter.size(), is(4798)); 50 | 51 | Map map = counter.pullFromRedis(jedis); 52 | 53 | assertThat(map.size(), is(1184)); 54 | assertThat(map.get("the"), is("339")); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/LinkedListExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | /** 4 | * @author downey 5 | * 6 | */ 7 | public class LinkedListExample { 8 | 9 | /** 10 | * @param args 11 | */ 12 | public static void main(String[] args) { 13 | ListNode node1 = new ListNode(1); 14 | ListNode node2 = new ListNode(2); 15 | ListNode node3 = new ListNode(3); 16 | 17 | node1.next = node2; 18 | node2.next = node3; 19 | node3.next = null; 20 | 21 | ListNode node0 = new ListNode(0, node1); 22 | System.out.println(node0); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ListClientExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class ListClientExample { 7 | @SuppressWarnings("rawtypes") 8 | private List list; 9 | 10 | @SuppressWarnings("rawtypes") 11 | public ListClientExample() { 12 | list = new ArrayList(); 13 | } 14 | 15 | @SuppressWarnings("rawtypes") 16 | public List getList() { 17 | return list; 18 | } 19 | 20 | public static void main(String[] args) { 21 | ListClientExample lce = new ListClientExample(); 22 | @SuppressWarnings("rawtypes") 23 | List list = lce.getList(); 24 | System.out.println(list); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ListClientExampleTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.assertThat; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import org.junit.Test; 10 | 11 | /** 12 | * @author downey 13 | * 14 | */ 15 | public class ListClientExampleTest { 16 | 17 | /** 18 | * Test method for {@link ListClientExample}. 19 | */ 20 | @Test 21 | public void testListClientExample() { 22 | ListClientExample lce = new ListClientExample(); 23 | @SuppressWarnings("rawtypes") 24 | List list = lce.getList(); 25 | assertThat(list, instanceOf(ArrayList.class) ); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ListLinks.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | 11 | /** 12 | * Example program to list links from a URL. 13 | * 14 | * From: http://jsoup.org/cookbook/extracting-data/example-list-links 15 | */ 16 | public class ListLinks { 17 | 18 | public static void main(String[] args) throws IOException { 19 | 20 | String dirname = System.getProperty("user.dir"); 21 | String filename = "src/resources/en.wikipedia.org/wiki/Computer_science"; 22 | String baseURI = dirname + "/" + filename; 23 | 24 | File input = new File(baseURI); 25 | Document doc = Jsoup.parse(input, "UTF-8", baseURI); 26 | 27 | //print("Fetching %s...", url); 28 | //Document doc = Jsoup.connect(url).get(); 29 | 30 | Element content = doc.getElementById("mw-content-text"); 31 | Elements paragraphs = content.getElementsByTag("p"); 32 | 33 | 34 | for (Element p : paragraphs) { 35 | Elements links = p.select("a[href]"); 36 | for (Element link : links) { 37 | print(" * a: <%s> (%s)", link.attr("href"), trim(link.text(), 35)); 38 | } 39 | break; 40 | } 41 | 42 | 43 | // Elements media = doc.select("[src]"); 44 | // Elements imports = doc.select("link[href]"); 45 | // 46 | // print("\nMedia: (%d)", media.size()); 47 | // for (Element src : media) { 48 | // if (src.tagName().equals("img")) { 49 | // print(" * %s: <%s> %sx%s (%s)", 50 | // src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), 51 | // trim(src.attr("alt"), 20)); 52 | // } else { 53 | // print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); 54 | // } 55 | // } 56 | // 57 | // print("\nImports: (%d)", imports.size()); 58 | // for (Element link : imports) { 59 | // print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel")); 60 | // } 61 | // 62 | // print("\nLinks: (%d)", links.size()); 63 | // for (Element link : links) { 64 | // print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); 65 | // break; 66 | // } 67 | } 68 | 69 | private static void print(String msg, Object... args) { 70 | System.out.println(String.format(msg, args)); 71 | } 72 | 73 | private static String trim(String s, int width) { 74 | if (s.length() > width) 75 | return s.substring(0, width-1) + "."; 76 | else 77 | return s; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ListNode.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | /** 4 | * @author downey 5 | * 6 | */ 7 | public class ListNode { 8 | 9 | public Object cargo; 10 | public ListNode next; 11 | 12 | public ListNode() { 13 | this.cargo = null; 14 | this.next = null; 15 | } 16 | 17 | public ListNode(Object cargo) { 18 | this.cargo = cargo; 19 | this.next = null; 20 | } 21 | 22 | public ListNode(Object cargo, ListNode next) { 23 | this.cargo = cargo; 24 | this.next = next; 25 | } 26 | 27 | public String toString() { 28 | return "ListNode(" + cargo.toString() + ")"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ListSorterTest.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import static org.junit.Assert.*; 4 | import static org.hamcrest.CoreMatchers.*; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Arrays; 8 | import java.util.Comparator; 9 | import java.util.List; 10 | 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | /** 15 | * @author downey 16 | * 17 | */ 18 | public class ListSorterTest { 19 | 20 | private ListSorter sorter; 21 | private Comparator comparator; 22 | 23 | /** 24 | * @throws java.lang.Exception 25 | */ 26 | @Before 27 | public void setUp() throws Exception { 28 | 29 | comparator = new Comparator() { 30 | @Override 31 | public int compare(Integer elt1, Integer elt2) { 32 | return elt1.compareTo(elt2); 33 | } 34 | }; 35 | 36 | sorter = new ListSorter(); 37 | } 38 | 39 | /** 40 | * Test method for {@link ListSorter#insertionSort(java.util.List, java.util.Comparator)}. 41 | */ 42 | @Test 43 | public void testInsertionSort() { 44 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 45 | sorter.insertionSort(list, comparator); 46 | isSorted(list); 47 | } 48 | 49 | /** 50 | * @param list 51 | * 52 | */ 53 | private void isSorted(List list) { 54 | assertThat(list.size(), is(5)); 55 | assertThat(list.get(0), is(1)); 56 | assertThat(list.get(1), is(2)); 57 | assertThat(list.get(2), is(3)); 58 | assertThat(list.get(3), is(4)); 59 | assertThat(list.get(4), is(5)); 60 | } 61 | 62 | /** 63 | * Test method for {@link mergeSortInPlace(java.util.List, java.util.Comparator)}. 64 | */ 65 | @Test 66 | public void testMergeSortInPlace() { 67 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 68 | sorter.mergeSortInPlace(list, comparator); 69 | isSorted(list); 70 | } 71 | 72 | /** 73 | * Test method for {@link mergeSort(java.util.List, java.util.Comparator)}. 74 | */ 75 | @Test 76 | public void testMergeSort() { 77 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 78 | List sorted = sorter.mergeSort(list, comparator); 79 | isSorted(sorted); 80 | } 81 | 82 | /** 83 | * Test method for {@link heapSort(java.util.List, java.util.Comparator)}. 84 | */ 85 | @Test 86 | public void testHeapSort() { 87 | List list = new ArrayList(Arrays.asList(3, 5, 1, 4, 2)); 88 | sorter.heapSort(list, comparator); 89 | isSorted(list); 90 | } 91 | 92 | /** 93 | * Test method for {@link topK(int, java.util.List, java.util.Comparator)}. 94 | */ 95 | @Test 96 | public void testTopK() { 97 | List list = new ArrayList(Arrays.asList(6, 3, 5, 8, 1, 4, 2, 7)); 98 | 99 | List res = sorter.topK(4, list, comparator); 100 | assertThat(res.size(), is(4)); 101 | assertThat(res.get(0), is(5)); 102 | assertThat(res.get(1), is(6)); 103 | assertThat(res.get(2), is(7)); 104 | assertThat(res.get(3), is(8)); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyBetterMap.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * Implementation of a Map using a collection of MyLinearMap, and 12 | * using `hashCode` to determine which map each key should go in. 13 | * 14 | * @author downey 15 | * @param 16 | * @param 17 | * 18 | */ 19 | public class MyBetterMap implements Map { 20 | 21 | // MyBetterMap uses a collection of MyLinearMap 22 | protected List> maps; 23 | 24 | /** 25 | * Initialize the map with 2 sub-maps. 26 | * 27 | */ 28 | public MyBetterMap() { 29 | makeMaps(2); 30 | } 31 | 32 | /** 33 | * Makes a collection of `k` MyLinearMap 34 | * 35 | * @param k 36 | */ 37 | protected void makeMaps(int k) { 38 | maps = new ArrayList>(k); 39 | for (int i=0; i()); 41 | } 42 | } 43 | 44 | @Override 45 | public void clear() { 46 | // clear the sub-maps 47 | for (int i=0; i chooseMap(Object key) { 59 | int index = key==null ? 0 : Math.abs(key.hashCode()) % maps.size(); 60 | return maps.get(index); 61 | } 62 | 63 | @Override 64 | public boolean containsKey(Object target) { 65 | // to find a key, we only have to search one map 66 | MyLinearMap map = chooseMap(target); 67 | return map.containsKey(target); 68 | } 69 | 70 | @Override 71 | public boolean containsValue(Object target) { 72 | // to find a value, we have to search all map 73 | for (MyLinearMap map: maps) { 74 | if (map.containsValue(target)) { 75 | return true; 76 | } 77 | } 78 | return false; 79 | } 80 | 81 | @Override 82 | public Set> entrySet() { 83 | throw new UnsupportedOperationException(); 84 | } 85 | 86 | @Override 87 | public V get(Object key) { 88 | MyLinearMap map = chooseMap(key); 89 | return map.get(key); 90 | } 91 | 92 | @Override 93 | public boolean isEmpty() { 94 | return size() == 0; 95 | } 96 | 97 | @Override 98 | public Set keySet() { 99 | // add up the keySets from the sub-maps 100 | Set set = new HashSet(); 101 | for (MyLinearMap map: maps) { 102 | set.addAll(map.keySet()); 103 | } 104 | return set; 105 | } 106 | 107 | @Override 108 | public V put(K key, V value) { 109 | MyLinearMap map = chooseMap(key); 110 | return map.put(key, value); 111 | } 112 | 113 | @Override 114 | public void putAll(Map map) { 115 | for (Map.Entry entry: map.entrySet()) { 116 | put(entry.getKey(), entry.getValue()); 117 | } 118 | } 119 | 120 | @Override 121 | public V remove(Object key) { 122 | MyLinearMap map = chooseMap(key); 123 | return map.remove(key); 124 | } 125 | 126 | @Override 127 | public int size() { 128 | // add up the sizes of the sub-maps 129 | int total = 0; 130 | for (MyLinearMap map: maps) { 131 | total += map.size(); 132 | } 133 | return total; 134 | } 135 | 136 | @Override 137 | public Collection values() { 138 | // add up the valueSets from the sub-maps 139 | Set set = new HashSet(); 140 | for (MyLinearMap map: maps) { 141 | set.addAll(map.values()); 142 | } 143 | return set; 144 | } 145 | 146 | /** 147 | * @param args 148 | */ 149 | public static void main(String[] args) { 150 | Map map = new MyBetterMap(); 151 | map.put("Word1", 1); 152 | map.put("Word2", 2); 153 | Integer value = map.get("Word1"); 154 | System.out.println(value); 155 | 156 | for (String key: map.keySet()) { 157 | System.out.println(key + ", " + map.get(key)); 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyBetterMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyBetterMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyBetterMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyFixedHashMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Map; 7 | 8 | /** 9 | * Implementation of a HashMap using a collection of MyLinearMap and 10 | * resizing when there are too many entries. 11 | * 12 | * @author downey 13 | * @param 14 | * @param 15 | * 16 | */ 17 | public class MyFixedHashMap extends MyHashMap implements Map { 18 | 19 | private int size = 0; 20 | 21 | @Override 22 | public void clear() { 23 | super.clear(); 24 | size = 0; 25 | } 26 | 27 | @Override 28 | public V put(K key, V value) { 29 | MyLinearMap map = chooseMap(key); 30 | size -= map.size(); 31 | V oldValue = map.put(key, value); 32 | size += map.size(); 33 | 34 | if (size() > maps.size() * FACTOR) { 35 | size = 0; 36 | rehash(); 37 | } 38 | return oldValue; 39 | } 40 | 41 | @Override 42 | public V remove(Object key) { 43 | MyLinearMap map = chooseMap(key); 44 | size -= map.size(); 45 | V oldValue = map.remove(key); 46 | size += map.size(); 47 | return oldValue; 48 | } 49 | 50 | @Override 51 | public int size() { 52 | return size; 53 | } 54 | 55 | /** 56 | * @param args 57 | */ 58 | public static void main(String[] args) { 59 | Map map = new MyFixedHashMap(); 60 | for (int i=0; i<10; i++) { 61 | map.put(new Integer(i).toString(), i); 62 | } 63 | Integer value = map.get("3"); 64 | System.out.println(value); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyFixedHashMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyFixedHashMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyFixedHashMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyHashMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | /** 10 | * Implementation of a HashMap using a collection of MyLinearMap and 11 | * resizing when there are too many entries. 12 | * 13 | * @author downey 14 | * @param 15 | * @param 16 | * 17 | */ 18 | public class MyHashMap extends MyBetterMap implements Map { 19 | 20 | // average number of entries per map before we rehash 21 | protected static final double FACTOR = 1.0; 22 | 23 | @Override 24 | public V put(K key, V value) { 25 | V oldValue = super.put(key, value); 26 | 27 | //System.out.println("Put " + key + " in " + map + " size now " + map.size()); 28 | 29 | // check if the number of elements per map exceeds the threshold 30 | if (size() > maps.size() * FACTOR) { 31 | rehash(); 32 | } 33 | return oldValue; 34 | } 35 | 36 | /** 37 | * Doubles the number of maps and rehashes the existing entries. 38 | */ 39 | /** 40 | * 41 | */ 42 | protected void rehash() { 43 | // save the existing entries 44 | List> oldMaps = maps; 45 | 46 | // make more maps 47 | int newK = maps.size() * 2; 48 | makeMaps(newK); 49 | 50 | //System.out.println("Rehashing, n is now " + newN); 51 | 52 | // put the entries into the new map 53 | for (MyLinearMap map: oldMaps) { 54 | for (Map.Entry entry: map.getEntries()) { 55 | put(entry.getKey(), entry.getValue()); 56 | } 57 | } 58 | } 59 | 60 | /** 61 | * @param args 62 | */ 63 | public static void main(String[] args) { 64 | Map map = new MyHashMap(); 65 | for (int i=0; i<10; i++) { 66 | map.put(new Integer(i).toString(), i); 67 | } 68 | Integer value = map.get("3"); 69 | System.out.println(value); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyHashMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import org.junit.Before; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class MyHashMapTest extends MyLinearMapTest { 13 | 14 | /** 15 | * @throws java.lang.Exception 16 | */ 17 | @Before 18 | public void setUp() throws Exception { 19 | map = new MyHashMap(); 20 | map.put("One", 1); 21 | map.put("Two", 2); 22 | map.put("Three", 3); 23 | map.put(null, 0); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyLinearMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Collection; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Set; 12 | 13 | /** 14 | * Implementation of a Map using a List of entries, so most 15 | * operations are linear time. 16 | * 17 | * @author downey 18 | * @param 19 | * @param 20 | * 21 | */ 22 | public class MyLinearMap implements Map { 23 | 24 | private List entries = new ArrayList(); 25 | 26 | public class Entry implements Map.Entry { 27 | private K key; 28 | private V value; 29 | 30 | public Entry(K key, V value) { 31 | this.key = key; 32 | this.value = value; 33 | } 34 | 35 | @Override 36 | public K getKey() { 37 | return key; 38 | } 39 | @Override 40 | public V getValue() { 41 | return value; 42 | } 43 | @Override 44 | public V setValue(V newValue) { 45 | value = newValue; 46 | return value; 47 | } 48 | } 49 | 50 | @Override 51 | public void clear() { 52 | entries.clear(); 53 | } 54 | 55 | @Override 56 | public boolean containsKey(Object target) { 57 | return findEntry(target) != null; 58 | } 59 | 60 | /** 61 | * Returns the entry that contains the target key, or null if there is none. 62 | * 63 | * @param target 64 | */ 65 | private Entry findEntry(Object target) { 66 | for (Entry entry: entries) { 67 | if (equals(target, entry.getKey())) { 68 | return entry; 69 | } 70 | } 71 | return null; 72 | } 73 | 74 | /** 75 | * Compares two keys or two values, handling null correctly. 76 | * 77 | * @param target 78 | * @param obj 79 | * @return 80 | */ 81 | private boolean equals(Object target, Object obj) { 82 | if (target == null) { 83 | return obj == null; 84 | } 85 | return target.equals(obj); 86 | } 87 | 88 | @Override 89 | public boolean containsValue(Object target) { 90 | for (Map.Entry entry: entries) { 91 | if (equals(target, entry.getValue())) { 92 | return true; 93 | } 94 | } 95 | return false; 96 | } 97 | 98 | @Override 99 | public Set> entrySet() { 100 | throw new UnsupportedOperationException(); 101 | } 102 | 103 | @Override 104 | public V get(Object key) { 105 | Entry entry = findEntry(key); 106 | if (entry == null) { 107 | return null; 108 | } 109 | return entry.getValue(); 110 | } 111 | 112 | @Override 113 | public boolean isEmpty() { 114 | return entries.isEmpty(); 115 | } 116 | 117 | @Override 118 | public Set keySet() { 119 | Set set = new HashSet(); 120 | for (Entry entry: entries) { 121 | set.add(entry.getKey()); 122 | } 123 | return set; 124 | } 125 | 126 | @Override 127 | public V put(K key, V value) { 128 | Entry entry = findEntry(key); 129 | if (entry == null) { 130 | entries.add(new Entry(key, value)); 131 | return null; 132 | } else { 133 | V oldValue = entry.getValue(); 134 | entry.setValue(value); 135 | return oldValue; 136 | } 137 | } 138 | 139 | @Override 140 | public void putAll(Map map) { 141 | for (Map.Entry entry: map.entrySet()) { 142 | put(entry.getKey(), entry.getValue()); 143 | } 144 | } 145 | 146 | @Override 147 | public V remove(Object key) { 148 | Entry entry = findEntry(key); 149 | if (entry == null) { 150 | return null; 151 | } else { 152 | V value = entry.getValue(); 153 | entries.remove(entry); 154 | return value; 155 | } 156 | } 157 | 158 | @Override 159 | public int size() { 160 | return entries.size(); 161 | } 162 | 163 | @Override 164 | public Collection values() { 165 | Set set = new HashSet(); 166 | for (Entry entry: entries) { 167 | set.add(entry.getValue()); 168 | } 169 | return set; 170 | } 171 | 172 | /** 173 | * @param args 174 | */ 175 | public static void main(String[] args) { 176 | Map map = new MyLinearMap(); 177 | map.put("Word1", 1); 178 | map.put("Word2", 2); 179 | Integer value = map.get("Word1"); 180 | System.out.println(value); 181 | 182 | for (String key: map.keySet()) { 183 | System.out.println(key + ", " + map.get(key)); 184 | } 185 | } 186 | 187 | /** 188 | * Returns a reference to `entries`. 189 | * 190 | * This is not part of the Map interface; it is here to provide the functionality 191 | * of `entrySet` in a way that is substantially simpler than the "right" way. 192 | * 193 | * @return 194 | */ 195 | protected Collection> getEntries() { 196 | return entries; 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyLinearMapTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.Collection; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | import java.util.Set; 13 | 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class MyLinearMapTest { 22 | 23 | protected Map map; 24 | 25 | /** 26 | * @throws java.lang.Exception 27 | */ 28 | @Before 29 | public void setUp() throws Exception { 30 | map = new MyLinearMap(); 31 | map.put("One", 1); 32 | map.put("Two", 2); 33 | map.put("Three", 3); 34 | map.put(null, 0); 35 | } 36 | 37 | /** 38 | * Test method for {@link MyLinearMap#clear()}. 39 | */ 40 | @Test 41 | public void testClear() { 42 | map.clear(); 43 | assertThat(map.size(), is(0)); 44 | } 45 | 46 | /** 47 | * Test method for {@link MyLinearMap#containsKey(java.lang.Object)}. 48 | */ 49 | @Test 50 | public void testContainsKey() { 51 | assertThat(map.containsKey("Three"), is(true)); 52 | assertThat(map.containsKey(null), is(true)); 53 | assertThat(map.containsKey("Four"), is(false)); 54 | } 55 | 56 | /** 57 | * Test method for {@link MyLinearMap#containsValue(java.lang.Object)}. 58 | */ 59 | @Test 60 | public void testContainsValue() { 61 | assertThat(map.containsValue(3), is(true)); 62 | assertThat(map.containsValue(0), is(true)); 63 | assertThat(map.containsValue(4), is(false)); 64 | } 65 | 66 | /** 67 | * Test method for {@link MyLinearMap#get(java.lang.Object)}. 68 | */ 69 | @Test 70 | public void testGet() { 71 | assertThat(map.get("Three"), is(3)); 72 | assertThat(map.get(null), is(0)); 73 | assertThat(map.get("Four"), nullValue()); 74 | } 75 | 76 | /** 77 | * Test method for {@link MyLinearMap#isEmpty()}. 78 | */ 79 | @Test 80 | public void testIsEmpty() { 81 | assertThat(map.isEmpty(), is(false)); 82 | map.clear(); 83 | assertThat(map.isEmpty(), is(true)); 84 | } 85 | 86 | /** 87 | * Test method for {@link MyLinearMap#keySet()}. 88 | */ 89 | @Test 90 | public void testKeySet() { 91 | Set keySet = map.keySet(); 92 | assertThat(keySet.size(), is(4)); 93 | assertThat(keySet.contains("Three"), is(true)); 94 | assertThat(keySet.contains(null), is(true)); 95 | assertThat(keySet.contains("Four"), is(false)); 96 | } 97 | 98 | /** 99 | * Test method for {@link MyLinearMap#put(java.lang.Object, java.lang.Object)}. 100 | */ 101 | @Test 102 | public void testPut() { 103 | map.put("One", 11); 104 | assertThat(map.size(), is(4)); 105 | assertThat(map.get("One"), is(11)); 106 | 107 | map.put("Five", 5); 108 | assertThat(map.size(), is(5)); 109 | assertThat(map.get("Five"), is(5)); 110 | } 111 | 112 | /** 113 | * Test method for {@link MyLinearMap#putAll(java.util.Map)}. 114 | */ 115 | @Test 116 | public void testPutAll() { 117 | Map m = new HashMap(); 118 | m.put("Six", 6); 119 | m.put("Seven", 7); 120 | m.put("Eight", 8); 121 | map.putAll(m); 122 | assertThat(map.size(), is(7)); 123 | } 124 | 125 | /** 126 | * Test method for {@link MyLinearMap#remove(java.lang.Object)}. 127 | */ 128 | @Test 129 | public void testRemove() { 130 | map.remove("One"); 131 | assertThat(map.size(), is(3)); 132 | assertThat(map.get("One"), nullValue()); 133 | } 134 | 135 | /** 136 | * Test method for {@link MyLinearMap#size()}. 137 | */ 138 | @Test 139 | public void testSize() { 140 | assertThat(map.size(), is(4)); 141 | } 142 | 143 | /** 144 | * Test method for {@link MyLinearMap#values()}. 145 | */ 146 | @Test 147 | public void testValues() { 148 | Collection keySet = map.values(); 149 | assertThat(keySet.size(), is(4)); 150 | assertThat(keySet.contains(3), is(true)); 151 | assertThat(keySet.contains(0), is(true)); 152 | assertThat(keySet.contains(4), is(false)); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyLinkedListTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayList; 7 | 8 | import org.junit.Before; 9 | 10 | 11 | /** 12 | * @author downey 13 | * 14 | */ 15 | public class MyLinkedListTest extends MyArrayListTest { 16 | 17 | /** 18 | * @throws java.lang.Exception 19 | */ 20 | @Before 21 | public void setUp() throws Exception { 22 | list = new ArrayList(); 23 | list.add(1); 24 | list.add(2); 25 | list.add(3); 26 | 27 | mylist = new MyLinkedList(); 28 | mylist.addAll(list); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/MyTreeMapExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | import java.util.TreeMap; 10 | import java.util.UUID; 11 | 12 | public class MyTreeMapExample { 13 | 14 | public static void main(String[] args) { 15 | int n = 16384; 16 | System.out.println("\nTesting MyTreeMap with random strings"); 17 | putRandomStrings(n); 18 | 19 | System.out.println("\nTesting MyTreeMap with timestamps"); 20 | putTimestamps(n); 21 | 22 | } 23 | 24 | /** 25 | * @param map 26 | * @param n 27 | */ 28 | private static void putRandomStrings(int n) { 29 | // MyTreeMap map = new MyTreeMap(); 30 | TreeMap map = new TreeMap(); 31 | 32 | final long startTime = System.currentTimeMillis(); 33 | for (int i=0; i map = new MyTreeMap(); 48 | TreeMap map = new TreeMap(); 49 | 50 | final long startTime = System.currentTimeMillis(); 51 | for (int i=0; i map, final long elapsed, int height) { 65 | System.out.println(" Time in milliseconds = " + (elapsed)); 66 | System.out.println(" Final size of MyTreeMap = " + map.size()); 67 | System.out.println(" log base 2 of size of MyTreeMap = " + Math.log(map.size()) / Math.log(2)); 68 | System.out.println(" Final height of MyTreeMap = " + height); 69 | } 70 | /** 71 | * @param map 72 | * @param n 73 | */ 74 | @SuppressWarnings("unused") 75 | private static void putWordList(int n) { 76 | // assemble the file name 77 | String slash = File.separator; 78 | String filename = System.getProperty("user.dir") + slash + 79 | "src" + slash + "resources" + slash + "words.txt"; 80 | 81 | MyTreeMap map = new MyTreeMap(); 82 | 83 | final long startTime = System.currentTimeMillis(); 84 | try (BufferedReader br = new BufferedReader(new FileReader(filename))) { 85 | String line; 86 | int i = 0; 87 | while ((line = br.readLine()) != null) { 88 | map.put(line, 0); 89 | 90 | i++; 91 | if (i >= n) { 92 | break; 93 | } 94 | } 95 | } catch (FileNotFoundException e) { 96 | e.printStackTrace(); 97 | } catch (IOException e) { 98 | e.printStackTrace(); 99 | } 100 | final long elapsed = System.currentTimeMillis() - startTime; 101 | printResults(map, elapsed, map.height()); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/Page.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.io.IOException; 7 | import java.net.URL; 8 | import java.nio.CharBuffer; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | /** 13 | * @author downey 14 | * 15 | */ 16 | public class Page implements Readable { 17 | public URL url; 18 | public List content; 19 | 20 | public Page(URL url) { 21 | this.url = url; 22 | this.content = new ArrayList(); 23 | } 24 | 25 | public void addLine(String line) { 26 | content.add(line); 27 | } 28 | 29 | @Override 30 | public int read(CharBuffer arg0) throws IOException { 31 | // TODO Auto-generated method stub 32 | return 0; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/ProfileListAdd.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | import org.jfree.data.xy.XYSeries; 8 | 9 | import com.allendowney.thinkdast.Profiler.Timeable; 10 | 11 | public class ProfileListAdd { 12 | 13 | /** 14 | * @param args 15 | */ 16 | public static void main(String[] args) { 17 | //profileArrayListAddEnd(); 18 | //profileArrayListAddBeginning(); 19 | //profileLinkedListAddBeginning(); 20 | profileLinkedListAddEnd(); 21 | } 22 | 23 | /** 24 | * Characterize the run time of adding to the end of an ArrayList 25 | */ 26 | public static void profileArrayListAddEnd() { 27 | Timeable timeable = new Timeable() { 28 | List list; 29 | 30 | public void setup(int n) { 31 | list = new ArrayList(); 32 | } 33 | 34 | public void timeMe(int n) { 35 | for (int i=0; i list; 51 | 52 | public void setup(int n) { 53 | list = new ArrayList(); 54 | } 55 | 56 | public void timeMe(int n) { 57 | for (int i=0; i list; 73 | 74 | public void setup(int n) { 75 | list = new LinkedList(); 76 | } 77 | 78 | public void timeMe(int n) { 79 | for (int i=0; i list; 95 | 96 | public void setup(int n) { 97 | list = new LinkedList(); 98 | } 99 | 100 | public void timeMe(int n) { 101 | for (int i=0; i map; 27 | 28 | public void setup(int n) { 29 | map = new HashMap(); 30 | } 31 | 32 | public void timeMe(int n) { 33 | for (int i=0; i map; 49 | 50 | public void setup(int n) { 51 | map = new MyHashMap(); 52 | } 53 | 54 | public void timeMe(int n) { 55 | for (int i=0; i map; 72 | 73 | public void setup(int n) { 74 | map = new MyFixedHashMap(); 75 | } 76 | 77 | public void timeMe(int n) { 78 | for (int i=0; i map = new MyBetterMap(); 48 | 49 | SillyArray array1 = new SillyArray("Word1".toCharArray()); 50 | map.put(array1, 1); 51 | 52 | // what happens if we mutate a key while it's in the Map? 53 | array1.setChar(0, 'C'); 54 | 55 | Integer value = map.get(array1); 56 | System.out.println(value); 57 | 58 | for (SillyArray key: map.keySet()) { 59 | System.out.println(key + ", " + map.get(key)); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/SillyString.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.Map; 7 | 8 | /** 9 | * @author downey 10 | * 11 | */ 12 | public class SillyString { 13 | private final String innerString; 14 | 15 | public SillyString(String innerString) { 16 | this.innerString = innerString; 17 | } 18 | 19 | public String toString() { 20 | return innerString; 21 | } 22 | 23 | @Override 24 | public boolean equals(Object other) { 25 | return this.toString().equals(other.toString()); 26 | } 27 | 28 | @Override 29 | public int hashCode() { 30 | int total = 0; 31 | for (int i=0; i map = new MyBetterMap(); 43 | 44 | map.put(new SillyString("Word1"), 1); 45 | map.put(new SillyString("Word2"), 2); 46 | Integer value = map.get(new SillyString("Word1")); 47 | System.out.println(value); 48 | 49 | for (SillyString key: map.keySet()) { 50 | System.out.println(key + ", " + map.get(key)); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/TermCounter.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | import org.jsoup.nodes.Node; 9 | import org.jsoup.nodes.TextNode; 10 | import org.jsoup.select.Elements; 11 | 12 | 13 | /** 14 | * Encapsulates a map from search term to frequency (count). 15 | * 16 | * @author downey 17 | * 18 | */ 19 | public class TermCounter { 20 | 21 | private Map map; 22 | private String label; 23 | 24 | public TermCounter(String label) { 25 | this.label = label; 26 | this.map = new HashMap(); 27 | } 28 | 29 | public String getLabel() { 30 | return label; 31 | } 32 | 33 | /** 34 | * Returns the total of all counts. 35 | * 36 | * @return 37 | */ 38 | public int size() { 39 | int total = 0; 40 | for (Integer value: map.values()) { 41 | total += value; 42 | } 43 | return total; 44 | } 45 | 46 | /** 47 | * Takes a collection of Elements and counts their words. 48 | * 49 | * @param paragraphs 50 | */ 51 | public void processElements(Elements paragraphs) { 52 | for (Node node: paragraphs) { 53 | processTree(node); 54 | } 55 | } 56 | 57 | /** 58 | * Finds TextNodes in a DOM tree and counts their words. 59 | * 60 | * @param root 61 | */ 62 | public void processTree(Node root) { 63 | // NOTE: we could use select to find the TextNodes, but since 64 | // we already have a tree iterator, let's use it. 65 | for (Node node: new WikiNodeIterable(root)) { 66 | if (node instanceof TextNode) { 67 | processText(((TextNode) node).text()); 68 | } 69 | } 70 | } 71 | 72 | /** 73 | * Splits `text` into words and counts them. 74 | * 75 | * @param text The text to process. 76 | */ 77 | public void processText(String text) { 78 | // replace punctuation with spaces, convert to lower case, and split on whitespace 79 | String[] array = text.replaceAll("\\pP", " "). 80 | toLowerCase(). 81 | split("\\s+"); 82 | 83 | for (int i=0; i keySet() { 126 | return map.keySet(); 127 | } 128 | 129 | /** 130 | * Print the terms and their counts in arbitrary order. 131 | */ 132 | public void printCounts() { 133 | for (String key: keySet()) { 134 | Integer count = get(key); 135 | System.out.println(key + ", " + count); 136 | } 137 | System.out.println("Total of all counts = " + size()); 138 | } 139 | 140 | /** 141 | * @param args 142 | * @throws IOException 143 | */ 144 | public static void main(String[] args) throws IOException { 145 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 146 | 147 | WikiFetcher wf = new WikiFetcher(); 148 | Elements paragraphs = wf.fetchWikipedia(url); 149 | 150 | TermCounter counter = new TermCounter(url.toString()); 151 | counter.processElements(paragraphs); 152 | counter.printCounts(); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/TermCounterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import org.jsoup.select.Elements; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | /** 14 | * @author downey 15 | * 16 | */ 17 | public class TermCounterTest { 18 | 19 | private TermCounter counter; 20 | 21 | /** 22 | * @throws java.lang.Exception 23 | */ 24 | @Before 25 | public void setUp() throws Exception { 26 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 27 | 28 | WikiFetcher wf = new WikiFetcher(); 29 | Elements paragraphs = wf.readWikipedia(url); 30 | 31 | counter = new TermCounter(url.toString()); 32 | counter.processElements(paragraphs); 33 | } 34 | 35 | @Test 36 | public void testSize() { 37 | assertThat(counter.size(), is(4798)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiCrawler.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.LinkedList; 5 | import java.util.Map; 6 | import java.util.Map.Entry; 7 | import java.util.Queue; 8 | 9 | import org.jsoup.nodes.Element; 10 | import org.jsoup.select.Elements; 11 | 12 | import redis.clients.jedis.Jedis; 13 | 14 | 15 | public class WikiCrawler { 16 | // keeps track of where we started 17 | @SuppressWarnings("unused") 18 | private final String source; 19 | 20 | // the index where the results go 21 | private JedisIndex index; 22 | 23 | // queue of URLs to be indexed 24 | private Queue queue = new LinkedList(); 25 | 26 | // fetcher used to get pages from Wikipedia 27 | final static WikiFetcher wf = new WikiFetcher(); 28 | 29 | /** 30 | * Constructor. 31 | * 32 | * @param source 33 | * @param index 34 | */ 35 | public WikiCrawler(String source, JedisIndex index) { 36 | this.source = source; 37 | this.index = index; 38 | queue.offer(source); 39 | } 40 | 41 | /** 42 | * Returns the number of URLs in the queue. 43 | * 44 | * @return 45 | */ 46 | public int queueSize() { 47 | return queue.size(); 48 | } 49 | 50 | /** 51 | * Gets a URL from the queue and indexes it. 52 | * @param testing 53 | * 54 | * @return URL of page indexed. 55 | * @throws IOException 56 | */ 57 | public String crawl(boolean testing) throws IOException { 58 | if (queue.isEmpty()) { 59 | return null; 60 | } 61 | String url = queue.poll(); 62 | System.out.println("Crawling " + url); 63 | 64 | if (testing==false && index.isIndexed(url)) { 65 | System.out.println("Already indexed."); 66 | return null; 67 | } 68 | 69 | Elements paragraphs; 70 | if (testing) { 71 | paragraphs = wf.readWikipedia(url); 72 | } else { 73 | paragraphs = wf.fetchWikipedia(url); 74 | } 75 | index.indexPage(url, paragraphs); 76 | queueInternalLinks(paragraphs); 77 | return url; 78 | } 79 | 80 | /** 81 | * Parses paragraphs and adds internal links to the queue. 82 | * 83 | * @param paragraphs 84 | */ 85 | // NOTE: absence of access level modifier means package-level 86 | void queueInternalLinks(Elements paragraphs) { 87 | for (Element paragraph: paragraphs) { 88 | queueInternalLinks(paragraph); 89 | } 90 | } 91 | 92 | /** 93 | * Parses a paragraph and adds internal links to the queue. 94 | * 95 | * @param paragraph 96 | */ 97 | private void queueInternalLinks(Element paragraph) { 98 | Elements elts = paragraph.select("a[href]"); 99 | for (Element elt: elts) { 100 | String relURL = elt.attr("href"); 101 | 102 | if (relURL.startsWith("/wiki/")) { 103 | String absURL = "https://en.wikipedia.org" + relURL; 104 | //System.out.println(absURL); 105 | queue.offer(absURL); 106 | } 107 | } 108 | } 109 | 110 | public static void main(String[] args) throws IOException { 111 | // make a WikiCrawler 112 | Jedis jedis = JedisMaker.make(); 113 | JedisIndex index = new JedisIndex(jedis); 114 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 115 | WikiCrawler wc = new WikiCrawler(source, index); 116 | 117 | // for testing purposes, load up the queue 118 | Elements paragraphs = wf.fetchWikipedia(source); 119 | wc.queueInternalLinks(paragraphs); 120 | 121 | // loop until we index a new page 122 | String res; 123 | do { 124 | res = wc.crawl(false); 125 | } while (res == null); 126 | 127 | Map map = index.getCounts("the"); 128 | for (Entry entry: map.entrySet()) { 129 | System.out.println(entry); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiCrawlerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.io.IOException; 10 | import java.util.Map; 11 | 12 | import org.jsoup.select.Elements; 13 | import org.junit.After; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import redis.clients.jedis.Jedis; 18 | 19 | 20 | /** 21 | * @author downey 22 | * 23 | */ 24 | public class WikiCrawlerTest { 25 | 26 | private Jedis jedis; 27 | private WikiCrawler wc; 28 | private JedisIndex index; 29 | 30 | /** 31 | * @throws java.lang.Exception 32 | */ 33 | @Before 34 | public void setUp() throws Exception { 35 | // make a WikiCrawler 36 | jedis = JedisMaker.make(); 37 | index = new JedisIndex(jedis); 38 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 39 | wc = new WikiCrawler(source, index); 40 | 41 | // for testing purposes, load up the queue 42 | WikiFetcher wf = new WikiFetcher(); 43 | Elements paragraphs = wf.readWikipedia(source); 44 | wc.queueInternalLinks(paragraphs); 45 | } 46 | 47 | /** 48 | * @throws java.lang.Exception 49 | */ 50 | @After 51 | public void tearDown() throws Exception { 52 | jedis.close(); 53 | } 54 | 55 | /** 56 | * Test method for {@link WikiCrawler#crawl()}. 57 | * @throws IOException 58 | */ 59 | @Test 60 | public void testCrawl() throws IOException { 61 | String url1 = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 62 | String url2 = "https://en.wikipedia.org/wiki/Programming_language"; 63 | String url3 = "https://en.wikipedia.org/wiki/Concurrent_computing"; 64 | 65 | String res = wc.crawl(true); 66 | assertThat(res.equals(url1), is(true)); 67 | assertThat(wc.queueSize(), is(396)); 68 | 69 | res = wc.crawl(true); 70 | assertThat(res.equals(url2), is(true)); 71 | assertThat(wc.queueSize(), is(653)); 72 | 73 | res = wc.crawl(true); 74 | assertThat(res.equals(url3), is(true)); 75 | assertThat(wc.queueSize(), is(704)); 76 | 77 | Map map = index.getCounts("the"); 78 | 79 | int count = map.get(url1); 80 | assertThat(count, is(339)); 81 | 82 | count = map.get(url2); 83 | assertThat(count, is(264)); 84 | 85 | count = map.get(url3); 86 | assertThat(count, is(53)); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiFetcher.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.net.URL; 7 | 8 | import org.jsoup.Connection; 9 | import org.jsoup.Jsoup; 10 | import org.jsoup.nodes.Document; 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | 14 | 15 | public class WikiFetcher { 16 | private long lastRequestTime = -1; 17 | private long minInterval = 1000; 18 | 19 | /** 20 | * Fetches and parses a URL string, returning a list of paragraph elements. 21 | * 22 | * @param url 23 | * @return 24 | * @throws IOException 25 | */ 26 | public Elements fetchWikipedia(String url) throws IOException { 27 | sleepIfNeeded(); 28 | 29 | // download and parse the document 30 | Connection conn = Jsoup.connect(url); 31 | Document doc = conn.get(); 32 | 33 | // select the content text and pull out the paragraphs. 34 | Element content = doc.getElementById("mw-content-text"); 35 | 36 | // TODO: avoid selecting paragraphs from sidebars and boxouts 37 | Elements paras = content.select("p"); 38 | return paras; 39 | } 40 | 41 | /** 42 | * Reads the contents of a Wikipedia page from src/resources. 43 | * 44 | * @param url 45 | * @return 46 | * @throws IOException 47 | */ 48 | public Elements readWikipedia(String url) throws IOException { 49 | URL realURL = new URL(url); 50 | 51 | // assemble the file name 52 | String slash = File.separator; 53 | String filename = "resources" + slash + realURL.getHost() + realURL.getPath(); 54 | 55 | // read the file 56 | InputStream stream = WikiFetcher.class.getClassLoader().getResourceAsStream(filename); 57 | Document doc = Jsoup.parse(stream, "UTF-8", filename); 58 | 59 | // parse the contents of the file 60 | // TODO: factor out the following repeated code 61 | Element content = doc.getElementById("mw-content-text"); 62 | Elements paras = content.select("p"); 63 | return paras; 64 | } 65 | 66 | /** 67 | * Rate limits by waiting at least the minimum interval between requests. 68 | */ 69 | private void sleepIfNeeded() { 70 | if (lastRequestTime != -1) { 71 | long currentTime = System.currentTimeMillis(); 72 | long nextRequestTime = lastRequestTime + minInterval; 73 | if (currentTime < nextRequestTime) { 74 | try { 75 | //System.out.println("Sleeping until " + nextRequestTime); 76 | Thread.sleep(nextRequestTime - currentTime); 77 | } catch (InterruptedException e) { 78 | System.err.println("Warning: sleep interrupted in fetchWikipedia."); 79 | } 80 | } 81 | } 82 | lastRequestTime = System.currentTimeMillis(); 83 | } 84 | 85 | /** 86 | * @param args 87 | * @throws IOException 88 | */ 89 | public static void main(String[] args) throws IOException { 90 | WikiFetcher wf = new WikiFetcher(); 91 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 92 | Elements paragraphs = wf.readWikipedia(url); 93 | 94 | for (Element paragraph: paragraphs) { 95 | System.out.println(paragraph); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiNodeExample.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayDeque; 5 | import java.util.ArrayList; 6 | import java.util.Collections; 7 | import java.util.Deque; 8 | import java.util.List; 9 | 10 | import org.jsoup.Connection; 11 | import org.jsoup.Jsoup; 12 | import org.jsoup.nodes.Document; 13 | import org.jsoup.nodes.Element; 14 | import org.jsoup.nodes.Node; 15 | import org.jsoup.nodes.TextNode; 16 | import org.jsoup.select.Elements; 17 | 18 | public class WikiNodeExample { 19 | 20 | public static void main(String[] args) throws IOException { 21 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 22 | 23 | // download and parse the document 24 | Connection conn = Jsoup.connect(url); 25 | Document doc = conn.get(); 26 | 27 | // select the content text and pull out the paragraphs. 28 | Element content = doc.getElementById("mw-content-text"); 29 | 30 | // TODO: avoid selecting paragraphs from sidebars and boxouts 31 | Elements paras = content.select("p"); 32 | Element firstPara = paras.get(0); 33 | 34 | recursiveDFS(firstPara); 35 | System.out.println(); 36 | 37 | iterativeDFS(firstPara); 38 | System.out.println(); 39 | 40 | Iterable iter = new WikiNodeIterable(firstPara); 41 | for (Node node: iter) { 42 | if (node instanceof TextNode) { 43 | System.out.print(node); 44 | } 45 | } 46 | } 47 | 48 | private static void iterativeDFS(Node root) { 49 | Deque stack = new ArrayDeque(); 50 | stack.push(root); 51 | 52 | // if the stack is empty, we're done 53 | while (!stack.isEmpty()) { 54 | 55 | // otherwise pop the next Node off the stack 56 | Node node = stack.pop(); 57 | if (node instanceof TextNode) { 58 | System.out.print(node); 59 | } 60 | 61 | // push the children onto the stack in reverse order 62 | List nodes = new ArrayList(node.childNodes()); 63 | Collections.reverse(nodes); 64 | 65 | for (Node child: nodes) { 66 | stack.push(child); 67 | } 68 | } 69 | } 70 | 71 | private static void recursiveDFS(Node node) { 72 | if (node instanceof TextNode) { 73 | System.out.print(node); 74 | } 75 | for (Node child: node.childNodes()) { 76 | recursiveDFS(child); 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiNodeIterable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import java.util.ArrayDeque; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.Deque; 10 | import java.util.Iterator; 11 | import java.util.List; 12 | import java.util.NoSuchElementException; 13 | 14 | import org.jsoup.nodes.Node; 15 | 16 | 17 | /** 18 | * Performs a depth-first traversal of a jsoup Node. 19 | * 20 | * @author downey 21 | * 22 | */ 23 | public class WikiNodeIterable implements Iterable { 24 | 25 | private Node root; 26 | 27 | /** 28 | * Creates an iterable starting with the given Node. 29 | * 30 | * @param root 31 | */ 32 | public WikiNodeIterable(Node root) { 33 | this.root = root; 34 | } 35 | 36 | @Override 37 | public Iterator iterator() { 38 | return new WikiNodeIterator(root); 39 | } 40 | 41 | /** 42 | * Inner class that implements the Iterator. 43 | * 44 | * @author downey 45 | * 46 | */ 47 | private class WikiNodeIterator implements Iterator { 48 | 49 | // this stack keeps track of the Nodes waiting to be visited 50 | Deque stack; 51 | 52 | /** 53 | * Initializes the Iterator with the root Node on the stack. 54 | * 55 | * @param node 56 | */ 57 | public WikiNodeIterator(Node node) { 58 | stack = new ArrayDeque(); 59 | stack.push(root); 60 | } 61 | 62 | @Override 63 | public boolean hasNext() { 64 | return !stack.isEmpty(); 65 | } 66 | 67 | @Override 68 | public Node next() { 69 | // if the stack is empty, we're done 70 | if (stack.isEmpty()) { 71 | throw new NoSuchElementException(); 72 | } 73 | 74 | // otherwise pop the next Node off the stack 75 | Node node = stack.pop(); 76 | //System.out.println(node); 77 | 78 | // push the children onto the stack in reverse order 79 | List nodes = new ArrayList(node.childNodes()); 80 | Collections.reverse(nodes); 81 | for (Node child: nodes) { 82 | stack.push(child); 83 | } 84 | return node; 85 | } 86 | 87 | @Override 88 | public void remove() { 89 | throw new UnsupportedOperationException(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiParser.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import org.jsoup.nodes.Element; 4 | import org.jsoup.nodes.Node; 5 | import org.jsoup.nodes.TextNode; 6 | import org.jsoup.select.Elements; 7 | import java.util.ArrayDeque; 8 | import java.util.Deque; 9 | import java.util.StringTokenizer; 10 | 11 | /** 12 | * 13 | */ 14 | /** 15 | * @author downey 16 | * 17 | */ 18 | public class WikiParser { 19 | 20 | // the list of paragraphs we should search 21 | private Elements paragraphs; 22 | 23 | // the stack of open delimiters 24 | // TODO: consider simplifying this by counting parentheses 25 | private Deque parenthesisStack; 26 | 27 | 28 | /** 29 | * Initializes a WikiParser with a list of Elements. 30 | * 31 | * @param paragraphs 32 | */ 33 | public WikiParser(Elements paragraphs) { 34 | this.paragraphs = paragraphs; 35 | this.parenthesisStack = new ArrayDeque(); 36 | } 37 | 38 | /** 39 | * Searches the paragraphs for a valid link. 40 | * 41 | * Warns if a paragraph ends with unbalanced parentheses. 42 | * 43 | * @return 44 | */ 45 | public Element findFirstLink() { 46 | for (Element paragraph: paragraphs) { 47 | Element firstLink = findFirstLinkPara(paragraph); 48 | if (firstLink != null) { 49 | return firstLink; 50 | } 51 | if (!parenthesisStack.isEmpty()) { 52 | System.err.println("Warning: unbalanced parentheses."); 53 | } 54 | } 55 | return null; 56 | } 57 | 58 | /** 59 | * Returns the first valid link in a paragraph, or null. 60 | * 61 | * @param root 62 | */ 63 | private Element findFirstLinkPara(Node root) { 64 | // create an Iterable that traverses the tree 65 | Iterable nt = new WikiNodeIterable(root); 66 | 67 | // loop through the nodes 68 | for (Node node: nt) { 69 | // process TextNodes to get parentheses 70 | if (node instanceof TextNode) { 71 | processTextNode((TextNode) node); 72 | } 73 | // process elements to get find links 74 | if (node instanceof Element) { 75 | Element firstLink = processElement((Element) node); 76 | if (firstLink != null) { 77 | return firstLink; 78 | } 79 | } 80 | } 81 | return null; 82 | } 83 | 84 | /** 85 | * Returns the element if it is a valid link, null otherwise. 86 | * 87 | * 88 | * 89 | * @param elt 90 | */ 91 | private Element processElement(Element elt) { 92 | //System.out.println(elt.tagName()); 93 | if (validLink(elt)) { 94 | return elt; 95 | } 96 | return null; 97 | } 98 | 99 | /** 100 | * Checks whether a link is value. 101 | * 102 | * @param elt 103 | * @return 104 | */ 105 | private boolean validLink(Element elt) { 106 | // it's no good if it's 107 | // not a link 108 | if (!elt.tagName().equals("a")) { 109 | return false; 110 | } 111 | // in italics 112 | if (isItalic(elt)) { 113 | return false; 114 | } 115 | // in parenthesis 116 | if (isInParens(elt)) { 117 | return false; 118 | } 119 | // a bookmark 120 | if (startsWith(elt, "#")) { 121 | return false; 122 | } 123 | // a Wikipedia help page 124 | if (startsWith(elt, "/wiki/Help:")) { 125 | return false; 126 | } 127 | // TODO: there are a couple of other "rules" we haven't handled 128 | return true; 129 | } 130 | 131 | /** 132 | * Checks whether a link starts with a given String. 133 | * 134 | * @param elt 135 | * @param s 136 | * @return 137 | */ 138 | private boolean startsWith(Element elt, String s) { 139 | //System.out.println(elt.attr("href")); 140 | return (elt.attr("href").startsWith(s)); 141 | } 142 | 143 | /** 144 | * Checks whether the element is in parentheses (possibly nested). 145 | * 146 | * @param elt 147 | * @return 148 | */ 149 | private boolean isInParens(Element elt) { 150 | // check whether there are any parentheses on the stack 151 | return !parenthesisStack.isEmpty(); 152 | } 153 | 154 | /** 155 | * Checks whether the element is in italics. 156 | * 157 | * (Either a "i" or "em" tag) 158 | * 159 | * @param start 160 | * @return 161 | */ 162 | private boolean isItalic(Element start) { 163 | // follow the parent chain until we get to null 164 | for (Element elt=start; elt != null; elt = elt.parent()) { 165 | if (elt.tagName().equals("i") || elt.tagName().equals("em")) { 166 | return true; 167 | } 168 | } 169 | return false; 170 | } 171 | 172 | /** 173 | * Processes a text node, splitting it up and checking parentheses. 174 | * 175 | * @param node 176 | */ 177 | private void processTextNode(TextNode node) { 178 | StringTokenizer st = new StringTokenizer(node.text(), " ()", true); 179 | while (st.hasMoreTokens()) { 180 | String token = st.nextToken(); 181 | // System.out.print(token); 182 | if (token.equals("(")) { 183 | parenthesisStack.push(token); 184 | } 185 | if (token.equals(")")) { 186 | if (parenthesisStack.isEmpty()) { 187 | System.err.println("Warning: unbalanced parentheses."); 188 | } 189 | parenthesisStack.pop(); 190 | } 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiParserTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.assertThat; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.io.IOException; 10 | 11 | import org.jsoup.nodes.Element; 12 | import org.jsoup.select.Elements; 13 | import org.junit.Test; 14 | 15 | /** 16 | * @author downey 17 | * 18 | */ 19 | public class WikiParserTest { 20 | 21 | final static WikiFetcher wf = new WikiFetcher(); 22 | 23 | /** 24 | * Test method for {@link WikiParser#findFirstLink()}. 25 | * @throws IOException 26 | */ 27 | @Test 28 | public void testFindFirstLink1() throws IOException { 29 | String url = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 30 | String href = findFirstLink(url); 31 | assertThat(href, is("/wiki/Programming_language")); 32 | } 33 | 34 | /** 35 | * Test method for {@link WikiParser#findFirstLink()}. 36 | * @throws IOException 37 | */ 38 | @Test 39 | public void testFindFirstLink2() throws IOException { 40 | String url = "https://en.wikipedia.org/wiki/Mathematics"; 41 | String href = findFirstLink(url); 42 | assertThat(href, is("/wiki/Quantity")); 43 | } 44 | 45 | /** 46 | * Uses WikiParser to find the first link in the given URL. 47 | * 48 | * @param url 49 | * @return 50 | * @throws IOException 51 | */ 52 | private String findFirstLink(String url) throws IOException { 53 | Elements paragraphs = wf.readWikipedia(url); 54 | WikiParser wp = new WikiParser(paragraphs); 55 | Element elt = wp.findFirstLink(); 56 | String href = elt.attr("href"); 57 | return href; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiPhilosophy.java: -------------------------------------------------------------------------------- 1 | package com.allendowney.thinkdast; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | public class WikiPhilosophy { 11 | 12 | final static List visited = new ArrayList(); 13 | final static WikiFetcher wf = new WikiFetcher(); 14 | 15 | /** 16 | * Tests a conjecture about Wikipedia and Philosophy. 17 | * 18 | * https://en.wikipedia.org/wiki/Wikipedia:Getting_to_Philosophy 19 | * 20 | * 1. Clicking on the first non-parenthesized, non-italicized link 21 | * 2. Ignoring external links, links to the current page, or red links 22 | * 3. Stopping when reaching "Philosophy", a page with no links or a page 23 | * that does not exist, or when a loop occurs 24 | * 25 | * @param args 26 | * @throws IOException 27 | */ 28 | public static void main(String[] args) throws IOException { 29 | 30 | String destination = "https://en.wikipedia.org/wiki/Philosophy"; 31 | String source = "https://en.wikipedia.org/wiki/Java_(programming_language)"; 32 | 33 | testConjecture(destination, source, 10); 34 | } 35 | 36 | /** 37 | * Starts from given URL and follows first link until it finds the destination or exceeds the limit. 38 | * 39 | * @param destination 40 | * @param source 41 | * @throws IOException 42 | */ 43 | public static void testConjecture(String destination, String source, int limit) throws IOException { 44 | String url = source; 45 | for (int i=0; i map; 23 | 24 | /** 25 | * Constructor. 26 | * 27 | * @param map 28 | */ 29 | public WikiSearch(Map map) { 30 | this.map = map; 31 | } 32 | 33 | /** 34 | * Looks up the relevance of a given URL. 35 | * 36 | * @param url 37 | * @return 38 | */ 39 | public Integer getRelevance(String url) { 40 | Integer relevance = map.get(url); 41 | return relevance==null ? 0: relevance; 42 | } 43 | 44 | /** 45 | * Prints the contents in order of term frequency. 46 | * 47 | * @param 48 | */ 49 | private void print() { 50 | List> entries = sort(); 51 | for (Entry entry: entries) { 52 | System.out.println(entry); 53 | } 54 | } 55 | 56 | /** 57 | * Computes the union of two search results. 58 | * 59 | * @param that 60 | * @return New WikiSearch object. 61 | */ 62 | public WikiSearch or(WikiSearch that) { 63 | Map union = new HashMap(map); 64 | for (String term: that.map.keySet()) { 65 | int relevance = totalRelevance(this.getRelevance(term), that.getRelevance(term)); 66 | union.put(term, relevance); 67 | } 68 | return new WikiSearch(union); 69 | } 70 | 71 | /** 72 | * Computes the intersection of two search results. 73 | * 74 | * @param that 75 | * @return New WikiSearch object. 76 | */ 77 | public WikiSearch and(WikiSearch that) { 78 | Map intersection = new HashMap(); 79 | for (String term: map.keySet()) { 80 | if (that.map.containsKey(term)) { 81 | int relevance = totalRelevance(this.map.get(term), that.map.get(term)); 82 | intersection.put(term, relevance); 83 | } 84 | } 85 | return new WikiSearch(intersection); 86 | } 87 | 88 | /** 89 | * Computes the intersection of two search results. 90 | * 91 | * @param that 92 | * @return New WikiSearch object. 93 | */ 94 | public WikiSearch minus(WikiSearch that) { 95 | Map difference = new HashMap(map); 96 | for (String term: that.map.keySet()) { 97 | difference.remove(term); 98 | } 99 | return new WikiSearch(difference); 100 | } 101 | 102 | /** 103 | * Computes the relevance of a search with multiple terms. 104 | * 105 | * @param rel1: relevance score for the first search 106 | * @param rel2: relevance score for the second search 107 | * @return 108 | */ 109 | protected int totalRelevance(Integer rel1, Integer rel2) { 110 | // simple starting place: relevance is the sum of the term frequencies. 111 | return rel1 + rel2; 112 | } 113 | 114 | /** 115 | * Sort the results by relevance. 116 | * 117 | * @return List of entries with URL and relevance. 118 | */ 119 | public List> sort() { 120 | // NOTE: this can be done more concisely in Java 8. See 121 | // http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java 122 | 123 | // make a list of entries 124 | List> entries = 125 | new LinkedList>(map.entrySet()); 126 | 127 | // make a Comparator object for sorting 128 | Comparator> comparator = new Comparator>() { 129 | @Override 130 | public int compare(Entry e1, Entry e2) { 131 | return e1.getValue().compareTo(e2.getValue()); 132 | } 133 | }; 134 | 135 | // sort and return the entries 136 | Collections.sort(entries, comparator); 137 | return entries; 138 | } 139 | 140 | 141 | /** 142 | * Performs a search and makes a WikiSearch object. 143 | * 144 | * @param term 145 | * @param index 146 | * @return 147 | */ 148 | public static WikiSearch search(String term, JedisIndex index) { 149 | Map map = index.getCounts(term); 150 | return new WikiSearch(map); 151 | } 152 | 153 | public static void main(String[] args) throws IOException { 154 | 155 | // make a JedisIndex 156 | Jedis jedis = JedisMaker.make(); 157 | JedisIndex index = new JedisIndex(jedis); 158 | 159 | // search for the first term 160 | String term1 = "java"; 161 | System.out.println("Query: " + term1); 162 | WikiSearch search1 = search(term1, index); 163 | search1.print(); 164 | 165 | // search for the second term 166 | String term2 = "programming"; 167 | System.out.println("Query: " + term2); 168 | WikiSearch search2 = search(term2, index); 169 | search2.print(); 170 | 171 | // compute the intersection of the searches 172 | System.out.println("Query: " + term1 + " AND " + term2); 173 | WikiSearch intersection = search1.and(search2); 174 | intersection.print(); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /solutions/src/com/allendowney/thinkdast/WikiSearchTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.allendowney.thinkdast; 5 | 6 | import static org.junit.Assert.*; 7 | import static org.hamcrest.CoreMatchers.*; 8 | 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Map.Entry; 13 | 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * @author downey 19 | * 20 | */ 21 | public class WikiSearchTest { 22 | 23 | private WikiSearch search1; 24 | private WikiSearch search2; 25 | 26 | /** 27 | * @throws java.lang.Exception 28 | */ 29 | @Before 30 | public void setUp() throws Exception { 31 | Map map1 = new HashMap(); 32 | map1.put("Page1", 1); 33 | map1.put("Page2", 2); 34 | map1.put("Page3", 3); 35 | search1 = new WikiSearch(map1); 36 | 37 | Map map2 = new HashMap(); 38 | map2.put("Page2", 4); 39 | map2.put("Page3", 5); 40 | map2.put("Page4", 7); 41 | search2 = new WikiSearch(map2); 42 | } 43 | 44 | /** 45 | * Test method for {@link WikiSearch#or(WikiSearch)}. 46 | */ 47 | @Test 48 | public void testOr() { 49 | WikiSearch search = search1.or(search2); 50 | assertThat(search.getRelevance("Page1"), is(1)); 51 | assertThat(search.getRelevance("Page2"), is(6)); 52 | assertThat(search.getRelevance("Page3"), is(8)); 53 | assertThat(search.getRelevance("Page4"), is(7)); 54 | assertThat(search.getRelevance("Page5"), is(0)); 55 | } 56 | 57 | /** 58 | * Test method for {@link WikiSearch#and(WikiSearch)}. 59 | */ 60 | @Test 61 | public void testAnd() { 62 | WikiSearch search = search1.and(search2); 63 | assertThat(search.getRelevance("Page1"), is(0)); 64 | assertThat(search.getRelevance("Page2"), is(6)); 65 | assertThat(search.getRelevance("Page3"), is(8)); 66 | assertThat(search.getRelevance("Page4"), is(0)); 67 | assertThat(search.getRelevance("Page5"), is(0)); 68 | } 69 | 70 | /** 71 | * Test method for {@link WikiSearch#minus(WikiSearch)}. 72 | */ 73 | @Test 74 | public void testMinus() { 75 | WikiSearch search = search1.minus(search2); 76 | assertThat(search.getRelevance("Page1"), is(1)); 77 | assertThat(search.getRelevance("Page2"), is(0)); 78 | assertThat(search.getRelevance("Page3"), is(0)); 79 | assertThat(search.getRelevance("Page4"), is(0)); 80 | assertThat(search.getRelevance("Page5"), is(0)); 81 | } 82 | 83 | /** 84 | * Test method for {@link WikiSearch#sort()}. 85 | */ 86 | @Test 87 | public void testSort() { 88 | List> list = search2.sort(); 89 | assertThat(list.get(0).getValue(), is(4)); 90 | assertThat(list.get(1).getValue(), is(5)); 91 | assertThat(list.get(2).getValue(), is(7)); 92 | } 93 | } 94 | --------------------------------------------------------------------------------