├── MIT-LICENSE.txt ├── README ├── arduino-mathematica-example ├── arduino-mathematica-example.nb └── arduino_mathematica_example.txt ├── basic-text-analysis ├── basic-text-analysis.cdf ├── basic-text-analysis.nb ├── bigram-network-small.jpg └── bigram-network.jpg └── simple-acoustic-daq ├── sample-data.mp3 ├── sample-data.wav ├── simple-acoustic-daq.nb └── v-to-f-circuit.png /MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2013 William J Turkel, http://williamjturkel.net 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/README -------------------------------------------------------------------------------- /arduino-mathematica-example/arduino-mathematica-example.nb: -------------------------------------------------------------------------------- 1 | (* Content-type: application/vnd.wolfram.mathematica *) 2 | 3 | (*** Wolfram Notebook File ***) 4 | (* http://www.wolfram.com/nb *) 5 | 6 | (* CreatedBy='Mathematica 8.0' *) 7 | 8 | (*CacheID: 234*) 9 | (* Internal cache information: 10 | NotebookFileLineBreakTest 11 | NotebookFileLineBreakTest 12 | NotebookDataPosition[ 157, 7] 13 | NotebookDataLength[ 9299, 291] 14 | NotebookOptionsPosition[ 7754, 234] 15 | NotebookOutlinePosition[ 8110, 250] 16 | CellTagsIndexPosition[ 8067, 247] 17 | WindowFrame->Normal*) 18 | 19 | (* Beginning of Notebook Content *) 20 | Notebook[{ 21 | 22 | Cell[CellGroupData[{ 23 | Cell[TextData[{ 24 | "Arduino ", 25 | StyleBox["Mathematica", 26 | FontSlant->"Italic"], 27 | " Example" 28 | }], "Title", 29 | CellChangeTimes->{{3.533802323351212*^9, 3.533802336036968*^9}}], 30 | 31 | Cell[CellGroupData[{ 32 | 33 | Cell["Setup", "Subsection", 34 | CellChangeTimes->{{3.533802340557988*^9, 3.533802341357321*^9}}], 35 | 36 | Cell["Install the SerialIO package", "Text", 37 | CellChangeTimes->{{3.533802344853786*^9, 3.533802360798071*^9}}], 38 | 39 | Cell[TextData[ButtonBox["http://library.wolfram.com/infocenter/Demos/5726/", 40 | BaseStyle->"Hyperlink", 41 | ButtonData->{ 42 | URL["http://library.wolfram.com/infocenter/Demos/5726/"], None}, 43 | ButtonNote->"http://library.wolfram.com/infocenter/Demos/5726/"]], "Text", 44 | CellChangeTimes->{{3.5337288624395514`*^9, 3.5337288805344954`*^9}}], 45 | 46 | Cell[TextData[{ 47 | "in the folder ", 48 | StyleBox["/Users/username/Library/Mathematica/Applications", 49 | FontWeight->"Bold"], 50 | " and make sure that is on your path. If the following does not evaluate to \ 51 | true" 52 | }], "Text", 53 | CellChangeTimes->{{3.533728885857113*^9, 3.533728930297147*^9}, { 54 | 3.5338023872170067`*^9, 3.533802412111549*^9}, {3.533802556324332*^9, 55 | 3.533802603701972*^9}}], 56 | 57 | Cell["\<\ 58 | MemberQ[$Path, \"/Users/username/Library/Mathematica/Applications\"]\ 59 | \>", "Program", 60 | CellChangeTimes->{{3.5337285634519367`*^9, 3.533728563453147*^9}, { 61 | 3.5337289070677557`*^9, 3.53372890730698*^9}, {3.5338024008671637`*^9, 62 | 3.5338024014815187`*^9}, 3.533802905315782*^9, {3.5338029751969833`*^9, 63 | 3.533802987708102*^9}}], 64 | 65 | Cell["Then run this command", "Text", 66 | CellChangeTimes->{{3.5337291102454233`*^9, 3.533729151574253*^9}, { 67 | 3.533802417440083*^9, 3.5338024318835163`*^9}}], 68 | 69 | Cell["\<\ 70 | AppendTo[$Path, \"/Users/username/Library/Mathematica/Applications\"]\ 71 | \>", "Program", 72 | CellChangeTimes->{{3.5337291102454233`*^9, 3.533729151574253*^9}, { 73 | 3.533802417440083*^9, 3.533802467194358*^9}, 3.533802917650968*^9}], 74 | 75 | Cell[TextData[{ 76 | "Next, edit the file ", 77 | StyleBox["/Users/username/Library/Mathematica/Applications/SerialIO/Kernal/\ 78 | init.m", 79 | FontWeight->"Bold"], 80 | " so the line" 81 | }], "Text", 82 | CellChangeTimes->{{3.533802473049708*^9, 3.533802530691044*^9}}], 83 | 84 | Cell["$Link = Install[\"SerialIO\"]", "Program", 85 | CellChangeTimes->{{3.533802627326419*^9, 3.533802633611517*^9}, 86 | 3.5338029251720753`*^9}], 87 | 88 | Cell["reads", "Text", 89 | CellChangeTimes->{{3.5338026373070307`*^9, 3.533802639726123*^9}}], 90 | 91 | Cell["\<\ 92 | $Link = 93 | Install[\"/Users/username/Library/Mathematica/Applications/SerialIO/MacOSX/\ 94 | SerialIO\", 95 | LinkProtocol -> \"Pipes\"]\ 96 | \>", "Program", 97 | CellChangeTimes->{{3.533802650927223*^9, 3.533802689778152*^9}, 98 | 3.533802933115745*^9}], 99 | 100 | Cell[TextData[{ 101 | "Make sure that the Arduino is running the ", 102 | StyleBox["arduino_mathematica_example", 103 | FontWeight->"Bold"], 104 | " sketch and that the Arduino software is not running on the computer." 105 | }], "Text", 106 | CellChangeTimes->{{3.533802724874497*^9, 3.533802774737712*^9}}], 107 | 108 | Cell["\<\ 109 | If you need to find the port name for your Arduino, you can open a terminal \ 110 | and type\ 111 | \>", "Text", 112 | CellChangeTimes->{{3.53380299935249*^9, 3.533803099241909*^9}}], 113 | 114 | Cell["ls /dev/tty.*", "Program", 115 | CellChangeTimes->{{3.53380303012006*^9, 3.5338030332168083`*^9}}] 116 | }, Open ]], 117 | 118 | Cell[CellGroupData[{ 119 | 120 | Cell["Run the demo program", "Subsection", 121 | CellChangeTimes->{{3.533802790122932*^9, 3.533802793505701*^9}}], 122 | 123 | Cell[BoxData[ 124 | RowBox[{"<<", "SerialIO`"}]], "Input", 125 | CellChangeTimes->{{3.533802818881028*^9, 3.533802822348425*^9}}], 126 | 127 | Cell[CellGroupData[{ 128 | 129 | Cell[BoxData[ 130 | RowBox[{"myArduino", "=", 131 | RowBox[{ 132 | "SerialOpen", "[", "\"\\"", "]"}]}]], "Input", 133 | CellChangeTimes->{{3.5335600259139643`*^9, 3.533560051388588*^9}, 134 | 3.5337300949920273`*^9}], 135 | 136 | Cell[BoxData[ 137 | InterpretationBox["\<\"SerialPort[]\"\>", 138 | StringForm["SerialPort[<`1`>]", "/dev/tty.usbmodem3a21"], 139 | Editable->False]], "Output", 140 | CellChangeTimes->{3.533803295952017*^9}] 141 | }, Open ]], 142 | 143 | Cell[CellGroupData[{ 144 | 145 | Cell[BoxData[ 146 | RowBox[{"SerialSetOptions", "[", 147 | RowBox[{"myArduino", ",", 148 | RowBox[{"\"\\"", "\[Rule]", "9600"}]}], "]"}]], "Input", 149 | CellChangeTimes->{{3.533560642595771*^9, 3.533560687321941*^9}, 150 | 3.533560778784617*^9, {3.533730056888424*^9, 3.533730094995583*^9}}], 151 | 152 | Cell[BoxData[ 153 | RowBox[{"{", 154 | RowBox[{ 155 | RowBox[{"\<\"BaudRate\"\>", "\[Rule]", "9600"}], ",", 156 | RowBox[{"\<\"DataBits\"\>", "\[Rule]", "8"}], ",", 157 | RowBox[{"\<\"StopBits\"\>", "\[Rule]", "1"}], ",", 158 | RowBox[{"\<\"Parity\"\>", "\[Rule]", "0"}]}], "}"}]], "Output", 159 | CellChangeTimes->{3.533803295985132*^9}] 160 | }, Open ]], 161 | 162 | Cell[CellGroupData[{ 163 | 164 | Cell[BoxData[ 165 | RowBox[{"SerialReadyQ", "[", "myArduino", "]"}]], "Input", 166 | CellChangeTimes->{{3.533560097474472*^9, 3.533560107928924*^9}, 167 | 3.5337300949938507`*^9, {3.533732772101479*^9, 3.5337327721781693`*^9}, { 168 | 3.533734443172874*^9, 3.5337344434204397`*^9}}], 169 | 170 | Cell[BoxData["True"], "Output", 171 | CellChangeTimes->{3.53380329601838*^9}] 172 | }, Open ]], 173 | 174 | Cell["\<\ 175 | It doesn't matter what character we send the Arduino. Here we use an ASCII B. \ 176 | We take the first byte returned by the Arduino, use ToCharacterCode[] to \ 177 | convert it into an integer, and repeat 10 times per second. The slider \ 178 | should wiggle back and forth as you turn the potentiometer connected to the \ 179 | Arduino.\ 180 | \>", "Text", 181 | CellChangeTimes->{{3.533803152467449*^9, 3.533803190548169*^9}, { 182 | 3.5338032236450043`*^9, 3.5338032772542543`*^9}}], 183 | 184 | Cell[CellGroupData[{ 185 | 186 | Cell[BoxData[ 187 | RowBox[{"Slider", "[", " ", 188 | RowBox[{ 189 | RowBox[{"Dynamic", "[", 190 | RowBox[{"Refresh", "[", 191 | RowBox[{ 192 | RowBox[{ 193 | RowBox[{"SerialWrite", "[", 194 | RowBox[{"myArduino", ",", " ", "\"\\""}], "]"}], ";", 195 | RowBox[{"First", "[", 196 | RowBox[{ 197 | RowBox[{"SerialRead", "[", "myArduino", "]"}], " ", "//", " ", 198 | "ToCharacterCode"}], "]"}]}], ",", 199 | RowBox[{"UpdateInterval", "\[Rule]", "0.1"}]}], "]"}], "]"}], ",", 200 | RowBox[{"{", 201 | RowBox[{"0", ",", "255"}], "}"}]}], "]"}]], "Input", 202 | CellChangeTimes->{{3.533734605057171*^9, 3.533734648378785*^9}, { 203 | 3.533735120384595*^9, 3.533735124270073*^9}, 3.533735829817933*^9, { 204 | 3.533735966525117*^9, 3.533735974897017*^9}, {3.533736426333003*^9, 205 | 3.533736468633232*^9}, {3.53373650765812*^9, 3.533736547304389*^9}, { 206 | 3.5337365896832857`*^9, 3.5337366065061827`*^9}, {3.533736652890985*^9, 207 | 3.533736654329612*^9}, {3.5337367138396997`*^9, 3.5337367217548037`*^9}, { 208 | 3.533736768583971*^9, 3.5337367776924067`*^9}, {3.533803146680799*^9, 209 | 3.5338031490786867`*^9}}], 210 | 211 | Cell[BoxData[ 212 | SliderBox[Dynamic[ 213 | Refresh[SerialIO`SerialWrite[$CellContext`myArduino, "B"]; First[ 214 | ToCharacterCode[ 215 | SerialIO`SerialRead[$CellContext`myArduino]]], UpdateInterval -> 216 | 0.1]], {0, 255}]], "Output", 217 | CellChangeTimes->{3.533803296052721*^9}] 218 | }, Open ]] 219 | }, Open ]], 220 | 221 | Cell[CellGroupData[{ 222 | 223 | Cell["\<\ 224 | Close the serial link to the Arduino when you are finished\ 225 | \>", "Subsection", 226 | CellChangeTimes->{{3.533802795865917*^9, 3.533802804114325*^9}}], 227 | 228 | Cell[BoxData[ 229 | RowBox[{"SerialClose", "[", "myArduino", "]"}]], "Input", 230 | CellChangeTimes->{{3.533560277343755*^9, 3.533560284692026*^9}, 231 | 3.533730094998847*^9}] 232 | }, Open ]] 233 | }, Open ]] 234 | }, 235 | WindowSize->{740, 867}, 236 | WindowMargins->{{Automatic, 78}, {Automatic, 102}}, 237 | FrontEndVersion->"8.0 for Mac OS X x86 (32-bit, 64-bit Kernel) (October 5, \ 238 | 2011)", 239 | StyleDefinitions->"Default.nb" 240 | ] 241 | (* End of Notebook Content *) 242 | 243 | (* Internal cache information *) 244 | (*CellTagsOutline 245 | CellTagsIndex->{} 246 | *) 247 | (*CellTagsIndex 248 | CellTagsIndex->{} 249 | *) 250 | (*NotebookFileOutline 251 | Notebook[{ 252 | Cell[CellGroupData[{ 253 | Cell[579, 22, 168, 6, 76, "Title"], 254 | Cell[CellGroupData[{ 255 | Cell[772, 32, 93, 1, 34, "Subsection"], 256 | Cell[868, 35, 110, 1, 26, "Text"], 257 | Cell[981, 38, 330, 5, 26, "Text"], 258 | Cell[1314, 45, 383, 9, 41, "Text"], 259 | Cell[1700, 56, 342, 6, 37, "Program"], 260 | Cell[2045, 64, 156, 2, 26, "Text"], 261 | Cell[2204, 68, 236, 4, 37, "Program"], 262 | Cell[2443, 74, 244, 7, 26, "Text"], 263 | Cell[2690, 83, 142, 2, 37, "Program"], 264 | Cell[2835, 87, 89, 1, 26, "Text"], 265 | Cell[2927, 90, 247, 7, 63, "Program"], 266 | Cell[3177, 99, 277, 6, 41, "Text"], 267 | Cell[3457, 107, 175, 4, 26, "Text"], 268 | Cell[3635, 113, 99, 1, 37, "Program"] 269 | }, Open ]], 270 | Cell[CellGroupData[{ 271 | Cell[3771, 119, 108, 1, 35, "Subsection"], 272 | Cell[3882, 122, 119, 2, 27, "Input"], 273 | Cell[CellGroupData[{ 274 | Cell[4026, 128, 221, 5, 27, "Input"], 275 | Cell[4250, 135, 212, 4, 27, "Output"] 276 | }, Open ]], 277 | Cell[CellGroupData[{ 278 | Cell[4499, 144, 286, 5, 27, "Input"], 279 | Cell[4788, 151, 316, 7, 27, "Output"] 280 | }, Open ]], 281 | Cell[CellGroupData[{ 282 | Cell[5141, 163, 267, 4, 27, "Input"], 283 | Cell[5411, 169, 72, 1, 27, "Output"] 284 | }, Open ]], 285 | Cell[5498, 173, 458, 8, 56, "Text"], 286 | Cell[CellGroupData[{ 287 | Cell[5981, 185, 1099, 23, 73, "Input"], 288 | Cell[7083, 210, 273, 6, 40, "Output"] 289 | }, Open ]] 290 | }, Open ]], 291 | Cell[CellGroupData[{ 292 | Cell[7405, 222, 154, 3, 35, "Subsection"], 293 | Cell[7562, 227, 164, 3, 27, "Input"] 294 | }, Open ]] 295 | }, Open ]] 296 | } 297 | ] 298 | *) 299 | 300 | (* End of internal cache information *) 301 | -------------------------------------------------------------------------------- /arduino-mathematica-example/arduino_mathematica_example.txt: -------------------------------------------------------------------------------- 1 | /* 2 | arduino_mathematica_example 3 | 4 | This code is adapted from 5 | http://arduino.cc/en/Tutorial/SerialCallResponse 6 | 7 | When started, the Arduino sends an ASCII A on the serial port until 8 | it receives a signal from the computer. It then reads Analog 1, 9 | sends a single byte on the serial port and waits for another signal 10 | from the computer. 11 | 12 | Test it with a potentiometer on A1. 13 | 14 | */ 15 | 16 | int sensor = 0; 17 | int inByte = 0; 18 | 19 | void setup() { 20 | Serial.begin(9600); 21 | establishContact(); 22 | } 23 | 24 | void loop() { 25 | if (Serial.available() > 0) { 26 | inByte = Serial.read(); 27 | // divide sensor value by 4 to return a single byte 0-255 28 | sensor = analogRead(A1)/4; 29 | delay(15); 30 | Serial.write(sensor); 31 | } 32 | } 33 | 34 | void establishContact() { 35 | while (Serial.available() <= 0) { 36 | Serial.print('A'); 37 | delay(100); 38 | } 39 | } -------------------------------------------------------------------------------- /basic-text-analysis/basic-text-analysis.nb: -------------------------------------------------------------------------------- 1 | (* Content-type: application/vnd.wolfram.mathematica *) 2 | 3 | (*** Wolfram Notebook File ***) 4 | (* http://www.wolfram.com/nb *) 5 | 6 | (* CreatedBy='Mathematica 8.0' *) 7 | 8 | (*CacheID: 234*) 9 | (* Internal cache information: 10 | NotebookFileLineBreakTest 11 | NotebookFileLineBreakTest 12 | NotebookDataPosition[ 157, 7] 13 | NotebookDataLength[ 99848, 2675] 14 | NotebookOptionsPosition[ 92605, 2429] 15 | NotebookOutlinePosition[ 93061, 2447] 16 | CellTagsIndexPosition[ 93018, 2444] 17 | WindowFrame->Normal*) 18 | 19 | (* Beginning of Notebook Content *) 20 | Notebook[{ 21 | 22 | Cell[CellGroupData[{ 23 | Cell[TextData[{ 24 | "Basic Text Analysis in ", 25 | StyleBox["Mathematica", 26 | FontSlant->"Italic"] 27 | }], "Title", 28 | CellChangeTimes->{{3.562315319001178*^9, 3.562315325939991*^9}}], 29 | 30 | Cell[TextData[{ 31 | "William J Turkel\n", 32 | ButtonBox["http://williamjturkel.net", 33 | BaseStyle->"Hyperlink", 34 | ButtonData->{ 35 | URL["http://williamjturkel.net"], None}, 36 | ButtonNote->"http://williamjturkel.net"] 37 | }], "Subtitle", 38 | CellChangeTimes->{{3.562315337323538*^9, 3.5623153530202017`*^9}, { 39 | 3.5623155691604643`*^9, 3.56231556916114*^9}}], 40 | 41 | Cell["November 2012", "Subtitle", 42 | CellChangeTimes->{{3.5623153573081703`*^9, 3.562315359363181*^9}}], 43 | 44 | Cell[CellGroupData[{ 45 | 46 | Cell["Introduction", "Subsection", 47 | CellChangeTimes->{{3.562315374646595*^9, 3.5623153763098516`*^9}}], 48 | 49 | Cell[TextData[{ 50 | "For a couple of years now I have been using ", 51 | StyleBox["Mathematica", 52 | FontSlant->"Italic"], 53 | " as my programming language of choice for my digital history work. For one \ 54 | thing, I love working with notebooks, which allow me to mix prose, citations, \ 55 | live data, executable code, manipulable simulations and other elements in a \ 56 | single document. I also love the generality of ", 57 | StyleBox["Mathematica", 58 | FontSlant->"Italic"], 59 | ". For any kind of technical work, there is usually a well-developed body of \ 60 | theory that is expressed in objects drawn from some branch of mathematics. \ 61 | Chances are, ", 62 | StyleBox["Mathematica", 63 | FontSlant->"Italic"], 64 | " already has a large number of high-level functions for working with those \ 65 | mathematical objects. The ", 66 | StyleBox["Mathematica", 67 | FontSlant->"Italic"], 68 | " documentation is excellent, if necessarily sprawling, since there are \ 69 | literally thousands of commands. The challenge is usually to find the \ 70 | commands that you need to solve a given problem. Since few ", 71 | StyleBox["Mathematica", 72 | FontSlant->"Italic"], 73 | " programmers seem to be working historians or humanists dealing with \ 74 | textual sources, it can be difficult to figure out where to begin." 75 | }], "Text", 76 | CellChangeTimes->{{3.5623154449912024`*^9, 3.562315707885*^9}, { 77 | 3.562315749236423*^9, 3.5623158375244703`*^9}, {3.562315913486582*^9, 78 | 3.562315979602768*^9}, {3.562316050730322*^9, 3.562316058665263*^9}, { 79 | 3.56231611944285*^9, 3.562316238063311*^9}, {3.562316297964085*^9, 80 | 3.562316337009603*^9}}] 81 | }, Open ]], 82 | 83 | Cell[CellGroupData[{ 84 | 85 | Cell["Using a built-in text", "Subsection", 86 | CellChangeTimes->{{3.562316429391014*^9, 3.5623164424041758`*^9}}], 87 | 88 | Cell[TextData[{ 89 | "As a sample text, we will use the Darwin\[CloseCurlyQuote]s ", 90 | StyleBox["Origin of Species", 91 | FontSlant->"Italic"], 92 | " from ", 93 | StyleBox["Mathematica", 94 | FontSlant->"Italic"], 95 | "\[CloseCurlyQuote]s built-in example database. ", 96 | "The ", 97 | StyleBox["Short", 98 | FontWeight->"Bold"], 99 | " command shows a small piece of something large. Here \ 100 | we\[CloseCurlyQuote]re asking to see the two lines at the beginning and end \ 101 | of this text." 102 | }], "Text", 103 | CellChangeTimes->{{3.5623165026688747`*^9, 3.562316528320557*^9}}], 104 | 105 | Cell[CellGroupData[{ 106 | 107 | Cell[BoxData[{ 108 | RowBox[{ 109 | RowBox[{"sample", "=", 110 | RowBox[{"ExampleData", "[", 111 | RowBox[{"{", 112 | RowBox[{"\"\\"", ",", "\"\\""}], "}"}], "]"}]}], 113 | ";"}], "\[IndentingNewLine]", 114 | RowBox[{"Short", "[", 115 | RowBox[{"sample", ",", "2"}], "]"}]}], "Input", 116 | CellChangeTimes->{{3.513508919942062*^9, 3.5135089448931932`*^9}, { 117 | 3.547013540831665*^9, 3.547013545549033*^9}, {3.5623165384259987`*^9, 118 | 3.562316547411353*^9}}], 119 | 120 | Cell[BoxData[ 121 | TagBox[ 122 | RowBox[{"\<\"INTRODUCTION. When on board H.M.S. \"\>", 123 | "\[Ellipsis]", "\<\" have been, and are being, evolved.\"\>"}], 124 | Short[#, 2]& ]], "Output", 125 | CellChangeTimes->{3.562316580525742*^9, 3.562318661125736*^9}] 126 | }, Open ]], 127 | 128 | Cell[TextData[{ 129 | "The ", 130 | StyleBox["Head", 131 | FontWeight->"Bold"], 132 | " command tells us what something is. Our text is currently a ", 133 | StyleBox["string", 134 | FontSlant->"Italic"], 135 | ", an ordered sequence of characters." 136 | }], "Text", 137 | CellChangeTimes->{{3.5623165637929688`*^9, 3.562316566903736*^9}}], 138 | 139 | Cell[CellGroupData[{ 140 | 141 | Cell[BoxData[ 142 | RowBox[{"Head", "[", "sample", "]"}]], "Input"], 143 | 144 | Cell[BoxData["String"], "Output", 145 | CellChangeTimes->{3.562316580560855*^9, 3.5623186611909113`*^9}] 146 | }, Open ]] 147 | }, Open ]], 148 | 149 | Cell[CellGroupData[{ 150 | 151 | Cell["Extracting part of a string", "Subsection", 152 | CellChangeTimes->{{3.5623166192436438`*^9, 3.562316623465918*^9}}], 153 | 154 | Cell[TextData[{ 155 | "Suppose we want to work with part of the text. We can extract the \ 156 | Introduction of ", 157 | StyleBox["Origin", 158 | FontSlant->"Italic"], 159 | " by pulling out everything between \[OpenCurlyDoubleQuote]INTRODUCTION\ 160 | \[CloseCurlyDoubleQuote] and \[OpenCurlyDoubleQuote]CHAPTER 1\ 161 | \[CloseCurlyDoubleQuote]. The command that we use to extract part of a string \ 162 | is called ", 163 | StyleBox["StringCases.", 164 | FontWeight->"Bold"], 165 | " Once we have extracted the Introduction, we want to check to make sure \ 166 | that the command worked the way we expected. Rather than look at the whole \ 167 | text right now, we can use the ", 168 | StyleBox["Short", 169 | FontWeight->"Bold"], 170 | " command to show us about five line of the text. It returns a couple of \ 171 | phrases at the beginning and end, using ellipses to indicate the much larger \ 172 | portion which we are not seeing." 173 | }], "Text", 174 | CellChangeTimes->{{3.562316634766395*^9, 3.562316664377317*^9}, { 175 | 3.56231671444033*^9, 3.56231673889585*^9}, {3.562316770676896*^9, 176 | 3.562316870803382*^9}}], 177 | 178 | Cell[CellGroupData[{ 179 | 180 | Cell[BoxData[{ 181 | RowBox[{ 182 | RowBox[{"intro", "=", 183 | RowBox[{ 184 | RowBox[{"StringCases", "[", 185 | RowBox[{"sample", ",", 186 | RowBox[{"Shortest", "[", 187 | RowBox[{"\"\\"", "~~", "__", "~~", "\"\\""}], 188 | "]"}]}], "]"}], "\[LeftDoubleBracket]", "1", 189 | "\[RightDoubleBracket]"}]}], ";"}], "\n", 190 | RowBox[{"Short", "[", 191 | RowBox[{"intro", ",", "5"}], "]"}]}], "Input", 192 | CellChangeTimes->{{3.547020660149399*^9, 3.547020661360504*^9}, { 193 | 3.5470207626733522`*^9, 3.547020772750098*^9}, {3.547020920187571*^9, 194 | 3.547020930500083*^9}, 3.547020991733007*^9, {3.547022257876546*^9, 195 | 3.547022260431181*^9}, 3.547033912735633*^9}], 196 | 197 | Cell[BoxData[ 198 | TagBox[ 199 | RowBox[{"\<\"INTRODUCTION. When on board H.M.S. 'Beagle,' as naturalist, I \ 200 | was much struck with certain fac\"\>", 201 | "\[Ellipsis]", "\<\"nced that Natural Selection has been the main but not \ 202 | exclusive means of modification. CHAPTER\"\>"}], 203 | Short[#, 5]& ]], "Output", 204 | CellChangeTimes->{3.562316682377681*^9, 3.562318661224654*^9}] 205 | }, Open ]], 206 | 207 | Cell[TextData[{ 208 | "Note the use of the ", 209 | StyleBox["Shortest", 210 | FontWeight->"Bold"], 211 | " command in the string matching expression above. Since there are probably \ 212 | multiple copies of the word \[OpenCurlyDoubleQuote]CHAPTER\ 213 | \[CloseCurlyDoubleQuote] in the text, we have to tell ", 214 | StyleBox["Mathematica", 215 | FontSlant->"Italic"], 216 | " how much of the text we want to match... do we want the portion between \ 217 | \[OpenCurlyDoubleQuote]INTRODUCTION\[CloseCurlyDoubleQuote] and the first \ 218 | instance of the word, the second, the last? Here are two examples to consider:" 219 | }], "Text", 220 | CellChangeTimes->{{3.5623167005861263`*^9, 3.5623167043928213`*^9}, { 221 | 3.562316886464653*^9, 3.562316948131661*^9}, {3.5623177572115307`*^9, 222 | 3.562317826666011*^9}}], 223 | 224 | Cell[CellGroupData[{ 225 | 226 | Cell[BoxData[ 227 | RowBox[{"StringCases", "[", 228 | RowBox[{"\"\\"", ",", 229 | RowBox[{"\"\\"", "~~", "__", "~~", "\"\\""}]}], "]"}]], "Input", 230 | CellChangeTimes->{{3.547021599096979*^9, 3.5470216326338797`*^9}, { 231 | 3.547021704529344*^9, 3.547021707601622*^9}}], 232 | 233 | Cell[BoxData[ 234 | RowBox[{"{", "\<\"bananarama\"\>", "}"}]], "Output", 235 | CellChangeTimes->{3.5623164024163513`*^9, 3.562317840946478*^9, 236 | 3.5623186612573442`*^9}] 237 | }, Open ]], 238 | 239 | Cell[CellGroupData[{ 240 | 241 | Cell[BoxData[ 242 | RowBox[{"StringCases", "[", 243 | RowBox[{"\"\\"", ",", 244 | RowBox[{"Shortest", "[", 245 | RowBox[{"\"\\"", "~~", "__", "~~", "\"\\""}], "]"}]}], 246 | "]"}]], "Input", 247 | CellChangeTimes->{{3.547021645909419*^9, 3.547021650067978*^9}, { 248 | 3.547021719045865*^9, 3.547021721915296*^9}, {3.547021755014896*^9, 249 | 3.547021759127054*^9}}], 250 | 251 | Cell[BoxData[ 252 | RowBox[{"{", "\<\"bana\"\>", "}"}]], "Output", 253 | CellChangeTimes->{3.5623164024553547`*^9, 3.562317844964492*^9, 254 | 3.562318661291707*^9}] 255 | }, Open ]] 256 | }, Open ]], 257 | 258 | Cell[CellGroupData[{ 259 | 260 | Cell["\<\ 261 | From a string to a list of words\ 262 | \>", "Subsection", 263 | CellChangeTimes->{{3.562317864698525*^9, 3.5623178693126802`*^9}}], 264 | 265 | Cell["\<\ 266 | It will be easier for us to analyze the text if we turn it into a list of \ 267 | words. In order to eliminate punctuation, I am going to get rid of everything \ 268 | that is not a word character. Note that doing things this way turns the \ 269 | abbreviation H.M.S. into three separate words.\ 270 | \>", "Text", 271 | CellChangeTimes->{{3.562317879441702*^9, 3.562317950618126*^9}}], 272 | 273 | Cell[CellGroupData[{ 274 | 275 | Cell[BoxData[{ 276 | RowBox[{ 277 | RowBox[{"introList", "=", 278 | RowBox[{"StringSplit", "[", 279 | RowBox[{"intro", ",", 280 | RowBox[{ 281 | RowBox[{"Except", "[", "WordCharacter", "]"}], ".."}]}], "]"}]}], 282 | ";"}], "\n", 283 | RowBox[{"Short", "[", 284 | RowBox[{"introList", ",", "4"}], "]"}]}], "Input", 285 | CellChangeTimes->{{3.547022048665861*^9, 3.54702206566894*^9}, { 286 | 3.547022219499251*^9, 3.54702222715679*^9}, 3.547033933965227*^9, 287 | 3.5623179149139853`*^9}], 288 | 289 | Cell[BoxData[ 290 | TagBox[ 291 | RowBox[{"{", 292 | RowBox[{"\<\"INTRODUCTION\"\>", ",", "\<\"When\"\>", ",", "\<\"on\"\>", 293 | ",", "\<\"board\"\>", ",", "\<\"H\"\>", ",", "\<\"M\"\>", 294 | ",", "\<\"S\"\>", ",", "\<\"Beagle\"\>", ",", "\<\"as\"\>", ",", 295 | RowBox[{"\[LeftSkeleton]", "1696", "\[RightSkeleton]"}], 296 | ",", "\<\"the\"\>", ",", "\<\"main\"\>", ",", "\<\"but\"\>", 297 | ",", "\<\"not\"\>", ",", "\<\"exclusive\"\>", ",", "\<\"means\"\>", 298 | ",", "\<\"of\"\>", ",", "\<\"modification\"\>", ",", "\<\"CHAPTER\"\>"}], 299 | "}"}], 300 | Short[#, 4]& ]], "Output", 301 | CellChangeTimes->{{3.562317908374763*^9, 3.5623179158390417`*^9}, 302 | 3.562318661324486*^9}] 303 | }, Open ]], 304 | 305 | Cell[TextData[{ 306 | StyleBox["Mathematica", 307 | FontSlant->"Italic"], 308 | " has a number of commands for selecting elements from lists. The ", 309 | StyleBox["Take", 310 | FontWeight->"Bold"], 311 | " command allows us to extract a given number of items from the beginning of \ 312 | a list." 313 | }], "Text", 314 | CellChangeTimes->{{3.5623179705654716`*^9, 3.562318018323454*^9}, { 315 | 3.562318073635407*^9, 3.562318100089039*^9}}], 316 | 317 | Cell[CellGroupData[{ 318 | 319 | Cell[BoxData[ 320 | RowBox[{"Take", "[", 321 | RowBox[{"introList", ",", "40"}], "]"}]], "Input", 322 | CellChangeTimes->{{3.513509064468219*^9, 3.513509075690248*^9}, { 323 | 3.513509118212433*^9, 3.513509119263319*^9}, {3.547022439204197*^9, 324 | 3.547022479088327*^9}}], 325 | 326 | Cell[BoxData[ 327 | RowBox[{"{", 328 | RowBox[{"\<\"INTRODUCTION\"\>", ",", "\<\"When\"\>", ",", "\<\"on\"\>", 329 | ",", "\<\"board\"\>", ",", "\<\"H\"\>", ",", "\<\"M\"\>", ",", "\<\"S\"\>", 330 | ",", "\<\"Beagle\"\>", ",", "\<\"as\"\>", ",", "\<\"naturalist\"\>", 331 | ",", "\<\"I\"\>", ",", "\<\"was\"\>", ",", "\<\"much\"\>", 332 | ",", "\<\"struck\"\>", ",", "\<\"with\"\>", ",", "\<\"certain\"\>", 333 | ",", "\<\"facts\"\>", ",", "\<\"in\"\>", ",", "\<\"the\"\>", 334 | ",", "\<\"distribution\"\>", ",", "\<\"of\"\>", ",", "\<\"the\"\>", 335 | ",", "\<\"inhabitants\"\>", ",", "\<\"of\"\>", ",", "\<\"South\"\>", 336 | ",", "\<\"America\"\>", ",", "\<\"and\"\>", ",", "\<\"in\"\>", 337 | ",", "\<\"the\"\>", ",", "\<\"geological\"\>", ",", "\<\"relations\"\>", 338 | ",", "\<\"of\"\>", ",", "\<\"the\"\>", ",", "\<\"present\"\>", 339 | ",", "\<\"to\"\>", ",", "\<\"the\"\>", ",", "\<\"past\"\>", 340 | ",", "\<\"inhabitants\"\>", ",", "\<\"of\"\>", ",", "\<\"that\"\>"}], 341 | "}"}]], "Output", 342 | CellChangeTimes->{3.562318037573434*^9, 3.562318661358734*^9}] 343 | }, Open ]], 344 | 345 | Cell[TextData[{ 346 | "The ", 347 | StyleBox["First", 348 | FontWeight->"Bold"], 349 | " command returns the first item in a list, and the ", 350 | StyleBox["Rest", 351 | FontWeight->"Bold"], 352 | " command returns everything but the first element. The ", 353 | StyleBox["Last", 354 | FontWeight->"Bold"], 355 | " command returns the last item." 356 | }], "Text", 357 | CellChangeTimes->{{3.562318103100919*^9, 3.56231815717583*^9}}], 358 | 359 | Cell[CellGroupData[{ 360 | 361 | Cell[BoxData[ 362 | RowBox[{"First", "[", "introList", "]"}]], "Input", 363 | CellChangeTimes->{{3.513509081403446*^9, 3.513509085434189*^9}, { 364 | 3.547022490910411*^9, 3.547022491363364*^9}}], 365 | 366 | Cell[BoxData["\<\"INTRODUCTION\"\>"], "Output", 367 | CellChangeTimes->{3.562318051158514*^9, 3.562318661391572*^9}] 368 | }, Open ]], 369 | 370 | Cell[CellGroupData[{ 371 | 372 | Cell[BoxData[ 373 | RowBox[{"Short", "[", 374 | RowBox[{"Rest", "[", "introList", "]"}], "]"}]], "Input", 375 | CellChangeTimes->{{3.513509089220837*^9, 3.51350909387435*^9}, { 376 | 3.547022496790865*^9, 3.5470225267779427`*^9}, {3.547022967097556*^9, 377 | 3.547022979718812*^9}}], 378 | 379 | Cell[BoxData[ 380 | TagBox[ 381 | RowBox[{"{", 382 | RowBox[{"\<\"When\"\>", ",", "\<\"on\"\>", ",", 383 | RowBox[{"\[LeftSkeleton]", "1709", "\[RightSkeleton]"}], 384 | ",", "\<\"modification\"\>", ",", "\<\"CHAPTER\"\>"}], "}"}], 385 | Short]], "Output", 386 | CellChangeTimes->{3.562318063577589*^9, 3.562318661424624*^9}] 387 | }, Open ]], 388 | 389 | Cell[CellGroupData[{ 390 | 391 | Cell[BoxData[ 392 | RowBox[{"Last", "[", "introList", "]"}]], "Input", 393 | CellChangeTimes->{{3.5470225361376257`*^9, 3.547022539570929*^9}}], 394 | 395 | Cell[BoxData["\<\"CHAPTER\"\>"], "Output", 396 | CellChangeTimes->{3.5623181673230143`*^9, 3.562318661457645*^9}] 397 | }, Open ]], 398 | 399 | Cell[TextData[{ 400 | "We can also use an ", 401 | StyleBox["index ", 402 | FontSlant->"Italic"], 403 | "to pull out list elements." 404 | }], "Text", 405 | CellChangeTimes->{{3.5623183031147127`*^9, 3.5623183061435347`*^9}}], 406 | 407 | Cell[CellGroupData[{ 408 | 409 | Cell[BoxData[ 410 | RowBox[{"introList", "\[LeftDoubleBracket]", "8", 411 | "\[RightDoubleBracket]"}]], "Input", 412 | CellChangeTimes->{{3.513510431063981*^9, 3.513510438636302*^9}, { 413 | 3.547022554783963*^9, 3.5470225552187843`*^9}, {3.5623183263684597`*^9, 414 | 3.56231832684877*^9}}], 415 | 416 | Cell[BoxData["\<\"Beagle\"\>"], "Output", 417 | CellChangeTimes->{{3.562318315744556*^9, 3.562318327295183*^9}, 418 | 3.562318661556168*^9}] 419 | }, Open ]], 420 | 421 | Cell[TextData[{ 422 | "We can test whether or not a given item is a member of a list with the ", 423 | StyleBox["MemberQ", 424 | FontWeight->"Bold"], 425 | " command." 426 | }], "Text", 427 | CellChangeTimes->{{3.5623181725210037`*^9, 3.562318211497408*^9}}], 428 | 429 | Cell[CellGroupData[{ 430 | 431 | Cell[BoxData[ 432 | RowBox[{"MemberQ", "[", 433 | RowBox[{"introList", ",", "\"\\""}], "]"}]], "Input", 434 | CellChangeTimes->{{3.513532572094133*^9, 3.513532586760565*^9}, { 435 | 3.54702256314856*^9, 3.5470225636080236`*^9}, {3.5470225977055063`*^9, 436 | 3.547022598702157*^9}}], 437 | 438 | Cell[BoxData["True"], "Output", 439 | CellChangeTimes->{3.562318237050396*^9, 3.562318661492714*^9}] 440 | }, Open ]], 441 | 442 | Cell[CellGroupData[{ 443 | 444 | Cell[BoxData[ 445 | RowBox[{"MemberQ", "[", 446 | RowBox[{"introList", ",", "\"\\""}], "]"}]], "Input", 447 | CellChangeTimes->{{3.513532610212311*^9, 3.5135326186851273`*^9}, { 448 | 3.547022603736636*^9, 3.547022608281859*^9}}], 449 | 450 | Cell[BoxData["False"], "Output", 451 | CellChangeTimes->{3.562318245200338*^9, 3.56231866152454*^9}] 452 | }, Open ]] 453 | }, Open ]], 454 | 455 | Cell[CellGroupData[{ 456 | 457 | Cell["\<\ 458 | The Map command lets us process each element in our list\ 459 | \>", "Subsection", 460 | CellChangeTimes->{{3.562318342118967*^9, 3.562318352454557*^9}}], 461 | 462 | Cell[TextData[{ 463 | "If we want to apply some kind of function to every element in a list, the \ 464 | most natural way to accomplish this in ", 465 | StyleBox["Mathematica", 466 | FontSlant->"Italic"], 467 | " is with the ", 468 | StyleBox["Map", 469 | FontWeight->"Bold"], 470 | " command. Here we show three examples using the first 40 words of the \ 471 | Introduction. Note that ", 472 | StyleBox["Map", 473 | FontWeight->"Bold"], 474 | " returns a new list rather than altering the original one." 475 | }], "Text", 476 | CellChangeTimes->{{3.562318404803013*^9, 3.562318494686194*^9}, { 477 | 3.562319165292338*^9, 3.56231919412246*^9}}], 478 | 479 | Cell[CellGroupData[{ 480 | 481 | Cell[BoxData[ 482 | RowBox[{"Map", "[", 483 | RowBox[{"ToUpperCase", ",", 484 | RowBox[{"Take", "[", 485 | RowBox[{"introList", ",", "40"}], "]"}]}], "]"}]], "Input", 486 | CellChangeTimes->{{3.513509185691944*^9, 3.513509197798243*^9}, { 487 | 3.5470239320715923`*^9, 3.547023949915019*^9}}], 488 | 489 | Cell[BoxData[ 490 | RowBox[{"{", 491 | RowBox[{"\<\"INTRODUCTION\"\>", ",", "\<\"WHEN\"\>", ",", "\<\"ON\"\>", 492 | ",", "\<\"BOARD\"\>", ",", "\<\"H\"\>", ",", "\<\"M\"\>", ",", "\<\"S\"\>", 493 | ",", "\<\"BEAGLE\"\>", ",", "\<\"AS\"\>", ",", "\<\"NATURALIST\"\>", 494 | ",", "\<\"I\"\>", ",", "\<\"WAS\"\>", ",", "\<\"MUCH\"\>", 495 | ",", "\<\"STRUCK\"\>", ",", "\<\"WITH\"\>", ",", "\<\"CERTAIN\"\>", 496 | ",", "\<\"FACTS\"\>", ",", "\<\"IN\"\>", ",", "\<\"THE\"\>", 497 | ",", "\<\"DISTRIBUTION\"\>", ",", "\<\"OF\"\>", ",", "\<\"THE\"\>", 498 | ",", "\<\"INHABITANTS\"\>", ",", "\<\"OF\"\>", ",", "\<\"SOUTH\"\>", 499 | ",", "\<\"AMERICA\"\>", ",", "\<\"AND\"\>", ",", "\<\"IN\"\>", 500 | ",", "\<\"THE\"\>", ",", "\<\"GEOLOGICAL\"\>", ",", "\<\"RELATIONS\"\>", 501 | ",", "\<\"OF\"\>", ",", "\<\"THE\"\>", ",", "\<\"PRESENT\"\>", 502 | ",", "\<\"TO\"\>", ",", "\<\"THE\"\>", ",", "\<\"PAST\"\>", 503 | ",", "\<\"INHABITANTS\"\>", ",", "\<\"OF\"\>", ",", "\<\"THAT\"\>"}], 504 | "}"}]], "Output", 505 | CellChangeTimes->{3.5623191405567293`*^9}] 506 | }, Open ]], 507 | 508 | Cell[CellGroupData[{ 509 | 510 | Cell[BoxData[ 511 | RowBox[{"Map", "[", 512 | RowBox[{"ToLowerCase", ",", 513 | RowBox[{"Take", "[", 514 | RowBox[{"introList", ",", "40"}], "]"}]}], "]"}]], "Input", 515 | CellChangeTimes->{{3.513509185691944*^9, 3.513509197798243*^9}, { 516 | 3.513509235288643*^9, 3.513509236489555*^9}, 3.547023953315316*^9}], 517 | 518 | Cell[BoxData[ 519 | RowBox[{"{", 520 | RowBox[{"\<\"introduction\"\>", ",", "\<\"when\"\>", ",", "\<\"on\"\>", 521 | ",", "\<\"board\"\>", ",", "\<\"h\"\>", ",", "\<\"m\"\>", ",", "\<\"s\"\>", 522 | ",", "\<\"beagle\"\>", ",", "\<\"as\"\>", ",", "\<\"naturalist\"\>", 523 | ",", "\<\"i\"\>", ",", "\<\"was\"\>", ",", "\<\"much\"\>", 524 | ",", "\<\"struck\"\>", ",", "\<\"with\"\>", ",", "\<\"certain\"\>", 525 | ",", "\<\"facts\"\>", ",", "\<\"in\"\>", ",", "\<\"the\"\>", 526 | ",", "\<\"distribution\"\>", ",", "\<\"of\"\>", ",", "\<\"the\"\>", 527 | ",", "\<\"inhabitants\"\>", ",", "\<\"of\"\>", ",", "\<\"south\"\>", 528 | ",", "\<\"america\"\>", ",", "\<\"and\"\>", ",", "\<\"in\"\>", 529 | ",", "\<\"the\"\>", ",", "\<\"geological\"\>", ",", "\<\"relations\"\>", 530 | ",", "\<\"of\"\>", ",", "\<\"the\"\>", ",", "\<\"present\"\>", 531 | ",", "\<\"to\"\>", ",", "\<\"the\"\>", ",", "\<\"past\"\>", 532 | ",", "\<\"inhabitants\"\>", ",", "\<\"of\"\>", ",", "\<\"that\"\>"}], 533 | "}"}]], "Output", 534 | CellChangeTimes->{3.562319150838316*^9}] 535 | }, Open ]], 536 | 537 | Cell[CellGroupData[{ 538 | 539 | Cell[BoxData[ 540 | RowBox[{"Map", "[", 541 | RowBox[{"StringLength", ",", 542 | RowBox[{"Take", "[", 543 | RowBox[{"introList", ",", "40"}], "]"}]}], "]"}]], "Input", 544 | CellChangeTimes->{{3.5135095985148573`*^9, 3.513509607824321*^9}, 545 | 3.5470239562436457`*^9}], 546 | 547 | Cell[BoxData[ 548 | RowBox[{"{", 549 | RowBox[{ 550 | "12", ",", "4", ",", "2", ",", "5", ",", "1", ",", "1", ",", "1", ",", "6", 551 | ",", "2", ",", "10", ",", "1", ",", "3", ",", "4", ",", "6", ",", "4", ",", 552 | "7", ",", "5", ",", "2", ",", "3", ",", "12", ",", "2", ",", "3", ",", 553 | "11", ",", "2", ",", "5", ",", "7", ",", "3", ",", "2", ",", "3", ",", 554 | "10", ",", "9", ",", "2", ",", "3", ",", "7", ",", "2", ",", "3", ",", "4", 555 | ",", "11", ",", "2", ",", "4"}], "}"}]], "Output", 556 | CellChangeTimes->{3.5623191624591703`*^9}] 557 | }, Open ]] 558 | }, Open ]], 559 | 560 | Cell[CellGroupData[{ 561 | 562 | Cell["Computing word frequencies", "Subsection", 563 | CellChangeTimes->{{3.562319208211643*^9, 3.5623192124010487`*^9}}], 564 | 565 | Cell[TextData[{ 566 | "In order to compute word frequencies, we first convert all words to \ 567 | lowercase, the sort them and count how often each appears using the ", 568 | StyleBox["Tally ", 569 | FontWeight->"Bold"], 570 | "command. This gives us a list of lists, where each of the smaller lists \ 571 | contains a single word and its frequency. " 572 | }], "Text", 573 | CellChangeTimes->{{3.5623192452666683`*^9, 3.562319389960231*^9}, 574 | 3.5623194731518393`*^9}], 575 | 576 | Cell[CellGroupData[{ 577 | 578 | Cell[BoxData[{ 579 | RowBox[{ 580 | RowBox[{"lowerIntroList", "=", 581 | RowBox[{"Map", "[", 582 | RowBox[{"ToLowerCase", ",", "introList"}], "]"}]}], 583 | ";"}], "\[IndentingNewLine]", 584 | RowBox[{ 585 | RowBox[{"sortedIntroList", "=", 586 | RowBox[{"Sort", "[", "lowerIntroList", "]"}]}], ";"}], "\n", 587 | RowBox[{ 588 | RowBox[{"wordFreq", "=", 589 | RowBox[{"Tally", "[", "sortedIntroList", "]"}]}], 590 | ";"}], "\[IndentingNewLine]", 591 | RowBox[{"Short", "[", "wordFreq", "]"}]}], "Input", 592 | CellChangeTimes->{{3.5135100758019943`*^9, 3.5135101114136667`*^9}, { 593 | 3.547024091976222*^9, 3.5470240977101707`*^9}, {3.562319414795299*^9, 594 | 3.562319462046362*^9}, 3.562319493269878*^9}], 595 | 596 | Cell[BoxData[ 597 | TagBox[ 598 | RowBox[{"{", 599 | RowBox[{ 600 | RowBox[{"{", 601 | RowBox[{"\<\"1837\"\>", ",", "1"}], "}"}], ",", 602 | RowBox[{"{", 603 | RowBox[{"\<\"1844\"\>", ",", "2"}], "}"}], ",", 604 | RowBox[{"\[LeftSkeleton]", "624", "\[RightSkeleton]"}], ",", 605 | RowBox[{"{", 606 | RowBox[{"\<\"years\"\>", ",", "3"}], "}"}], ",", 607 | RowBox[{"{", 608 | RowBox[{"\<\"yet\"\>", ",", "2"}], "}"}]}], "}"}], 609 | Short]], "Output", 610 | CellChangeTimes->{3.562319496540655*^9}] 611 | }, Open ]], 612 | 613 | Cell[TextData[{ 614 | "Finally we can sort our tally list by the frequency of each item. This is \ 615 | traditionally done in descending order. In ", 616 | StyleBox["Mathematica", 617 | FontSlant->"Italic"], 618 | " we can change the sort order by passing the ", 619 | StyleBox["Sort", 620 | FontWeight->"Bold"], 621 | " command an anonymous function. (It isn\[CloseCurlyQuote]t crucial for this \ 622 | example to understand exactly how this works, but it is explained in the next \ 623 | section if you are curious. If not, just skip ahead.)" 624 | }], "Text", 625 | CellChangeTimes->{{3.562319475573374*^9, 3.5623195220915823`*^9}, { 626 | 3.562319618730534*^9, 3.5623196967556953`*^9}}], 627 | 628 | Cell[CellGroupData[{ 629 | 630 | Cell[BoxData[{ 631 | RowBox[{ 632 | RowBox[{"sortedFrequencyList", "=", 633 | RowBox[{"Sort", "[", 634 | RowBox[{"wordFreq", ",", 635 | RowBox[{ 636 | RowBox[{ 637 | RowBox[{"#1", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}], 638 | ">", 639 | RowBox[{"#2", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 640 | "&"}]}], "]"}]}], ";"}], "\n", 641 | RowBox[{"Short", "[", 642 | RowBox[{"sortedFrequencyList", ",", "8"}], "]"}]}], "Input", 643 | CellChangeTimes->{{3.51351174816334*^9, 3.5135117710310163`*^9}, 644 | 3.547033994291498*^9, {3.562319538940189*^9, 3.562319547109508*^9}}], 645 | 646 | Cell[BoxData[ 647 | TagBox[ 648 | RowBox[{"{", 649 | RowBox[{ 650 | RowBox[{"{", 651 | RowBox[{"\<\"the\"\>", ",", "100"}], "}"}], ",", 652 | RowBox[{"{", 653 | RowBox[{"\<\"of\"\>", ",", "91"}], "}"}], ",", 654 | RowBox[{"{", 655 | RowBox[{"\<\"to\"\>", ",", "54"}], "}"}], ",", 656 | RowBox[{"{", 657 | RowBox[{"\<\"and\"\>", ",", "52"}], "}"}], ",", 658 | RowBox[{"{", 659 | RowBox[{"\<\"i\"\>", ",", "44"}], "}"}], ",", 660 | RowBox[{"{", 661 | RowBox[{"\<\"in\"\>", ",", "37"}], "}"}], ",", 662 | RowBox[{"{", 663 | RowBox[{"\<\"that\"\>", ",", "27"}], "}"}], ",", 664 | RowBox[{"{", 665 | RowBox[{"\<\"a\"\>", ",", "24"}], "}"}], ",", 666 | RowBox[{"{", 667 | RowBox[{"\<\"this\"\>", ",", "20"}], "}"}], ",", 668 | RowBox[{"{", 669 | RowBox[{"\<\"it\"\>", ",", "20"}], "}"}], ",", 670 | RowBox[{"{", 671 | RowBox[{"\<\"be\"\>", ",", "20"}], "}"}], ",", 672 | RowBox[{"{", 673 | RowBox[{"\<\"which\"\>", ",", "18"}], "}"}], ",", 674 | RowBox[{"\[LeftSkeleton]", "605", "\[RightSkeleton]"}], ",", 675 | RowBox[{"{", 676 | RowBox[{"\<\"admiration\"\>", ",", "1"}], "}"}], ",", 677 | RowBox[{"{", 678 | RowBox[{"\<\"admirably\"\>", ",", "1"}], "}"}], ",", 679 | RowBox[{"{", 680 | RowBox[{"\<\"adduced\"\>", ",", "1"}], "}"}], ",", 681 | RowBox[{"{", 682 | RowBox[{"\<\"adapted\"\>", ",", "1"}], "}"}], ",", 683 | RowBox[{"{", 684 | RowBox[{"\<\"acquire\"\>", ",", "1"}], "}"}], ",", 685 | RowBox[{"{", 686 | RowBox[{"\<\"acknowledging\"\>", ",", "1"}], "}"}], ",", 687 | RowBox[{"{", 688 | RowBox[{"\<\"acknowledged\"\>", ",", "1"}], "}"}], ",", 689 | RowBox[{"{", 690 | RowBox[{"\<\"accuracy\"\>", ",", "1"}], "}"}], ",", 691 | RowBox[{"{", 692 | RowBox[{"\<\"account\"\>", ",", "1"}], "}"}], ",", 693 | RowBox[{"{", 694 | RowBox[{"\<\"absolutely\"\>", ",", "1"}], "}"}], ",", 695 | RowBox[{"{", 696 | RowBox[{"\<\"1837\"\>", ",", "1"}], "}"}]}], "}"}], 697 | Short[#, 8]& ]], "Output", 698 | CellChangeTimes->{{3.562319534074436*^9, 3.56231954829252*^9}}] 699 | }, Open ]], 700 | 701 | Cell["\<\ 702 | Here are the twenty most frequent words:\ 703 | \>", "Text", 704 | CellChangeTimes->{{3.562324096917938*^9, 3.562324106244768*^9}}], 705 | 706 | Cell[CellGroupData[{ 707 | 708 | Cell[BoxData[ 709 | RowBox[{"Take", "[", 710 | RowBox[{"sortedFrequencyList", ",", "20"}], "]"}]], "Input", 711 | CellChangeTimes->{{3.5135118639232492`*^9, 3.513511878034113*^9}}], 712 | 713 | Cell[BoxData[ 714 | RowBox[{"{", 715 | RowBox[{ 716 | RowBox[{"{", 717 | RowBox[{"\<\"the\"\>", ",", "100"}], "}"}], ",", 718 | RowBox[{"{", 719 | RowBox[{"\<\"of\"\>", ",", "91"}], "}"}], ",", 720 | RowBox[{"{", 721 | RowBox[{"\<\"to\"\>", ",", "54"}], "}"}], ",", 722 | RowBox[{"{", 723 | RowBox[{"\<\"and\"\>", ",", "52"}], "}"}], ",", 724 | RowBox[{"{", 725 | RowBox[{"\<\"i\"\>", ",", "44"}], "}"}], ",", 726 | RowBox[{"{", 727 | RowBox[{"\<\"in\"\>", ",", "37"}], "}"}], ",", 728 | RowBox[{"{", 729 | RowBox[{"\<\"that\"\>", ",", "27"}], "}"}], ",", 730 | RowBox[{"{", 731 | RowBox[{"\<\"a\"\>", ",", "24"}], "}"}], ",", 732 | RowBox[{"{", 733 | RowBox[{"\<\"this\"\>", ",", "20"}], "}"}], ",", 734 | RowBox[{"{", 735 | RowBox[{"\<\"it\"\>", ",", "20"}], "}"}], ",", 736 | RowBox[{"{", 737 | RowBox[{"\<\"be\"\>", ",", "20"}], "}"}], ",", 738 | RowBox[{"{", 739 | RowBox[{"\<\"which\"\>", ",", "18"}], "}"}], ",", 740 | RowBox[{"{", 741 | RowBox[{"\<\"have\"\>", ",", "18"}], "}"}], ",", 742 | RowBox[{"{", 743 | RowBox[{"\<\"species\"\>", ",", "17"}], "}"}], ",", 744 | RowBox[{"{", 745 | RowBox[{"\<\"on\"\>", ",", "17"}], "}"}], ",", 746 | RowBox[{"{", 747 | RowBox[{"\<\"is\"\>", ",", "17"}], "}"}], ",", 748 | RowBox[{"{", 749 | RowBox[{"\<\"as\"\>", ",", "17"}], "}"}], ",", 750 | RowBox[{"{", 751 | RowBox[{"\<\"my\"\>", ",", "13"}], "}"}], ",", 752 | RowBox[{"{", 753 | RowBox[{"\<\"been\"\>", ",", "13"}], "}"}], ",", 754 | RowBox[{"{", 755 | RowBox[{"\<\"for\"\>", ",", "11"}], "}"}]}], "}"}]], "Output", 756 | CellChangeTimes->{3.5623241103469276`*^9}] 757 | }, Open ]], 758 | 759 | Cell[TextData[{ 760 | "The ", 761 | StyleBox["Cases", 762 | FontWeight->"Bold"], 763 | " statement pulls every item from a list that matches a pattern. Here we \ 764 | are looking to see how often the word \[OpenCurlyDoubleQuote]modification\ 765 | \[CloseCurlyDoubleQuote] appears." 766 | }], "Text", 767 | CellChangeTimes->{{3.562324123165251*^9, 3.56232412615028*^9}}], 768 | 769 | Cell[CellGroupData[{ 770 | 771 | Cell[BoxData[ 772 | RowBox[{"Cases", "[", 773 | RowBox[{"wordFreq", ",", 774 | RowBox[{"{", 775 | RowBox[{"\"\\"", ",", "_"}], "}"}]}], "]"}]], "Input", 776 | CellChangeTimes->{{3.513511326034938*^9, 3.513511345819559*^9}, 777 | 3.513511410229457*^9, {3.547024576379676*^9, 3.547024577734756*^9}}], 778 | 779 | Cell[BoxData[ 780 | RowBox[{"{", 781 | RowBox[{"{", 782 | RowBox[{"\<\"modification\"\>", ",", "4"}], "}"}], "}"}]], "Output", 783 | CellChangeTimes->{3.562324134832981*^9}] 784 | }, Open ]] 785 | }, Open ]], 786 | 787 | Cell[CellGroupData[{ 788 | 789 | Cell["Aside: Anonymous Functions", "Subsection", 790 | CellChangeTimes->{{3.562319706955283*^9, 3.56231971589673*^9}}], 791 | 792 | Cell[TextData[{ 793 | "Most programming languages let you define new functions, and ", 794 | StyleBox["Mathematica", 795 | FontSlant->"Italic"], 796 | " is no exception. You can use these new functions with built-in commands \ 797 | like ", 798 | StyleBox["Map", 799 | FontWeight->"Bold"], 800 | "." 801 | }], "Text", 802 | CellChangeTimes->{{3.5623197718416767`*^9, 3.562319821544907*^9}}], 803 | 804 | Cell[BoxData[ 805 | RowBox[{ 806 | RowBox[{"plus2", "[", "x_", "]"}], ":=", "\[IndentingNewLine]", 807 | RowBox[{"Return", "[", 808 | RowBox[{"x", "+", "2"}], "]"}]}]], "Input", 809 | CellChangeTimes->{{3.513501293231903*^9, 3.5135013115776653`*^9}, 810 | 3.513511042853307*^9, {3.562319830467196*^9, 3.5623198333294573`*^9}}], 811 | 812 | Cell[CellGroupData[{ 813 | 814 | Cell[BoxData[ 815 | RowBox[{"Map", "[", 816 | RowBox[{"plus2", ",", 817 | RowBox[{"{", 818 | RowBox[{"1", ",", "2", ",", "3"}], "}"}]}], "]"}]], "Input", 819 | CellChangeTimes->{{3.513501316317018*^9, 3.513501326328458*^9}}], 820 | 821 | Cell[BoxData[ 822 | RowBox[{"{", 823 | RowBox[{"3", ",", "4", ",", "5"}], "}"}]], "Output", 824 | CellChangeTimes->{3.562319841212895*^9}] 825 | }, Open ]], 826 | 827 | Cell[TextData[{ 828 | "Being able to define functions allows you to \[LineSeparator]\t", 829 | StyleBox["hide details", 830 | FontSlant->"Italic"], 831 | ": as long as you can use a function like ", 832 | StyleBox["plus2", 833 | FontWeight->"Bold"], 834 | " you may not care how it works\n\t", 835 | StyleBox["reuse and share code", 836 | FontSlant->"Italic"], 837 | ": so you don\[CloseCurlyQuote]t have to keep reinventing the wheel." 838 | }], "Text", 839 | CellChangeTimes->{{3.562319865835971*^9, 3.562319888789777*^9}}], 840 | 841 | Cell[TextData[{ 842 | "In ", 843 | StyleBox["Mathematica", 844 | FontSlant->"Italic"], 845 | ", you can also create ", 846 | StyleBox["anonymous", 847 | FontSlant->"Italic"], 848 | " functions. One way of writing an anonymous function in ", 849 | StyleBox["Mathematica", 850 | FontSlant->"Italic"], 851 | " is to use a ", 852 | StyleBox["Slot", 853 | FontWeight->"Bold"], 854 | " in place of a variable." 855 | }], "Text", 856 | CellChangeTimes->{{3.5623199161242*^9, 3.562319919861532*^9}}], 857 | 858 | Cell["# + 2 &", "Text", 859 | CellChangeTimes->{3.5623199325938053`*^9}], 860 | 861 | Cell["\<\ 862 | So we don\[CloseCurlyQuote]t have to define our function in advance, we can \ 863 | just write it where we need it.\ 864 | \>", "Text", 865 | CellChangeTimes->{{3.5623199427263813`*^9, 3.5623199452594347`*^9}}], 866 | 867 | Cell[CellGroupData[{ 868 | 869 | Cell[BoxData[ 870 | RowBox[{"Map", "[", 871 | RowBox[{ 872 | RowBox[{ 873 | RowBox[{"#", "+", "2"}], "&"}], ",", 874 | RowBox[{"{", 875 | RowBox[{"1", ",", "2", ",", "3"}], "}"}]}], "]"}]], "Input", 876 | CellChangeTimes->{{3.51350155378265*^9, 3.513501563382359*^9}}], 877 | 878 | Cell[BoxData[ 879 | RowBox[{"{", 880 | RowBox[{"3", ",", "4", ",", "5"}], "}"}]], "Output", 881 | CellChangeTimes->{3.562319956896357*^9}] 882 | }, Open ]], 883 | 884 | Cell["\<\ 885 | We can apply an anonymous function to an argument like this:\ 886 | \>", "Text", 887 | CellChangeTimes->{{3.562320000969887*^9, 3.562320003772716*^9}}], 888 | 889 | Cell[CellGroupData[{ 890 | 891 | Cell[BoxData[ 892 | RowBox[{ 893 | RowBox[{"(", 894 | RowBox[{ 895 | RowBox[{"#", "+", "2"}], "&"}], ")"}], "[", "40", "]"}]], "Input", 896 | CellChangeTimes->{{3.513501657581882*^9, 3.513501687602624*^9}}], 897 | 898 | Cell[BoxData["42"], "Output", 899 | CellChangeTimes->{3.562320016370151*^9}] 900 | }, Open ]], 901 | 902 | Cell[TextData[{ 903 | "A named function like ", 904 | StyleBox["plus2", 905 | FontWeight->"Bold"], 906 | " is still sitting there when we\[CloseCurlyQuote]re done with it. An \ 907 | anonymous function disappears immediately after use." 908 | }], "Text", 909 | CellChangeTimes->{{3.5623200303562593`*^9, 3.562320038914756*^9}}] 910 | }, Open ]], 911 | 912 | Cell[CellGroupData[{ 913 | 914 | Cell["N-grams", "Subsection", 915 | CellChangeTimes->{{3.562324151076453*^9, 3.562324152084149*^9}, { 916 | 3.5623243359816313`*^9, 3.562324337492695*^9}}], 917 | 918 | Cell[TextData[{ 919 | "The ", 920 | StyleBox["Partition", 921 | FontWeight->"Bold"], 922 | " command can be used to create n-grams. This tells ", 923 | StyleBox["Mathematica", 924 | FontSlant->"Italic"], 925 | " to give us all of the partitions of a list that are two elements long and \ 926 | that are offset by one." 927 | }], "Text", 928 | CellChangeTimes->{{3.5623241705874157`*^9, 3.5623241812758408`*^9}, { 929 | 3.562324339772488*^9, 3.562324340299816*^9}}], 930 | 931 | Cell[CellGroupData[{ 932 | 933 | Cell[BoxData[{ 934 | RowBox[{ 935 | RowBox[{"bigrams", "=", 936 | RowBox[{"Partition", "[", 937 | RowBox[{"lowerIntroList", ",", "2", ",", "1"}], "]"}]}], ";"}], "\n", 938 | RowBox[{"Short", "[", 939 | RowBox[{"bigrams", ",", "8"}], "]"}]}], "Input", 940 | CellChangeTimes->{{3.513510236912479*^9, 3.513510267416089*^9}, 941 | 3.513510309386425*^9, {3.5135105226742773`*^9, 3.513510526055801*^9}, { 942 | 3.547024603386063*^9, 3.547024604867043*^9}, 3.547034020875515*^9, { 943 | 3.562324194997053*^9, 3.562324215246892*^9}}], 944 | 945 | Cell[BoxData[ 946 | TagBox[ 947 | RowBox[{"{", 948 | RowBox[{ 949 | RowBox[{"{", 950 | RowBox[{"\<\"introduction\"\>", ",", "\<\"when\"\>"}], "}"}], ",", 951 | RowBox[{"{", 952 | RowBox[{"\<\"when\"\>", ",", "\<\"on\"\>"}], "}"}], ",", 953 | RowBox[{"{", 954 | RowBox[{"\<\"on\"\>", ",", "\<\"board\"\>"}], "}"}], ",", 955 | RowBox[{"{", 956 | RowBox[{"\<\"board\"\>", ",", "\<\"h\"\>"}], "}"}], ",", 957 | RowBox[{"{", 958 | RowBox[{"\<\"h\"\>", ",", "\<\"m\"\>"}], "}"}], ",", 959 | RowBox[{"{", 960 | RowBox[{"\<\"m\"\>", ",", "\<\"s\"\>"}], "}"}], ",", 961 | RowBox[{"{", 962 | RowBox[{"\<\"s\"\>", ",", "\<\"beagle\"\>"}], "}"}], ",", 963 | RowBox[{"{", 964 | RowBox[{"\<\"beagle\"\>", ",", "\<\"as\"\>"}], "}"}], ",", 965 | RowBox[{"{", 966 | RowBox[{"\<\"as\"\>", ",", "\<\"naturalist\"\>"}], "}"}], ",", 967 | RowBox[{"\[LeftSkeleton]", "1695", "\[RightSkeleton]"}], ",", 968 | RowBox[{"{", 969 | RowBox[{"\<\"been\"\>", ",", "\<\"the\"\>"}], "}"}], ",", 970 | RowBox[{"{", 971 | RowBox[{"\<\"the\"\>", ",", "\<\"main\"\>"}], "}"}], ",", 972 | RowBox[{"{", 973 | RowBox[{"\<\"main\"\>", ",", "\<\"but\"\>"}], "}"}], ",", 974 | RowBox[{"{", 975 | RowBox[{"\<\"but\"\>", ",", "\<\"not\"\>"}], "}"}], ",", 976 | RowBox[{"{", 977 | RowBox[{"\<\"not\"\>", ",", "\<\"exclusive\"\>"}], "}"}], ",", 978 | RowBox[{"{", 979 | RowBox[{"\<\"exclusive\"\>", ",", "\<\"means\"\>"}], "}"}], ",", 980 | RowBox[{"{", 981 | RowBox[{"\<\"means\"\>", ",", "\<\"of\"\>"}], "}"}], ",", 982 | RowBox[{"{", 983 | RowBox[{"\<\"of\"\>", ",", "\<\"modification\"\>"}], "}"}], ",", 984 | RowBox[{"{", 985 | RowBox[{"\<\"modification\"\>", ",", "\<\"chapter\"\>"}], "}"}]}], "}"}], 986 | 987 | Short[#, 8]& ]], "Output", 988 | CellChangeTimes->{{3.562324191457252*^9, 3.562324215811686*^9}}] 989 | }, Open ]], 990 | 991 | Cell["\<\ 992 | We can tally and sort bigrams, too.\ 993 | \>", "Text", 994 | CellChangeTimes->{3.562324227500272*^9}], 995 | 996 | Cell[CellGroupData[{ 997 | 998 | Cell[BoxData[{ 999 | RowBox[{ 1000 | RowBox[{"sortedBigrams", "=", 1001 | RowBox[{"Sort", "[", 1002 | RowBox[{ 1003 | RowBox[{"Tally", "[", "bigrams", "]"}], ",", 1004 | RowBox[{ 1005 | RowBox[{ 1006 | RowBox[{"#1", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}], 1007 | ">", 1008 | RowBox[{"#2", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 1009 | "&"}]}], "]"}]}], ";"}], "\n", 1010 | RowBox[{"Short", "[", 1011 | RowBox[{"sortedBigrams", ",", "8"}], "]"}]}], "Input", 1012 | CellChangeTimes->{{3.513510290365209*^9, 3.5135103324186897`*^9}, { 1013 | 3.5135103893801107`*^9, 3.513510399579616*^9}, {3.5135104731676073`*^9, 1014 | 3.513510496853149*^9}, {3.5135105329127913`*^9, 3.5135105359846373`*^9}, 1015 | 3.547034029436455*^9, {3.5623242414631033`*^9, 3.5623242416709146`*^9}}], 1016 | 1017 | Cell[BoxData[ 1018 | TagBox[ 1019 | RowBox[{"{", 1020 | RowBox[{ 1021 | RowBox[{"{", 1022 | RowBox[{ 1023 | RowBox[{"{", 1024 | RowBox[{"\<\"of\"\>", ",", "\<\"the\"\>"}], "}"}], ",", "21"}], "}"}], 1025 | ",", 1026 | RowBox[{"{", 1027 | RowBox[{ 1028 | RowBox[{"{", 1029 | RowBox[{"\<\"in\"\>", ",", "\<\"the\"\>"}], "}"}], ",", "13"}], "}"}], 1030 | ",", 1031 | RowBox[{"{", 1032 | RowBox[{ 1033 | RowBox[{"{", 1034 | RowBox[{"\<\"i\"\>", ",", "\<\"have\"\>"}], "}"}], ",", "11"}], "}"}], 1035 | ",", 1036 | RowBox[{"{", 1037 | RowBox[{ 1038 | RowBox[{"{", 1039 | RowBox[{"\<\"to\"\>", ",", "\<\"the\"\>"}], "}"}], ",", "11"}], "}"}], 1040 | ",", 1041 | RowBox[{"{", 1042 | RowBox[{ 1043 | RowBox[{"{", 1044 | RowBox[{"\<\"which\"\>", ",", "\<\"i\"\>"}], "}"}], ",", "7"}], "}"}], 1045 | ",", 1046 | RowBox[{"{", 1047 | RowBox[{ 1048 | RowBox[{"{", 1049 | RowBox[{"\<\"to\"\>", ",", "\<\"me\"\>"}], "}"}], ",", "7"}], "}"}], 1050 | ",", 1051 | RowBox[{"{", 1052 | RowBox[{ 1053 | RowBox[{"{", 1054 | RowBox[{"\<\"of\"\>", ",", "\<\"species\"\>"}], "}"}], ",", "6"}], 1055 | "}"}], ",", 1056 | RowBox[{"{", 1057 | RowBox[{ 1058 | RowBox[{"{", 1059 | RowBox[{"\<\"i\"\>", ",", "\<\"shall\"\>"}], "}"}], ",", "5"}], "}"}], 1060 | ",", 1061 | RowBox[{"\[LeftSkeleton]", "1420", "\[RightSkeleton]"}], ",", 1062 | RowBox[{"{", 1063 | RowBox[{ 1064 | RowBox[{"{", 1065 | RowBox[{"\<\"beagle\"\>", ",", "\<\"as\"\>"}], "}"}], ",", "1"}], 1066 | "}"}], ",", 1067 | RowBox[{"{", 1068 | RowBox[{ 1069 | RowBox[{"{", 1070 | RowBox[{"\<\"s\"\>", ",", "\<\"beagle\"\>"}], "}"}], ",", "1"}], "}"}], 1071 | ",", 1072 | RowBox[{"{", 1073 | RowBox[{ 1074 | RowBox[{"{", 1075 | RowBox[{"\<\"m\"\>", ",", "\<\"s\"\>"}], "}"}], ",", "1"}], "}"}], ",", 1076 | RowBox[{"{", 1077 | RowBox[{ 1078 | RowBox[{"{", 1079 | RowBox[{"\<\"h\"\>", ",", "\<\"m\"\>"}], "}"}], ",", "1"}], "}"}], ",", 1080 | RowBox[{"{", 1081 | RowBox[{ 1082 | RowBox[{"{", 1083 | RowBox[{"\<\"board\"\>", ",", "\<\"h\"\>"}], "}"}], ",", "1"}], "}"}], 1084 | ",", 1085 | RowBox[{"{", 1086 | RowBox[{ 1087 | RowBox[{"{", 1088 | RowBox[{"\<\"on\"\>", ",", "\<\"board\"\>"}], "}"}], ",", "1"}], "}"}], 1089 | ",", 1090 | RowBox[{"{", 1091 | RowBox[{ 1092 | RowBox[{"{", 1093 | RowBox[{"\<\"when\"\>", ",", "\<\"on\"\>"}], "}"}], ",", "1"}], "}"}], 1094 | ",", 1095 | RowBox[{"{", 1096 | RowBox[{ 1097 | RowBox[{"{", 1098 | RowBox[{"\<\"introduction\"\>", ",", "\<\"when\"\>"}], "}"}], ",", 1099 | "1"}], "}"}]}], "}"}], 1100 | Short[#, 8]& ]], "Output", 1101 | CellChangeTimes->{{3.5623242378483143`*^9, 3.5623242422323437`*^9}}] 1102 | }, Open ]] 1103 | }, Open ]], 1104 | 1105 | Cell[CellGroupData[{ 1106 | 1107 | Cell["\<\ 1108 | Concordance (Keyword in Context)\ 1109 | \>", "Subsection", 1110 | CellChangeTimes->{{3.5623242592493896`*^9, 3.562324280830934*^9}}], 1111 | 1112 | Cell[TextData[{ 1113 | "A concordance shows keywords in the context of surrounding words. We can \ 1114 | make one of these quite easily if we starting by generating n-grams. Then we \ 1115 | use ", 1116 | StyleBox["Cases", 1117 | FontWeight->"Bold"], 1118 | " to pull out all of the 5-grams in the Introduction that have \ 1119 | \[OpenCurlyDoubleQuote]organic\[CloseCurlyDoubleQuote] as the middle word \ 1120 | (for example), and format the output with the ", 1121 | StyleBox["TableForm", 1122 | FontWeight->"Bold"], 1123 | " command." 1124 | }], "Text", 1125 | CellChangeTimes->{{3.562324326798587*^9, 3.562324385171234*^9}, { 1126 | 3.56232446163906*^9, 3.562324461677747*^9}}], 1127 | 1128 | Cell[CellGroupData[{ 1129 | 1130 | Cell[BoxData[{ 1131 | RowBox[{ 1132 | RowBox[{"fivegrams", "=", 1133 | RowBox[{"Partition", "[", 1134 | RowBox[{"lowerIntroList", ",", "5", ",", "1"}], "]"}]}], 1135 | ";"}], "\[IndentingNewLine]", 1136 | RowBox[{"TableForm", "[", 1137 | RowBox[{"Cases", "[", 1138 | RowBox[{"fivegrams", ",", 1139 | RowBox[{"{", 1140 | RowBox[{"_", ",", "_", ",", "\"\\"", ",", "_", ",", "_"}], 1141 | "}"}]}], "]"}], "]"}]}], "Input", 1142 | CellChangeTimes->{{3.513511248311277*^9, 3.513511261603615*^9}, { 1143 | 3.513532398286975*^9, 3.513532403940528*^9}, {3.5470253189831877`*^9, 1144 | 3.547025321636856*^9}, {3.5623243963340387`*^9, 3.562324454392639*^9}}], 1145 | 1146 | Cell[BoxData[ 1147 | TagBox[GridBox[{ 1148 | {"\<\"affinities\"\>", "\<\"of\"\>", "\<\"organic\"\>", "\<\"beings\"\>", \ 1149 | "\<\"on\"\>"}, 1150 | {"\<\"several\"\>", "\<\"distinct\"\>", "\<\"organic\"\>", \ 1151 | "\<\"beings\"\>", "\<\"by\"\>"}, 1152 | {"\<\"coadaptations\"\>", "\<\"of\"\>", "\<\"organic\"\>", \ 1153 | "\<\"beings\"\>", "\<\"to\"\>"}, 1154 | {"\<\"amongst\"\>", "\<\"all\"\>", "\<\"organic\"\>", "\<\"beings\"\>", "\ 1155 | \<\"throughout\"\>"}, 1156 | {"\<\"succession\"\>", "\<\"of\"\>", "\<\"organic\"\>", "\<\"beings\"\>", \ 1157 | "\<\"throughout\"\>"} 1158 | }, 1159 | GridBoxAlignment->{ 1160 | "Columns" -> {{Left}}, "ColumnsIndexed" -> {}, "Rows" -> {{Baseline}}, 1161 | "RowsIndexed" -> {}}, 1162 | GridBoxSpacings->{"Columns" -> { 1163 | Offset[0.27999999999999997`], { 1164 | Offset[2.0999999999999996`]}, 1165 | Offset[0.27999999999999997`]}, "ColumnsIndexed" -> {}, "Rows" -> { 1166 | Offset[0.2], { 1167 | Offset[0.4]}, 1168 | Offset[0.2]}, "RowsIndexed" -> {}}], 1169 | Function[BoxForm`e$, 1170 | TableForm[BoxForm`e$]]]], "Output", 1171 | CellChangeTimes->{{3.562324406004394*^9, 3.562324456843721*^9}}] 1172 | }, Open ]] 1173 | }, Open ]], 1174 | 1175 | Cell[CellGroupData[{ 1176 | 1177 | Cell["Removing stop words", "Subsection", 1178 | CellChangeTimes->{{3.562324522909973*^9, 3.562324525212741*^9}}], 1179 | 1180 | Cell[TextData[{ 1181 | StyleBox["Mathematica", 1182 | FontSlant->"Italic"], 1183 | " has access to a lot of built-in, curated data. Here we grab a list of \ 1184 | English stopwords." 1185 | }], "Text", 1186 | CellChangeTimes->{3.562324539869426*^9}], 1187 | 1188 | Cell[CellGroupData[{ 1189 | 1190 | Cell[BoxData[{ 1191 | RowBox[{ 1192 | RowBox[{"stopWords", "=", 1193 | RowBox[{"WordData", "[", 1194 | RowBox[{"All", ",", "\"\\""}], "]"}]}], ";"}], "\n", 1195 | RowBox[{"Short", "[", 1196 | RowBox[{"stopWords", ",", "4"}], "]"}]}], "Input", 1197 | CellChangeTimes->{{3.499275571943419*^9, 3.499275583703279*^9}, 1198 | 3.499496072207768*^9, 3.5135124310886517`*^9, 3.5470340537170887`*^9, { 1199 | 3.5623245520613337`*^9, 3.562324552269104*^9}}], 1200 | 1201 | Cell[BoxData[ 1202 | TagBox[ 1203 | RowBox[{"{", 1204 | RowBox[{"\<\"0\"\>", ",", "\<\"1\"\>", ",", "\<\"2\"\>", ",", "\<\"3\"\>", 1205 | ",", "\<\"4\"\>", ",", "\<\"5\"\>", ",", "\<\"6\"\>", ",", "\<\"7\"\>", 1206 | ",", "\<\"8\"\>", ",", "\<\"9\"\>", ",", "\<\"a\"\>", ",", "\<\"A\"\>", 1207 | ",", "\<\"about\"\>", ",", "\<\"above\"\>", ",", 1208 | RowBox[{"\[LeftSkeleton]", "234", "\[RightSkeleton]"}], 1209 | ",", "\<\"with\"\>", ",", "\<\"within\"\>", ",", "\<\"without\"\>", 1210 | ",", "\<\"would\"\>", ",", "\<\"x\"\>", ",", "\<\"X\"\>", 1211 | ",", "\<\"y\"\>", ",", "\<\"Y\"\>", ",", "\<\"yet\"\>", 1212 | ",", "\<\"you\"\>", ",", "\<\"your\"\>", ",", "\<\"yours\"\>", 1213 | ",", "\<\"z\"\>", ",", "\<\"Z\"\>"}], "}"}], 1214 | Short[#, 4]& ]], "Output", 1215 | CellChangeTimes->{3.562324552772784*^9}] 1216 | }, Open ]], 1217 | 1218 | Cell[TextData[{ 1219 | "The ", 1220 | StyleBox["Select", 1221 | FontWeight->"Bold"], 1222 | " command allows us to use a function to pull items from a list. We want \ 1223 | everything that is not a member of the list of stop words." 1224 | }], "Text", 1225 | CellChangeTimes->{{3.513512623332355*^9, 3.5135127088852262`*^9}, { 1226 | 3.5135127707119637`*^9, 3.513512771553851*^9}, 3.51351283002063*^9, { 1227 | 3.513513043781899*^9, 3.513513107505973*^9}, {3.513532653035204*^9, 1228 | 3.513532699274063*^9}}], 1229 | 1230 | Cell[CellGroupData[{ 1231 | 1232 | Cell[BoxData[ 1233 | RowBox[{"Short", "[", 1234 | RowBox[{"lowerIntroList", ",", "8"}], "]"}]], "Input", 1235 | CellChangeTimes->{{3.513513147106862*^9, 3.513513160617852*^9}, { 1236 | 3.547025477804221*^9, 3.5470254801483927`*^9}, {3.562324595666078*^9, 1237 | 3.562324600706121*^9}}], 1238 | 1239 | Cell[BoxData[ 1240 | TagBox[ 1241 | RowBox[{"{", 1242 | RowBox[{"\<\"introduction\"\>", ",", "\<\"when\"\>", ",", "\<\"on\"\>", 1243 | ",", "\<\"board\"\>", ",", "\<\"h\"\>", ",", "\<\"m\"\>", 1244 | ",", "\<\"s\"\>", ",", "\<\"beagle\"\>", ",", "\<\"as\"\>", 1245 | ",", "\<\"naturalist\"\>", ",", "\<\"i\"\>", ",", "\<\"was\"\>", 1246 | ",", "\<\"much\"\>", ",", "\<\"struck\"\>", ",", "\<\"with\"\>", 1247 | ",", "\<\"certain\"\>", ",", "\<\"facts\"\>", ",", "\<\"in\"\>", 1248 | ",", "\<\"the\"\>", ",", 1249 | RowBox[{"\[LeftSkeleton]", "1676", "\[RightSkeleton]"}], 1250 | ",", "\<\"species\"\>", ",", "\<\"furthermore\"\>", ",", "\<\"i\"\>", 1251 | ",", "\<\"am\"\>", ",", "\<\"convinced\"\>", ",", "\<\"that\"\>", 1252 | ",", "\<\"natural\"\>", ",", "\<\"selection\"\>", ",", "\<\"has\"\>", 1253 | ",", "\<\"been\"\>", ",", "\<\"the\"\>", ",", "\<\"main\"\>", 1254 | ",", "\<\"but\"\>", ",", "\<\"not\"\>", ",", "\<\"exclusive\"\>", 1255 | ",", "\<\"means\"\>", ",", "\<\"of\"\>", ",", "\<\"modification\"\>", 1256 | ",", "\<\"chapter\"\>"}], "}"}], 1257 | Short[#, 8]& ]], "Output", 1258 | CellChangeTimes->{{3.562324596762402*^9, 3.562324602145632*^9}}] 1259 | }, Open ]], 1260 | 1261 | Cell[CellGroupData[{ 1262 | 1263 | Cell[BoxData[{ 1264 | RowBox[{ 1265 | RowBox[{"lowerIntroNoStopwords", "=", 1266 | RowBox[{"Select", "[", 1267 | RowBox[{"lowerIntroList", ",", 1268 | RowBox[{ 1269 | RowBox[{"Not", "[", 1270 | RowBox[{"MemberQ", "[", 1271 | RowBox[{"stopWords", ",", "#"}], "]"}], "]"}], "&"}]}], "]"}]}], 1272 | ";"}], "\n", 1273 | RowBox[{"Short", "[", 1274 | RowBox[{"lowerIntroNoStopwords", ",", "8"}], "]"}]}], "Input", 1275 | CellChangeTimes->{{3.5135129300288277`*^9, 3.513513010646536*^9}, { 1276 | 3.547025491678665*^9, 3.5470254984043627`*^9}, 3.547034064615868*^9, { 1277 | 3.562324615740595*^9, 3.5623246162252617`*^9}}], 1278 | 1279 | Cell[BoxData[ 1280 | TagBox[ 1281 | RowBox[{"{", 1282 | RowBox[{"\<\"introduction\"\>", ",", "\<\"board\"\>", 1283 | ",", "\<\"beagle\"\>", ",", "\<\"naturalist\"\>", ",", "\<\"struck\"\>", 1284 | ",", "\<\"certain\"\>", ",", "\<\"facts\"\>", ",", "\<\"distribution\"\>", 1285 | ",", "\<\"inhabitants\"\>", ",", "\<\"south\"\>", ",", "\<\"america\"\>", 1286 | ",", "\<\"geological\"\>", ",", "\<\"relations\"\>", 1287 | ",", "\<\"present\"\>", ",", 1288 | RowBox[{"\[LeftSkeleton]", "697", "\[RightSkeleton]"}], 1289 | ",", "\<\"species\"\>", ",", "\<\"descendants\"\>", 1290 | ",", "\<\"species\"\>", ",", "\<\"furthermore\"\>", ",", "\<\"am\"\>", 1291 | ",", "\<\"convinced\"\>", ",", "\<\"natural\"\>", 1292 | ",", "\<\"selection\"\>", ",", "\<\"main\"\>", ",", "\<\"exclusive\"\>", 1293 | ",", "\<\"means\"\>", ",", "\<\"modification\"\>", 1294 | ",", "\<\"chapter\"\>"}], "}"}], 1295 | Short[#, 8]& ]], "Output", 1296 | CellChangeTimes->{3.562324620666235*^9}] 1297 | }, Open ]] 1298 | }, Open ]], 1299 | 1300 | Cell[CellGroupData[{ 1301 | 1302 | Cell["\<\ 1303 | Bigrams containing the most frequent words\ 1304 | \>", "Subsection", 1305 | CellChangeTimes->{{3.562324646137566*^9, 3.562324651472574*^9}}], 1306 | 1307 | Cell["\<\ 1308 | Here is a more complicated example built mostly from functions we've already \ 1309 | seen. We start by finding the ten most frequently occuring words once we have \ 1310 | gotten rid of stop words.\ 1311 | \>", "Text", 1312 | CellChangeTimes->{{3.513532746012711*^9, 3.513532772355875*^9}, { 1313 | 3.513532831983807*^9, 3.51353284073367*^9}, {3.5135330147323303`*^9, 1314 | 3.51353303978685*^9}, {3.547025684554604*^9, 3.5470256861837*^9}, { 1315 | 3.5623246655860147`*^9, 3.5623247109837523`*^9}}], 1316 | 1317 | Cell[CellGroupData[{ 1318 | 1319 | Cell[BoxData[ 1320 | RowBox[{"freqWordCounts", "=", 1321 | RowBox[{"Take", "[", 1322 | RowBox[{ 1323 | RowBox[{"Sort", "[", 1324 | RowBox[{ 1325 | RowBox[{"Tally", "[", 1326 | RowBox[{"Take", "[", 1327 | RowBox[{"lowerIntroNoStopwords", ",", 1328 | RowBox[{"{", 1329 | RowBox[{"1", ",", 1330 | RowBox[{"-", "120"}]}], "}"}]}], "]"}], "]"}], ",", 1331 | RowBox[{ 1332 | RowBox[{ 1333 | RowBox[{"#1", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}], 1334 | ">", 1335 | RowBox[{ 1336 | "#2", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 1337 | "&"}]}], "]"}], ",", "10"}], "]"}]}]], "Input", 1338 | CellChangeTimes->{{3.5135145532797327`*^9, 3.513514594887085*^9}, { 1339 | 3.513514672972118*^9, 3.5135147448911324`*^9}, {3.5135150926695423`*^9, 1340 | 3.51351510454635*^9}, {3.513515135182817*^9, 3.513515151272627*^9}, { 1341 | 3.5135151814001083`*^9, 3.513515191505727*^9}, {3.54702569879628*^9, 1342 | 3.5470257000921593`*^9}, {3.547026013587479*^9, 3.5470260138383827`*^9}, 1343 | 3.5470261101556463`*^9, 3.547026183747919*^9, 3.5470269886790047`*^9, { 1344 | 3.54702708380752*^9, 3.547027096592635*^9}}], 1345 | 1346 | Cell[BoxData[ 1347 | RowBox[{"{", 1348 | RowBox[{ 1349 | RowBox[{"{", 1350 | RowBox[{"\<\"shall\"\>", ",", "9"}], "}"}], ",", 1351 | RowBox[{"{", 1352 | RowBox[{"\<\"species\"\>", ",", "9"}], "}"}], ",", 1353 | RowBox[{"{", 1354 | RowBox[{"\<\"facts\"\>", ",", "9"}], "}"}], ",", 1355 | RowBox[{"{", 1356 | RowBox[{"\<\"chapter\"\>", ",", "5"}], "}"}], ",", 1357 | RowBox[{"{", 1358 | RowBox[{"\<\"variation\"\>", ",", "5"}], "}"}], ",", 1359 | RowBox[{"{", 1360 | RowBox[{"\<\"conditions\"\>", ",", "5"}], "}"}], ",", 1361 | RowBox[{"{", 1362 | RowBox[{"\<\"beings\"\>", ",", "5"}], "}"}], ",", 1363 | RowBox[{"{", 1364 | RowBox[{"\<\"organic\"\>", ",", "5"}], "}"}], ",", 1365 | RowBox[{"{", 1366 | RowBox[{"\<\"conclusions\"\>", ",", "5"}], "}"}], ",", 1367 | RowBox[{"{", 1368 | RowBox[{"\<\"subject\"\>", ",", "5"}], "}"}]}], "}"}]], "Output", 1369 | CellChangeTimes->{3.562324686758062*^9}] 1370 | }, Open ]], 1371 | 1372 | Cell["\<\ 1373 | We remove a few of the words we are not interested in.\ 1374 | \>", "Text", 1375 | CellChangeTimes->{{3.5623247241103897`*^9, 3.5623247322703*^9}}], 1376 | 1377 | Cell[BoxData[ 1378 | RowBox[{ 1379 | RowBox[{"freqWords", "=", 1380 | RowBox[{"Complement", "[", 1381 | RowBox[{ 1382 | RowBox[{"Map", "[", 1383 | RowBox[{"First", ",", "freqWordCounts"}], "]"}], ",", 1384 | RowBox[{"{", 1385 | RowBox[{"\"\\"", ",", "\"\\""}], "}"}]}], "]"}]}], 1386 | ";"}]], "Input", 1387 | CellChangeTimes->{{3.513514730041527*^9, 3.5135147557389517`*^9}, 1388 | 3.513514834542293*^9, {3.547026086755148*^9, 3.5470261506004257`*^9}, { 1389 | 3.547026194468732*^9, 3.547026218639434*^9}, {3.5470262516109037`*^9, 1390 | 3.547026252633079*^9}, 3.5470269632673264`*^9, {3.547027021401208*^9, 1391 | 3.547027024152776*^9}, {3.547027077013763*^9, 3.547027080061672*^9}, { 1392 | 3.5470271199948263`*^9, 3.547027121001843*^9}}], 1393 | 1394 | Cell["\<\ 1395 | We rewrite the bigrams as a list of graph edges. This will be useful for \ 1396 | visualizing the results as a network.\ 1397 | \>", "Text", 1398 | CellChangeTimes->{{3.5623247459347143`*^9, 3.562324786933663*^9}}], 1399 | 1400 | Cell[CellGroupData[{ 1401 | 1402 | Cell[BoxData[{ 1403 | RowBox[{ 1404 | RowBox[{"edgeList", "=", 1405 | RowBox[{"Map", "[", 1406 | RowBox[{ 1407 | RowBox[{ 1408 | RowBox[{ 1409 | RowBox[{"#", "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}], 1410 | "\[Rule]", 1411 | RowBox[{"#", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 1412 | "&"}], ",", 1413 | RowBox[{"Partition", "[", 1414 | RowBox[{"lowerIntroNoStopwords", ",", "2", ",", "1"}], "]"}]}], "]"}]}], 1415 | ";"}], "\n", 1416 | RowBox[{"Short", "[", 1417 | RowBox[{"edgeList", ",", "4"}], "]"}]}], "Input", 1418 | CellChangeTimes->{{3.513513320050641*^9, 3.513513364871571*^9}, { 1419 | 3.513513445922579*^9, 3.5135134473689947`*^9}, {3.51351348391549*^9, 1420 | 3.513513486475466*^9}, {3.5135138218684683`*^9, 3.5135138234836407`*^9}, { 1421 | 3.513513872189485*^9, 3.5135138747647057`*^9}, 3.513532794696472*^9, { 1422 | 3.5135329384986773`*^9, 3.5135329394772797`*^9}, {3.547025738865313*^9, 1423 | 3.547025739360914*^9}, 3.547034082827572*^9, {3.56232476992789*^9, 1424 | 3.5623247708726587`*^9}}], 1425 | 1426 | Cell[BoxData[ 1427 | TagBox[ 1428 | RowBox[{"{", 1429 | RowBox[{ 1430 | RowBox[{"\<\"introduction\"\>", "\[Rule]", "\<\"board\"\>"}], ",", 1431 | RowBox[{"\<\"board\"\>", "\[Rule]", "\<\"beagle\"\>"}], ",", 1432 | RowBox[{"\<\"beagle\"\>", "\[Rule]", "\<\"naturalist\"\>"}], ",", 1433 | RowBox[{"\[LeftSkeleton]", "717", "\[RightSkeleton]"}], ",", 1434 | RowBox[{"\<\"exclusive\"\>", "\[Rule]", "\<\"means\"\>"}], ",", 1435 | RowBox[{"\<\"means\"\>", "\[Rule]", "\<\"modification\"\>"}], ",", 1436 | RowBox[{"\<\"modification\"\>", "\[Rule]", "\<\"chapter\"\>"}]}], "}"}], 1437 | Short[#, 4]& ]], "Output", 1438 | CellChangeTimes->{{3.562324765816613*^9, 3.562324771351297*^9}}] 1439 | }, Open ]], 1440 | 1441 | Cell["\<\ 1442 | We grab the most frequent ones.\ 1443 | \>", "Text", 1444 | CellChangeTimes->{{3.562324796885623*^9, 3.562324805757053*^9}}], 1445 | 1446 | Cell[CellGroupData[{ 1447 | 1448 | Cell[BoxData[{ 1449 | RowBox[{ 1450 | RowBox[{"freqBigrams", "=", 1451 | RowBox[{"Union", "[", 1452 | RowBox[{ 1453 | RowBox[{"Select", "[", 1454 | RowBox[{"edgeList", ",", 1455 | RowBox[{ 1456 | RowBox[{"MemberQ", "[", 1457 | RowBox[{"freqWords", ",", 1458 | RowBox[{ 1459 | "#", "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}]}], 1460 | "]"}], "&"}]}], "]"}], ",", "\[IndentingNewLine]", 1461 | RowBox[{"Select", "[", 1462 | RowBox[{"edgeList", ",", 1463 | RowBox[{ 1464 | RowBox[{"MemberQ", "[", 1465 | RowBox[{"freqWords", ",", 1466 | RowBox[{ 1467 | "#", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 1468 | "]"}], "&"}]}], "]"}]}], "]"}]}], ";"}], "\n", 1469 | RowBox[{"Short", "[", 1470 | RowBox[{"freqBigrams", ",", " ", "4"}], "]"}]}], "Input", 1471 | CellChangeTimes->{{3.513514764485812*^9, 3.513514992882855*^9}, { 1472 | 3.5135329761651907`*^9, 3.51353298061662*^9}, 3.547034090860498*^9, { 1473 | 3.56232482031118*^9, 3.562324826774181*^9}}], 1474 | 1475 | Cell[BoxData[ 1476 | TagBox[ 1477 | RowBox[{"{", 1478 | RowBox[{ 1479 | RowBox[{"\<\"abstract\"\>", "\[Rule]", "\<\"variation\"\>"}], ",", 1480 | RowBox[{"\<\"affinities\"\>", "\[Rule]", "\<\"organic\"\>"}], ",", 1481 | RowBox[{"\<\"allied\"\>", "\[Rule]", "\<\"species\"\>"}], ",", 1482 | RowBox[{"\[LeftSkeleton]", "87", "\[RightSkeleton]"}], ",", 1483 | RowBox[{"\<\"varieties\"\>", "\[Rule]", "\<\"species\"\>"}], ",", 1484 | RowBox[{"\<\"varying\"\>", "\[Rule]", "\<\"conditions\"\>"}], ",", 1485 | RowBox[{"\<\"volume\"\>", "\[Rule]", "\<\"facts\"\>"}]}], "}"}], 1486 | Short[#, 4]& ]], "Output", 1487 | CellChangeTimes->{{3.562324816540305*^9, 3.562324827490951*^9}}] 1488 | }, Open ]], 1489 | 1490 | Cell["\<\ 1491 | Finally we can visualize the results as a network. When you are exploring a \ 1492 | text this way, you often want to keep tweaking your parameters and see if \ 1493 | anything interesting comes up.\ 1494 | \>", "Text", 1495 | CellChangeTimes->{{3.562324834709869*^9, 3.5623249033215113`*^9}}], 1496 | 1497 | Cell[CellGroupData[{ 1498 | 1499 | Cell[BoxData[ 1500 | RowBox[{"Framed", "[", 1501 | RowBox[{"Pane", "[", 1502 | RowBox[{ 1503 | RowBox[{"GraphPlot", "[", 1504 | RowBox[{"freqBigrams", ",", 1505 | RowBox[{"Method", "\[Rule]", 1506 | RowBox[{"{", 1507 | RowBox[{"\"\\"", ",", 1508 | RowBox[{"\"\\"", "\[Rule]", ".1"}], ",", 1509 | RowBox[{"\"\\"", "\[Rule]", 1510 | RowBox[{"-", "4"}]}]}], "}"}]}], ",", 1511 | RowBox[{"VertexLabeling", "\[Rule]", "True"}], ",", 1512 | RowBox[{"DirectedEdges", "\[Rule]", "True"}], ",", 1513 | RowBox[{"ImageSize", "\[Rule]", 1514 | RowBox[{"{", 1515 | RowBox[{"1100", ",", "800"}], "}"}]}]}], "]"}], ",", 1516 | RowBox[{"{", 1517 | RowBox[{"400", ",", "400"}], "}"}], ",", 1518 | RowBox[{"Scrollbars", "\[Rule]", "True"}], ",", " ", 1519 | RowBox[{"ScrollPosition", "\[Rule]", 1520 | RowBox[{"{", 1521 | RowBox[{"400", ",", "200"}], "}"}]}]}], "]"}], "]"}]], "Input", 1522 | CellChangeTimes->CompressedData[" 1523 | 1:eJxTTMoPSmViYGAQBmIQ/dKFpale95Wj9kH1FhD9Y8udmSD6jYvcXhC9pP/P 1524 | MRDdplRyDUTvPXaBuwFILwuapAiieU5HqILoJ5OXaoBo9o4OExAdu+WoNYi+ 1525 | 9O+BK4gul98TCqIPHJwXDaJXuEzIAdP/bphm5b5yPMNwyR5EJ+nY5oFokbLI 1526 | YhA97fajChAdwX2kEkSvc/5dD6Izi9J6QfTna84LQbQby40NIPrFQo5tIFqX 1527 | U2k7iN7y4vtuEH1qk+8BEG2xNPQOiPY64fEBRJs6LW2b0fXKUSXPoh1E37lV 1528 | NAdE2+wLWASiOXbsXAaiq6zXbQfRAF7PnCQ= 1529 | "]], 1530 | 1531 | Cell[BoxData[ 1532 | FrameBox[ 1533 | PaneBox[ 1534 | GraphicsBox[ 1535 | TagBox[GraphicsComplexBox[CompressedData[" 1536 | 1:eJw1lns41AkXx2fmNzeMy5BLblEpt0QJK3VOkRCtShm3TXIpt5Iol1KSskvk 1537 | UstKSGRlddmK0qIt4tUIbchLkYqR61BjjJnX+zzve57nPN//z+f5fr9H1+/I 1538 | 7gAKiUTKW9z/ajF/Yev1SiUs4Bdy+t1n4HtvfOdPPSqoXqOmoNEwCnbvTEo2 1539 | J7Fx86SAqdwgh4bPc0mejgoo/iB+cRRYeL4lZVOksyzWXcifO2Q3DYOXdBr1 1540 | /ChYFIrW5xPFkPpD4s50fSXMDiHbx3FlcaV7QtdolRT2a+tdm2im4GRqrVHB 1541 | MhkMtDjNC2hkYO621ljyuBQmZiw5xRDLYMP6mpq/a+iYx51qci9k4s4wad34 1542 | QAJLAzp2hM3K4GDxPedMXRI+NNkn0y3PwuwlFxIXNkhjVdycukBVAr9oqo3e 1543 | 0heClVsG9YGmGB6d8o///JqFZ1+0pWw0pmO9dkzVw0omDoyPxvnKMfC5vqt7 1544 | 1Y8qaNX5U7D4QA+kRJoqWp5noX5c+pceVzbWZ1WYWZxRQPUw8ztdr4SgPaHT 1545 | f3i3Esr7rzgRJEdD6XWB1b9+Z6PGtssz5X00FFyir+kKl8NE74zOugMU/Pfv 1546 | Oi61TnJ4qKSeOx4njR2WW3eamM2DkeJ+XT/1KZirzbrLpc5AZLexH7mIjK25 1547 | Fxxf9/Pg2EddDlmFgldMvPpeMkiYsPC21cFdDA9zjDOt+RQ0argeej+OiQ7x 1548 | pbs8V/PAUOeQZEnpAjRz2KxrR4VwOSZeeWAXBe8kpMbmn+iHHwxlwz76TgLp 1549 | f9NYuc3+xbKPUJ073KyxmQfF2W1mY6Yj4At2cxKPe8BUSYhRkhqHYV5qpwaZ 1550 | D6x7k40Bht+A+iY+kUmTRahxPJB0QH6RD1WnS5HA52bY2GNNwxGfA6W9dCZG 1551 | FT1JK19HRslUa+w6GymMNn3ZqhYshYZ3i97VBSngurHB9iOxFHTz3phLpiki 1552 | 75X3aJ8hgbZl5cdhSghZmb5e0QXfYZJvHlvFewfTfvOe+658BOeSP2sJJxIO 1553 | NrgYBenSsHs59UZYgQw6KClYP7OQRnrq5cfx2ixcFapCChiioOjxlbuCWXn8 1554 | kPVGy3KDFIpuioyohxVQbtOD/dfbZsF6f+6T51IK2FQYOhCwg47dSc5U2h5p 1555 | 3JzQ9MTEgYbVlxU/cAylEf1DYlgLn6EpZuHouV/ZmMmb362sKY1ycY6q3rIq 1556 | aG1VRnq+ZhrEx4YcxOWy+IUZMiJjKgauiEhXuzoN74+VBPF6qcjT9UhV5aji 1557 | /zlk3+9YphPLg08vBZz6Z1T8R7Bz21/hFNQSrH2m1TYDaicFh3Pe09DOpSKS 1558 | UULBbJcIZTi+FLfOXxtS8hkGr3Yv2s9Kc2DakMsJYdNw39vzDNdXbEzanvO6 1559 | qFgM7RFrslNiqKglzT/FjBLBmo9Mq9BuNlaEeQ4o2c9BXe/taEYpHQcn3NYH 1560 | Dckgtak67zqNDxk3eyzLexdgyzbnjKFXUmhleu7kHVt5/Muq0U3GjMB7K8X0 1561 | IAEZ0zZ0ReZ/lcLTZ2N+3JImgOeE+hKSjDQ6sTLAXGoa3NX1jD8l8SFp7ojd 1562 | VT0mRocNv2lVp6Ky0O5WftYUBFZ0nKTdpmPq4LNf5B0FcNCjc+OuFjpyPJu5 1563 | eq+mYCVfOnCTmMA/3ndHuaoI4LCqdmh1LQnh6f78o5wpePzp7E1NAwUc6X7j 1564 | sZAsgeDYO7dUk0j4OeKR1kQ/GdNd0rSajGRxOOLOILdRALdlGgJ2pi5yjh9Z 1565 | rfdGDNszPMo5+TQ8rhdXNL7vM+j+vGw79zEJdat5nq0D38FQ/CCCYSqLibN5 1566 | zlxdFu5tLUv5souOULe0zdWDhju1mvpvr1XDD6ZN8Yk/8iDazO5FhcZSpEfI 1567 | uGuXTcPHgQsT22WX4qGiW741sr3gW2r7NZhcD+5kP/ra1FloX51Yv95NFtU0 1568 | tG6IXJjYZmS2QYdHQWYOk+6zXgIp4TN503KL+cdLXXb0gAQqh2py0swJrOUg 1569 | ZzhVAg8f7PL65ElgtYWTfPUDCZhVen0eO0ug/6hhTEevBFQZLGFeOYExb/+6 1570 | YSSRwJZ88qoP7QRm/Sy2+bc8CbusjVvuCAkMjq0++EibhD3171dk2dCxf3OG 1571 | T+UnEg6ciDo+GEBHXdtLV9X4JDT4dfbUjXQ6+sp2VslLSHhD38qgeTGvfQ6y 1572 | Pl+SIqP1xYtgP0THAbP6iUxFMvbxm8Nl5RkYWek2p6ZORp2SEhe2NQM1TPil 1573 | 2jpk9Fn/eL9dAAN1E7YpF68kYwGHflDwjYJRq71FxlUi2NYb0VGtReB9Wk9n 1574 | qakIrsRqnbfZSuC7+/b2p6vmgeJ5iHnSn8DCDZ3tvUbz8Jugq9A3iUAn/cNP 1575 | hm8KQSUjyuVrEYFXkurIbVpC2COcbGE/JfCczlrXmqw5uKR78crLfwhsZu+e 1576 | 6mDMgcbyDndbHRq+HXMVBu6Ygk5F0d1zFjQsER9p6X06Cat63hbaO9FQGPXM 1577 | oNtkEmoD3afPe9HwDF2qj71pAu61W2+2CaZh4p72Sgv1cShyrf7iH01DB/32 1578 | 2ZjZr/B7CF9blEDDziR+nGXbKFwaXzMoTqahjcbes/8q44G43tVy4DQDbdO6 1579 | yja9I6Hd6bR2OU8GRvkpa5j/TUId5su3keYMDK54Ij5RSUJVS1O25mKPhdRa 1580 | rFDIJWE+rEpW+kJHFT/7cUryYk9+ijoWWEfHX0JFnH3HSZjqesZe9yodzV36 1581 | xuT9Sfjt7t5i13A6/uSzosl8LwlnjIodwo2p+Nrsat4pTQl8Pc4SaBNUdDL4 1582 | bVUZWQIlbqnjPj0EcrPZ36pHxGD43NHWqorAN8MFKY8X/ZTpP2f8ZPH+LTss 1583 | o+89E8Ok4NEC34PAW25yB0vvi+EPhz7tSRMC3fe0M34rE8PRmibWU4JA57vG 1584 | RtkFi/9DYyQrmUFDbpWt1vWFERiyZV578YGKu/mO4RVCHuSUePO21FKRuBqe 1585 | IhKNgv4LdhA9j4ons/Ye20Idg/RVxVytWCqu+MPg+mqFcVBKpy7P9KZi6my8 1586 | opfOBHSvTCkJ3ULFnn6uta36JOQvTHU/0Kdi1lhFrkHpJMgvlKlu7KdgMrsy 1587 | I8dPABzW6afCJgpWky7V60wKgGp/g2v5JwUPpmw8sDJhDsKrl58SFFNQMz4m 1588 | P1JeCCdcwGxrNgXz9yqbDxQKIYXbcGHpRQr+EKKcAevn4cz9ZNfE0xScsaG+ 1589 | 8m2ch3xT57SEExSsi9jTouIpgv8A8MA6ag== 1590 | "], { 1591 | {RGBColor[0.5, 0., 0.], Arrowheads[{{0.020584615674281364`, 0.7}}], 1592 | ArrowBox[{1, 2}], ArrowBox[{2, 20}], ArrowBox[{2, 86}], 1593 | ArrowBox[{2, 87}], ArrowBox[{2, 88}], ArrowBox[{3, 4}], 1594 | ArrowBox[{4, 11}], ArrowBox[{5, 6}], ArrowBox[{6, 69}], 1595 | ArrowBox[{6, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 1596 | 104, 105, 106, 39}], ArrowBox[{6, 70}], ArrowBox[{6, 71}], 1597 | ArrowBox[{6, 72}], ArrowBox[{6, 73}], ArrowBox[{6, 74}], 1598 | ArrowBox[{6, 75}], ArrowBox[{6, 76}], ArrowBox[{6, 77}], 1599 | ArrowBox[{6, 78}], ArrowBox[{6, 79}], ArrowBox[{6, 80}], 1600 | ArrowBox[{6, 81}], 1601 | ArrowBox[{6, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 1602 | 118, 119, 120, 121, 122, 82}], ArrowBox[{6, 83}], ArrowBox[{7, 4}], 1603 | ArrowBox[{8, 9}], ArrowBox[{9, 46}], ArrowBox[{9, 47}], 1604 | ArrowBox[{9, 48}], ArrowBox[{9, 49}], ArrowBox[{9, 50}], 1605 | ArrowBox[{9, 51}], ArrowBox[{9, 52}], ArrowBox[{9, 21}], 1606 | ArrowBox[{9, 53}], ArrowBox[{10, 9}], ArrowBox[{11, 12}], 1607 | ArrowBox[{11, 13}], ArrowBox[{11, 14}], ArrowBox[{11, 15}], 1608 | ArrowBox[{11, 16}], ArrowBox[{15, 32}], ArrowBox[{17, 9}], 1609 | ArrowBox[{18, 2}], ArrowBox[{19, 9}], ArrowBox[{20, 1}], 1610 | ArrowBox[{20, 21}], ArrowBox[{20, 22}], ArrowBox[{23, 20}], 1611 | ArrowBox[{24, 4}], ArrowBox[{25, 6}], ArrowBox[{26, 8}], 1612 | ArrowBox[{26, 27}], ArrowBox[{26, 28}], ArrowBox[{26, 29}], 1613 | ArrowBox[{26, 30}], ArrowBox[{29, 6}], ArrowBox[{31, 20}], 1614 | ArrowBox[{32, 33}], ArrowBox[{32, 34}], ArrowBox[{32, 35}], 1615 | ArrowBox[{32, 36}], ArrowBox[{37, 9}], ArrowBox[{38, 6}], 1616 | ArrowBox[{39, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 1617 | 134, 135, 136, 137, 138, 6}], ArrowBox[{40, 9}], ArrowBox[{41, 20}], 1618 | ArrowBox[{42, 4}], ArrowBox[{43, 6}], ArrowBox[{44, 32}], 1619 | ArrowBox[{45, 6}], ArrowBox[{52, 26}], ArrowBox[{54, 2}], 1620 | ArrowBox[{55, 20}], ArrowBox[{56, 26}], ArrowBox[{57, 2}], 1621 | ArrowBox[{58, 6}], ArrowBox[{59, 6}], ArrowBox[{60, 6}], 1622 | ArrowBox[{61, 2}], ArrowBox[{62, 26}], ArrowBox[{63, 20}], 1623 | ArrowBox[{64, 6}], ArrowBox[{65, 20}], ArrowBox[{66, 11}], 1624 | ArrowBox[{67, 26}], ArrowBox[{68, 9}], 1625 | ArrowBox[{82, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 1626 | 150, 151, 152, 153, 154, 6}], ArrowBox[{84, 9}], ArrowBox[{84, 4}], 1627 | ArrowBox[{85, 6}], ArrowBox[{89, 32}], ArrowBox[{90, 9}]}, {InsetBox[ 1628 | FrameBox["\<\"abstract\"\>", 1629 | Background->RGBColor[1, 1, 0.8], 1630 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1631 | StripOnInput->False], 1], InsetBox[ 1632 | FrameBox["\<\"variation\"\>", 1633 | Background->RGBColor[1, 1, 0.8], 1634 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1635 | StripOnInput->False], 2], InsetBox[ 1636 | FrameBox["\<\"affinities\"\>", 1637 | Background->RGBColor[1, 1, 0.8], 1638 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1639 | StripOnInput->False], 3], InsetBox[ 1640 | FrameBox["\<\"organic\"\>", 1641 | Background->RGBColor[1, 1, 0.8], 1642 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1643 | StripOnInput->False], 4], InsetBox[ 1644 | FrameBox["\<\"allied\"\>", 1645 | Background->RGBColor[1, 1, 0.8], 1646 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1647 | StripOnInput->False], 5], InsetBox[ 1648 | FrameBox["\<\"species\"\>", 1649 | Background->RGBColor[1, 1, 0.8], 1650 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1651 | StripOnInput->False], 6], InsetBox[ 1652 | FrameBox["\<\"amongst\"\>", 1653 | Background->RGBColor[1, 1, 0.8], 1654 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1655 | StripOnInput->False], 7], InsetBox[ 1656 | FrameBox["\<\"arrived\"\>", 1657 | Background->RGBColor[1, 1, 0.8], 1658 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1659 | StripOnInput->False], 8], InsetBox[ 1660 | FrameBox["\<\"facts\"\>", 1661 | Background->RGBColor[1, 1, 0.8], 1662 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1663 | StripOnInput->False], 9], InsetBox[ 1664 | FrameBox["\<\"balancing\"\>", 1665 | Background->RGBColor[1, 1, 0.8], 1666 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1667 | StripOnInput->False], 10], InsetBox[ 1668 | FrameBox["\<\"beings\"\>", 1669 | Background->RGBColor[1, 1, 0.8], 1670 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1671 | StripOnInput->False], 11], InsetBox[ 1672 | FrameBox["\<\"effects\"\>", 1673 | Background->RGBColor[1, 1, 0.8], 1674 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1675 | StripOnInput->False], 12], InsetBox[ 1676 | FrameBox["\<\"embryological\"\>", 1677 | Background->RGBColor[1, 1, 0.8], 1678 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1679 | StripOnInput->False], 13], InsetBox[ 1680 | FrameBox["\<\"live\"\>", 1681 | Background->RGBColor[1, 1, 0.8], 1682 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1683 | StripOnInput->False], 14], InsetBox[ 1684 | FrameBox["\<\"physical\"\>", 1685 | Background->RGBColor[1, 1, 0.8], 1686 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1687 | StripOnInput->False], 15], InsetBox[ 1688 | FrameBox["\<\"throughout\"\>", 1689 | Background->RGBColor[1, 1, 0.8], 1690 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1691 | StripOnInput->False], 16], InsetBox[ 1692 | FrameBox["\<\"catalogues\"\>", 1693 | Background->RGBColor[1, 1, 0.8], 1694 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1695 | StripOnInput->False], 17], InsetBox[ 1696 | FrameBox["\<\"cause\"\>", 1697 | Background->RGBColor[1, 1, 0.8], 1698 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1699 | StripOnInput->False], 18], InsetBox[ 1700 | FrameBox["\<\"certain\"\>", 1701 | Background->RGBColor[1, 1, 0.8], 1702 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1703 | StripOnInput->False], 19], InsetBox[ 1704 | FrameBox["\<\"chapter\"\>", 1705 | Background->RGBColor[1, 1, 0.8], 1706 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1707 | StripOnInput->False], 20], InsetBox[ 1708 | FrameBox["\<\"shall\"\>", 1709 | Background->RGBColor[1, 1, 0.8], 1710 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1711 | StripOnInput->False], 21], InsetBox[ 1712 | FrameBox["\<\"struggle\"\>", 1713 | Background->RGBColor[1, 1, 0.8], 1714 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1715 | StripOnInput->False], 22], InsetBox[ 1716 | FrameBox["\<\"character\"\>", 1717 | Background->RGBColor[1, 1, 0.8], 1718 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1719 | StripOnInput->False], 23], InsetBox[ 1720 | FrameBox["\<\"coadaptations\"\>", 1721 | Background->RGBColor[1, 1, 0.8], 1722 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1723 | StripOnInput->False], 24], InsetBox[ 1724 | FrameBox["\<\"conclusion\"\>", 1725 | Background->RGBColor[1, 1, 0.8], 1726 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1727 | StripOnInput->False], 25], InsetBox[ 1728 | FrameBox["\<\"conclusions\"\>", 1729 | Background->RGBColor[1, 1, 0.8], 1730 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1731 | StripOnInput->False], 26], InsetBox[ 1732 | FrameBox["\<\"directly\"\>", 1733 | Background->RGBColor[1, 1, 0.8], 1734 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1735 | StripOnInput->False], 27], InsetBox[ 1736 | FrameBox["\<\"grounded\"\>", 1737 | Background->RGBColor[1, 1, 0.8], 1738 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1739 | StripOnInput->False], 28], InsetBox[ 1740 | FrameBox["\<\"origin\"\>", 1741 | Background->RGBColor[1, 1, 0.8], 1742 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1743 | StripOnInput->False], 29], InsetBox[ 1744 | FrameBox["\<\"probable\"\>", 1745 | Background->RGBColor[1, 1, 0.8], 1746 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1747 | StripOnInput->False], 30], InsetBox[ 1748 | FrameBox["\<\"condition\"\>", 1749 | Background->RGBColor[1, 1, 0.8], 1750 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1751 | StripOnInput->False], 31], InsetBox[ 1752 | FrameBox["\<\"conditions\"\>", 1753 | Background->RGBColor[1, 1, 0.8], 1754 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1755 | StripOnInput->False], 32], InsetBox[ 1756 | FrameBox["\<\"climate\"\>", 1757 | Background->RGBColor[1, 1, 0.8], 1758 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1759 | StripOnInput->False], 33], InsetBox[ 1760 | FrameBox["\<\"habit\"\>", 1761 | Background->RGBColor[1, 1, 0.8], 1762 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1763 | StripOnInput->False], 34], InsetBox[ 1764 | FrameBox["\<\"life\"\>", 1765 | Background->RGBColor[1, 1, 0.8], 1766 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1767 | StripOnInput->False], 35], InsetBox[ 1768 | FrameBox["\<\"structure\"\>", 1769 | Background->RGBColor[1, 1, 0.8], 1770 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1771 | StripOnInput->False], 36], InsetBox[ 1772 | FrameBox["\<\"continent\"\>", 1773 | Background->RGBColor[1, 1, 0.8], 1774 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1775 | StripOnInput->False], 37], InsetBox[ 1776 | FrameBox["\<\"convinced\"\>", 1777 | Background->RGBColor[1, 1, 0.8], 1778 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1779 | StripOnInput->False], 38], InsetBox[ 1780 | FrameBox["\<\"descendants\"\>", 1781 | Background->RGBColor[1, 1, 0.8], 1782 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1783 | StripOnInput->False], 39], InsetBox[ 1784 | FrameBox["\<\"detail\"\>", 1785 | Background->RGBColor[1, 1, 0.8], 1786 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1787 | StripOnInput->False], 40], InsetBox[ 1788 | FrameBox["\<\"devote\"\>", 1789 | Background->RGBColor[1, 1, 0.8], 1790 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1791 | StripOnInput->False], 41], InsetBox[ 1792 | FrameBox["\<\"distinct\"\>", 1793 | Background->RGBColor[1, 1, 0.8], 1794 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1795 | StripOnInput->False], 42], InsetBox[ 1796 | FrameBox["\<\"explain\"\>", 1797 | Background->RGBColor[1, 1, 0.8], 1798 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1799 | StripOnInput->False], 43], InsetBox[ 1800 | FrameBox["\<\"external\"\>", 1801 | Background->RGBColor[1, 1, 0.8], 1802 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1803 | StripOnInput->False], 44], InsetBox[ 1804 | FrameBox["\<\"extinct\"\>", 1805 | Background->RGBColor[1, 1, 0.8], 1806 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1807 | StripOnInput->False], 45], InsetBox[ 1808 | FrameBox["\<\"adduced\"\>", 1809 | Background->RGBColor[1, 1, 0.8], 1810 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1811 | StripOnInput->False], 46], InsetBox[ 1812 | FrameBox["\<\"arguments\"\>", 1813 | Background->RGBColor[1, 1, 0.8], 1814 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1815 | StripOnInput->False], 47], InsetBox[ 1816 | FrameBox["\<\"come\"\>", 1817 | Background->RGBColor[1, 1, 0.8], 1818 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1819 | StripOnInput->False], 48], InsetBox[ 1820 | FrameBox["\<\"distribution\"\>", 1821 | Background->RGBColor[1, 1, 0.8], 1822 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1823 | StripOnInput->False], 49], InsetBox[ 1824 | FrameBox["\<\"illustration\"\>", 1825 | Background->RGBColor[1, 1, 0.8], 1826 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1827 | StripOnInput->False], 50], InsetBox[ 1828 | FrameBox["\<\"possibly\"\>", 1829 | Background->RGBColor[1, 1, 0.8], 1830 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1831 | StripOnInput->False], 51], InsetBox[ 1832 | FrameBox["\<\"references\"\>", 1833 | Background->RGBColor[1, 1, 0.8], 1834 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1835 | StripOnInput->False], 52], InsetBox[ 1836 | FrameBox["\<\"throw\"\>", 1837 | Background->RGBColor[1, 1, 0.8], 1838 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1839 | StripOnInput->False], 53], InsetBox[ 1840 | FrameBox["\<\"favourable\"\>", 1841 | Background->RGBColor[1, 1, 0.8], 1842 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1843 | StripOnInput->False], 54], InsetBox[ 1844 | FrameBox["\<\"fourth\"\>", 1845 | Background->RGBColor[1, 1, 0.8], 1846 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1847 | StripOnInput->False], 55], InsetBox[ 1848 | FrameBox["\<\"general\"\>", 1849 | Background->RGBColor[1, 1, 0.8], 1850 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1851 | StripOnInput->False], 56], InsetBox[ 1852 | FrameBox["\<\"imperfect\"\>", 1853 | Background->RGBColor[1, 1, 0.8], 1854 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1855 | StripOnInput->False], 57], InsetBox[ 1856 | FrameBox["\<\"individuals\"\>", 1857 | Background->RGBColor[1, 1, 0.8], 1858 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1859 | StripOnInput->False], 58], InsetBox[ 1860 | FrameBox["\<\"infertility\"\>", 1861 | Background->RGBColor[1, 1, 0.8], 1862 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1863 | StripOnInput->False], 59], InsetBox[ 1864 | FrameBox["\<\"innumerable\"\>", 1865 | Background->RGBColor[1, 1, 0.8], 1866 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1867 | StripOnInput->False], 60], InsetBox[ 1868 | FrameBox["\<\"laws\"\>", 1869 | Background->RGBColor[1, 1, 0.8], 1870 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1871 | StripOnInput->False], 61], InsetBox[ 1872 | FrameBox["\<\"leading\"\>", 1873 | Background->RGBColor[1, 1, 0.8], 1874 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1875 | StripOnInput->False], 62], InsetBox[ 1876 | FrameBox["\<\"modification\"\>", 1877 | Background->RGBColor[1, 1, 0.8], 1878 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1879 | StripOnInput->False], 63], InsetBox[ 1880 | FrameBox["\<\"namely\"\>", 1881 | Background->RGBColor[1, 1, 0.8], 1882 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1883 | StripOnInput->False], 64], InsetBox[ 1884 | FrameBox["\<\"record\"\>", 1885 | Background->RGBColor[1, 1, 0.8], 1886 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1887 | StripOnInput->False], 65], InsetBox[ 1888 | FrameBox["\<\"relations\"\>", 1889 | Background->RGBColor[1, 1, 0.8], 1890 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1891 | StripOnInput->False], 66], InsetBox[ 1892 | FrameBox["\<\"sketch\"\>", 1893 | Background->RGBColor[1, 1, 0.8], 1894 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1895 | StripOnInput->False], 67], InsetBox[ 1896 | FrameBox["\<\"sorts\"\>", 1897 | Background->RGBColor[1, 1, 0.8], 1898 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1899 | StripOnInput->False], 68], InsetBox[ 1900 | FrameBox["\<\"born\"\>", 1901 | Background->RGBColor[1, 1, 0.8], 1902 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1903 | StripOnInput->False], 69], InsetBox[ 1904 | FrameBox["\<\"fertility\"\>", 1905 | Background->RGBColor[1, 1, 0.8], 1906 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1907 | StripOnInput->False], 70], InsetBox[ 1908 | FrameBox["\<\"furthermore\"\>", 1909 | Background->RGBColor[1, 1, 0.8], 1910 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1911 | StripOnInput->False], 71], InsetBox[ 1912 | FrameBox["\<\"immutable\"\>", 1913 | Background->RGBColor[1, 1, 0.8], 1914 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1915 | StripOnInput->False], 72], InsetBox[ 1916 | FrameBox["\<\"independently\"\>", 1917 | Background->RGBColor[1, 1, 0.8], 1918 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1919 | StripOnInput->False], 73], InsetBox[ 1920 | FrameBox["\<\"inhabiting\"\>", 1921 | Background->RGBColor[1, 1, 0.8], 1922 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1923 | StripOnInput->False], 74], InsetBox[ 1924 | FrameBox["\<\"manner\"\>", 1925 | Background->RGBColor[1, 1, 0.8], 1926 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1927 | StripOnInput->False], 75], InsetBox[ 1928 | FrameBox["\<\"mystery\"\>", 1929 | Background->RGBColor[1, 1, 0.8], 1930 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1931 | StripOnInput->False], 76], InsetBox[ 1932 | FrameBox["\<\"narrow\"\>", 1933 | Background->RGBColor[1, 1, 0.8], 1934 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1935 | StripOnInput->False], 77], InsetBox[ 1936 | FrameBox["\<\"nevertheless\"\>", 1937 | Background->RGBColor[1, 1, 0.8], 1938 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1939 | StripOnInput->False], 78], InsetBox[ 1940 | FrameBox["\<\"quite\"\>", 1941 | Background->RGBColor[1, 1, 0.8], 1942 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1943 | StripOnInput->False], 79], InsetBox[ 1944 | FrameBox["\<\"ranges\"\>", 1945 | Background->RGBColor[1, 1, 0.8], 1946 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1947 | StripOnInput->False], 80], InsetBox[ 1948 | FrameBox["\<\"state\"\>", 1949 | Background->RGBColor[1, 1, 0.8], 1950 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1951 | StripOnInput->False], 81], InsetBox[ 1952 | FrameBox["\<\"varieties\"\>", 1953 | Background->RGBColor[1, 1, 0.8], 1954 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1955 | StripOnInput->False], 82], InsetBox[ 1956 | FrameBox["\<\"year\"\>", 1957 | Background->RGBColor[1, 1, 0.8], 1958 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1959 | StripOnInput->False], 83], InsetBox[ 1960 | FrameBox["\<\"succession\"\>", 1961 | Background->RGBColor[1, 1, 0.8], 1962 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1963 | StripOnInput->False], 84], InsetBox[ 1964 | FrameBox["\<\"variability\"\>", 1965 | Background->RGBColor[1, 1, 0.8], 1966 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1967 | StripOnInput->False], 85], InsetBox[ 1968 | FrameBox["\<\"correlation\"\>", 1969 | Background->RGBColor[1, 1, 0.8], 1970 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1971 | StripOnInput->False], 86], InsetBox[ 1972 | FrameBox["\<\"domestication\"\>", 1973 | Background->RGBColor[1, 1, 0.8], 1974 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1975 | StripOnInput->False], 87], InsetBox[ 1976 | FrameBox["\<\"limited\"\>", 1977 | Background->RGBColor[1, 1, 0.8], 1978 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1979 | StripOnInput->False], 88], InsetBox[ 1980 | FrameBox["\<\"varying\"\>", 1981 | Background->RGBColor[1, 1, 0.8], 1982 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1983 | StripOnInput->False], 89], InsetBox[ 1984 | FrameBox["\<\"volume\"\>", 1985 | Background->RGBColor[1, 1, 0.8], 1986 | FrameStyle->RGBColor[0.94, 0.85, 0.36], 1987 | StripOnInput->False], 90]}}], 1988 | Annotation[#, VertexCoordinateRules -> CompressedData[" 1989 | 1:eJw10Yk7FAgcxvG572HG5IjI7GazQxpdpK3fr6IopNZm6HBsKEeRUsyUbdL2 1990 | qKkUHXp0KDmWlrQVZXOsTPWMUbEhm1yJUa7JNsTY9nl23+f5Pu8f8BGG7NoQ 1991 | SiIQCAFf+vevaSdXXLkpwMvaq5I2v4/wqVXWsLXFBM3LzHgWVf3g+sohe1ky 1992 | H5cN6RjGVQYoqskgBHjwUN+ufxQDHDzyNGVpnCcXK45mjm13HYHOk9a1NiEk 1993 | zIpClyNyPSgWy71P2QowPZK4Sqrm4iy/pKb+Iia2WdlcGnxCwiFFud3lmWwM 1994 | W3RQE1pLxww3VSJxgIny1GkH6Ho2Vs0vK/ujjIYX1cNKv6sM9I5mCWVhZMwJ 1995 | fbE2epSNnddKPM8ICXjXYSO72ZCD6dOOyicXsrBIOmauM52C4zPM+vNsx8HZ 1996 | N5VyZ4Ye7h3YJut5xsFDj+pTltjTsNIqoejuTQZ2DPRLgwzoWGPr41e0zgSd 1997 | G7ZG6INbICVObOR0hIO20lPvWnz4WJlW4LjoJx6aRy8obqobB6tB67YdGwRo 1998 | uO3rfeEGVGTNCyu98ImPFm6nP+a/pqLuJG1O004DlG9ObagIJuFfv1h7la8x 1999 | wO3ZleoBKQtfOK3wdnD8DHZGgcIQ82EYK0+7paZ8hLhm+xBiFhFVGUc9nrVp 2000 | YHeXUEI0IeE5h02vH9MJmDT5UuXup4e7Z+3PuGhJaFd1Jeq2lIHuspz1AbM1 2001 | ILLePjUtZxKeSPicSzHjcDpBZtyxnoTFSYrEzH1tsFjEje4KGgLCf6u96bbq 2002 | 0cwuKM3ofWKxTAPX0usdP4j7IAhcx6b8S4BhkpQgYA5Ar0bRYEHUAqdkqDZU 2003 | 9DdQGmVyBpWLUOYRnBxs+MWHYt1kRMYaR6xtcaFi35bgnFYaA/dmPTiRP4+I 2004 | U8OqxHnfMTFe/FhlFsFE0a2sVxXhPJz3ofP5rkQS+m5ekkGkGqGmbnP/axEZ 2005 | V+bm74HhcUg7E7Qp/vInGNIuSCzSvIKRkM8BG891gWf2b+XkNQTsrPKyCxdS 2006 | sfkryvXoy2x0F/BcqhexkKY4fV9mxcFvokwIod0knLh/7pZu1BDb0xotnRYy 2007 | ceLGhB1lBw8Nlt4JvFI/Ci6BGQ9qmDxUXo3qCF1Lw+ZkTwr1exYuS1I+cHCn 2008 | Yulpo3aJiIW4LTKBM9kDyoTJmMMX+HhG83mD8QwWGkg9TDdzTdDFOZdQM2cE 2009 | 9Lu73fX5XHzHiOxji/WgniCfMjs/Am92Z4drWimoEforTCWm+L9D+u0XM60T 2010 | NfD2sU5SWU3BP3Xebg93ktBSN7fasv4jmO3X7Tj7hoquXgVx9GwSpnvFGsOe 2011 | 6bji86VuwZZe2PR8E/WYYAzEVRmSSD4VN748Qvep42Py6rPPsq7p4XnsnPSU 2012 | BApasrQHGHsnYE4XwzmqmY8F0QEdglVjUNFaGE/PoWHnoO/88G42UpSlF69Q 2013 | tZB6o8Upv3USlrt5pnbXMdFZfHh/8UpDfOhc68t2JGPJLD0tXEfEEwub4jLf 2014 | M/HgoYR1y0/ooIZsPo3AZuEaTiosYI6An7mN/dtkLSSP7XI9b8PA+OjeRpU5 2015 | BY3HXfMy04YhrODFfmohDRWd1ccNPXTwo3/DkvVPaSgJeKK2qRuGWVpW2FI9 2016 | GX9907zXx0QHO0ytokrLCQi/B2bGSIbh/ttDN2Z8y8O+5kb/yZ+nICKxOM80 2017 | mYA9sfcsB9uIeMrrhKXSjou9scWd6lodFLKrQr0VX5xlfbNtGvWwOtU/X5JJ 2018 | xT020qyBjT0gPDZztfo+AYWlmgBVxycQ6e/E0sVclI9e9FQLOfiDKjfl3Xoa 2019 | QsX0eh9/KnpbKtsK55phu1gpk6/TQLyj66MCi+lIi2X7WeWOQFfH0cHV3Om4 2020 | PSsvqIzbCkE5K99HECvBjxhCm6sYheez5ZXzfbloZmF5fcKLgf8AKgR0tQ== 2021 | 2022 | "]]& ], 2023 | AspectRatio->Automatic, 2024 | FrameTicks->None, 2025 | ImageSize->{1100, 800}, 2026 | PlotRange->All, 2027 | PlotRangePadding->Scaled[0.1]], 2028 | ImageSize->{400, 400}, 2029 | ScrollPosition->{400., 200.}, 2030 | Scrollbars->True], 2031 | StripOnInput->False]], "Output", 2032 | CellChangeTimes->{ 2033 | 3.5623249178058167`*^9, 3.5623250515951033`*^9, {3.562325226134234*^9, 2034 | 3.562325249139447*^9}, {3.5623253065565357`*^9, 3.562325361729618*^9}, { 2035 | 3.562325411929564*^9, 3.562325438116972*^9}}] 2036 | }, Open ]] 2037 | }, Open ]], 2038 | 2039 | Cell[CellGroupData[{ 2040 | 2041 | Cell["Document Frequencies", "Subsection", 2042 | CellChangeTimes->{{3.562325731375288*^9, 3.562325738171823*^9}}], 2043 | 2044 | Cell[TextData[{ 2045 | "We have been looking at the Introduction to ", 2046 | StyleBox["Origin", 2047 | FontSlant->"Italic"], 2048 | ". We can also calculate word frequencies for the whole document. When we \ 2049 | list the fifty most common words (not including stop words) we can get a \ 2050 | better sense of what the whole book is about." 2051 | }], "Text", 2052 | CellChangeTimes->{{3.562325749817523*^9, 3.56232579854653*^9}}], 2053 | 2054 | Cell[CellGroupData[{ 2055 | 2056 | Cell[BoxData[{ 2057 | RowBox[{ 2058 | RowBox[{"sampleList", "=", 2059 | RowBox[{"Map", "[", 2060 | RowBox[{"ToLowerCase", ",", 2061 | RowBox[{"StringSplit", "[", 2062 | RowBox[{"sample", ",", 2063 | RowBox[{ 2064 | RowBox[{"Except", "[", "WordCharacter", "]"}], ".."}]}], "]"}]}], 2065 | "]"}]}], ";"}], "\n", 2066 | RowBox[{ 2067 | RowBox[{"docFreq", "=", 2068 | RowBox[{"Sort", "[", 2069 | RowBox[{ 2070 | RowBox[{"Tally", "[", 2071 | RowBox[{"Sort", "[", "sampleList", "]"}], "]"}], ",", 2072 | RowBox[{ 2073 | RowBox[{ 2074 | RowBox[{"#1", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}], 2075 | ">", 2076 | RowBox[{"#2", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 2077 | "&"}]}], "]"}]}], ";"}], "\[IndentingNewLine]", 2078 | RowBox[{"Take", "[", 2079 | RowBox[{ 2080 | RowBox[{"Select", "[", 2081 | RowBox[{ 2082 | RowBox[{"Take", "[", 2083 | RowBox[{"docFreq", ",", "200"}], "]"}], ",", 2084 | RowBox[{ 2085 | RowBox[{"Not", "[", 2086 | RowBox[{"MemberQ", "[", 2087 | RowBox[{"stopWords", ",", 2088 | RowBox[{"First", "[", "#", "]"}]}], "]"}], "]"}], "&"}]}], "]"}], 2089 | ",", "50"}], "]"}]}], "Input", 2090 | CellChangeTimes->{{3.547022048665861*^9, 3.54702206566894*^9}, { 2091 | 3.547022219499251*^9, 3.54702222715679*^9}, {3.5470283296396093`*^9, 2092 | 3.547028392094002*^9}, 3.547034119088517*^9, {3.562325806406304*^9, 2093 | 3.562325807559642*^9}}], 2094 | 2095 | Cell[BoxData[ 2096 | RowBox[{"{", 2097 | RowBox[{ 2098 | RowBox[{"{", 2099 | RowBox[{"\<\"species\"\>", ",", "1489"}], "}"}], ",", 2100 | RowBox[{"{", 2101 | RowBox[{"\<\"forms\"\>", ",", "397"}], "}"}], ",", 2102 | RowBox[{"{", 2103 | RowBox[{"\<\"varieties\"\>", ",", "396"}], "}"}], ",", 2104 | RowBox[{"{", 2105 | RowBox[{"\<\"selection\"\>", ",", "383"}], "}"}], ",", 2106 | RowBox[{"{", 2107 | RowBox[{"\<\"natural\"\>", ",", "361"}], "}"}], ",", 2108 | RowBox[{"{", 2109 | RowBox[{"\<\"life\"\>", ",", "298"}], "}"}], ",", 2110 | RowBox[{"{", 2111 | RowBox[{"\<\"plants\"\>", ",", "297"}], "}"}], ",", 2112 | RowBox[{"{", 2113 | RowBox[{"\<\"different\"\>", ",", "282"}], "}"}], ",", 2114 | RowBox[{"{", 2115 | RowBox[{"\<\"case\"\>", ",", "281"}], "}"}], ",", 2116 | RowBox[{"{", 2117 | RowBox[{"\<\"animals\"\>", ",", "280"}], "}"}], ",", 2118 | RowBox[{"{", 2119 | RowBox[{"\<\"great\"\>", ",", "260"}], "}"}], ",", 2120 | RowBox[{"{", 2121 | RowBox[{"\<\"distinct\"\>", ",", "255"}], "}"}], ",", 2122 | RowBox[{"{", 2123 | RowBox[{"\<\"nature\"\>", ",", "253"}], "}"}], ",", 2124 | RowBox[{"{", 2125 | RowBox[{"\<\"having\"\>", ",", "252"}], "}"}], ",", 2126 | RowBox[{"{", 2127 | RowBox[{"\<\"new\"\>", ",", "244"}], "}"}], ",", 2128 | RowBox[{"{", 2129 | RowBox[{"\<\"long\"\>", ",", "243"}], "}"}], ",", 2130 | RowBox[{"{", 2131 | RowBox[{"\<\"period\"\>", ",", "238"}], "}"}], ",", 2132 | RowBox[{"{", 2133 | RowBox[{"\<\"cases\"\>", ",", "224"}], "}"}], ",", 2134 | RowBox[{"{", 2135 | RowBox[{"\<\"believe\"\>", ",", "216"}], "}"}], ",", 2136 | RowBox[{"{", 2137 | RowBox[{"\<\"structure\"\>", ",", "214"}], "}"}], ",", 2138 | RowBox[{"{", 2139 | RowBox[{"\<\"conditions\"\>", ",", "211"}], "}"}], ",", 2140 | RowBox[{"{", 2141 | RowBox[{"\<\"genera\"\>", ",", "210"}], "}"}], ",", 2142 | RowBox[{"{", 2143 | RowBox[{"\<\"generally\"\>", ",", "199"}], "}"}], ",", 2144 | RowBox[{"{", 2145 | RowBox[{"\<\"number\"\>", ",", "198"}], "}"}], ",", 2146 | RowBox[{"{", 2147 | RowBox[{"\<\"common\"\>", ",", "194"}], "}"}], ",", 2148 | RowBox[{"{", 2149 | RowBox[{"\<\"far\"\>", ",", "193"}], "}"}], ",", 2150 | RowBox[{"{", 2151 | RowBox[{"\<\"time\"\>", ",", "191"}], "}"}], ",", 2152 | RowBox[{"{", 2153 | RowBox[{"\<\"degree\"\>", ",", "190"}], "}"}], ",", 2154 | RowBox[{"{", 2155 | RowBox[{"\<\"groups\"\>", ",", "173"}], "}"}], ",", 2156 | RowBox[{"{", 2157 | RowBox[{"\<\"characters\"\>", ",", "170"}], "}"}], ",", 2158 | RowBox[{"{", 2159 | RowBox[{"\<\"certain\"\>", ",", "169"}], "}"}], ",", 2160 | RowBox[{"{", 2161 | RowBox[{"\<\"view\"\>", ",", "168"}], "}"}], ",", 2162 | RowBox[{"{", 2163 | RowBox[{"\<\"large\"\>", ",", "168"}], "}"}], ",", 2164 | RowBox[{"{", 2165 | RowBox[{"\<\"instance\"\>", ",", "165"}], "}"}], ",", 2166 | RowBox[{"{", 2167 | RowBox[{"\<\"modification\"\>", ",", "161"}], "}"}], ",", 2168 | RowBox[{"{", 2169 | RowBox[{"\<\"facts\"\>", ",", "157"}], "}"}], ",", 2170 | RowBox[{"{", 2171 | RowBox[{"\<\"closely\"\>", ",", "155"}], "}"}], ",", 2172 | RowBox[{"{", 2173 | RowBox[{"\<\"parts\"\>", ",", "154"}], "}"}], ",", 2174 | RowBox[{"{", 2175 | RowBox[{"\<\"intermediate\"\>", ",", "154"}], "}"}], ",", 2176 | RowBox[{"{", 2177 | RowBox[{"\<\"modified\"\>", ",", "153"}], "}"}], ",", 2178 | RowBox[{"{", 2179 | RowBox[{"\<\"genus\"\>", ",", "147"}], "}"}], ",", 2180 | RowBox[{"{", 2181 | RowBox[{"\<\"present\"\>", ",", "143"}], "}"}], ",", 2182 | RowBox[{"{", 2183 | RowBox[{"\<\"birds\"\>", ",", "143"}], "}"}], ",", 2184 | RowBox[{"{", 2185 | RowBox[{"\<\"produced\"\>", ",", "141"}], "}"}], ",", 2186 | RowBox[{"{", 2187 | RowBox[{"\<\"individuals\"\>", ",", "140"}], "}"}], ",", 2188 | RowBox[{"{", 2189 | RowBox[{"\<\"inhabitants\"\>", ",", "139"}], "}"}], ",", 2190 | RowBox[{"{", 2191 | RowBox[{"\<\"parent\"\>", ",", "138"}], "}"}], ",", 2192 | RowBox[{"{", 2193 | RowBox[{"\<\"world\"\>", ",", "136"}], "}"}], ",", 2194 | RowBox[{"{", 2195 | RowBox[{"\<\"character\"\>", ",", "136"}], "}"}], ",", 2196 | RowBox[{"{", 2197 | RowBox[{"\<\"organic\"\>", ",", "135"}], "}"}]}], "}"}]], "Output", 2198 | CellChangeTimes->{3.5623258110069304`*^9}] 2199 | }, Open ]] 2200 | }, Open ]], 2201 | 2202 | Cell[CellGroupData[{ 2203 | 2204 | Cell["\<\ 2205 | TF-IDF: Term frequency-Inverse document frequency\ 2206 | \>", "Subsection", 2207 | CellChangeTimes->{3.562325837979517*^9}], 2208 | 2209 | Cell["\<\ 2210 | The basic intuition behind tf-idf is as follows...\ 2211 | \>", "Text", 2212 | CellChangeTimes->{{3.5623258544008408`*^9, 3.562325862301536*^9}}], 2213 | 2214 | Cell["\<\ 2215 | A word that occurs frequently on every page doesn't tell you anything special \ 2216 | about that page. It is a stop word. 2217 | A word that occurs only a few times in the whole document or corpus can be \ 2218 | ignored. 2219 | A word that occurs a number of times on one page but is relatively rare in \ 2220 | the document or corpus overall can give you some idea what the page is about.\ 2221 | \ 2222 | \>", "Text", 2223 | CellChangeTimes->{{3.5623258544008408`*^9, 3.56232588643292*^9}}], 2224 | 2225 | Cell["\<\ 2226 | Here is one way to calculate tf-idf (there are lots of different versions)\ 2227 | \>", "Text", 2228 | CellChangeTimes->{{3.5623258544008408`*^9, 3.5623258769530573`*^9}}], 2229 | 2230 | Cell[BoxData[ 2231 | RowBox[{ 2232 | RowBox[{"tfidf", "[", 2233 | RowBox[{"termfreq_", ",", "docfreq_", ",", "numdocs_"}], "]"}], ":=", 2234 | "\[IndentingNewLine]", 2235 | RowBox[{ 2236 | RowBox[{"Log", "[", 2237 | RowBox[{"termfreq", "+", "1.0"}], "]"}], " ", 2238 | RowBox[{"Log", "[", 2239 | RowBox[{"numdocs", "/", "docfreq"}], "]"}]}]}]], "Input", 2240 | CellChangeTimes->{{3.5124909962548237`*^9, 3.5124910390188837`*^9}}], 2241 | 2242 | Cell["\<\ 2243 | Using document frequencies and TF-IDF we can get a sense of what different \ 2244 | parts of a text are about. Here is how we would analyze chapter 9 (there are \ 2245 | 15 chapters in all).\ 2246 | \>", "Text", 2247 | CellChangeTimes->{{3.562325908168535*^9, 3.562325977079001*^9}}], 2248 | 2249 | Cell[BoxData[{ 2250 | RowBox[{ 2251 | RowBox[{"ch9", "=", 2252 | RowBox[{ 2253 | RowBox[{"StringCases", "[", 2254 | RowBox[{"sample", ",", 2255 | RowBox[{"Shortest", "[", 2256 | RowBox[{"\"\\"", "~~", "__", "~~", "\"\\""}], 2257 | "]"}]}], "]"}], "\[LeftDoubleBracket]", "1", 2258 | "\[RightDoubleBracket]"}]}], ";"}], "\n", 2259 | RowBox[{ 2260 | RowBox[{"ch9List", "=", 2261 | RowBox[{"Map", "[", 2262 | RowBox[{"ToLowerCase", ",", 2263 | RowBox[{"StringSplit", "[", 2264 | RowBox[{"ch9", ",", 2265 | RowBox[{ 2266 | RowBox[{"Except", "[", "WordCharacter", "]"}], ".."}]}], "]"}]}], 2267 | "]"}]}], ";"}], "\[IndentingNewLine]", 2268 | RowBox[{ 2269 | RowBox[{"ch9Terms", "=", 2270 | RowBox[{"Union", "[", "ch9List", "]"}]}], ";"}], "\[IndentingNewLine]", 2271 | RowBox[{ 2272 | RowBox[{"ch9TermFreq", "=", 2273 | RowBox[{"Sort", "[", 2274 | RowBox[{ 2275 | RowBox[{"Tally", "[", "ch9List", "]"}], ",", 2276 | RowBox[{ 2277 | RowBox[{ 2278 | RowBox[{"#1", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}], 2279 | ">", 2280 | RowBox[{"#2", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}], 2281 | "&"}]}], "]"}]}], ";"}], "\n", 2282 | RowBox[{ 2283 | RowBox[{"ch9DocFreq", "=", 2284 | RowBox[{"Select", "[", 2285 | RowBox[{"docFreq", ",", 2286 | RowBox[{ 2287 | RowBox[{"MemberQ", "[", 2288 | RowBox[{"ch9Terms", ",", 2289 | RowBox[{"#", "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}]}], 2290 | "]"}], "&"}]}], "]"}]}], ";"}]}], "Input", 2291 | CellChangeTimes->{{3.5470292455489273`*^9, 3.547029295676169*^9}, 2292 | 3.5470293735149612`*^9, 3.5470294370036583`*^9, {3.547029501585433*^9, 2293 | 3.547029523385488*^9}, {3.54702960971285*^9, 3.547029617049231*^9}, { 2294 | 3.547029698870226*^9, 3.547029703658392*^9}, 3.547029782244587*^9, { 2295 | 3.547030322879662*^9, 3.547030379302886*^9}, {3.547031097624631*^9, 2296 | 3.547031110569645*^9}, {3.547032843403961*^9, 3.547032871142591*^9}, 2297 | 3.547034164068964*^9}], 2298 | 2299 | Cell[BoxData[ 2300 | RowBox[{ 2301 | RowBox[{"computeTFIDF", "[", 2302 | RowBox[{"termlist_", ",", "tflist_", ",", "dflist_"}], "]"}], ":=", 2303 | "\[IndentingNewLine]", 2304 | RowBox[{"Module", "[", 2305 | RowBox[{ 2306 | RowBox[{"{", 2307 | RowBox[{"outlist", ",", "tf", ",", "df"}], "}"}], ",", 2308 | "\[IndentingNewLine]", 2309 | RowBox[{ 2310 | RowBox[{"outlist", "=", 2311 | RowBox[{"{", "}"}]}], ";", "\[IndentingNewLine]", 2312 | RowBox[{"Do", "[", "\[IndentingNewLine]", 2313 | RowBox[{ 2314 | RowBox[{ 2315 | RowBox[{"tf", "=", 2316 | RowBox[{ 2317 | RowBox[{"Cases", "[", 2318 | RowBox[{"tflist", ",", 2319 | RowBox[{ 2320 | RowBox[{"{", 2321 | RowBox[{"t", ",", "x_"}], "}"}], "\[Rule]", "x"}]}], "]"}], 2322 | "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}]}], ";", 2323 | "\[IndentingNewLine]", 2324 | RowBox[{"df", "=", 2325 | RowBox[{ 2326 | RowBox[{"Cases", "[", 2327 | RowBox[{"dflist", ",", 2328 | RowBox[{ 2329 | RowBox[{"{", 2330 | RowBox[{"t", ",", "x_"}], "}"}], "\[Rule]", "x"}]}], "]"}], 2331 | "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}]}], ";", 2332 | "\[IndentingNewLine]", 2333 | RowBox[{"outlist", "=", 2334 | RowBox[{"Append", "[", 2335 | RowBox[{"outlist", ",", 2336 | RowBox[{"{", 2337 | RowBox[{"t", ",", "tf", ",", "df", ",", 2338 | RowBox[{"tfidf", "[", 2339 | RowBox[{"tf", ",", "df", ",", "15.0"}], "]"}]}], "}"}]}], 2340 | "]"}]}]}], ",", "\[IndentingNewLine]", 2341 | RowBox[{"{", 2342 | RowBox[{"t", ",", "termlist"}], "}"}]}], "]"}], ";", 2343 | "\[IndentingNewLine]", 2344 | RowBox[{"Return", "[", "outlist", "]"}]}]}], "]"}]}]], "Input", 2345 | CellChangeTimes->{{3.54703151452752*^9, 3.54703152014762*^9}, { 2346 | 3.547031561185525*^9, 3.5470315917716227`*^9}, {3.547031736837302*^9, 2347 | 3.547031838174638*^9}, {3.5470318868395348`*^9, 3.547031914335375*^9}, { 2348 | 3.5470319821810713`*^9, 3.547032083256281*^9}, {3.547032127995277*^9, 2349 | 3.547032171050722*^9}, {3.5470322070789347`*^9, 3.5470322105749903`*^9}, { 2350 | 3.547032402580517*^9, 3.547032403021102*^9}, 3.547032714540661*^9, { 2351 | 3.547033262948254*^9, 3.5470332630348463`*^9}, 3.547035928521545*^9}], 2352 | 2353 | Cell[CellGroupData[{ 2354 | 2355 | Cell[BoxData[{ 2356 | RowBox[{ 2357 | RowBox[{"ch9TFIDF", "=", 2358 | RowBox[{"Sort", "[", 2359 | RowBox[{ 2360 | RowBox[{"computeTFIDF", "[", 2361 | RowBox[{"ch9Terms", ",", "ch9TermFreq", ",", "ch9DocFreq"}], "]"}], ",", 2362 | RowBox[{ 2363 | RowBox[{ 2364 | RowBox[{"#1", "\[LeftDoubleBracket]", "4", "\[RightDoubleBracket]"}], 2365 | ">", 2366 | RowBox[{"#2", "\[LeftDoubleBracket]", "4", "\[RightDoubleBracket]"}]}], 2367 | "&"}]}], "]"}]}], ";"}], "\[IndentingNewLine]", 2368 | RowBox[{ 2369 | RowBox[{"Take", "[", 2370 | RowBox[{"ch9TFIDF", ",", "50"}], "]"}], "\[LeftDoubleBracket]", 2371 | RowBox[{"All", ",", "1"}], "\[RightDoubleBracket]"}]}], "Input", 2372 | CellChangeTimes->{{3.5470315943677197`*^9, 3.5470316620577793`*^9}, { 2373 | 3.547032323827345*^9, 3.547032337529541*^9}, {3.547032911696452*^9, 2374 | 3.547032922240966*^9}, {3.56232602926597*^9, 3.5623260303144693`*^9}}], 2375 | 2376 | Cell[BoxData[ 2377 | RowBox[{"{", 2378 | RowBox[{"\<\"teleostean\"\>", ",", "\<\"tapir\"\>", ",", "\<\"richest\"\>", 2379 | ",", "\<\"pebbles\"\>", ",", "\<\"mississippi\"\>", ",", "\<\"downs\"\>", 2380 | ",", "\<\"decay\"\>", ",", "\<\"conchologists\"\>", ",", "\<\"wear\"\>", 2381 | ",", "\<\"thinner\"\>", ",", "\<\"tear\"\>", ",", "\<\"supplement\"\>", 2382 | ",", "\<\"superimposed\"\>", ",", "\<\"sedgwick\"\>", 2383 | ",", "\<\"rolled\"\>", ",", "\<\"poorness\"\>", ",", "\<\"nodules\"\>", 2384 | ",", "\<\"mineralogical\"\>", ",", "\<\"levels\"\>", 2385 | ",", "\<\"inadequate\"\>", ",", "\<\"grinding\"\>", ",", "\<\"gravel\"\>", 2386 | ",", "\<\"downward\"\>", ",", "\<\"denuded\"\>", ",", "\<\"comprehend\"\>", 2387 | ",", "\<\"chthamalus\"\>", ",", "\<\"atom\"\>", 2388 | ",", "\<\"accumulations\"\>", ",", "\<\"sand\"\>", ",", "\<\"ramsay\"\>", 2389 | ",", "\<\"littoral\"\>", ",", "\<\"sedimentary\"\>", ",", "\<\"wears\"\>", 2390 | ",", "\<\"wearing\"\>", ",", "\<\"wealden\"\>", ",", "\<\"watermark\"\>", 2391 | ",", "\<\"watch\"\>", ",", "\<\"vehemently\"\>", ",", "\<\"valve\"\>", 2392 | ",", "\<\"upright\"\>", ",", "\<\"unimproved\"\>", 2393 | ",", "\<\"unfathomable\"\>", ",", "\<\"undermined\"\>", 2394 | ",", "\<\"underlies\"\>", ",", "\<\"unanimously\"\>", 2395 | ",", "\<\"ubiquitous\"\>", ",", "\<\"transmutation\"\>", 2396 | ",", "\<\"tides\"\>", ",", "\<\"tidal\"\>", ",", "\<\"swarmed\"\>"}], 2397 | "}"}]], "Output", 2398 | CellChangeTimes->{3.562326034809412*^9}] 2399 | }, Open ]], 2400 | 2401 | Cell["\<\ 2402 | Whether or not you are familiar with nineteenth-century science, it should be \ 2403 | clear that the chapter has something to do with geology. Darwin also provided \ 2404 | chapter summaries of his own:\ 2405 | \>", "Text", 2406 | CellChangeTimes->{{3.562326163890539*^9, 3.5623262142637444`*^9}}], 2407 | 2408 | Cell[CellGroupData[{ 2409 | 2410 | Cell[BoxData[ 2411 | RowBox[{"StringTake", "[", 2412 | RowBox[{"ch9", ",", "548"}], "]"}]], "Input", 2413 | CellChangeTimes->{{3.547033000279071*^9, 3.547033001630723*^9}, { 2414 | 3.54703304661331*^9, 3.547033086303193*^9}, {3.562326228946951*^9, 2415 | 3.562326254744359*^9}}], 2416 | 2417 | Cell[BoxData["\<\"CHAPTER 9. ON THE IMPERFECTION OF THE GEOLOGICAL RECORD. On \ 2418 | the absence of intermediate varieties at the present day. On the nature of \ 2419 | extinct intermediate varieties; on their number. On the vast lapse of time, \ 2420 | as inferred from the rate of deposition and of denudation. On the poorness of \ 2421 | our palaeontological collections. On the intermittence of geological \ 2422 | formations. On the absence of intermediate varieties in any one formation. On \ 2423 | the sudden appearance of groups of species. On their sudden appearance in the \ 2424 | lowest known fossiliferous strata.\"\>"], "Output", 2425 | CellChangeTimes->{{3.562326223729416*^9, 3.562326255285418*^9}}] 2426 | }, Open ]] 2427 | }, Open ]] 2428 | }, Open ]] 2429 | }, 2430 | WindowSize->{801, 803}, 2431 | WindowMargins->{{51, Automatic}, {Automatic, 19}}, 2432 | Magnification:>FEPrivate`If[ 2433 | FEPrivate`Equal[FEPrivate`$VersionNumber, 6.], 1.25, 1.25 Inherited], 2434 | FrontEndVersion->"8.0 for Mac OS X x86 (32-bit, 64-bit Kernel) (October 5, \ 2435 | 2011)", 2436 | StyleDefinitions->"Default.nb" 2437 | ] 2438 | (* End of Notebook Content *) 2439 | 2440 | (* Internal cache information *) 2441 | (*CellTagsOutline 2442 | CellTagsIndex->{} 2443 | *) 2444 | (*CellTagsIndex 2445 | CellTagsIndex->{} 2446 | *) 2447 | (*NotebookFileOutline 2448 | Notebook[{ 2449 | Cell[CellGroupData[{ 2450 | Cell[579, 22, 170, 5, 154, "Title"], 2451 | Cell[752, 29, 342, 9, 89, "Subtitle"], 2452 | Cell[1097, 40, 101, 1, 57, "Subtitle"], 2453 | Cell[CellGroupData[{ 2454 | Cell[1223, 45, 102, 1, 42, "Subsection"], 2455 | Cell[1328, 48, 1547, 31, 201, "Text"] 2456 | }, Open ]], 2457 | Cell[CellGroupData[{ 2458 | Cell[2912, 84, 111, 1, 42, "Subsection"], 2459 | Cell[3026, 87, 529, 15, 70, "Text"], 2460 | Cell[CellGroupData[{ 2461 | Cell[3580, 106, 455, 11, 53, "Input"], 2462 | Cell[4038, 119, 240, 5, 47, "Output"] 2463 | }, Open ]], 2464 | Cell[4293, 127, 295, 9, 51, "Text"], 2465 | Cell[CellGroupData[{ 2466 | Cell[4613, 140, 62, 1, 33, "Input"], 2467 | Cell[4678, 143, 99, 1, 33, "Output"] 2468 | }, Open ]] 2469 | }, Open ]], 2470 | Cell[CellGroupData[{ 2471 | Cell[4826, 150, 117, 1, 42, "Subsection"], 2472 | Cell[4946, 153, 1020, 22, 126, "Text"], 2473 | Cell[CellGroupData[{ 2474 | Cell[5991, 179, 669, 15, 72, "Input"], 2475 | Cell[6663, 196, 362, 7, 105, "Output"] 2476 | }, Open ]], 2477 | Cell[7040, 206, 745, 15, 89, "Text"], 2478 | Cell[CellGroupData[{ 2479 | Cell[7810, 225, 274, 5, 33, "Input"], 2480 | Cell[8087, 232, 160, 3, 33, "Output"] 2481 | }, Open ]], 2482 | Cell[CellGroupData[{ 2483 | Cell[8284, 240, 361, 8, 33, "Input"], 2484 | Cell[8648, 250, 152, 3, 33, "Output"] 2485 | }, Open ]] 2486 | }, Open ]], 2487 | Cell[CellGroupData[{ 2488 | Cell[8849, 259, 130, 3, 42, "Subsection"], 2489 | Cell[8982, 264, 366, 6, 70, "Text"], 2490 | Cell[CellGroupData[{ 2491 | Cell[9373, 274, 456, 12, 53, "Input"], 2492 | Cell[9832, 288, 660, 13, 67, "Output"] 2493 | }, Open ]], 2494 | Cell[10507, 304, 393, 10, 51, "Text"], 2495 | Cell[CellGroupData[{ 2496 | Cell[10925, 318, 253, 5, 33, "Input"], 2497 | Cell[11181, 325, 1034, 16, 91, "Output"] 2498 | }, Open ]], 2499 | Cell[12230, 344, 376, 12, 51, "Text"], 2500 | Cell[CellGroupData[{ 2501 | Cell[12631, 360, 181, 3, 33, "Input"], 2502 | Cell[12815, 365, 111, 1, 33, "Output"] 2503 | }, Open ]], 2504 | Cell[CellGroupData[{ 2505 | Cell[12963, 371, 262, 5, 33, "Input"], 2506 | Cell[13228, 378, 303, 7, 47, "Output"] 2507 | }, Open ]], 2508 | Cell[CellGroupData[{ 2509 | Cell[13568, 390, 133, 2, 33, "Input"], 2510 | Cell[13704, 394, 108, 1, 33, "Output"] 2511 | }, Open ]], 2512 | Cell[13827, 398, 195, 6, 32, "Text"], 2513 | Cell[CellGroupData[{ 2514 | Cell[14047, 408, 271, 5, 33, "Input"], 2515 | Cell[14321, 415, 133, 2, 33, "Output"] 2516 | }, Open ]], 2517 | Cell[14469, 420, 228, 6, 32, "Text"], 2518 | Cell[CellGroupData[{ 2519 | Cell[14722, 430, 275, 5, 33, "Input"], 2520 | Cell[15000, 437, 95, 1, 33, "Output"] 2521 | }, Open ]], 2522 | Cell[CellGroupData[{ 2523 | Cell[15132, 443, 223, 4, 33, "Input"], 2524 | Cell[15358, 449, 95, 1, 33, "Output"] 2525 | }, Open ]] 2526 | }, Open ]], 2527 | Cell[CellGroupData[{ 2528 | Cell[15502, 456, 152, 3, 42, "Subsection"], 2529 | Cell[15657, 461, 571, 15, 70, "Text"], 2530 | Cell[CellGroupData[{ 2531 | Cell[16253, 480, 272, 6, 33, "Input"], 2532 | Cell[16528, 488, 1014, 16, 91, "Output"] 2533 | }, Open ]], 2534 | Cell[CellGroupData[{ 2535 | Cell[17579, 509, 293, 6, 33, "Input"], 2536 | Cell[17875, 517, 1012, 16, 91, "Output"] 2537 | }, Open ]], 2538 | Cell[CellGroupData[{ 2539 | Cell[18924, 538, 252, 6, 33, "Input"], 2540 | Cell[19179, 546, 526, 9, 53, "Output"] 2541 | }, Open ]] 2542 | }, Open ]], 2543 | Cell[CellGroupData[{ 2544 | Cell[19754, 561, 116, 1, 42, "Subsection"], 2545 | Cell[19873, 564, 429, 9, 70, "Text"], 2546 | Cell[CellGroupData[{ 2547 | Cell[20327, 577, 654, 16, 91, "Input"], 2548 | Cell[20984, 595, 470, 14, 47, "Output"] 2549 | }, Open ]], 2550 | Cell[21469, 612, 625, 13, 89, "Text"], 2551 | Cell[CellGroupData[{ 2552 | Cell[22119, 629, 582, 14, 53, "Input"], 2553 | Cell[22704, 645, 1915, 52, 124, "Output"] 2554 | }, Open ]], 2555 | Cell[24634, 700, 130, 3, 32, "Text"], 2556 | Cell[CellGroupData[{ 2557 | Cell[24789, 707, 167, 3, 33, "Input"], 2558 | Cell[24959, 712, 1495, 43, 72, "Output"] 2559 | }, Open ]], 2560 | Cell[26469, 758, 330, 8, 51, "Text"], 2561 | Cell[CellGroupData[{ 2562 | Cell[26824, 770, 294, 6, 33, "Input"], 2563 | Cell[27121, 778, 158, 4, 33, "Output"] 2564 | }, Open ]] 2565 | }, Open ]], 2566 | Cell[CellGroupData[{ 2567 | Cell[27328, 788, 113, 1, 42, "Subsection"], 2568 | Cell[27444, 791, 340, 10, 51, "Text"], 2569 | Cell[27787, 803, 305, 6, 53, "Input"], 2570 | Cell[CellGroupData[{ 2571 | Cell[28117, 813, 208, 5, 33, "Input"], 2572 | Cell[28328, 820, 125, 3, 33, "Output"] 2573 | }, Open ]], 2574 | Cell[28468, 826, 467, 12, 70, "Text"], 2575 | Cell[28938, 840, 419, 15, 51, "Text"], 2576 | Cell[29360, 857, 67, 1, 32, "Text"], 2577 | Cell[29430, 860, 203, 4, 32, "Text"], 2578 | Cell[CellGroupData[{ 2579 | Cell[29658, 868, 247, 7, 33, "Input"], 2580 | Cell[29908, 877, 125, 3, 33, "Output"] 2581 | }, Open ]], 2582 | Cell[30048, 883, 150, 3, 32, "Text"], 2583 | Cell[CellGroupData[{ 2584 | Cell[30223, 890, 188, 5, 33, "Input"], 2585 | Cell[30414, 897, 71, 1, 33, "Output"] 2586 | }, Open ]], 2587 | Cell[30500, 901, 292, 7, 51, "Text"] 2588 | }, Open ]], 2589 | Cell[CellGroupData[{ 2590 | Cell[30829, 913, 146, 2, 42, "Subsection"], 2591 | Cell[30978, 917, 411, 11, 51, "Text"], 2592 | Cell[CellGroupData[{ 2593 | Cell[31414, 932, 493, 10, 53, "Input"], 2594 | Cell[31910, 944, 1727, 43, 105, "Output"] 2595 | }, Open ]], 2596 | Cell[33652, 990, 101, 3, 32, "Text"], 2597 | Cell[CellGroupData[{ 2598 | Cell[33778, 997, 763, 17, 53, "Input"], 2599 | Cell[34544, 1016, 2476, 84, 124, "Output"] 2600 | }, Open ]] 2601 | }, Open ]], 2602 | Cell[CellGroupData[{ 2603 | Cell[37069, 1106, 130, 3, 42, "Subsection"], 2604 | Cell[37202, 1111, 597, 14, 70, "Text"], 2605 | Cell[CellGroupData[{ 2606 | Cell[37824, 1129, 607, 14, 53, "Input"], 2607 | Cell[38434, 1145, 1063, 25, 123, "Output"] 2608 | }, Open ]] 2609 | }, Open ]], 2610 | Cell[CellGroupData[{ 2611 | Cell[39546, 1176, 107, 1, 42, "Subsection"], 2612 | Cell[39656, 1179, 214, 6, 32, "Text"], 2613 | Cell[CellGroupData[{ 2614 | Cell[39895, 1189, 421, 9, 53, "Input"], 2615 | Cell[40319, 1200, 771, 14, 67, "Output"] 2616 | }, Open ]], 2617 | Cell[41105, 1217, 457, 10, 51, "Text"], 2618 | Cell[CellGroupData[{ 2619 | Cell[41587, 1231, 260, 5, 33, "Input"], 2620 | Cell[41850, 1238, 1110, 19, 105, "Output"] 2621 | }, Open ]], 2622 | Cell[CellGroupData[{ 2623 | Cell[42997, 1262, 574, 14, 72, "Input"], 2624 | Cell[43574, 1278, 916, 17, 105, "Output"] 2625 | }, Open ]] 2626 | }, Open ]], 2627 | Cell[CellGroupData[{ 2628 | Cell[44539, 1301, 138, 3, 42, "Subsection"], 2629 | Cell[44680, 1306, 469, 8, 51, "Text"], 2630 | Cell[CellGroupData[{ 2631 | Cell[45174, 1318, 1103, 25, 72, "Input"], 2632 | Cell[46280, 1345, 829, 23, 53, "Output"] 2633 | }, Open ]], 2634 | Cell[47124, 1371, 144, 3, 32, "Text"], 2635 | Cell[47271, 1376, 713, 15, 33, "Input"], 2636 | Cell[47987, 1393, 204, 4, 32, "Text"], 2637 | Cell[CellGroupData[{ 2638 | Cell[48216, 1401, 984, 22, 53, "Input"], 2639 | Cell[49203, 1425, 638, 12, 67, "Output"] 2640 | }, Open ]], 2641 | Cell[49856, 1440, 121, 3, 32, "Text"], 2642 | Cell[CellGroupData[{ 2643 | Cell[50002, 1447, 962, 25, 72, "Input"], 2644 | Cell[50967, 1474, 634, 12, 67, "Output"] 2645 | }, Open ]], 2646 | Cell[51616, 1489, 276, 5, 51, "Text"], 2647 | Cell[CellGroupData[{ 2648 | Cell[51917, 1498, 1315, 30, 129, "Input"], 2649 | Cell[53235, 1530, 24404, 504, 532, "Output"] 2650 | }, Open ]] 2651 | }, Open ]], 2652 | Cell[CellGroupData[{ 2653 | Cell[77688, 2040, 108, 1, 42, "Subsection"], 2654 | Cell[77799, 2043, 384, 8, 70, "Text"], 2655 | Cell[CellGroupData[{ 2656 | Cell[78208, 2055, 1321, 37, 110, "Input"], 2657 | Cell[79532, 2094, 3837, 103, 243, "Output"] 2658 | }, Open ]] 2659 | }, Open ]], 2660 | Cell[CellGroupData[{ 2661 | Cell[83418, 2203, 121, 3, 42, "Subsection"], 2662 | Cell[83542, 2208, 142, 3, 32, "Text"], 2663 | Cell[83687, 2213, 449, 9, 107, "Text"], 2664 | Cell[84139, 2224, 168, 3, 32, "Text"], 2665 | Cell[84310, 2229, 390, 10, 53, "Input"], 2666 | Cell[84703, 2241, 266, 5, 51, "Text"], 2667 | Cell[84972, 2248, 1864, 48, 110, "Input"], 2668 | Cell[86839, 2298, 2194, 52, 186, "Input"], 2669 | Cell[CellGroupData[{ 2670 | Cell[89058, 2354, 844, 19, 72, "Input"], 2671 | Cell[89905, 2375, 1420, 22, 167, "Output"] 2672 | }, Open ]], 2673 | Cell[91340, 2400, 281, 5, 51, "Text"], 2674 | Cell[CellGroupData[{ 2675 | Cell[91646, 2409, 253, 5, 33, "Input"], 2676 | Cell[91902, 2416, 663, 8, 186, "Output"] 2677 | }, Open ]] 2678 | }, Open ]] 2679 | }, Open ]] 2680 | } 2681 | ] 2682 | *) 2683 | 2684 | (* End of internal cache information *) 2685 | -------------------------------------------------------------------------------- /basic-text-analysis/bigram-network-small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/basic-text-analysis/bigram-network-small.jpg -------------------------------------------------------------------------------- /basic-text-analysis/bigram-network.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/basic-text-analysis/bigram-network.jpg -------------------------------------------------------------------------------- /simple-acoustic-daq/sample-data.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/simple-acoustic-daq/sample-data.mp3 -------------------------------------------------------------------------------- /simple-acoustic-daq/sample-data.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/simple-acoustic-daq/sample-data.wav -------------------------------------------------------------------------------- /simple-acoustic-daq/v-to-f-circuit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williamjturkel/Mathematica/3e1c635bccc7be29f7baaeda30a96bff80c26a02/simple-acoustic-daq/v-to-f-circuit.png --------------------------------------------------------------------------------