├── chapter1 ├── example.txt └── lines.text ├── chapter3 └── example.txt ├── chapter4 └── http.cap ├── chapter5 ├── inspect.png ├── hello_screen.png └── hello.html ├── exercises ├── traceroutes │ ├── .DS_Store │ └── PAM08 │ │ ├── .DS_Store │ │ ├── Portugal │ │ ├── Portugal_www.megaupload.com.html │ │ ├── Portugal_www.facebook.com.html │ │ ├── Portugal_www.rapidshare.com.html │ │ ├── Portugal_www.blogger.com.html │ │ ├── Portugal_www.wikipedia.org.html │ │ ├── Portugal_www.live.com.html │ │ ├── Portugal_www.msn.com.html │ │ ├── Portugal_www.google.com.html │ │ ├── Portugal_www.hi5.com.html │ │ ├── Portugal_www.baidu.com.html │ │ ├── Portugal_www.youtube.com.html │ │ ├── Portugal_www.friendster.com.html │ │ ├── Portugal_www.fotolog.net.html │ │ ├── Portugal_www.yahoo.co.jp.html │ │ └── Portugal_www.orkut.com.html │ │ ├── CanadaW │ │ ├── CanadaW_www.youtube.com.html │ │ ├── CanadaW_www.megaupload.com.html │ │ ├── CanadaW_www.wikipedia.org.html │ │ ├── CanadaW_www.fotolog.net.html │ │ ├── CanadaW_www.friendster.com.html │ │ ├── CanadaW_www.hi5.com.html │ │ ├── CanadaW_www.msn.com.html │ │ ├── CanadaW_www.live.com.html │ │ ├── CanadaW_www.yahoo.com.html │ │ ├── CanadaW_www.myspace.com.html │ │ ├── CanadaW_www.qq.com.html │ │ ├── CanadaW_www.rapidshare.com.html │ │ ├── CanadaW_www.sina.com.cn.html │ │ ├── CanadaW_www.yahoo.co.jp.html │ │ ├── CanadaW_www.blogger.com.html │ │ ├── CanadaW_www.microsoft.com.html │ │ └── CanadaW_www.orkut.com.html │ │ ├── MassachusettsC │ │ ├── MassachusettsC_www.fotolog.net.html │ │ ├── MassachusettsC_www.megaupload.com.html │ │ ├── MassachusettsC_www.blogger.com.html │ │ ├── MassachusettsC_www.msn.com.html │ │ ├── MassachusettsC_www.orkut.com.html │ │ ├── MassachusettsC_www.google.com.html │ │ └── MassachusettsC_www.rapidshare.com.html │ │ ├── Japan │ │ ├── Japan_www.sina.com.cn.html │ │ ├── Japan_www.youtube.com.html │ │ ├── Japan_www.friendster.com.html │ │ ├── Japan_www.facebook.com.html │ │ ├── Japan_www.wikipedia.org.html │ │ ├── Japan_www.yahoo.co.jp.html │ │ ├── Japan_www.fotolog.net.html │ │ ├── Japan_www.megaupload.com.html │ │ ├── Japan_www.myspace.com.html │ │ ├── Japan_www.live.com.html │ │ ├── Japan_www.msn.com.html │ │ ├── Japan_www.yahoo.com.html │ │ ├── Japan_www.microsoft.com.html │ │ └── Japan_www.blogger.com.html │ │ ├── UnitedKingdom │ │ ├── UnitedKingdom_www.megaupload.com.html │ │ ├── UnitedKingdom_www.fotolog.net.html │ │ ├── UnitedKingdom_www.hi5.com.html │ │ ├── UnitedKingdom_www.rapidshare.com.html │ │ ├── UnitedKingdom_www.blogger.com.html │ │ ├── UnitedKingdom_www.msn.com.html │ │ ├── UnitedKingdom_www.wikipedia.org.html │ │ ├── UnitedKingdom_www.yahoo.co.jp.html │ │ ├── UnitedKingdom_www.microsoft.com.html │ │ ├── UnitedKingdom_www.live.com.html │ │ └── UnitedKingdom_www.yahoo.com.html │ │ ├── NewYorkC │ │ ├── NewYorkC_www.hi5.com.html │ │ ├── NewYorkC_www.yahoo.com.html │ │ ├── NewYorkC_www.rapidshare.com.html │ │ ├── NewYorkC_www.youtube.com.html │ │ ├── NewYorkC_www.microsoft.com.html │ │ ├── NewYorkC_www.megaupload.com.html │ │ ├── NewYorkC_www.fotolog.net.html │ │ ├── NewYorkC_www.wikipedia.org.html │ │ ├── NewYorkC_www.live.com.html │ │ ├── NewYorkC_www.blogger.com.html │ │ └── NewYorkC_www.msn.com.html │ │ ├── PennsylvaniaC │ │ ├── PennsylvaniaC_www.megaupload.com.html │ │ ├── PennsylvaniaC_www.youtube.com.html │ │ ├── PennsylvaniaC_www.wikipedia.org.html │ │ ├── PennsylvaniaC_www.blogger.com.html │ │ └── PennsylvaniaC_www.facebook.com.html │ │ ├── UtahC │ │ ├── UtahC_www.fotolog.net.html │ │ ├── UtahC_www.facebook.com.html │ │ ├── UtahC_www.megaupload.com.html │ │ ├── UtahC_www.friendster.com.html │ │ ├── UtahC_www.youtube.com.html │ │ ├── UtahC_www.yahoo.com.html │ │ ├── UtahC_www.blogger.com.html │ │ ├── UtahC_www.google.com.html │ │ ├── UtahC_www.live.com.html │ │ ├── UtahC_www.sina.com.cn.html │ │ ├── UtahC_www.microsoft.com.html │ │ ├── UtahC_www.rapidshare.com.html │ │ └── UtahC_www.orkut.com.html │ │ ├── Germany │ │ ├── Germany_www.megaupload.com.html │ │ ├── Germany_www.facebook.com.html │ │ ├── Germany_www.fotolog.net.html │ │ ├── Germany_www.friendster.com.html │ │ ├── Germany_www.rapidshare.com.html │ │ ├── Germany_www.blogger.com.html │ │ └── Germany_www.live.com.html │ │ ├── Korea │ │ ├── Korea_www.megaupload.com.html │ │ ├── Korea_www.youtube.com.html │ │ ├── Korea_www.wikipedia.org.html │ │ ├── Korea_www.msn.com.html │ │ ├── Korea_www.hi5.com.html │ │ ├── Korea_www.yahoo.co.jp.html │ │ ├── Korea_www.microsoft.com.html │ │ ├── Korea_www.orkut.com.html │ │ ├── Korea_www.google.com.html │ │ ├── Korea_www.rapidshare.com.html │ │ └── Korea_www.live.com.html │ │ ├── Poland │ │ ├── Poland_www.megaupload.com.html │ │ └── Poland_www.rapidshare.com.html │ │ ├── Argentina │ │ ├── Argentina_www.youtube.com.html │ │ ├── Argentina_www.yahoo.com.html │ │ └── Argentina_www.megaupload.com.html │ │ ├── India │ │ ├── India_www.megaupload.com.html │ │ ├── India_www.youtube.com.html │ │ ├── India_www.fotolog.net.html │ │ ├── India_www.rapidshare.com.html │ │ └── India_www.blogger.com.html │ │ ├── CaliforniaC │ │ ├── CaliforniaC_www.megaupload.com.html │ │ ├── CaliforniaC_www.facebook.com.html │ │ ├── CaliforniaC_www.wikipedia.org.html │ │ ├── CaliforniaC_www.fotolog.net.html │ │ └── CaliforniaC_www.hi5.com.html │ │ ├── Indonesia │ │ ├── Indonesia_www.megaupload.com.html │ │ └── Indonesia_www.wikipedia.org.html │ │ ├── Australia │ │ ├── Australia_www.youtube.com.html │ │ ├── Australia_www.megaupload.com.html │ │ ├── Australia_www.orkut.com.html │ │ ├── Australia_www.google.com.html │ │ └── Australia_www.rapidshare.com.html │ │ ├── Mexico │ │ ├── Mexico_www.megaupload.com.html │ │ ├── Mexico_www.youtube.com.html │ │ └── Mexico_www.wikipedia.org.html │ │ ├── WashingtonDCC │ │ ├── WashingtonDCC_www.megaupload.com.html │ │ ├── WashingtonDCC_www.wikipedia.org.html │ │ ├── WashingtonDCC_www.youtube.com.html │ │ ├── WashingtonDCC_www.hi5.com.html │ │ ├── WashingtonDCC_www.fotolog.net.html │ │ └── WashingtonDCC_www.microsoft.com.html │ │ ├── NorthCarolinaC │ │ ├── NorthCarolinaC_www.youtube.com.html │ │ ├── NorthCarolinaC_www.blogger.com.html │ │ ├── NorthCarolinaC_www.megaupload.com.html │ │ ├── NorthCarolinaC_www.hi5.com.html │ │ └── NorthCarolinaC_www.rapidshare.com.html │ │ ├── SouthAfrica │ │ ├── SouthAfrica_www.facebook.com.html │ │ ├── SouthAfrica_www.megaupload.com.html │ │ └── SouthAfrica_www.youtube.com.html │ │ ├── Pakistan │ │ └── Pakistan_www.megaupload.com.html │ │ ├── ArizonaC │ │ ├── ArizonaC_www.hi5.com.html │ │ ├── ArizonaC_www.megaupload.com.html │ │ └── ArizonaC_www.facebook.com.html │ │ ├── Italy │ │ └── Italy_www.megaupload.com.html │ │ └── Sweden │ │ └── Sweden_www.megaupload.com.html ├── network_traffic │ ├── dns.cap │ └── README.md ├── weather_forecast │ ├── forecast.jpg │ └── README.md ├── ini_files │ ├── example.ini │ └── README.md ├── reg_ex │ └── README.md ├── README.md ├── reports │ └── example.txt └── video_log_files │ ├── logs.txt │ └── README.md └── chapter2 └── example.txt /chapter1/example.txt: -------------------------------------------------------------------------------- 1 | test file for basic commands 2 | -------------------------------------------------------------------------------- /chapter1/lines.text: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | -------------------------------------------------------------------------------- /chapter3/example.txt: -------------------------------------------------------------------------------- 1 | test 2 | There is nothing here 3 | Let us have a test here 4 | 12 5 | -------------------------------------------------------------------------------- /chapter4/http.cap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/chapter4/http.cap -------------------------------------------------------------------------------- /chapter5/inspect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/chapter5/inspect.png -------------------------------------------------------------------------------- /chapter5/hello_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/chapter5/hello_screen.png -------------------------------------------------------------------------------- /exercises/traceroutes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/exercises/traceroutes/.DS_Store -------------------------------------------------------------------------------- /exercises/network_traffic/dns.cap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/exercises/network_traffic/dns.cap -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/exercises/traceroutes/PAM08/.DS_Store -------------------------------------------------------------------------------- /exercises/weather_forecast/forecast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/Parsing-Workshop/HEAD/exercises/weather_forecast/forecast.jpg -------------------------------------------------------------------------------- /chapter2/example.txt: -------------------------------------------------------------------------------- 1 | This file is helpful to play around with the Exercises 2 | Definitely loop through me and apply regular expressions. 3 | Let me also have some numbers: 1, 120 4 | 78 5 | -------------------------------------------------------------------------------- /exercises/ini_files/example.ini: -------------------------------------------------------------------------------- 1 | home=$TOOL_HOME 2 | # comment 3 | [memory] 4 | 5 | base = 5g 6 | gc= advanced 7 | 8 | [logs] 9 | # what is this? 10 | loc=/somewhere/in/the/middle/of/nowhere/ 11 | -------------------------------------------------------------------------------- /chapter5/hello.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |Here is a paragraph
9 |Here is a another one!
10 | 11 | 12 | -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/Portugal/Portugal_www.megaupload.com.html: -------------------------------------------------------------------------------- 1 |2 | 1 lis1-br1-gi-6-0-10.cprm.net (195.8.0.118) 0.235 ms 0.162 ms 0.139 ms 3 | 2 lis2-cr1-po0-0-4-0.cprm.net (195.8.0.69) 0.769 ms 0.774 ms 0.720 ms 4 | 3 lon1-cr1-po12-0-0.cprm.net (195.8.0.70) 27.329 ms 27.376 ms 27.378 ms 5 | 4 ae0-4.lon11.ip.tiscali.net (213.200.79.13) 27.409 ms 27.525 ms 27.444 ms 6 | 5 so-2-0-0.was11.ip.tiscali.net (213.200.80.25) 104.030 ms 104.006 ms 104.068 ms 7 | 6 carpathia-gw158-1.ip.tiscali.net (213.200.66.66) 104.390 ms 104.327 ms 104.595 ms 8 | 7 mail.megaupload.com (69.5.88.74) 104.184 ms 104.271 ms 103.986 ms 9 | -------------------------------------------------------------------------------- /exercises/reg_ex/README.md: -------------------------------------------------------------------------------- 1 | # Regular Expressions Exercises 2 | 3 | In this folder, you can find the famous short story by Mark Twain: 4 | [The Celebrated Jumping Frog of Calaveras County](http://twain.lib.virginia.edu/projects/price/frog.htm). 5 | 6 | We will use this as a training text file to work on our regular expression skills! 7 | 8 | ## Exercises 9 | 10 | - Write a script that prints out all lines in the text that contain a 'q'. 11 | - Write a script that prints out all lines in the text that contain a word that start with a 'q' or a 'Q'. 12 | - Count the number of words that contain two consecutive vowels. 13 | - Count the number of lines where one word occurs twice. 14 | -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/Portugal/Portugal_www.facebook.com.html: -------------------------------------------------------------------------------- 1 |2 | 1 lis1-br1-gi-6-0-10.cprm.net (195.8.0.118) 0.256 ms 0.162 ms 0.138 ms 3 | 2 lis2-cr1-po0-0-4-0.cprm.net (195.8.0.69) 0.847 ms 0.946 ms 0.955 ms 4 | 3 lon1-cr1-po12-0-0.cprm.net (195.8.0.70) 27.997 ms 27.486 ms 27.670 ms 5 | 4 ldn-s2-rou-1021.UK.eurorings.net (134.222.109.53) 27.422 ms 30.446 ms 27.409 ms 6 | 5 ldn-s2-rou-1003.UK.eurorings.net (134.222.231.226) 28.126 ms 28.034 ms 27.714 ms 7 | 6 89.167.141.241 (89.167.141.241) 175.571 ms 175.682 ms 175.684 ms 8 | 7 v526.bsw01.sctm.tfbnw.net (204.15.21.125) 170.755 ms 180.746 ms 180.173 ms 9 | 8 www-b.facebook.com (204.15.20.26) 170.600 ms 171.290 ms 170.458 ms 10 | -------------------------------------------------------------------------------- /exercises/README.md: -------------------------------------------------------------------------------- 1 | # Exercises for the Parsing Workshop 2 | 3 | While the individual chapters contain their own little exercises, a collection of more involved exercises together with test data to try them out is linked here. 4 | 5 | Many of the exercises are already linked to in the individual chapters and we encourage you to solve them as you work through the chapters. 6 | 7 | The intended way of solving the exercises is by adding your **Python and Bash** scripts to the corresponding folders. 8 | This will also allow you to easily do code reviews on each other's solutions. 9 | 10 | ## Contribution 11 | 12 | If you have interesting challenges in the world of parsing, please feel free to add your own challenges and submit a pull request! 13 | -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/Portugal/Portugal_www.rapidshare.com.html: -------------------------------------------------------------------------------- 1 |2 | 1 lis1-br1-gi-6-0-10.cprm.net (195.8.0.118) 0.271 ms 0.160 ms 0.168 ms 3 | 2 213.242.71.50 (213.242.71.50) 0.522 ms 0.487 ms 0.466 ms 4 | 3 213.242.71.49 (213.242.71.49) 45.928 ms 45.778 ms 46.110 ms 5 | 4 ge-0-0-0.mp1.Madrid1.Level3.net (4.68.115.229) 51.833 ms 51.269 ms 51.749 ms 6 | 5 ae-1-0.bbr2.Frankfurt1.Level3.net (212.187.128.29) 137.020 ms 53.101 ms so-3-1-0.bbr1.Frankfurt1.Level3.net (4.68.128.253) 47.943 ms 7 | 6 ae-23-54.car3.Frankfurt1.Level3.net (4.68.118.113) 69.358 ms ae-13-51.car3.Frankfurt1.Level3.net (4.68.118.17) 45.966 ms ae-13-55.car3.Frankfurt1.Level3.net (4.68.118.145) 51.732 ms 8 | 7 rapidshare.com (195.122.131.250) 46.224 ms 46.395 ms 46.670 ms 9 | -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/CanadaW/CanadaW_www.youtube.com.html: -------------------------------------------------------------------------------- 1 | 1 * * * 2 | 2 sevengate.cpsc.ucalgary.ca (136.159.7.1) 147.845 ms 10.817 ms 0.979 ms 3 | 3 * * * 4 | 4 campus.cpsc.ucalgary.ca (136.159.253.209) 9.275 ms 189.839 ms 1.969 ms 5 | 5 209.115.239.141 (209.115.239.141) 7.978 ms 7.186 ms 1.974 ms 6 | 6 208.38.22.186 (208.38.22.186) 7.980 ms 9.323 ms 9.968 ms 7 | 7 clgrab31dr01.bb.telus.com (154.11.97.1) 9.974 ms 7.969 ms 11.961 ms 8 | 8 clgrab21gr01.bb.telus.com (205.233.111.77) 69.954 ms 189.846 ms 189.872 ms 9 | 9 chcgildtgr00.bb.telus.com (154.11.11.30) 49.951 ms 41.938 ms 47.954 ms 10 | 10 154.11.2.142 (154.11.2.142) 39.959 ms 39.026 ms 39.943 ms 11 | 11 * * * 12 | 12 * * * 13 | 13 208.65.153.253 (208.65.153.253) 73.028 ms 1637.071 ms 69.861 ms 14 | -------------------------------------------------------------------------------- /exercises/traceroutes/PAM08/MassachusettsC/MassachusettsC_www.fotolog.net.html: -------------------------------------------------------------------------------- 1 |Traceroute to 2 | 65.118.195.131 3 |
4 | Fri Sep 28 17:49:45 EDT 20075 |
6 | 1 vineyard-gw (204.17.195.230) 0.858 ms 1.181 ms 1.510 ms 7 | 2 router6 (204.17.195.236) 0.566 ms 0.656 ms 0.501 ms 8 | 3 bos-edge-06.inet.qwest.net (65.115.97.141) 15.920 ms 11.108 ms 21.866 ms 9 | 4 bos-core-02.inet.qwest.net (205.171.28.73) 17.559 ms 13.187 ms 17.042 ms 10 | 5 jfk-core-01.inet.qwest.net (205.171.8.110) 22.309 ms 22.232 ms 18.494 ms 11 | 6 nyc-edge-01.inet.qwest.net (205.171.30.34) 22.559 ms 12.597 ms 12.142 ms 12 | 7 65-118-195-131.dia.static.qwest.net (65.118.195.131) 12.403 ms 24.113 ms 17.273 ms 13 |14 |
15 | Return to the Vineyard.NET home page 16 |