├── .gitignore ├── predeval ├── __init__.py ├── multifilter.py ├── progress.py ├── runstat.py └── util.py ├── paper ├── predeval.pdf ├── predeval-slides.pdf └── predeval-nycml-2015-04-16.pptx ├── setup.py ├── README.md ├── data ├── Trec_beta_annotations.txt ├── sk_results_ascii.txt ├── Trec_beta_results.txt ├── sk_results.txt ├── sk_annotation.txt └── labeler2.txt └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /predeval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /paper/predeval.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeloitteHux-Old/proficiency-metric/HEAD/paper/predeval.pdf -------------------------------------------------------------------------------- /paper/predeval-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeloitteHux-Old/proficiency-metric/HEAD/paper/predeval-slides.pdf -------------------------------------------------------------------------------- /paper/predeval-nycml-2015-04-16.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeloitteHux-Old/proficiency-metric/HEAD/paper/predeval-nycml-2015-04-16.pptx -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | An Information Theoretic Metric for Multi-Class Categorization 3 | """ 4 | import sys 5 | 6 | from setuptools import find_packages, setup 7 | 8 | 9 | install_requires = [ 10 | "cffi>=1.6.0,<2.0" 11 | ] 12 | if sys.version_info[:2] < (2, 7): 13 | install_requires.extend(["argparse", "ordereddict"]) 14 | 15 | setup( 16 | name="predeval", 17 | version="0.1.0", 18 | author="Magnetic Engineering", 19 | author_email="engineering@magnetic.com", 20 | description=__doc__.strip().splitlines()[0], 21 | long_description=__doc__, 22 | license="Apache 2.0", 23 | packages=find_packages(), 24 | install_requires=install_requires, 25 | ) 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # An Information Theoretic Metric for Multi-Class Categorization 2 | 3 | ## python 4 | 5 | The implementation of the Proficiency Metric in various settings: 6 | 7 | * `predeval.py:ConfusionMX`: Classification 8 | 9 | * `predeval.py:MuLabCat`: Multi-Label Categorization 10 | 11 | ### paper 12 | 13 | The research paper describing the Proficiency Metric. 14 | 15 | ### data 16 | 17 | The test data for the results in the paper: 18 | 19 | * `annotated`: human-labeled examples 20 | 21 | * `results`: algorithmically categorized examples 22 | 23 | * `labeler[123].txt`: KDD Cup 2005 24 | 25 | * `Trec_beta`: Example test data for ERD 2014 - http://web-ngram.research.microsoft.com/erd2014/Datasets.aspx 26 | 27 | * `sk`: 100 Magnetic queries annotated with Wikipedia concepts in similar format as ERD 2014 (Trec\_beta) 28 | 29 | * `magnetic`: hand-annotated examples from searches in the Magnetic.com database of search events 30 | -------------------------------------------------------------------------------- /predeval/multifilter.py: -------------------------------------------------------------------------------- 1 | # Remove a regexp from a set of parallel files. 2 | # Usage: 3 | # python multifilter.py -rex '\tNegative' ../data/magnetic_annotated.txt ../data/magnetic_results* 4 | 5 | import argparse 6 | import re 7 | import tempfile 8 | import os 9 | import util 10 | logger = util.get_logger("MF") 11 | 12 | def finish (i, o, p): 13 | i.close() 14 | os.close(o) 15 | os.rename(p,i.name) 16 | util.wrote(i.name,logger) 17 | 18 | def run (rex, main, others): 19 | util.reading(main.name,logger) 20 | mainH,mainP = tempfile.mkstemp(dir=os.path.dirname(main.name)) 21 | othersIO = list() 22 | for o in others: 23 | util.reading(o.name,logger) 24 | h,p = tempfile.mkstemp(dir=os.path.dirname(o.name)) 25 | othersIO.append((o,h,p)) 26 | # read the files in parallel 27 | dropped = 0 28 | lines = 0 29 | for line in main: 30 | lines += 1 31 | prefix = line.split('\t')[0] 32 | keep = rex.search(line) is None 33 | if keep: 34 | os.write(mainH,line) 35 | else: 36 | dropped += 1 37 | for i,o,_p in othersIO: 38 | line1 = i.readline() 39 | prefix1 = line1.split('\t')[0].rstrip() 40 | if prefix1 != prefix: 41 | raise Exception("prefix mismatch",prefix,prefix,i.name) 42 | if keep: 43 | os.write(o,line1) 44 | for i in others: 45 | line1 = i.readline() 46 | if line1 != '': 47 | raise Exception('uneven files',line1,i.name) 48 | logger.info("Dropped {:,d} lines out of {:,d}".format(dropped,lines)) 49 | # close streams and rename 50 | finish(main,mainH,mainP) 51 | for i,o,p in othersIO: 52 | finish(i,o,p) 53 | 54 | if __name__ == '__main__': 55 | ap = argparse.ArgumentParser(description='Filter several parallel files') 56 | ap.add_argument('-rex',help='the regular expression to filter on', 57 | required=True) 58 | ap.add_argument('main',help='the main file searched in', 59 | type=argparse.FileType('r')) 60 | ap.add_argument('others',help='other filtered files',nargs='*', 61 | type=argparse.FileType('r')) 62 | args = ap.parse_args() 63 | run(re.compile(args.rex),args.main,args.others) 64 | -------------------------------------------------------------------------------- /data/Trec_beta_annotations.txt: -------------------------------------------------------------------------------- 1 | id1 0 /m/0fd4x total recall 0.98 2 | id1 0 /m/0tc7 arnold schwarzenegger 0.95 3 | id2 0 /m/0fd4x total recall 0.98 4 | id2 1 /m/0gvrws1 total recall 0.95 5 | id3 0 /m/0tc7 the governator 0.9 6 | id3 1 /m/0gj5qr6 the governator 0.8 7 | TREC-7 0 /m/04cnvy bowflex 1 8 | TREC-8 0 /m/03d452 brooks brothers 1 9 | TREC-10 0 /m/01n7q california 1 10 | TREC-11 0 /m/0nfgq cass county missouri 1 11 | TREC-13 0 /m/020ys5 condo 1 12 | TREC-13 0 /m/02xry florida 1 13 | TREC-14 0 /m/0c4tkd culpeper national cemetery 1 14 | TREC-17 0 /m/02st88 discovery channel 1 15 | TREC-17 1 /m/0kc8y discovery channel 1 16 | TREC-20 0 /m/0dc3_ dutchess county 1 17 | TREC-22 0 /m/03ck4lv east ridge high school 1 18 | TREC-22 1 /m/027311j east ridge high school 1 19 | TREC-22 2 /m/0bs8gsb east ridge high school 1 20 | TREC-25 0 /m/0180mw er 1 21 | TREC-28 0 /m/02761b3 french lick resort and casino 1 22 | TREC-34 0 /m/03v0t illinois state 1 23 | TREC-36 0 /m/03v1s indiana 1 24 | TREC-37 0 /m/03v1s indiana state 1 25 | TREC-40 0 /m/03s0w iowa 1 26 | TREC-42 0 /m/04f_d kansas city mo 1 27 | TREC-43 0 /m/02qvp5f kenmore 1 28 | TREC-45 0 /m/03rk0 india 1 29 | TREC-48 0 /m/02dp7g madam cj walker 1 30 | TREC-49 0 /m/015fr brazil 1 31 | TREC-50 0 /m/09c7w0 united states 1 32 | TREC-51 0 /m/01b370 martha stewart 1 33 | TREC-51 0 /m/0j6rg imclone 1 34 | TREC-52 0 /m/0158xh mayo clinic 1 35 | TREC-52 0 /m/0ggh3 jacksonville fl 1 36 | TREC-53 0 /m/04p2zc milwaukee journal sentinel 1 37 | TREC-54 0 /m/0cjjt mothers day 1 38 | TREC-55 0 /m/02mjmr obama 1 39 | TREC-56 0 /m/0gyytc orange county convention center 1 40 | TREC-57 0 /m/0k1jw pacific northwest laboratory 1 41 | TREC-60 0 /m/066kp ps 2 1 42 | TREC-61 0 /m/04r3s1 ralph owen brewster 1 43 | TREC-62 0 /m/02_zt7 rincon 1 44 | TREC-63 0 /m/0288kpv ritz carlto 1 45 | TREC-63 0 /m/06y9l6 lake las vegas 1 46 | TREC-65 0 /m/064m90 rocky mountain news 1 47 | TREC-69 0 /m/0l35f sonoma county 1 48 | TREC-70 0 /m/05b5w nile 1 49 | TREC-72 0 /m/07b_l texas 1 50 | TREC-72 0 /m/02qtlv border patrol 1 51 | TREC-73 0 /m/07c0j the beatles 1 52 | TREC-74 0 /m/0p4s9 the music man 1 53 | TREC-74 1 /m/0cqnss the music man 1 54 | TREC-74 2 /m/0gwypk the music man 1 55 | TREC-75 0 /m/0251j3 the secret garden 1 56 | TREC-75 1 /m/064xfg the secret garden 1 57 | TREC-75 2 /m/049cgk the secret garden 1 58 | TREC-75 3 /m/03cn0k6 the secret garden 1 59 | TREC-75 4 /m/0dj9x08 the secret garden 1 60 | TREC-76 0 /m/07h34 tn 1 61 | TREC-76 0 /m/024ckj highway partol 1 62 | TREC-81 0 /m/02gnrt university of phoenix 1 63 | TREC-82 0 /m/08874 yellowstone national park 1 64 | TREC-83 0 /m/07vth us capitol 1 65 | TREC-85 0 /m/01fmw0 uss yorktown 1 66 | TREC-85 0 /m/0gkgp charleston sc 1 67 | TREC-86 0 /m/05q4wt8 va dmv 1 68 | 69 | -------------------------------------------------------------------------------- /data/sk_results_ascii.txt: -------------------------------------------------------------------------------- 1 | 6 0 Upírske denníky (seriál) upirske denniky 0.32 2 | 7 0 Počasie počasie 0.26 3 | 7 0 Korfu (mesto) korfu 0.23 4 | 8 0 14. november 14 0.16 5 | 8 1 14. máj 14 0.1 6 | 8 2 14. február 14 0.1 7 | 8 3 14. jún 14 0.1 8 | 8 4 14. marec 14 0.09 9 | 9 0 Počasie počasie 0.28 10 | 10 0 Počasie počasie 0.17 11 | 11 0 Počasie počasie 0.23 12 | 12 0 Denník SME sme 0.37 13 | 12 0 .sk .sk 0.09 14 | 13 0 Obsah (filozofia) obsah 0.22 15 | 13 1 Rozloha obsah 0.13 16 | 15 0 .sk sk 0.15 17 | 19 0 Počasie počasie 0.22 18 | 20 0 Lefkada (ostrov) lefkada 0.3 19 | 21 0 Rakovina prostaty rakovina prostaty 0.39 20 | 24 0 Profesionáli (slovenský seriál) profesionáli 0.22 21 | 26 0 Nevidzany (okres Zlaté Moravce) nevidzany 0.29 22 | 26 1 Nevidzany (okres Prievidza) nevidzany 0.28 23 | 27 0 14. november 14 0.15 24 | 27 1 14. máj 14 0.09 25 | 28 0 Počasie počasie 0.26 26 | 28 0 Korfu (mesto) korfu 0.21 27 | 29 0 Počasie počasie 0.33 28 | 30 0 Žila zily 0.08 29 | 31 0 Voda vodu 0.11 30 | 32 0 Rapovce rapovce 0.29 31 | 34 0 Zuzana zuzana 0.21 32 | 35 0 Korfu korfu 0.2 33 | 35 0 Ostrov ostrov 0.1 34 | 35 1 Korfu (mesto) korfu 0.12 35 | 35 1 Ostrov ostrov 0.1 36 | 36 0 Počasie počasie 0.28 37 | 36 0 Tunis tunis 0.25 38 | 37 0 .cz cz 0.21 39 | 39 0 Denník SME sme 0.25 40 | 40 0 Kalkulačka kalkulačka 0.19 41 | 41 0 .sk sk 0.17 42 | 42 0 Škoda Octavia skoda octavia 0.24 43 | 42 1 Škoda Octavia (1959) skoda octavia 0.23 44 | 43 0 Denník SME sme 0.21 45 | 44 0 .sk sk 0.15 46 | 46 0 Index telesnej hmotnosti bmi 0.29 47 | 48 0 Počasie počasie 0.22 48 | 48 0 Korfu (mesto) korfu 0.2 49 | 50 0 Online online 0.15 50 | 51 0 Škoda Auto skoda 0.23 51 | 51 0 Karburátor karburátor 0.35 52 | 56 0 Upírske denníky (seriál) upírske denníky 0.31 53 | 57 0 Kalkulačka kalkulacka 0.24 54 | 60 0 Hudba hudba 0.16 55 | 61 0 Choroba choroby 0.07 56 | 64 0 Olej (všeobecne) olej 0.17 57 | 66 0 Počasie počasie 0.29 58 | 68 0 Rakovina (album) rakovina 0.14 59 | 69 0 Denník SME sme 0.25 60 | 70 0 Susedia (slovenský seriál) susedia 0.28 61 | 71 0 2014 2014 0.14 62 | 71 1 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.09 63 | 72 0 2014 2014 0.14 64 | 72 1 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.09 65 | 73 0 Význam vyznam 0.15 66 | 77 0 Počasie počasie 0.22 67 | 78 0 Celiakia celiakia 0.45 68 | 79 0 Peniaze peniaze 0.2 69 | 80 0 Palec (jednotka dĺžky) palec 0.33 70 | 80 1 Páleč palec 0.18 71 | 81 0 Nohy (album) nohy 0.15 72 | 82 0 .sk sk 0.17 73 | 85 0 Hurgada hurghada 0.22 74 | 85 0 Počasie počasie 0.19 75 | 85 0 Egypt egypt 0.16 76 | 86 0 Potravina potraviny 0.21 77 | 88 0 Korzár (denník) korzár 0.38 78 | 89 0 Rakovina (album) rakovina 0.14 79 | 90 0 Muž muz 0.2 80 | 90 0 Žena zena 0.19 81 | 91 0 Pohotovosť (seriál) pohotovost 0.35 82 | 92 0 Škoda Octavia skoda octavia 0.27 83 | 92 1 Škoda Octavia (1959) skoda octavia 0.21 84 | 93 0 Škoda Yeti skoda yeti 0.24 85 | 97 0 Diabetes mellitus cukrovka 0.19 86 | 98 0 Učiteľ ucitel 0.15 87 | 99 0 Hodiny (súhvezdie) hodiny 0.21 88 | 99 0 Svidník svidnik 0.13 89 | 99 1 Hodiny hodiny 0.16 90 | 99 1 Svidník svidnik 0.13 91 | 101 0 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.15 92 | 101 1 2014 2014 0.11 93 | -------------------------------------------------------------------------------- /data/Trec_beta_results.txt: -------------------------------------------------------------------------------- 1 | id1 0 /m/0tc7 arnold schwarzenegger 0.17 2 | id1 0 /m/0fd4x total recall 0.14 3 | id1 1 /m/0tc7 arnold schwarzenegger 0.17 4 | id1 1 /m/0gvrws1 total recall 0.15 5 | id2 0 /m/0gvrws1 total recall 0.17 6 | id2 1 /m/0fd4x total recall 0.17 7 | id3 0 /m/0gj5qr6 the governator 0.47 8 | id3 1 /m/0tc7 the governator 0.07 9 | TREC-1 0 /m/03rk0 india 1 10 | TREC-7 0 /m/04cnvy bowflex 0.33 11 | TREC-8 0 /m/03d452 brooks brothers 0.18 12 | TREC-10 0 /m/01n7q california 1 13 | TREC-11 0 /m/0nfgq cass county missouri 0.2 14 | TREC-13 0 /m/02xry florida 1 15 | TREC-14 0 /m/0c4tkd culpeper national cemetery 0.2 16 | TREC-15 0 /m/057lcq asbestos 0.13 17 | TREC-15 0 /m/03nqt5b dangers 0.15 18 | TREC-17 0 /m/0kc8y discovery channel store 0.2 19 | TREC-20 0 /m/0dc3_ dutchess county 0.17 20 | TREC-22 0 /m/027311j east ridge high school 0.1 21 | TREC-22 1 /m/0bs8gsb east ridge high school 0.1 22 | TREC-23 0 /m/0f6msb skeet shoot 0.24 23 | TREC-25 0 /m/0180mw er tv show 0.17 24 | TREC-26 0 /m/0fql_ uranus 0.07 25 | TREC-28 0 /m/02761b3 french lick resort 0.3 26 | TREC-34 0 /m/03v0t illinois 1 27 | TREC-36 0 /m/03v1s indiana 1 28 | TREC-37 0 /m/04shkp indiana state fairgrounds 0.2 29 | TREC-40 0 /m/03s0w iowa 1 30 | TREC-42 0 /m/04f_d kansas city mo 0.15 31 | TREC-43 0 /m/010rgv kenmore 0.06 32 | TREC-45 0 /m/03rk0 india 1 33 | TREC-48 0 /m/02dp7g madam cj walker 0.24 34 | TREC-49 0 /m/015fr brazil 1 35 | TREC-51 0 /m/01b370 martha stewart 0.22 36 | TREC-51 0 /m/0j6rg imclone 0.21 37 | TREC-51 1 /m/07kczv0 martha stewart 0.12 38 | TREC-51 1 /m/0j6rg imclone 0.21 39 | TREC-52 0 /m/0158xh mayo clinic 0.19 40 | TREC-52 0 /m/0ggh3 jacksonville fl 0.15 41 | TREC-53 0 /m/04p2zc milwaukee journal sentinel 0.26 42 | TREC-54 0 /m/0cjjt mothers day 0.1 43 | TREC-56 0 /m/0gyytc orange county convention center 0.22 44 | TREC-57 0 /m/0k1jw pacific northwest laboratory 0.18 45 | TREC-61 0 /m/04r3s1 ralph owen brewster 0.27 46 | TREC-62 0 /m/02_zt7 rincon puerto rico 0.16 47 | TREC-63 0 /m/0288kpv ritz carlton 0.23 48 | TREC-64 0 /m/01pvr35 gem 0.05 49 | TREC-65 0 /m/064m90 rocky mountain news 0.22 50 | TREC-69 0 /m/03c3c7h sonoma county 0.16 51 | TREC-69 1 /m/0l35f sonoma county 0.1 52 | TREC-70 0 /m/05b5w source of the nile 0.2 53 | TREC-72 0 /m/0c9jmt border patrol 0.11 54 | TREC-72 0 /m/07b_l texas 1 55 | TREC-73 0 /m/057xsbz the beatles rock band 0.15 56 | TREC-74 0 /m/0cqnss the music man 0.15 57 | TREC-74 1 /m/0gwypk the music man 0.15 58 | TREC-74 2 /m/0p4s9 the music man 0.1 59 | TREC-75 0 /m/049cgk the secret garden 0.15 60 | TREC-75 1 /m/0dj9x08 the secret garden 0.15 61 | TREC-75 2 /m/0251j3 the secret garden 0.15 62 | TREC-75 3 /m/0pcpkmn the secret garden 0.09 63 | TREC-76 0 /m/07h34 tn 1 64 | TREC-79 0 /m/05t3df tv 0.17 65 | TREC-80 0 /m/016tw3 universal 0.14 66 | TREC-80 1 /m/034130 universal 0.1 67 | TREC-81 0 /m/02gnrt university of phoenix 0.18 68 | TREC-82 0 /m/08874 yellowstone national park 0.16 69 | TREC-83 0 /m/07vth us capitol 0.16 70 | TREC-85 0 /m/0gkgp charleston sc 0.16 71 | TREC-85 0 /m/01fmw0 uss yorktown 0.06 72 | TREC-86 0 /m/07z1m va 1 73 | TREC-87 0 /m/0217wz vines 0.15 74 | TREC-90 0 /m/02y10n calculator 0.05 75 | -------------------------------------------------------------------------------- /data/sk_results.txt: -------------------------------------------------------------------------------- 1 | 6 0 Upírske denníky (seriál) upirske denniky 0.32 2 | 7 0 Korfu (mesto) korfu 0.33 3 | 7 1 Korfu korfu 0.33 4 | 8 0 14. storočie 14 0.11 5 | 8 1 14. máj 14 0.1 6 | 8 2 14. február 14 0.1 7 | 8 3 14. jún 14 0.1 8 | 8 4 14. november 14 0.05 9 | 8 5 14 14 0.05 10 | 9 0 Chalkidiki chalkidiki 0.45 11 | 10 0 15. február 15 0.15 12 | 10 0 Marsa (Aude) marsa 0.17 13 | 10 1 15. storočie 15 0.11 14 | 10 1 Marsa (Aude) marsa 0.17 15 | 10 2 15. január 15 0.1 16 | 10 2 Marsa (Aude) marsa 0.17 17 | 10 3 15. marec 15 0.1 18 | 10 3 Marsa (Aude) marsa 0.17 19 | 10 4 15. september 15 0.1 20 | 10 4 Marsa (Aude) marsa 0.17 21 | 11 0 Bulharsko bulharsku 0.16 22 | 12 0 Denník SME denník sme 0.29 23 | 13 0 Obsah (filozofia) obsah 0.22 24 | 13 1 Rozloha obsah 0.13 25 | 15 0 .sk sk 0.15 26 | 19 0 Bulharsko bulharsko 0.19 27 | 20 0 Lefkada (ostrov) lefkada 0.55 28 | 21 0 Rakovina prostaty rakovina prostaty 0.39 29 | 26 0 Nevidzany (okres Zlaté Moravce) nevidzany 0.29 30 | 26 1 Nevidzany (okres Prievidza) nevidzany 0.28 31 | 27 0 Kos (sopka) kos 0.18 32 | 27 1 Koš (obec na Slovensku) kos 0.17 33 | 27 2 Kos (ostrov) kos 0.11 34 | 28 0 Korfu (mesto) korfu 0.28 35 | 28 1 Korfu korfu 0.24 36 | 30 0 Žila zily 0.08 37 | 32 0 Rapovce rapovce 0.45 38 | 34 0 Zuzana zuzana 0.21 39 | 35 0 Korfu korfu 0.2 40 | 35 0 Ostrov ostrov 0.1 41 | 35 1 Korfu (mesto) korfu 0.12 42 | 35 1 Ostrov ostrov 0.1 43 | 36 0 Tunis tunis 0.48 44 | 37 0 .cz cz 0.21 45 | 39 0 Denník SME sme 0.25 46 | 41 0 .sk sk 0.17 47 | 42 0 Škoda Octavia skoda octavia 0.24 48 | 42 1 Škoda Octavia (1959) skoda octavia 0.23 49 | 43 0 Denník SME sme 0.32 50 | 44 0 .sk sk 0.17 51 | 46 0 Index telesnej hmotnosti bmi 0.29 52 | 47 0 Online online 0.16 53 | 48 0 Korfu (mesto) korfu 0.24 54 | 48 1 Korfu korfu 0.23 55 | 50 0 Online online 0.15 56 | 56 0 4. storočie 4 0.16 57 | 56 1 4. január 4 0.15 58 | 56 2 4. máj 4 0.15 59 | 56 3 4. február 4 0.05 60 | 57 0 Kalkulačka kalkulacka 0.24 61 | 60 0 Hudba hudba 0.16 62 | 61 0 Choroba choroby 0.16 63 | 64 0 Olej (všeobecne) olej 0.17 64 | 66 0 Kréta krete 0.09 65 | 68 0 Rakovina (album) rakovina 0.14 66 | 69 0 Denník SME sme 0.33 67 | 70 0 Susedia (slovenský seriál) susedia 0.28 68 | 71 0 2014 2014 0.14 69 | 71 1 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.09 70 | 72 0 2014 2014 0.16 71 | 72 1 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.1 72 | 73 0 Význam vyznam 0.15 73 | 77 0 Bulharsko bulharsko 0.19 74 | 78 0 Celiakia celiakia 0.45 75 | 79 0 Peniaze peniaze 0.2 76 | 80 0 Palec (jednotka dĺžky) palec 0.33 77 | 80 1 Páleč palec 0.18 78 | 81 0 Nohy (album) nohy 0.18 79 | 82 0 .sk sk 0.17 80 | 85 0 Hurgada hurghada 0.24 81 | 85 0 Egypt egypt 0.17 82 | 86 0 Potravina potraviny 0.21 83 | 89 0 Rakovina (album) rakovina 0.14 84 | 90 0 Muž muz 0.2 85 | 90 0 Žena zena 0.19 86 | 91 0 Pohotovosť (seriál) pohotovost 0.47 87 | 92 0 Škoda Octavia skoda octavia 0.27 88 | 92 1 Škoda Octavia (1959) skoda octavia 0.21 89 | 93 0 Škoda Yeti skoda yeti 0.24 90 | 97 0 Diabetes mellitus cukrovka 0.19 91 | 98 0 Učiteľ ucitel 0.15 92 | 99 0 Hodiny (súhvezdie) hodiny 0.21 93 | 99 0 Svidník svidnik 0.13 94 | 99 1 Hodiny hodiny 0.16 95 | 99 1 Svidník svidnik 0.13 96 | 101 0 Majstrovstvá sveta v ľadovom hokeji 2014 2014 0.16 97 | 101 1 2014 2014 0.12 98 | -------------------------------------------------------------------------------- /data/sk_annotation.txt: -------------------------------------------------------------------------------- 1 | 1 0 Správa (žurnalistika) spravy 2 | 3 0 Žart vtipy 3 | 6 0 Upírske denníky (seriál) upirske denniky 4 | 6 0 Online online 5 | 7 0 Počasie počasie 6 | 7 0 Korfu korfu 7 | 8 0 Počasie počasie 8 | 8 0 Rodos (ostrov) rhodos 9 | 9 0 Počasie počasie 10 | 9 0 Chalkidiki chalkidiki 11 | 10 0 Počasie počasie 12 | 11 0 Počasie počasie 13 | 11 0 Bulharsko bulharsku 14 | 12 0 Denník SME denník sme 15 | 16 0 Rozprávka rozpravky 16 | 18 0 Kartová hra karty 17 | 19 0 Počasie počasie 18 | 19 0 Bulharsko bulharsko 19 | 20 0 Počasie počasie 20 | 20 0 Lefkada (ostrov) lefkada 21 | 21 0 Zhubný nádor rakovina 22 | 21 0 Predstojnica prostaty 23 | 22 0 Šport sportove 24 | 24 0 Profesionáli (slovenský seriál) profesionáli 25 | 26 0 Nevidzany (okres Zlaté Moravce) nevidzany 26 | 26 1 Nevidzany (okres Prievidza) nevidzany 27 | 27 0 Počasie počasie 28 | 27 0 Kos (ostrov) kos 29 | 28 0 Počasie počasie 30 | 28 0 Korfu korfu 31 | 29 0 Počasie počasie 32 | 29 0 zakynthose Zakynthos (ostrov) 33 | 31 0 Voda vodu 1 34 | 32 0 Rapovce rapovce 1 35 | 32 0 Kúpalisko kúpalisko 1 36 | 34 0 Zuzana zuzana 1 37 | 35 0 Korfu ostrov korfu 1 38 | 36 0 Počasie počasie 1 39 | 36 0 Tunis tunis 1 40 | 38 0 Rozprávka rozpravky 1 41 | 39 0 Denník SME sme 1 42 | 40 0 Kalkulačka kalkulačka 1 43 | 40 0 Úver úverová 1 44 | 42 0 Škoda Octavia skoda octavia 1 45 | 42 1 Škoda Octavia (1959) skoda octavia 1 46 | 43 0 Denník SME sme 1 47 | 44 0 Správa (žurnalistika) správy 1 48 | 45 0 Žart vtipne 49 | 46 0 Index telesnej hmotnosti bmi 1 50 | 48 0 Počasie počasie 1 51 | 48 0 Korfu (mesto) korfu 1 52 | 48 1 Počasie počasie 1 53 | 48 1 Korfu korfu 1 54 | 49 0 Rodinné prípady rodinne pripady 1 55 | 50 0 Rozprávka rozpravky 1 56 | 50 0 Online online 1 57 | 51 0 Škoda Auto skoda 1 58 | 51 0 Karburátor karburátor 1 59 | 52 0 Slovensko slovenske 1 60 | 52 0 Správa (žurnalistika) spravy 1 61 | 55 0 Euromajdan ukrajinská kríza 1 62 | 56 0 Upírske denníky (seriál) upírske denníky 1 63 | 57 0 Kalkulačka kalkulacka 1 64 | 58 0 Kartová hra karty 1 65 | 60 0 Hudba hudba 1 66 | 61 0 Choroba choroby 1 67 | 62 0 Správa (žurnalistika) správy 1 68 | 63 0 Nicki Minaj nicky minaj 1 69 | 64 0 Olej (všeobecne) olej 1 70 | 66 0 Počasie počasie 1 71 | 66 0 Kréta krete 1 72 | 68 0 Zhubný nádor rakovina 1 73 | 68 0 Žalúdok žaludku 1 74 | 69 0 Denník SME sme 1 75 | 70 0 Susedia (slovenský seriál) susedia 1 76 | 71 0 Mzda mzdy 1 77 | 72 0 Mzda mzdy 1 78 | 73 0 Kartová hra kariet 1 79 | 76 0 Aloa pravá aloe 1 80 | 77 0 Bulharsko bulharsko 1 81 | 77 0 Počasie počasie 1 82 | 78 0 Celiakia celiakia 1 83 | 79 0 Česko-Slovensko cesko slovenske 1 84 | 79 0 Peniaze peniaze 1 85 | 81 0 Bolesť bolesti 1 86 | 83 0 Košice kosiciach 1 87 | 85 0 Hurgada hurghada 1 88 | 85 0 Egypt egypt 1 89 | 85 0 Počasie počasie 1 90 | 86 0 Draslík draslíka 1 91 | 86 0 Fosfor fosforu 1 92 | 86 0 Potravina potraviny 1 93 | 88 0 Korzár (denník) korzár 1 94 | 89 0 Zhubný nádor rakovina 1 95 | 89 0 Kvasinka kvasinky 1 96 | 90 0 Lev (súhvezdie) lev 1 97 | 90 0 Ryby (súhvezdie) ryby 1 98 | 90 0 Muž muz 1 99 | 90 0 Žena zena 1 100 | 91 0 Pohotovosť (seriál) pohotovost 1 101 | 92 0 Škoda Octavia skoda octavia 1 102 | 93 0 Škoda Yeti skoda yeti 1 103 | 95 0 Slovensko slovensku 1 104 | 97 0 Diabetes mellitus cukrovka 1 105 | 98 0 Učiteľ ucitel 1 106 | 98 0 Nemecko nemecku 1 107 | 99 0 Svidník svidnik 1 108 | -------------------------------------------------------------------------------- /predeval/progress.py: -------------------------------------------------------------------------------- 1 | # progress reporting 2 | 3 | # Author:: Sam Steingold () 4 | # Copyright:: Copyright (c) 2014, 2015, 2016 Magnetic Media Online, Inc. 5 | # License:: Apache License, Version 2.0 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | import argparse 20 | import time 21 | import datetime 22 | import re 23 | import util 24 | 25 | def difftime2string (x): 26 | ax = abs(x) 27 | if ax < 1: return "%.2fms" % (x*1000.0) 28 | if ax < 100: return "%.2fsec" % (x) 29 | if ax < 6000: return "%.2fmin" % (x/60.0) 30 | if ax < 108000: return "%.2fhrs" % (x/3600.0) 31 | if ax < 400*24*3600: return "%.2fdays" % (x/(24*3600.0)) 32 | return "%.2fyrs" % (x/(365.25*24*3600)) 33 | 34 | def elapsed (start): 35 | return difftime2string(time.time()-start) 36 | 37 | def processed (start,count,unit): 38 | spent = time.time() - start 39 | return "%d new %ss in %s%s" % ( 40 | count,unit,difftime2string(spent), 41 | (" (%s/%s)" % (difftime2string(spent/count),unit)) if count else "") 42 | 43 | def timing (func, logger = None): 44 | start = time.time() 45 | ret = func() 46 | util.info("Ran %s in %s" % (func, elapsed(start)),logger=logger) 47 | return ret 48 | 49 | difftime_rex = re.compile('^(-)?([0-9.]+)(ms|sec|min|hrs|days|yrs)$') 50 | def parse_difftime (s): 51 | if s is None: 52 | return None 53 | if isinstance(s,int): 54 | return s 55 | if not isinstance(s,str): 56 | raise TypeError("parse_difftime",s) 57 | m = difftime_rex.match(s) 58 | if m is None: 59 | raise ValueError("parse_difftime",s) 60 | sign,num,units = m.groups() 61 | num = float(num) * (1 if sign is None else -1) 62 | if units == "ms": return num / 1000.0 63 | if units == "sec": return num 64 | if units == "min": return num * 60 65 | if units == "hrs": return num * 3600 66 | if units == "days": return num * 3600 * 24 67 | if units == "yrs": return num * 3600 * 24 * 365.25 68 | raise ValueError("parse_difftime",s,units) 69 | 70 | def parse_ymdh (s): 71 | return datetime.datetime.strptime(s,"%Y/%m/%d/%H") 72 | 73 | def time2string (t = None): 74 | return time.strftime("%F %T",time.localtime(t)) 75 | 76 | def test (): 77 | print difftime2string(100) 78 | print parse_difftime("-45min") 79 | print time2string() 80 | 81 | class Done (Exception): 82 | pass 83 | 84 | class Progress (object): 85 | @staticmethod 86 | def get_parser (max_ticks = None, tick_report = None, 87 | max_time = None, time_report = None, 88 | flow_report = None): 89 | aparse = argparse.ArgumentParser(add_help=False) 90 | aparse.add_argument('-max-ticks',type=int, default=max_ticks, 91 | help='Iterate at most time many times') 92 | aparse.add_argument('-tick-report',type=int, default=tick_report, metavar='N', 93 | help='Report progress every N ticks') 94 | aparse.add_argument('-max-time',default=max_time, 95 | help='Iterate for at most this long (e.g., 4hrs)') 96 | aparse.add_argument('-time-report',type=int, default=time_report, metavar='S', 97 | help='Report progress every S seconds') 98 | aparse.add_argument('-flow-report', default=flow_report, 99 | help='Report progress based on data flow time interval, e.g., every 20min of data') 100 | return aparse 101 | 102 | def __init__ (self, logger, status, opts, max_possible = None): 103 | self.logger = logger 104 | self.status = status 105 | self.start = time.time() 106 | self.ticks = 0 107 | self.last_report_ticks = self.ticks 108 | self.last_report_time = self.start 109 | self.max_ticks = min(opts.max_ticks or max_possible, 110 | max_possible or opts.max_ticks) 111 | self.tick_report = opts.tick_report 112 | self.max_time = parse_difftime(opts.max_time) 113 | self.time_report = opts.time_report 114 | try: 115 | self.date_beg = opts.beg 116 | self.date_end = opts.end 117 | self.flow_beg = datetime.datetime.combine(opts.beg, datetime.time.min) 118 | self.flow_end = datetime.datetime.combine(opts.end, datetime.time.max) 119 | except AttributeError: 120 | self.date_beg = self.date_end = self.flow_beg = self.flow_end = None 121 | self.flow_now = self.flow_beg 122 | self.flow_report = None if opts.flow_report is None else parse_difftime(opts.flow_report) 123 | self.last_report_flow = self.flow_now 124 | 125 | def completed_ticks (self): 126 | if self.max_ticks is None: 127 | return None 128 | return float(self.ticks) / self.max_ticks 129 | def completed_flow (self): 130 | if self.flow_now is None: 131 | return None 132 | return (float((self.flow_now - self.flow_beg).total_seconds()) / 133 | (self.flow_end - self.flow_beg).total_seconds()) 134 | def completed (self): 135 | completed_ticks = self.completed_ticks() 136 | completed_flow = self.completed_flow() 137 | if completed_flow: 138 | if completed_ticks: 139 | return (completed_flow + completed_ticks) / 2 140 | return completed_flow 141 | if completed_ticks: 142 | return completed_ticks 143 | return None 144 | 145 | def __str__ (self): 146 | return ("".format(t=self.ticks)) 158 | 159 | # return (remaining-time, expected-time-at-end) 160 | def eta (self): 161 | completed = self.completed() 162 | if completed is None: 163 | if self.max_time is None: 164 | return (None, None) 165 | end = self.start + self.max_time 166 | return (end - time.time(), end) 167 | now = time.time() 168 | remains = (now - self.start) * (1-completed) / completed 169 | if self.max_time is None: 170 | return (remains, now + remains) 171 | end = self.start + self.max_time 172 | return (min(remains, end - now), min(now + remains, end)) 173 | 174 | # flow_now is the timestamp of the current record 175 | def tick (self, flow_now = None): 176 | now = time.time() 177 | if ((self.max_ticks is not None and self.ticks == self.max_ticks) or 178 | (self.max_time is not None and now > self.start + self.max_time)): 179 | raise Done() 180 | self.ticks += 1 181 | if flow_now is not None: 182 | self.flow_now = flow_now 183 | if ((self.tick_report is not None and 184 | self.ticks - self.last_report_ticks >= self.tick_report) or 185 | (self.flow_report is not None and self.flow_now is not None and 186 | ((self.flow_now - self.last_report_flow).total_seconds() 187 | >= self.flow_report)) or 188 | (self.time_report is not None and 189 | now - self.last_report_time >= self.time_report)): 190 | self.logger.info("%s",self.report()) 191 | self.last_report_time = now 192 | self.last_report_ticks = self.ticks 193 | self.last_report_flow = self.flow_now 194 | 195 | def report (self): 196 | remains, eta = self.eta() 197 | s = "" if self.flow_now is None else self.flow_now.strftime( 198 | "%Y-%m-%d %H:%M:%S ") 199 | s += "" if self.status is None else self.status() 200 | if remains is None or remains <= 0: 201 | return s + "{t:,d}".format(t=self.ticks) 202 | return s + "{t:,d} ({c:.2%}) ETA: {e:s} ({r:s})".format( 203 | t=self.ticks,c=self.completed() or 0,e=time2string(eta), 204 | r=difftime2string(remains)) 205 | 206 | @staticmethod 207 | def test (): 208 | p = Progress(None, None, Progress.get_parser().parse_args()) 209 | p.max_ticks = 1000 210 | p.ticks = 100 211 | p.start -= 100 212 | print p 213 | print p.report() 214 | p.tick() 215 | print p 216 | print p.report() 217 | 218 | if __name__ == '__main__': 219 | test() 220 | Progress.test() 221 | -------------------------------------------------------------------------------- /predeval/runstat.py: -------------------------------------------------------------------------------- 1 | # Running statistics 2 | 3 | # Author:: Sam Steingold () 4 | # Copyright:: Copyright (c) 2014, 2015, 2016 Magnetic Media Online, Inc. 5 | # License:: Apache License, Version 2.0 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | import math 20 | import util 21 | import sys 22 | 23 | class RunStat (object): 24 | def __init__ (self, title): 25 | self.title = title 26 | 27 | def add (self, observation, n): 28 | raise NotImplementedError("RunStat.add") 29 | 30 | def num (self): # total number of observations added 31 | raise NotImplementedError("RunStat.num") 32 | 33 | def merge (self, rs): 34 | if self.title != rs.title: 35 | raise ValueError("RunStat.merge: title mismatch",self.title,rs.title) 36 | 37 | def out (self): 38 | raise NotImplementedError("RunStat.out") 39 | 40 | class Counter (RunStat): 41 | def __init__ (self, title, values = None, values_weights = None): 42 | super(Counter, self).__init__(title) 43 | self.counts = dict() 44 | if values is not None: 45 | for v in values: 46 | self.add(v) 47 | if values_weights is not None: 48 | for v,w in values_weights: 49 | self.add(v,w) 50 | 51 | def add (self, observation, n = 1): 52 | if isinstance(self.title,tuple) and ( 53 | not isinstance(observation,tuple) or len(self.title) != len(observation)): 54 | raise ValueError("Counter.add: incompatible observation",self.title,observation) 55 | self.counts[observation] = self.counts.get(observation,0) + n 56 | 57 | def num (self): 58 | return sum(self.counts.itervalues()) 59 | 60 | def entropy (self, scaledto1 = False): 61 | return util.dict_entropy(self.counts, scaledto1=scaledto1) 62 | 63 | def merge (self, c): 64 | super(Counter, self).merge(c) 65 | if not isinstance(c, Counter): 66 | raise ValueError("Counter.merge: bad type",type(c)) 67 | for o,n in c.counts.iteritems(): 68 | self.add(o, n) 69 | 70 | def split (self): 71 | "Split a counter of pairs into a pair of counters" 72 | (t1, t2) = self.title 73 | r1 = Counter(t1) 74 | r2 = Counter(t2) 75 | for (o1,o2),n in self.counts.iteritems(): 76 | r1.add(o1, n) 77 | r2.add(o2, n) 78 | return (r1,r2) 79 | 80 | def __str__ (self): 81 | return "%s (%s)" % (self.title,util.dict__str__( 82 | self.counts,util.title2missing(self.title))) 83 | 84 | def __repr__ (self): 85 | return "%s(%r)" % (self.__class__, self.__dict__) 86 | 87 | # pylint: disable=arguments-differ 88 | def out (self, pc = util.PrintCounter()): 89 | if pc is None: 90 | pc = util.PrintCounter() 91 | return pc.out(self.counts, self.title) 92 | 93 | def csv (self, csv, logger=None, smallest=0): 94 | util.PrintCounter.csv(self.counts, self.title, csv, logger=logger, smallest=smallest) 95 | 96 | def dump (self, pc, csv, logger=None, csvsmallest=0): 97 | if pc.out(self.counts,self.title) and csv is not None: 98 | self.csv(csv, logger=logger, smallest=csvsmallest) 99 | 100 | def short (self): 101 | return "{t:s}:{n:,d}/{c:,d}".format( 102 | t=self.title,n=self.num(),c=len(self.counts)) 103 | 104 | @staticmethod 105 | def test (): 106 | c = Counter("foo") 107 | for x in ['a','b','a','c','a','b']: 108 | c.add(x) 109 | pc = util.PrintCounter() 110 | c.out(pc) 111 | d = Counter("foo", values=['c','d','e','c',None,'a','b',None]) 112 | d.out(pc) 113 | d.csv(sys.stdout) 114 | c.merge(d) 115 | c.out(pc) 116 | e = Counter("bar") 117 | try: 118 | e.merge(c) 119 | except Exception as ex: 120 | print ex 121 | try: 122 | e.merge(c) 123 | except Exception as ex: 124 | print ex 125 | c.add("123") 126 | c.add("a",3) 127 | c.add("b",1) 128 | c.out() 129 | try: 130 | c.merge(e) 131 | except Exception as ex: 132 | print ex 133 | c = Counter(('a','b')) 134 | c.add(('a1','b1')) 135 | try: 136 | c.add(('a1','b1','d')) 137 | except Exception as ex: 138 | print ex 139 | c.add(('a1','b1')) 140 | c.add(('a1','b2')) 141 | c.out() 142 | c.csv(sys.stdout) 143 | p1,p2 = c.split() 144 | p1.out(pc) 145 | p2.out(pc) 146 | 147 | 148 | class NumStat (RunStat): 149 | def __init__ (self, title, values = None, values_weights = None, integer = False): 150 | super(NumStat, self).__init__(title) 151 | self.count = 0 152 | self.minV = float("inf") 153 | self.minN = 0 154 | self.maxV = float("-inf") 155 | self.maxN = 0 156 | self.sumV = 0 157 | self.sum2 = 0 158 | self.nanCount = 0 159 | self.integer = integer # if true, min/max is printed with {0:,d} 160 | self.bad = None 161 | if values is not None: 162 | for v in values: 163 | self.add(v) 164 | if values_weights is not None: 165 | for v,w in values_weights: 166 | self.add(v,w) 167 | 168 | def add (self, v, n = 1): 169 | try: 170 | v = float(v) 171 | except (TypeError, ValueError): 172 | if self.bad is None: 173 | self.bad = Counter("{}(bad)".format(self.title)) 174 | self.bad.add(v,n) 175 | return 176 | if math.isnan(v): 177 | self.nanCount += n 178 | else: 179 | self.count += n 180 | self.sumV += v * n 181 | self.sum2 += v*v * n 182 | if self.minV == v: 183 | self.minN += n 184 | elif self.minV > v: 185 | self.minV = v 186 | self.minN = n 187 | if self.maxV == v: 188 | self.maxN += n 189 | elif self.maxV < v: 190 | self.maxV = v 191 | self.maxN = n 192 | 193 | def num (self): 194 | return self.nanCount + self.count + (0 if self.bad is None else self.bad.num()) 195 | 196 | def merge (self, ns): 197 | super(NumStat, self).merge(ns) 198 | if isinstance(ns, NumStat): 199 | self.count += ns.count 200 | if self.minV == ns.minV: 201 | self.minN += ns.minN 202 | elif self.minV > ns.minV: 203 | self.minV = ns.minV 204 | self.minN = ns.minN 205 | if self.maxV == ns.maxV: 206 | self.maxN += ns.maxN 207 | elif self.maxV < ns.maxV: 208 | self.maxV = ns.maxV 209 | self.maxN = ns.maxN 210 | self.sumV += ns.sumV 211 | self.sum2 += ns.sum2 212 | self.nanCount += ns.nanCount 213 | elif isinstance(ns, Counter): 214 | for x,n in ns.counts.iteritems(): 215 | self.add(x,n) 216 | 217 | def mean (self): 218 | if self.count == 0: 219 | return float("NaN") 220 | return self.sumV / self.count 221 | 222 | def stdDev (self): 223 | if self.count == 0: 224 | return float("NaN") 225 | if (self.maxV - self.minV) < sys.float_info.epsilon * self.sum2: 226 | return 0 # guard against roundoff errors producing sqrt(-eps) 227 | return math.sqrt(self.sum2 / self.count - 228 | (self.sumV * self.sumV) / (self.count * self.count)) 229 | 230 | 231 | def __str__ (self, toString = str): 232 | if toString is None: 233 | toString = str 234 | return "{t:s} [{c:,.0f} {m:s}${d:s} {i:s}{I:s}:{a:s}{A:s}{n:s}{b:s}]".format( 235 | t=self.title,c=self.count,m=toString(self.mean()), 236 | d=toString(self.stdDev()), 237 | i=("{i:,.0f}".format(i=self.minV) if self.integer else 238 | toString(self.minV)), 239 | I=("" if self.minN == 1 else "*{i:,.0f}".format(i=self.minN)), 240 | a=("{i:,.0f}".format(i=self.maxV) if self.integer else 241 | toString(self.maxV)), 242 | A=("" if self.maxN == 1 else "*{i:,.0f}".format(i=self.maxN)), 243 | n=("" if self.nanCount==0 else " NaN={i:,.0f}".format(i=self.nanCount)), 244 | b=("" if self.bad is None else " Bad={i:,.0f}".format(i=self.bad.num()))) 245 | 246 | def __repr__ (self): 247 | return "%s(%r)" % (self.__class__, self.__dict__) 248 | 249 | # pylint: disable=arguments-differ 250 | def out (self, pc = util.PrintCounter(), toString = str): 251 | print pc.header+" "+self.__str__(toString) 252 | if self.bad is None: 253 | return False # full output, no truncation 254 | return self.bad.out(pc) 255 | 256 | def dump (self, pc, toString = str): 257 | self.out(pc,toString) 258 | 259 | def as_dict (self): 260 | if self.bad: 261 | ret = self.__dict__.copy() 262 | ret["bad"] = self.bad.counts 263 | return ret 264 | return self.__dict__ 265 | 266 | @staticmethod 267 | def test (): 268 | c = NumStat("foo") 269 | for x in [1,2,1,3,0,0,0,0,float("NaN")]: 270 | c.add(x) 271 | c.out() 272 | print "c.num={n:,d}".format(n=c.num()) 273 | d = NumStat("foo",values=[5,6,8,3,4,5,2,4,5,6,7,4,4,5]) 274 | d.out() 275 | print c.as_dict() 276 | c.merge(d) 277 | c.out() 278 | print c.as_dict() 279 | for x in [100,200,10000,300000]: 280 | c.add(x) 281 | import progress 282 | c.out(toString=progress.difftime2string) 283 | print "c.num={n:,d}".format(n=c.num()) 284 | print c.as_dict() 285 | 286 | def test(): 287 | Counter.test() 288 | NumStat.test() 289 | c = Counter("foo") 290 | c.add(1) 291 | c.add("2") 292 | c.add("a") 293 | n = NumStat("foo") 294 | n.add(1) 295 | try: 296 | c.merge(n) 297 | except Exception as ex: 298 | print ex 299 | n.merge(c) 300 | n.out() 301 | 302 | if __name__ == '__main__': 303 | test() 304 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /predeval/util.py: -------------------------------------------------------------------------------- 1 | # misc small utilities 2 | 3 | # Author:: Sam Steingold () 4 | # Copyright:: Copyright (c) 2014, 2015, 2016 Magnetic Media Online, Inc. 5 | # License:: Apache License, Version 2.0 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | import sys 20 | import collections 21 | import csv 22 | import os 23 | import math 24 | import re 25 | import logging 26 | import random 27 | 28 | def weighted_sample(x, weights, n=1): 29 | "Returns a weighted sample of length n with replacement. Weight do not need to sum to 1." 30 | length = len(x) 31 | assert length == len(weights) > 0 32 | assert n >= 1 33 | totalweight = sum(weights) 34 | cumulative_sum = 0 35 | sample = [] 36 | r = [random.random() * totalweight for i in xrange(n)] 37 | for i in xrange(length): 38 | cumulative_sum += weights[i] 39 | for j in xrange(n): 40 | if r[j] < cumulative_sum: 41 | sample.append(x[i]) 42 | r[j] = totalweight + 1 #make sure that it won't be triggered again 43 | if len(sample) >= n: 44 | break 45 | return sample 46 | 47 | def sample_file (inf, outf, rates, pos, logger=None, separator = "\t"): 48 | """Sample lines in INF according to RATES at position POS and write OUTF.""" 49 | reading(inf,logger) 50 | written = collections.defaultdict(int) 51 | wasread = collections.defaultdict(int) 52 | with open(inf) as i: 53 | with open(outf,"w") as o: 54 | for l in i: 55 | v = l.strip().split(separator)[pos] 56 | wasread[v] += 1 57 | try: 58 | r = rates[v] 59 | except KeyError: 60 | r = rates[v] = 1 61 | warn("Unexpected value [%s] in [%s], set rate=1" % (v,l.strip()), logger) 62 | if r == 1 or random.random() <= r: 63 | o.write(l) 64 | written[v] += 1 65 | wrote(outf,logger) 66 | info("Read {i:,d} lines: {c:s}".format( 67 | i=sum(wasread.itervalues()),c=counter2string(wasread)),logger) 68 | info("Wrote {i:,d} lines: {c:s}".format( 69 | i=sum(written.itervalues()),c=counter2string(written)),logger) 70 | 71 | def get_logger (name, level = logging.INFO): 72 | console = logging.StreamHandler() # stderr 73 | console.setFormatter(logging.Formatter( 74 | fmt='%(asctime)s %(levelname)s %(name)s/%(module)s %(message)s', 75 | datefmt='%Y-%m-%d %H:%M:%S')) 76 | # add the handler to the root logger 77 | logger = logging.getLogger(name) 78 | logger.addHandler(console) 79 | logger.setLevel(level) 80 | return logger 81 | 82 | def debug (s,logger = None): 83 | if logger is None: 84 | print "DEBUG " + s 85 | elif isinstance(logger,logging.Logger): 86 | logger.debug(s) 87 | else: 88 | pass 89 | 90 | def info (s,logger = None): 91 | if logger is None: 92 | print s 93 | elif isinstance(logger,logging.Logger): 94 | logger.info(s) 95 | else: 96 | pass 97 | 98 | def warn (s,logger = None): 99 | if logger is None: 100 | print "WARNING " + s 101 | elif isinstance(logger,logging.Logger): 102 | logger.warn(s) 103 | else: 104 | pass 105 | 106 | # http://stackoverflow.com/questions/497885/python-element-wise-tuple-operations-like-sum 107 | def tuple_sum (a, b): 108 | return tuple(map(sum, zip(a, b))) 109 | 110 | # http://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-python-whilst-preserving-order 111 | def dedup (seq): 112 | seen = set() 113 | return [x for x in seq if x not in seen and not seen.add(x)] 114 | 115 | # http://stackoverflow.com/questions/15889131/how-to-find-the-cumulative-sum-of-numbers-in-a-list 116 | def accumu (seq): 117 | total = 0 118 | for x in seq: 119 | total += x 120 | yield total 121 | 122 | def cumsum (seq): return list(accumu(seq)) 123 | 124 | # similar to the system function, but 125 | # - does not support negative step 126 | # - does support dates and such, as long as they support "+" and "<" 127 | # - stop is included in the range 128 | def myrange (start, stop, step): 129 | while start <= stop: 130 | yield start 131 | start += step 132 | 133 | import ast 134 | import pprint 135 | class HumanReadable (object): 136 | # http://stackoverflow.com/questions/28055565/how-to-serialize-a-python-dict-to-text-in-a-human-readable-way 137 | @staticmethod 138 | def save (x, fname): 139 | with open(fname, 'w') as f: 140 | pprint.PrettyPrinter(stream=f).pprint(x) 141 | 142 | @staticmethod 143 | def load (fname): 144 | with open(fname, 'r') as f: 145 | return ast.literal_eval(f.read()) 146 | 147 | # execfile('util.py'); test() 148 | def test (): 149 | counter = collections.Counter(['a','b','a','c','a','b']) 150 | PrintCounter().out(counter,'test1') 151 | PrintCounter(max_row=2,min_omit=0,min_row=0).out(counter,'test2') 152 | PrintCounter.csv(counter,'test3',sys.stdout) 153 | PrintCounter.csv(counter,'test4',"foo-") 154 | os.remove("foo-test4.csv") 155 | counter[u'a\u0437'] = 3 156 | counter[7] = 5 157 | print counter 158 | PrintCounter.csv(counter,'test5',"foo-") 159 | os.remove("foo-test5.csv") 160 | print asBigNumberBin(123456789) 161 | print asBigNumberDec(123456789) 162 | print "bin_entropy" 163 | for x in range(10): 164 | print bin_entropy(10,x) 165 | print "bin_mutual_info" 166 | print bin_mutual_info(200,100,100,50) 167 | for x in range(10): 168 | print bin_mutual_info(200,20,20+0.8*x,(200-x)*0.1) 169 | x1 = dict([(a,2*a) for a in range(10)]) 170 | x1[(1,2,3)] = 6 171 | x1 = [x1] + [(x1,x1)] 172 | HumanReadable.save(x1, "tmp") 173 | x2 = HumanReadable.load("tmp") 174 | os.remove("tmp") 175 | if x1 != x2: 176 | raise Exception("HumanReadable",x1,x2) 177 | print x1 178 | 179 | def default_None (x, d): return d if x is None else x 180 | 181 | def empty2none (v): return (None if v == '' else v) 182 | 183 | # http://stackoverflow.com/questions/29127801/cross-version-portability-in-python 184 | if hasattr(1, 'bit_length'): 185 | def bitlen (x): return x.bit_length() 186 | else: 187 | def bitlen (x): return len(bin(x))-2 188 | 189 | # http://en.wikipedia.org/wiki/Binary_prefix 190 | binaryPrefixes = ['K','M','G','T','P','E','Z','Y'] 191 | 192 | asBigNumberBinCuts = [(10*(y+1),binaryPrefixes[y]) for y in range(len(binaryPrefixes))][::-1] 193 | def asBigNumberBin (v): # valid toString argument 194 | l = bitlen(v) 195 | for b,p in asBigNumberBinCuts: 196 | if l >= b: 197 | return "%.1f%si" % ((v >> (b-10)) / 1024.0, p) 198 | return str(v) 199 | 200 | asBigNumberDecCuts = [(10.0**(3*(y+1)),binaryPrefixes[y]) for y in range(len(binaryPrefixes))][::-1] 201 | def asBigNumberDec (v): # valid toString argument 202 | for c,p in asBigNumberDecCuts: 203 | if v >= c: 204 | return "%.1f%s" % (v / c, p) 205 | return str(v) 206 | 207 | def nicenum (s): # nice number presentation 208 | try: 209 | return "{n:,d}".format(n=int(s)) 210 | except ValueError: 211 | return s 212 | 213 | # not needed in python3 214 | def ensure_dir (path, logger = None): 215 | if path == "": # current directory is presumed to exist 216 | return 217 | try: 218 | os.makedirs(path) 219 | info("Created [%s]" % (path),logger) 220 | except OSError: 221 | if os.path.isdir(path): 222 | debug("Path [%s] already exists" % (path),logger) 223 | else: 224 | raise 225 | 226 | class DirLock (object): 227 | def __init__ (self, path): 228 | ensure_dir(path) 229 | self.dir = path 230 | self.lock = os.path.join(path,"locked") 231 | 232 | def __enter__ (self): 233 | if os.path.exists(self.lock): 234 | with open(self.lock) as l: 235 | raise ValueError("directory is in use",self.dir,l.read()) 236 | with open(self.lock,"w") as l: 237 | l.write("pid=%s logname=%s" % (os.getpid(),os.getenv("LOGNAME"))) 238 | return self.dir 239 | 240 | def __exit__ (self, _exc_type, _exc_value, _traceback): 241 | os.unlink(self.lock) 242 | 243 | # turn exceptions into None 244 | def catching_exceptions (logger,function,arguments): 245 | try: 246 | return function(*arguments) 247 | except Exception as e: 248 | logger.error("%s: %s",function.__name__,e) 249 | return None 250 | 251 | # http://nullege.com/codes/search/pyutil.strutil.commonsuffix 252 | def commonsuffix(l): 253 | cp = [] 254 | for i in range(min([len(element) for element in l])): 255 | c = l[0][-i-1] 256 | for s in l[1:]: 257 | if s[-i-1] != c: 258 | cp.reverse() 259 | return ''.join(cp) 260 | cp.append(c) 261 | cp.reverse() 262 | return ''.join(cp) 263 | 264 | def title_from_2paths (first, second): 265 | cp = os.path.commonprefix([first,second]) 266 | cs = commonsuffix([first,second]) 267 | return "%s(%s|%s)%s" % ( 268 | cp,first[len(cp):len(first)-len(cs)], 269 | second[len(cp):len(second)-len(cs)],cs) 270 | 271 | def canonicalize_domain (domain): 272 | if domain is None or domain == '': 273 | return None 274 | if re.match(r'[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(:[0-9]+)?$',domain): 275 | return 'dotted.quad' 276 | domain = re.sub(r'(:[0-9]+|[.:]+)$','',domain.lower()) # strip port & downcase 277 | tld = re.sub(r'^.*\.([a-z]*)$',r'\1',domain) 278 | if len(tld) > 2: mindot = 1 # .com .info, .travel, .kitchen &c 279 | elif len(tld) == 2: mindot = 2 # gov.us com.cn &c 280 | else: 281 | # logger.info("weird domain [[%s]]",domain) 282 | return domain 283 | while domain.count('.') > mindot: 284 | domain1 = re.sub(r'^(pub|web|www*)?-?[0-9]*\.','',domain) 285 | if domain1 == domain: 286 | return domain 287 | else: 288 | domain = domain1 289 | return domain 290 | 291 | def url2host (url): 292 | if url is None or url == '': 293 | return None 294 | if re.match(r'https?://',url): 295 | return re.sub(r'^https?://([^/]*)(/.*)?',r'\1',url) 296 | return 'bad.url' 297 | 298 | def url2domain (url): 299 | return canonicalize_domain(url2host(url)) 300 | 301 | def sigmoid (v): 302 | return 1/(1+math.exp(-v)) 303 | def antisigmoid(v): 304 | return -math.log(1/v - 1) 305 | 306 | def bin_entropy (total, first): 307 | "Return the total entropy in nats." 308 | if total < 0 or first < 0 or first > total: 309 | raise ValueError("util.bin_entropy",total,first) 310 | if total == 0 or first == 0 or first == total: 311 | return 0 312 | second = total - first 313 | return math.log(total) - ( 314 | first * math.log(first) + second * math.log(second)) / total 315 | 316 | def bin_mutual_info (total, actual, predicted, tp): 317 | "Return the mutual information in nats." 318 | fn = actual - tp 319 | fp = predicted - tp 320 | tn = total - actual - predicted + tp 321 | if (total < 0 or actual > total or actual < 0 or predicted > total 322 | or predicted < 0 or tp < 0 or fn < 0 or fp < 0 or tn < 0): 323 | raise ValueError("util.bin_mutual_info",total, actual, predicted, tp) 324 | if total == 0 or actual == 0 or actual == total or predicted == 0 or predicted == total: 325 | return 0 326 | mi = 0 327 | total = float(total) 328 | if tp > 0: 329 | mi += tp * math.log(total * tp / (actual * predicted)) 330 | if fn > 0: 331 | mi += fn * math.log(total * fn / (actual * (total-predicted))) 332 | if fp > 0: 333 | mi += fp * math.log(total * fp / ((total-actual) * predicted)) 334 | if tn > 0: 335 | mi += tn * math.log(total * tn / ((total-actual) * (total-predicted))) 336 | return mi / total 337 | 338 | def dict_entropy (counts, missing = None, scaledto1 = False): 339 | "Return total entropy and the entropy with missing dropped." 340 | n = sum(counts.itervalues()) 341 | if n == 0 or len(counts) <= 1: 342 | return (0,None) 343 | s = sum(c*math.log(c,2) for c in counts.itervalues()) 344 | entropy_total = math.log(n,2) - s / n 345 | if missing in counts: 346 | if len(counts) == 2: 347 | entropy_present = 0 348 | else: 349 | nonN = counts[missing] 350 | n -= nonN 351 | entropy_present = math.log(n,2) - (s - nonN * math.log(nonN,2)) / n 352 | else: entropy_present = None 353 | if scaledto1: 354 | if entropy_total is not None: 355 | entropy_total /= math.log(len(counts), 2) 356 | if entropy_present is not None: 357 | entropy_present /= math.log(len(counts), 2) 358 | return (entropy_total,entropy_present) 359 | 360 | def dict__str__ (counts, missing = None): 361 | "Return a short string describing the counter dictionary." 362 | entropy_total,entropy_present = dict_entropy(counts,missing) 363 | return "len={l:,d}; sum={s:,d}; entropy={e:g}{p:s}".format( 364 | l=len(counts),s=sum(counts.itervalues()),e=entropy_total, 365 | p=("" if entropy_present is None else 366 | "/{p:g}".format(p=entropy_present))) 367 | 368 | def title2missing (title): 369 | return tuple([None] * len(title)) if isinstance(title,tuple) else None 370 | 371 | def title2string(title, sep='-'): 372 | return sep.join(str(o) for o in title) if not isinstance(title,str) else title 373 | 374 | # http://stackoverflow.com/questions/613183/python-sort-a-dictionary-by-value 375 | # http://stackoverflow.com/questions/28839182/sorting-dictionary-by-value-and-lexicographical 376 | def counter2pairs (counter): 377 | # count: reverse, value: lexicographical 378 | return sorted(counter.iteritems(), key=lambda (k,v): (-v,k)) 379 | 380 | def dict_drop_rare (counter, min_count): 381 | return dict((k,v) for (k,v) in counter.iteritems() if v >= min_count) 382 | 383 | def counter_aggregate (dicts): 384 | ret = dict() 385 | for di in dicts: 386 | for what,count in di.iteritems(): 387 | ret[what] = ret.get(what,0) + count 388 | return ret 389 | 390 | def counter2string (counter, sep="; ", maxlen=None): 391 | total = sum(counter.itervalues()) 392 | pairs = counter2pairs(counter) 393 | if maxlen and maxlen < len(pairs): 394 | suffix = "...%d omitted" % (len(pairs) - maxlen) 395 | pairs = pairs[:maxlen] 396 | else: 397 | suffix = "" 398 | return sep.join("[{k:s}: {v:,d} ({p:.2%})]".format( 399 | k=str(k),v=v,p=float(v)/total) for (k,v) in pairs)+suffix 400 | 401 | class PrintCounter (object): 402 | min_count_default = 0 # omit if count less that this OR 403 | max_row_default = sys.maxint # ... already have this many rows OR 404 | min_percent_default = 0. # ... percent less that this 405 | min_row_default = 10 # ... but ONLY IF already printed at least this much 406 | min_omit_default = 10 # ... AND do NOT omit less than this much 407 | header_default = '===' # printed before the counter title 408 | prefix_default = None # printed before the list 409 | suffix_default = None # printed after the list 410 | 411 | def __init__ (self, *args, **kwargs): 412 | # args -- tuple of anonymous arguments 413 | # kwargs -- dictionary of named arguments 414 | if len(args) > 0: 415 | if len(kwargs) == 0: 416 | kwargs = args[0] 417 | else: raise Exception("PrintCounter: cannot mix anonymous & named") 418 | self.min_count = default_None(kwargs.get('pc_min_count'), PrintCounter.min_count_default) 419 | self.max_row = default_None(kwargs.get('pc_max_row'), PrintCounter.max_row_default) 420 | self.min_percent = default_None(kwargs.get('pc_min_percent'), PrintCounter.min_percent_default) 421 | self.min_row = default_None(kwargs.get('pc_min_row'), PrintCounter.min_row_default) 422 | self.min_omit = default_None(kwargs.get('pc_min_omit'), PrintCounter.min_omit_default) 423 | self.header = default_None(kwargs.get('pc_header'), PrintCounter.header_default) 424 | self.prefix = default_None(kwargs.get('pc_prefix'), PrintCounter.prefix_default) 425 | self.suffix = default_None(kwargs.get('pc_suffix'), PrintCounter.suffix_default) 426 | self.total = dict() # fill it outside for cross-percentages 427 | 428 | @staticmethod 429 | def add_arguments (parser): 430 | parser.add_argument('-pc-min_count', type=int, help='for PrintCounter') 431 | parser.add_argument('-pc-max_row', type=int, default=100, help='for PrintCounter') 432 | parser.add_argument('-pc-min_percent', type=float, default=1.0, help='for PrintCounter') 433 | parser.add_argument('-pc-min_row', type=int, help='for PrintCounter') 434 | parser.add_argument('-pc-min_omit', type=int, help='for PrintCounter') 435 | parser.add_argument('-pc-header', help='for PrintCounter') 436 | parser.add_argument('-pc-prefix', help='for PrintCounter') 437 | parser.add_argument('-pc-suffix', help='for PrintCounter') 438 | 439 | def out (self, counter, title, missing = None): 440 | if missing is None: 441 | missing = title2missing(title) 442 | title = title2string(title) 443 | total = sum(counter.itervalues()) 444 | num_rows = len(counter) 445 | if total == num_rows: 446 | print "{h:s} {t:s} {n:,d} items: {i:s}".format( 447 | h=self.header,t=title,n=num_rows,i=counter.keys()) 448 | return False 449 | small5 = dict_drop_rare(counter,5) 450 | if len(small5) == len(counter) or len(small5) < 2: 451 | print "{h:s} {t:s} ({a:s})".format( 452 | h=self.header,t=title,a=dict__str__(counter,missing)) 453 | else: 454 | print "{h:s} {t:s} ({a:s})/({s:s})".format( 455 | h=self.header,t=title,a=dict__str__(counter,missing), 456 | s=dict__str__(small5,missing)) 457 | row = 0 458 | left = 1 459 | if not self.prefix is None: 460 | print self.prefix 461 | def as_ascii (o): 462 | if isinstance(o,str): 463 | return o 464 | if isinstance(o,unicode): 465 | return o.encode('utf-8') 466 | return str(o) 467 | for obj, count in counter2pairs(counter): 468 | percent = float(count) / total 469 | row += 1 470 | omit = num_rows - row + 1 471 | if ((count < self.min_count or row > self.max_row 472 | or 100 * percent < self.min_percent) 473 | and omit > self.min_omit and row > self.min_row): 474 | print " - omitted {o:,d} rows ({l:.2%})".format(o=omit,l=left) 475 | if not self.suffix is None: 476 | print self.suffix 477 | return True # truncated 478 | left -= percent 479 | xp = ("" if obj not in self.total else 480 | " ({p:.2%})".format(p=float(count)/self.total[obj])) 481 | if isinstance(obj,tuple): 482 | print " {r:5d} {o:s} {c:12,d} {p:6.2%}{xp:s}".format( 483 | r=row, o=" ".join(as_ascii(o).rjust(30) for o in obj), 484 | c=count, p=percent, xp = xp) 485 | else: 486 | print " {r:5d} {o:30s} {c:12,d} {p:6.2%}{xp:s}".format( 487 | r=row, o=as_ascii(obj), c=count, p=percent, xp=xp) 488 | if not self.suffix is None: 489 | print self.suffix 490 | return False # no truncation 491 | 492 | @staticmethod 493 | def csv (counter, title, destination, logger=None, smallest = 0): 494 | if isinstance(destination, str): 495 | if isinstance(title,tuple): 496 | destination += "-".join(str(o) for o in title) + ".csv" 497 | else: 498 | destination += title + ".csv" 499 | info("Writing {r:,d} rows to [{d:s}]".format( 500 | r=len(counter),d=destination),logger) 501 | with open(destination,"wb") as dest: 502 | PrintCounter.csv(counter, title, dest, smallest=smallest) 503 | wrote(destination,logger=logger) 504 | else: 505 | writer = csv.writer(destination) 506 | if isinstance(title,tuple): 507 | writer.writerow(list(title)+["count"]) 508 | for observation,count in counter2pairs(counter): 509 | if count >= smallest: 510 | writer.writerow([unicode(x).encode('utf-8') 511 | for x in observation]+[count]) 512 | else: 513 | writer.writerow([title,"count"]) 514 | # writer.writerows(counter2pairs(counter)) 515 | for observation,count in counter2pairs(counter): 516 | if count >= smallest: 517 | writer.writerow([unicode(observation).encode('utf-8'),count]) 518 | # chances are, write() above will write a better message than this 519 | #if isinstance(destination,file) and os.path.isfile(destination.name): 520 | # wrote(destination.name,logger=logger) 521 | 522 | # http://stackoverflow.com/questions/390250/elegant-ways-to-support-equivalence-equality-in-python-classes 523 | class CommonMixin(object): 524 | def __eq__(self, other): 525 | return type(other) is type(self) and self.__dict__ == other.__dict__ 526 | def __ne__(self, other): 527 | return not self.__eq__(other) 528 | def __str__(self): 529 | return str(self.__dict__) 530 | 531 | def wilson (success, total): 532 | "Return the center and the half-length of the Wilson confidence interval" 533 | z = 1.96 534 | p = float(success) / total 535 | scale = 1 / (1 + z*z / total) 536 | center = ( p + z*z / (2 * total) ) * scale 537 | halfwidth = z * math.sqrt( p*(1-p) / total + z*z/(4*total*total) ) * scale 538 | return (center, halfwidth) 539 | 540 | # pass an empty collections.defaultdict(int) as types 541 | # and it will be filled with type counts 542 | # NB: this will double count objects which appear multiple times in containers 543 | def sizeof (obj, types = None): 544 | ret = sys.getsizeof(obj) 545 | if types is not None: 546 | types[type(obj).__name__] += 1 547 | if (isinstance(obj,list) or isinstance(obj,tuple) or 548 | isinstance(obj,set) or isinstance(obj,frozenset)): 549 | for x in obj: 550 | ret += sizeof(x, types = types) 551 | return ret 552 | if isinstance(obj,dict): 553 | for k,v in obj.iteritems(): 554 | ret += sizeof(k, types = types) + sizeof(v, types = types) 555 | return ret 556 | return ret 557 | 558 | def bytes2string (s): 559 | if bitlen(s) > 10: 560 | return "{b:,d} bytes ({a:s}B)".format(b=s,a=asBigNumberBin(s)) 561 | else: 562 | return "{b:,d} bytes".format(b=s) 563 | 564 | def filesize2string (f): 565 | return bytes2string(os.path.getsize(f)) 566 | 567 | def reading (f,logger = None): 568 | info("Reading {s:s} from [{f:s}]".format(s=filesize2string(f),f=f),logger) 569 | 570 | def wrote (f,logger = None): 571 | info("Wrote {s:s} into [{f:s}]".format(s=filesize2string(f),f=f),logger) 572 | 573 | def enum (name, values): 574 | return type(name, (), dict(zip(values,values))) 575 | 576 | def enum_get (cl, val): 577 | ret = cl.__dict__.get(val) 578 | if val == ret: return ret 579 | raise ValueError("enum_get: Bad value for Enum",cl.__name__,val) 580 | 581 | def read_multimap (inf, delimiter, col1, col2, logger = None, 582 | keyproc = None, valproc = None): 583 | 'Read a multi-map from a TSV/CSV stream with 2 columns.' 584 | if isinstance(inf,str): 585 | reading(inf,logger=logger) 586 | with open(inf) as ins: 587 | return read_multimap(ins,delimiter,col1,col2,logger=logger, 588 | keyproc=keyproc,valproc=valproc) 589 | ret = dict() 590 | lines = 0 591 | for row in csv.reader(inf,delimiter=delimiter,escapechar='\\'): 592 | if len(row) <= max(col1,col2): 593 | warn("Bad line %s, aborting" % (row),logger) 594 | break 595 | lines += 1 596 | key = row[col1].strip() 597 | if keyproc is not None: 598 | key = keyproc(key) 599 | val = row[col2].strip() 600 | if valproc is not None: 601 | val = valproc(val) 602 | try: 603 | s = ret[key] 604 | except KeyError: 605 | s = ret[key] = set() 606 | if val in s: 607 | warn("Duplicate value [%s] for key [%s]" % (val,key),logger) 608 | s.add(val) 609 | info("Read {l:,d} lines with {k:,d} keys and {v:,d} values".format( 610 | l=lines,k=len(ret),v=sum([len(s) for s in ret.itervalues()])),logger) 611 | return ret 612 | 613 | if __name__ == '__main__': 614 | test() 615 | -------------------------------------------------------------------------------- /data/labeler2.txt: -------------------------------------------------------------------------------- 1 | 1939 Information\Arts & Humanities Information\Law & Politics Living\Finance & Investment Information\Education 2 | 0 apr Living\Finance & Investment Information\Companies & Industries Shopping\Buying Guides & Researching 3 | 1099 form Living\Finance & Investment Computers\Software Information\Law & Politics Computers\Internet & Intranet Information\References & Libraries 4 | 1967 impala Living\Car & Garage Sports\Auto Racing 5 | a worn path critique Information\Arts & Humanities Living\Book & Magazine Information\References & Libraries 6 | aatfcu Information\Companies & Industries Living\Finance & Investment Shopping\Stores & Products 7 | aberdeen police department Information\Local & Regional Information\Law & Politics 8 | accountancy jobs Living\Career & Jobs Information\Companies & Industries Living\Finance & Investment 9 | address for clallam county sheriff Information\Local & Regional Information\Law & Politics 10 | adelphia power page Computers\Internet & Intranet Online Community\Homepages 11 | adidas campus Living\Fashion & Apparel Living\Health & Fitness Shopping\Stores & Products Shopping\Bargains & Discounts 12 | aerial advertising Information\Companies & Industries Living\Career & Jobs Shopping\Stores & Products 13 | Aerosols Information\Science & Technology Information\References & Libraries 14 | affiliateprograms Information\Companies & Industries Living\Career & Jobs 15 | african capitals Information\Local & Regional Information\References & Libraries 16 | aircargo tracking Shopping\Other 17 | airline carryon restrictions Living\Travel & Vacation Information\Other 18 | airline reservations Living\Travel & Vacation Shopping\Other 19 | alcoholic recipes Living\Food & Cooking Living\Other 20 | all music Entertainment\Music 21 | allan iverson Sports\Basketball Shopping\Auctions & Bids Entertainment\Games & Toys 22 | alpena high school Information\Local & Regional Information\Education 23 | american candle factory Shopping\Stores & Products Shopping\Auctions & Bids Living\Furnishing & Houseware 24 | american express company Information\Companies & Industries 25 | american girl Living\Book & Magazine Entertainment\Games & Toys Living\Gifts & Collectables 26 | amerisuites Living\Travel & Vacation Information\Companies & Industries 27 | amniocentisis Living\Health & Fitness Information\Science & Technology 28 | animated movies Entertainment\Movies Information\References & Libraries 29 | anne klein watches Living\Gifts & Collectables Living\Fashion & Apparel Shopping\Stores & Products 30 | anniversary ring Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 31 | annuity loans Living\Finance & Investment Information\References & Libraries Information\Companies & Industries Shopping\Stores & Products 32 | apha Living\Health & Fitness Information\References & Libraries 33 | applause Entertainment\Other Information\References & Libraries 34 | application servers Computers\Software Computers\Other Information\Companies & Industries 35 | appraiser forum Online Community\Forums & Groups 36 | aquarium Entertainment\Other Living\Furnishing & Houseware Living\Pets & Animals Shopping\Buying Guides & Researching 37 | arabic poems Information\Arts & Humanities Information\References & Libraries 38 | architecture competition Information\Arts & Humanities Information\References & Libraries 39 | arizona bar exam Information\Law & Politics Information\References & Libraries Information\Local & Regional 40 | arthritis pain Living\Health & Fitness Information\References & Libraries 41 | arthur tress Information\Arts & Humanities Entertainment\Other 42 | articles of incorperation;washington Information\Local & Regional Information\References & Libraries Information\Law & Politics 43 | artistic greetings Information\Arts & Humanities Living\Gifts & Collectables Shopping\Stores & Products 44 | ashley furniture outlet Living\Furnishing & Houseware Shopping\Stores & Products 45 | asia pacific Information\Local & Regional Information\References & Libraries Living\Travel & Vacation 46 | asking for a raise Living\Career & Jobs 47 | askjevies Computers\Internet & Intranet Online Community\Homepages 48 | ata hotels in italy Information\Local & Regional Living\Travel & Vacation Shopping\Other 49 | athlon xp Computers\Hardware Computers\Other Computers\Mobile Computing 50 | atv trails Sports\Outdoor Recreations Sports\Other 51 | aubergine Information\Local & Regional Living\Food & Cooking 52 | austin realtors Information\Local & Regional Living\Real Estate 53 | australian law foundation Information\Local & Regional Information\Law & Politics 54 | auto blue book prices Living\Car & Garage Shopping\Bargains & Discounts 55 | automotive lights Living\Car & Garage Shopping\Stores & Products 56 | autos under $200.00 Shopping\Bargains & Discounts Living\Car & Garage 57 | avecia Information\Companies & Industries Information\References & Libraries Information\Science & Technology 58 | azcentral Living\Book & Magazine Online Community\Homepages Computers\Internet & Intranet 59 | baby arab songs Entertainment\Music Living\Family & Kids 60 | babylonian numbers Information\References & Libraries Information\Education 61 | baker's rack Living\Furnishing & Houseware Living\Food & Cooking Shopping\Stores & Products 62 | balmorhea blue agate Information\Science & Technology Living\Gifts & Collectables Shopping\Stores & Products 63 | baltimore ravens Sports\American Football Sports\Schedules & Tickets Sports\News & Scores 64 | bankruptcy fraud Living\Finance & Investment Information\Companies & Industries 65 | bar accessories Living\Furnishing & Houseware 66 | barnyard animals Living\Pets & Animals Entertainment\Other 67 | barnyard pictures Entertainment\Pictures & Photos Information\References & Libraries Living\Landscaping & Gardening 68 | barstool Living\Furnishing & Houseware Shopping\Stores & Products 69 | basic basketball skills Sports\Basketball 70 | bass sandals Living\Fashion & Apparel Shopping\Stores & Products 71 | bateries or for or cell or phones Computers\Mobile Computing Shopping\Other Living\Tools & Hardware 72 | baylor hospital irving Living\Health & Fitness Information\Local & Regional Living\Gifts & Collectables 73 | beacon federal Living\Finance & Investment Information\Companies & Industries 74 | beaver collectibles Living\Gifts & Collectables Living\Pets & Animals 75 | beckhoff Computers\Hardware Computers\Software Information\Companies & Industries Shopping\Stores & Products 76 | bedroom idea master Living\Furnishing & Houseware Living\Health & Fitness Living\Real Estate Shopping\Buying Guides & Researching 77 | best water treatment Living\Health & Fitness Information\Other Living\Landscaping & Gardening 78 | bicycle rides party Sports\Outdoor Recreations Living\Health & Fitness 79 | biography beatrix potter Information\Arts & Humanities Information\References & Libraries Living\Family & Kids 80 | bird of paradise flower Living\Landscaping & Gardening Living\Gifts & Collectables Information\References & Libraries 81 | birthdaysandangels Living\Family & Kids Entertainment\Other 82 | black art Information\Arts & Humanities Information\References & Libraries Living\Gifts & Collectables Shopping\Buying Guides & Researching 83 | black boy Information\Arts & Humanities Living\Book & Magazine Information\Law & Politics 84 | black history month this week Information\References & Libraries Information\Arts & Humanities Information\Law & Politics 85 | black monday Entertainment\Games & Toys Living\Finance & Investment Information\Companies & Industries Living\Fashion & Apparel 86 | bladder cancer symptoms Living\Health & Fitness Information\Science & Technology Information\References & Libraries Information\Education 87 | blood work Living\Health & Fitness Information\Education Living\Career & Jobs Information\Local & Regional 88 | bob moore auto Shopping\Stores & Products Living\Car & Garage 89 | bodega bay sportfishing Sports\Outdoor Recreations Information\Local & Regional Living\Pets & Animals Living\Travel & Vacation 90 | bolt-on rollcages Living\Car & Garage Living\Tools & Hardware Shopping\Stores & Products 91 | bon ton Shopping\Stores & Products Information\Local & Regional 92 | book storage and preservation Information\References & Libraries Information\Education Living\Book & Magazine 93 | boostmobile Computers\Mobile Computing Shopping\Bargains & Discounts 94 | boreal ski Information\Local & Regional Sports\Outdoor Recreations 95 | boss pedals Shopping\Stores & Products Computers\Hardware Entertainment\Other 96 | boston colleges Information\Education Information\Local & Regional 97 | boston es lens Information\Local & Regional Shopping\Stores & Products Entertainment\Pictures & Photos 98 | bourbon reforms Information\Local & Regional Information\Law & Politics Information\References & Libraries 99 | bra size Living\Fashion & Apparel Information\Other Shopping\Buying Guides & Researching 100 | brainpop electricity Information\Education Information\References & Libraries 101 | brand management Living\Career & Jobs Information\Arts & Humanities Information\Companies & Industries Information\References & Libraries 102 | breeding hourse and raising Living\Pets & Animals Information\References & Libraries Information\Education 103 | brian lumley Living\Book & Magazine Entertainment\Humor & Fun Information\Arts & Humanities 104 | broadband internet Computers\Internet & Intranet Online Community\Other 105 | brokerage houses Living\Finance & Investment 106 | brooks Living\Fashion & Apparel Shopping\Stores & Products 107 | broyhill computer armoire Living\Furnishing & Houseware Computers\Other 108 | brush cutters Living\Furnishing & Houseware Living\Landscaping & Gardening 109 | bua chompoo Entertainment\Other 110 | bullmastiff photos Living\Pets & Animals Entertainment\Pictures & Photos 111 | bumble bee halloween costumes Living\Family & Kids Living\Fashion & Apparel Entertainment\Games & Toys Entertainment\Other Shopping\Stores & Products 112 | burberry brit Living\Gifts & Collectables Shopping\Stores & Products 113 | cacapon resort west virginia Living\Travel & Vacation Information\Local & Regional 114 | cag conference sacramento Information\Local & Regional 115 | cal state northridge Sports\Baseball Online Community\Homepages 116 | camouflage face paint Entertainment\Other 117 | Campbell Scott Entertainment\Music Entertainment\TV Entertainment\Other 118 | camper trailers Sports\Outdoor Recreations Living\Travel & Vacation 119 | cancell Information\Companies & Industries Information\References & Libraries Information\Science & Technology Living\Health & Fitness 120 | cancer of the eye Living\Health & Fitness Information\Education Information\Science & Technology Information\References & Libraries 121 | canton ohio old pewter plate Living\Gifts & Collectables Shopping\Auctions & Bids 122 | cappucino Living\Food & Cooking Shopping\Other 123 | car auction in flat rock michigan Shopping\Auctions & Bids Information\Local & Regional Living\Car & Garage 124 | car loans with bad credit Living\Finance & Investment Living\Car & Garage 125 | car seat laws Information\Law & Politics Living\Car & Garage 126 | car spy pics Living\Car & Garage 127 | caranddriver Living\Car & Garage Living\Book & Magazine Shopping\Buying Guides & Researching 128 | cardinal strich Information\Education Information\Local & Regional 129 | career as a teacher Living\Career & Jobs Information\Education 130 | careercrusing Living\Career & Jobs Living\Travel & Vacation 131 | carmen marc valvo Living\Fashion & Apparel Entertainment\Celebrities Shopping\Stores & Products 132 | carpet shampoo Living\Furnishing & Houseware Living\Pets & Animals Shopping\Stores & Products Shopping\Buying Guides & Researching Living\Real Estate 133 | carpet stains Living\Furnishing & Houseware Living\Pets & Animals Shopping\Stores & Products Shopping\Buying Guides & Researching Living\Real Estate 134 | casino blackjack Entertainment\Games & Toys Entertainment\Other Information\Other Living\Other 135 | casio keyboard Entertainment\Music Living\Furnishing & Houseware Computers\Hardware Information\Arts & Humanities 136 | castles Information\Arts & Humanities Information\References & Libraries Entertainment\Other Living\Gifts & Collectables Living\Real Estate 137 | cat claw decals Shopping\Stores & Products 138 | catholic charities lasvegas Living\Religion & Belief Information\Local & Regional 139 | catwalk hair Living\Other Entertainment\Other 140 | cecil county schools Information\Local & Regional Information\Education 141 | celebrity Entertainment\Celebrities Entertainment\Movies Entertainment\Music Entertainment\Pictures & Photos Entertainment\TV 142 | cellphone service Computers\Mobile Computing Shopping\Stores & Products Living\Tools & Hardware 143 | cerritos college Information\Education Information\Local & Regional 144 | chain maille Information\References & Libraries Living\Fashion & Apparel Entertainment\Other Living\Gifts & Collectables 145 | chalks airlines Living\Travel & Vacation Shopping\Bargains & Discounts 146 | champagne punch recipes Living\Food & Cooking 147 | chase mortgage company Living\Finance & Investment Information\Companies & Industries 148 | cheap international airfare Living\Travel & Vacation Shopping\Bargains & Discounts 149 | check credit Living\Finance & Investment 150 | cheese puffs Living\Food & Cooking Entertainment\Other 151 | chemical processing of hair Entertainment\Other Living\Other Information\References & Libraries 152 | chevy luv Living\Car & Garage 153 | chicago tribune Information\Local & Regional Living\Book & Magazine Online Community\Homepages 154 | chicken recipe websites Computers\Internet & Intranet Living\Food & Cooking 155 | childrens encyclopedia Information\Education Information\References & Libraries Living\Book & Magazine 156 | childrens musuem of minnesota Information\Local & Regional Living\Family & Kids Entertainment\Other 157 | china shanghai hotel Information\Local & Regional Living\Travel & Vacation Shopping\Stores & Products 158 | christoper china collection Living\Gifts & Collectables Living\Furnishing & Houseware 159 | christopher lowell collection Living\Furnishing & Houseware Living\Gifts & Collectables 160 | chuck klosterman Information\Local & Regional Living\Book & Magazine 161 | cigar wholesale Entertainment\Other Living\Other Shopping\Bargains & Discounts 162 | cincinnati bikers Information\Local & Regional Sports\Outdoor Recreations 163 | civil war Information\Arts & Humanities Information\References & Libraries 164 | claimspro Information\Companies & Industries Living\Health & Fitness 165 | classified room hold down clips Living\Other 166 | classroom accountability system Information\Education 167 | claudia mills Information\Arts & Humanities Living\Book & Magazine Online Community\People Search 168 | clay aiken web sites Entertainment\Music Online Community\People Search Entertainment\Celebrities 169 | clay poker chip Entertainment\Games & Toys Shopping\Stores & Products 170 | clean imesh Computers\Multimedia Computers\Software Entertainment\Music 171 | CLERKS Entertainment\Movies Entertainment\Celebrities Shopping\Stores & Products 172 | cnnasia Entertainment\TV Information\Local & Regional 173 | coach factory outlet Living\Fashion & Apparel Shopping\Bargains & Discounts 174 | coeurdalene Information\Local & Regional Sports\Outdoor Recreations Living\Travel & Vacation 175 | colgate university Information\Education 176 | college fight songs Entertainment\Other Sports\Other 177 | college search Information\Education Information\References & Libraries 178 | coloured contact lens Living\Health & Fitness Shopping\Stores & Products 179 | colouring donald duck page Living\Family & Kids Entertainment\Humor & Fun Entertainment\Games & Toys 180 | columbia missourian birth announcements Information\Local & Regional Living\Other 181 | columbia sandals Living\Fashion & Apparel Shopping\Stores & Products 182 | combat instinct3 Entertainment\Games & Toys Computers\Multimedia 183 | comfot inn Living\Travel & Vacation Information\Companies & Industries Shopping\Stores & Products 184 | comic images Entertainment\Pictures & Photos Entertainment\Humor & Fun 185 | commercial mortgage lender Living\Finance & Investment Information\Companies & Industries Living\Real Estate 186 | composition poses a problem for literature Information\Education Information\Arts & Humanities 187 | computer dictionary Computers\Software Living\Book & Magazine Information\References & Libraries 188 | consulting companies Information\Companies & Industries Living\Career & Jobs 189 | consumer credit counseling Living\Finance & Investment Information\Companies & Industries 190 | contactlens Living\Health & Fitness Shopping\Stores & Products 191 | contra roms Entertainment\Games & Toys Shopping\Stores & Products 192 | converting a 1972 beetle sedan into a convertible Living\Car & Garage Living\Tools & Hardware 193 | cooking supplies Living\Food & Cooking Living\Tools & Hardware 194 | copier repair Computers\Hardware Living\Tools & Hardware 195 | coquitlam centre Information\Local & Regional 196 | costa rica information Living\Travel & Vacation Information\Local & Regional 197 | costarica Living\Travel & Vacation Information\Local & Regional 198 | country music movies Entertainment\Movies Entertainment\Music 199 | cowan realtors Living\Real Estate Information\Companies & Industries 200 | credit reporting services Living\Finance & Investment Information\Companies & Industries Shopping\Stores & Products 201 | crockpot recipe Living\Food & Cooking 202 | cross pendant Information\Companies & Industries Shopping\Stores & Products Living\Gifts & Collectables Living\Religion & Belief 203 | cross stitch angels Information\Companies & Industries Shopping\Stores & Products Living\Gifts & Collectables Living\Religion & Belief 204 | cruise value Living\Travel & Vacation Shopping\Bargains & Discounts Shopping\Buying Guides & Researching 205 | cumberland times newspaper Information\Local & Regional Online Community\Homepages 206 | curling irons Living\Tools & Hardware Sports\Other 207 | custom mail boxes Computers\Internet & Intranet Information\Science & Technology Computers\Networks & Telecommunication Living\Tools & Hardware 208 | d'addario strings Entertainment\Music 209 | dail up connetion Computers\Internet & Intranet Computers\Networks & Telecommunication Information\Science & Technology 210 | david blaine Entertainment\Celebrities Entertainment\Other Entertainment\Games & Toys 211 | day after day Entertainment\Music Information\Arts & Humanities Shopping\Other 212 | DAYLIGHT Living\Health & Fitness Living\Family & Kids Information\Science & Technology Living\Travel & Vacation Living\Other 213 | dead to rights guide Entertainment\Games & Toys Entertainment\Movies 214 | deaf dogs Living\Pets & Animals Information\References & Libraries 215 | debt consolidate Living\Finance & Investment Information\Companies & Industries Shopping\Stores & Products 216 | default credit card debt Living\Finance & Investment Information\Companies & Industries Shopping\Stores & Products 217 | Dennis Christopher Entertainment\Celebrities 218 | derrek lee Sports\Baseball 219 | devil costume accessories Shopping\Stores & Products Entertainment\Games & Toys 220 | dianne mcguiness Information\Other Living\Book & Magazine 221 | diaphragmatic breathing Living\Health & Fitness Sports\Other 222 | diet calculator Information\Science & Technology Living\Health & Fitness 223 | dingking Living\Tools & Hardware Information\Companies & Industries Shopping\Stores & Products 224 | dirt devil vacuum Living\Furnishing & Houseware Shopping\Stores & Products 225 | discount bmw parts Living\Car & Garage Shopping\Bargains & Discounts 226 | divinity candy Living\Food & Cooking 227 | dog jokes Entertainment\Humor & Fun 228 | doll knitting Entertainment\Games & Toys Living\Family & Kids 229 | dollz maker Entertainment\Games & Toys Living\Family & Kids 230 | dollz pic Entertainment\Games & Toys Living\Family & Kids 231 | dough ornaments Living\Food & Cooking 232 | dover elevator Information\Companies & Industries 233 | download directx Computers\Software Computers\Internet & Intranet 234 | drew shoes Living\Fashion & Apparel Shopping\Stores & Products 235 | drug guide Living\Health & Fitness Information\References & Libraries Shopping\Buying Guides & Researching 236 | ducati motorcycle Living\Car & Garage Shopping\Stores & Products 237 | dvd label searcher Shopping\Stores & Products Computers\Internet & Intranet Entertainment\Movies 238 | e scooter Living\Other 239 | ear plug Living\Health & Fitness 240 | early simptons of hiv Information\Science & Technology Living\Health & Fitness Information\References & Libraries 241 | east texas real estate Living\Real Estate Information\Local & Regional 242 | eastmountainsouth lyrics Entertainment\Music 243 | eaton aeroquip Information\Companies & Industries 244 | ebay costumes Shopping\Stores & Products Shopping\Auctions & Bids 245 | eddie b Information\Companies & Industries Living\Fashion & Apparel Shopping\Stores & Products 246 | edina schools Information\Education Information\Local & Regional 247 | egyptains Information\Local & Regional Information\References & Libraries Information\Law & Politics 248 | el tovar hotel Living\Travel & Vacation 249 | electric log splitters Living\Tools & Hardware 250 | elementary education Information\Education 251 | elen feinberg Information\Law & Politics 252 | elf pics Entertainment\Pictures & Photos 253 | elijah house Online Community\Homepages 254 | emerica skate Sports\Outdoor Recreations Shopping\Stores & Products Information\Local & Regional 255 | entertainment hollywood california feb 2005 Entertainment\Other Information\Local & Regional 256 | environmentally friendly darkroom chemicals\ Entertainment\Pictures & Photos Living\Tools & Hardware 257 | epiglottitis Information\Science & Technology 258 | ernie ball Entertainment\Music Shopping\Other 259 | espn picks Entertainment\TV Sports\Other 260 | espnlive Entertainment\TV Computers\Internet & Intranet Sports\Other 261 | eu members Information\Law & Politics 262 | eukanuba dog food Living\Pets & Animals Shopping\Stores & Products 263 | exersaucer Living\Family & Kids Entertainment\Games & Toys Information\Companies & Industries Shopping\Stores & Products 264 | eye bags Living\Health & Fitness 265 | family circle Living\Book & Magazine Entertainment\Humor & Fun 266 | familytrust Information\Companies & Industries Living\Finance & Investment 267 | famous people costumes Living\Fashion & Apparel Entertainment\Celebrities Entertainment\Humor & Fun 268 | famous volcanoes Information\Other Information\References & Libraries 269 | fantasy games Entertainment\Games & Toys Computers\Internet & Intranet 270 | farside gallery Living\Book & Magazine Entertainment\Humor & Fun 271 | fastenable plastic containers Living\Tools & Hardware Shopping\Stores & Products 272 | father daughter Living\Family & Kids 273 | ferrari spyder Living\Car & Garage 274 | fertility calender Living\Health & Fitness 275 | file local taxes in harrisburg pa Living\Finance & Investment Information\Law & Politics Information\Local & Regional 276 | file taxes on line Living\Finance & Investment Computers\Internet & Intranet Information\Law & Politics 277 | final fantasy tactics jobs Living\Career & Jobs 278 | findagrave Living\Family & Kids Computers\Internet & Intranet Information\Local & Regional 279 | finger eleven Entertainment\Music 280 | Finola Hughes Entertainment\Celebrities 281 | first americans Information\Arts & Humanities Information\References & Libraries 282 | fleetwood rv Living\Car & Garage 283 | florida athletics Sports\Other Information\Local & Regional 284 | florida corporation Information\Local & Regional Information\Companies & Industries 285 | food network .com/tv Entertainment\TV Computers\Internet & Intranet 286 | foodwebs Information\Science & Technology Information\References & Libraries 287 | ford ranger forums Online Community\Forums & Groups Living\Car & Garage 288 | forever and always Information\Companies & Industries Entertainment\Games & Toys 289 | formal evening dresses Shopping\Stores & Products Living\Fashion & Apparel 290 | forum avatars Online Community\Forums & Groups Entertainment\Pictures & Photos 291 | frame oval picture Entertainment\Pictures & Photos Living\Gifts & Collectables 292 | franklin county recorder Information\Law & Politics Information\Local & Regional Living\Real Estate 293 | frannet Information\Companies & Industries Computers\Internet & Intranet Online Community\Forums & Groups 294 | freddy crougar Information\Other 295 | free christian chat Online Community\Chat & Instant Messaging Living\Religion & Belief 296 | free clip arts Information\Arts & Humanities Entertainment\Pictures & Photos Computers\Software Computers\Other 297 | free cursive fonts Computers\Software 298 | free cute baby photo contests Living\Family & Kids Entertainment\Pictures & Photos 299 | free full 3d massively multiplayer online game downloads Computers\Multimedia Computers\Internet & Intranet Computers\Software Entertainment\Games & Toys 300 | free game whomp word Entertainment\Games & Toys Computers\Software 301 | free nextel ringtone Entertainment\Music Information\Arts & Humanities Computers\Mobile Computing 302 | free payroll software Computers\Software Living\Finance & Investment 303 | free samples for teens Shopping\Bargains & Discounts Living\Family & Kids 304 | free scrapbooking pages Computers\Multimedia Entertainment\Pictures & Photos Living\Tools & Hardware 305 | free speed booster Computers\Networks & Telecommunication Computers\Software 306 | free stuff for your website Computers\Internet & Intranet 307 | freezing vegetables Living\Food & Cooking 308 | friends scripts Entertainment\TV 309 | frontier community college; il Information\Education Information\References & Libraries 310 | full metal jacket pics Entertainment\Movies Entertainment\Pictures & Photos 311 | funnies Entertainment\Humor & Fun 312 | g.w. bush's cabnete Information\Law & Politics 313 | garbage Entertainment\Music Living\Furnishing & Houseware 314 | garrett college Information\Education Living\Family & Kids Information\Local & Regional 315 | gastritis Living\Health & Fitness Information\Science & Technology 316 | gemtek Living\Furnishing & Houseware Living\Landscaping & Gardening Living\Real Estate 317 | gerontological nursing Information\Science & Technology Information\Education Living\Health & Fitness 318 | gigabyte motherboard Computers\Hardware Information\Science & Technology Shopping\Stores & Products 319 | gilda texter Entertainment\Movies 320 | gladiator soundtrack Entertainment\Movies Entertainment\Other Shopping\Stores & Products 321 | glencoe Information\Education 322 | gmac auto loan Living\Finance & Investment Living\Car & Garage 323 | gocarts Sports\Auto Racing Living\Car & Garage 324 | godsmack concert tickets Shopping\Buying Guides & Researching Entertainment\Music 325 | gohan and videl Entertainment\Games & Toys Entertainment\Pictures & Photos Information\Arts & Humanities Living\Family & Kids 326 | gold chains Shopping\Stores & Products Living\Fashion & Apparel 327 | gold shield Living\Health & Fitness Living\Car & Garage 328 | golden shrimp recipe at tsukasa Living\Food & Cooking Information\Local & Regional 329 | golf ball testing Sports\Outdoor Recreations Shopping\Other 330 | golf discount store Shopping\Bargains & Discounts Sports\Outdoor Recreations 331 | golf glen movie theatre illinois Information\Local & Regional Entertainment\Movies 332 | goverment programs Information\Law & Politics 333 | grand hyatt washington dc Information\Local & Regional Living\Travel & Vacation 334 | grandaddy lyrics Entertainment\Music Information\Arts & Humanities Living\Family & Kids 335 | great lakes aviation Sports\Outdoor Recreations Information\Local & Regional 336 | green tomato mincemeat Living\Food & Cooking 337 | greyhoundlines Information\Local & Regional Living\Travel & Vacation 338 | guru rinpoche practice Living\Religion & Belief 339 | habbo hotel cheats Entertainment\Games & Toys Computers\Software Computers\Security 340 | halloween pirate costume Living\Fashion & Apparel Living\Family & Kids Shopping\Stores & Products 341 | hand made bird feeder Shopping\Stores & Products Living\Tools & Hardware 342 | healthaccess Living\Health & Fitness Information\Local & Regional 343 | heartache Living\Health & Fitness Information\Science & Technology 344 | heartburn Living\Health & Fitness Information\Science & Technology Information\References & Libraries 345 | heelys Shopping\Stores & Products Living\Fashion & Apparel 346 | hilton maldives Living\Travel & Vacation 347 | hip clothing Living\Fashion & Apparel 348 | history of jerusalem Information\Arts & Humanities Information\Law & Politics Information\Local & Regional Information\References & Libraries 349 | hms capitol Living\Real Estate Living\Finance & Investment 350 | holes movie Entertainment\Movies Information\Arts & Humanities 351 | home brewing kit Living\Food & Cooking Living\Tools & Hardware 352 | home gym equipment Living\Health & Fitness Living\Tools & Hardware 353 | home office phone system Living\Tools & Hardware Computers\Networks & Telecommunication Computers\Multimedia 354 | home theater equipment Living\Tools & Hardware Living\Furnishing & Houseware Computers\Multimedia 355 | homes for sale in atlanta ga Living\Real Estate Information\Local & Regional 356 | honda cd player Living\Car & Garage Entertainment\Music Computers\Hardware 357 | honda elsinore Living\Car & Garage Information\Companies & Industries Shopping\Stores & Products 358 | hope publishing Entertainment\Music Information\Arts & Humanities 359 | house for sale in florida Living\Real Estate Information\Local & Regional 360 | how to get out of debt Living\Finance & Investment 361 | how to play cricket Entertainment\Games & Toys Sports\Other 362 | hunting knifes Living\Tools & Hardware Sports\Outdoor Recreations 363 | hutchinson public library Information\Arts & Humanities Information\Local & Regional 364 | idocket support Information\Law & Politics Computers\Internet & Intranet Online Community\Forums & Groups Information\Companies & Industries 365 | incorporating in texas Information\Law & Politics Information\Local & Regional Information\Companies & Industries 366 | incumbent Information\Law & Politics 367 | indianrail Information\Local & Regional 368 | insurance careers Living\Career & Jobs 369 | insurance policy Information\Arts & Humanities Living\Finance & Investment 370 | internet explorer Computers\Internet & Intranet Computers\Software 371 | intrinsic Computers\Multimedia Information\Companies & Industries Information\Science & Technology Information\Arts & Humanities 372 | irish food recipes Living\Food & Cooking Information\Local & Regional 373 | iroc wheels Living\Car & Garage Living\Tools & Hardware 374 | is ron howard in the movie Entertainment\Movies Entertainment\Celebrities 375 | islamic greetings Information\Arts & Humanities Information\Law & Politics Information\Local & Regional 376 | iwon fantasy hockey Entertainment\Games & Toys Sports\Other 377 | jacket patches Living\Fashion & Apparel 378 | janet wojcik Sports\Other 379 | jann arden Entertainment\Celebrities Entertainment\Movies 380 | jasons deli Living\Food & Cooking Information\Local & Regional 381 | jeep floor mats Living\Car & Garage 382 | jeep stroller Living\Car & Garage 383 | jeffrey and thompson Information\Law & Politics Information\Science & Technology 384 | jennifer finnigan Entertainment\Celebrities Entertainment\Movies 385 | jerry oliver Entertainment\Celebrities Entertainment\Music 386 | jerry thompson Information\Science & Technology Information\Companies & Industries Information\Local & Regional Living\Real Estate 387 | jessica mcclintock dress Shopping\Stores & Products Living\Fashion & Apparel Information\Arts & Humanities 388 | jewelry armoire Shopping\Stores & Products Shopping\Bargains & Discounts Information\Arts & Humanities 389 | jobs in uae Living\Career & Jobs Information\Local & Regional 390 | john wooden Entertainment\Celebrities Sports\Basketball Information\Arts & Humanities Living\Book & Magazine 391 | jokes funny stories Entertainment\Humor & Fun 392 | journal of the san juans Living\Book & Magazine Information\Local & Regional Entertainment\Other Computers\Internet & Intranet Information\Science & Technology 393 | JUMANJI Entertainment\Movies Living\Book & Magazine Entertainment\TV Shopping\Stores & Products 394 | kate spade handbags Living\Fashion & Apparel Shopping\Stores & Products Information\Companies & Industries 395 | kathi livornese Online Community\People Search 396 | keegan connor tracy Entertainment\Celebrities Entertainment\Movies 397 | kellybluebookusedcarvalues Living\Car & Garage Shopping\Bargains & Discounts Information\Companies & Industries 398 | Kids Halloween Costume Living\Fashion & Apparel Living\Family & Kids Shopping\Stores & Products 399 | kimbanet Online Community\Forums & Groups Online Community\Homepages Information\Local & Regional 400 | kimber guns Shopping\Stores & Products Sports\Other Living\Tools & Hardware 401 | kitchen cabinet handles Living\Tools & Hardware Living\Furnishing & Houseware Shopping\Stores & Products 402 | knickerbocker hotel Living\Travel & Vacation Information\Local & Regional 403 | knoxville real estate Living\Real Estate Information\Local & Regional 404 | korean song lyrics Entertainment\Music Information\Local & Regional 405 | korg d1600 Computers\Multimedia Living\Furnishing & Houseware Entertainment\Music Information\Science & Technology Information\Companies & Industries 406 | la county museum Living\Travel & Vacation Information\Arts & Humanities Information\Local & Regional 407 | lady baltimore cake Living\Food & Cooking Information\Arts & Humanities 408 | lah real estate Living\Real Estate 409 | lancia scorpion Living\Car & Garage Information\Companies & Industries 410 | langston hughes Online Community\People Search Information\Arts & Humanities Information\Education Information\Law & Politics 411 | larkin center Information\Science & Technology Information\Companies & Industries Information\Education 412 | larryboy Entertainment\Games & Toys Living\Family & Kids 413 | laryngitis Living\Health & Fitness Information\Science & Technology 414 | latex halloween masks Living\Furnishing & Houseware Living\Gifts & Collectables 415 | lawn tractor review Living\Furnishing & Houseware Living\Car & Garage 416 | lawncare Living\Furnishing & Houseware Living\Car & Garage 417 | laying ceramic tile Living\Furnishing & Houseware 418 | lcd tv Living\Furnishing & Houseware Information\Science & Technology 419 | learn to speak japanese Information\Education Information\Arts & Humanities Information\Local & Regional 420 | learning carter style guitar Information\Education Entertainment\Music Information\Arts & Humanities 421 | lee corso Online Community\People Search Information\Arts & Humanities 422 | library pictures Information\Arts & Humanities Information\Education 423 | lifespan of bulls Living\Tools & Hardware Living\Furnishing & Houseware Information\Science & Technology 424 | limbitrol Living\Health & Fitness Information\Science & Technology 425 | lindenhurst; long island; new york Information\Local & Regional Sports\Other 426 | list of state capitols Information\Arts & Humanities Information\Law & Politics Information\Education Information\Local & Regional 427 | little giants Entertainment\Movies 428 | little rock newspapers Living\Book & Magazine Information\Local & Regional Information\Law & Politics Information\Arts & Humanities 429 | living room Living\Furnishing & Houseware Living\Real Estate 430 | long eared owl Living\Pets & Animals Information\Science & Technology 431 | longwood gardens Living\Landscaping & Gardening 432 | lostlovers Living\Dating & Relationships Information\Arts & Humanities Entertainment\Music 433 | lotto powerball Entertainment\Games & Toys Living\Finance & Investment 434 | louisiana state jobs Living\Career & Jobs Information\Local & Regional 435 | lourdes water rosary Living\Fashion & Apparel Information\Companies & Industries Information\Arts & Humanities Information\Local & Regional 436 | love and romance advice Living\Dating & Relationships Online Community\Forums & Groups Information\Arts & Humanities 437 | lowes home inprovement Shopping\Stores & Products Information\Companies & Industries 438 | ltd purchases Information\Companies & Industries Shopping\Other 439 | lucas babin Entertainment\Celebrities 440 | mac osx icons Computers\Other 441 | major dad Entertainment\TV Living\Family & Kids 442 | malls in huntsville; tx Shopping\Buying Guides & Researching Information\Local & Regional 443 | malone college Information\Education Information\Local & Regional 444 | manheim Living\Car & Garage Entertainment\Celebrities Online Community\Forums & Groups Online Community\People Search 445 | map directions Living\Travel & Vacation Information\Local & Regional 446 | maps florida Living\Travel & Vacation Information\Local & Regional 447 | maps of india Living\Travel & Vacation Information\Local & Regional 448 | marine corp rings Living\Fashion & Apparel Shopping\Stores & Products 449 | mario arcade games online Entertainment\Games & Toys Computers\Networks & Telecommunication 450 | mark scott Online Community\People Search 451 | martha stewart paint colors Living\Furnishing & Houseware Shopping\Stores & Products Entertainment\Celebrities 452 | massachusetts attorney Information\Law & Politics Information\Local & Regional 453 | maui Living\Travel & Vacation Information\Local & Regional 454 | max factor Living\Fashion & Apparel Information\Companies & Industries Shopping\Stores & Products 455 | max payne mod Entertainment\Games & Toys 456 | medieval astrology history Information\Arts & Humanities Information\Science & Technology Information\Education 457 | mehendi designs Information\Arts & Humanities 458 | melcor Information\Companies & Industries Computers\Hardware Information\Science & Technology 459 | mens hairstyle Living\Fashion & Apparel 460 | mens health Living\Health & Fitness 461 | mens wedding rings Living\Gifts & Collectables Living\Fashion & Apparel 462 | mesabi daily news Living\Book & Magazine Information\Law & Politics Information\Local & Regional 463 | messenger express Online Community\Chat & Instant Messaging Computers\Networks & Telecommunication 464 | metal swing sets Living\Furnishing & Houseware Living\Family & Kids 465 | mi term life insurance quote Living\Finance & Investment Living\Health & Fitness Information\Arts & Humanities 466 | micheals craft Shopping\Stores & Products Information\Arts & Humanities Information\Companies & Industries 467 | microgramma font Computers\Software Information\Arts & Humanities Information\References & Libraries 468 | micronet Online Community\Homepages Computers\Internet & Intranet Information\Arts & Humanities Information\Science & Technology 469 | microscope quiz Information\Science & Technology Information\Education 470 | microsoft forms Information\Science & Technology Information\Companies & Industries Computers\Other 471 | migraine medications Living\Health & Fitness Information\Science & Technology 472 | mikasa-japan Living\Food & Cooking Living\Travel & Vacation Information\Local & Regional 473 | milk jug skeleton pattern Living\Furnishing & Houseware Living\Gifts & Collectables Living\Fashion & Apparel Information\Arts & Humanities 474 | millenium development goals Information\Companies & Industries Information\Arts & Humanities Information\Local & Regional 475 | minnesota offender locator Information\Law & Politics Information\Local & Regional Computers\Software 476 | miss match Entertainment\TV Living\Dating & Relationships 477 | mitsubishi Living\Car & Garage Information\Companies & Industries Shopping\Stores & Products 478 | mix drink recipes Living\Food & Cooking 479 | mls orlando Living\Real Estate Information\Local & Regional 480 | mono vision Shopping\Stores & Products Information\Companies & Industries Living\Health & Fitness 481 | mopar Living\Car & Garage Shopping\Stores & Products Living\Book & Magazine 482 | motorhome magazine Living\Book & Magazine Living\Car & Garage 483 | mournful poems Information\Arts & Humanities Living\Dating & Relationships 484 | movie tshirts Living\Fashion & Apparel Entertainment\Movies 485 | movies listings Entertainment\Movies Shopping\Buying Guides & Researching 486 | msn best buy home page Online Community\Homepages Shopping\Buying Guides & Researching 487 | msn homepage Online Community\Homepages 488 | msn messinger Online Community\Chat & Instant Messaging Online Community\Forums & Groups 489 | msnbc outlook Online Community\Homepages Online Community\Forums & Groups 490 | museum storage facility london Information\Arts & Humanities Information\Local & Regional Living\Travel & Vacation 491 | music to listen to Entertainment\Music 492 | mydvd deluxe 5 Shopping\Stores & Products Entertainment\Music Computers\Hardware Information\Science & Technology 493 | mystic river movie Entertainment\Movies 494 | names of brittany spears family Entertainment\Celebrities Information\References & Libraries 495 | nancy lamott Entertainment\Music Entertainment\Celebrities 496 | nanny cams Shopping\Stores & Products Living\Tools & Hardware Living\Family & Kids 497 | nara garden beach resort Living\Travel & Vacation Information\Local & Regional 498 | natalie portman wallpaper Computers\Other Entertainment\Celebrities Information\Arts & Humanities Living\Fashion & Apparel 499 | national geographic traveler Information\Local & Regional Information\Arts & Humanities Living\Book & Magazine 500 | national health scholars Living\Health & Fitness Information\Science & Technology 501 | nba all star weekend tickets Sports\Schedules & Tickets Sports\Basketball 502 | nba draft mock Sports\News & Scores Sports\Basketball 503 | neon bar signs Shopping\Stores & Products Living\Furnishing & Houseware 504 | nervous system diagram Information\Science & Technology 505 | new york bankruptcy courts Information\Law & Politics Information\Local & Regional 506 | new york city bowling locations Information\Local & Regional Entertainment\Other Sports\Other 507 | new york clebrity clubs Entertainment\Celebrities Information\Local & Regional 508 | new zealand clothes Living\Fashion & Apparel Information\Local & Regional 509 | new zealand migration Information\Law & Politics Information\Local & Regional 510 | newfoundland map Information\Arts & Humanities Information\Local & Regional Information\Science & Technology 511 | newspapers in tennessee Living\Book & Magazine Information\Local & Regional 512 | nick cannon pics Entertainment\Celebrities Entertainment\Music 513 | nick jr Computers\Internet & Intranet Information\Education Living\Family & Kids 514 | nightclubs in london Living\Dating & Relationships Information\Local & Regional Entertainment\Humor & Fun 515 | nj weddings Living\Dating & Relationships Living\Family & Kids Living\Gifts & Collectables Information\Local & Regional 516 | no doc loans Living\Finance & Investment 517 | nocheatersdate Living\Dating & Relationships Online Community\Forums & Groups 518 | nokia unlock Information\Science & Technology Computers\Mobile Computing Information\Companies & Industries 519 | norcold Shopping\Stores & Products Living\Furnishing & Houseware Information\Companies & Industries Information\Science & Technology 520 | north carolina mountains Information\Local & Regional Sports\Outdoor Recreations 521 | north county toyota Living\Car & Garage Information\Companies & Industries Information\Local & Regional 522 | nsw lotto Entertainment\Games & Toys Living\Finance & Investment Information\Arts & Humanities 523 | nyjournalnews Information\Arts & Humanities Information\Local & Regional 524 | oasis lyrics Entertainment\Music Online Community\Forums & Groups 525 | oink baby Living\Fashion & Apparel Living\Family & Kids 526 | oklahoma better business beura Information\Local & Regional Information\Companies & Industries 527 | oklahomauniversity Information\Education Information\Local & Regional 528 | one day at a time Entertainment\Movies Living\Dating & Relationships Information\Arts & Humanities 529 | onslow county district court Information\Law & Politics Information\Local & Regional 530 | oprah producers email Entertainment\TV Entertainment\Celebrities 531 | original movie posters Entertainment\Movies Entertainment\Pictures & Photos 532 | outsourcing and employment in us Information\Companies & Industries Living\Career & Jobs 533 | Panasonic Upright Vacuum Living\Furnishing & Houseware Living\Tools & Hardware Information\Companies & Industries 534 | paris recipes Living\Food & Cooking Information\Local & Regional 535 | parking at heathrow Living\Travel & Vacation Information\Local & Regional 536 | parrot in the oven Living\Book & Magazine Information\Arts & Humanities 537 | parts source Living\Car & Garage 538 | party entertainment Living\Dating & Relationships Entertainment\Humor & Fun Entertainment\Other 539 | patrick roy pics Entertainment\Celebrities Sports\Hockey 540 | pay per click optimize Information\Science & Technology 541 | pc roms Entertainment\Games & Toys 542 | pcv valve Living\Car & Garage Shopping\Stores & Products 543 | pedal tractor parts Living\Car & Garage 544 | pen friends in india Living\Dating & Relationships Online Community\Forums & Groups 545 | permenant abdominal gas remedies Living\Health & Fitness Information\Science & Technology 546 | permission slip Information\Education Living\Family & Kids Information\Arts & Humanities 547 | personal page Online Community\Homepages 548 | pex tubing prices Living\Tools & Hardware Living\Furnishing & Houseware Shopping\Stores & Products Shopping\Buying Guides & Researching 549 | pgatour Sports\Schedules & Tickets Sports\Outdoor Recreations 550 | philippians Information\Arts & Humanities Information\Local & Regional 551 | photo radar Living\Travel & Vacation Information\Science & Technology Living\Car & Garage 552 | pick up lines for guys Living\Dating & Relationships Entertainment\Humor & Fun 553 | pictures of hotrods Living\Car & Garage Entertainment\Pictures & Photos 554 | pictures of winter wonderland Entertainment\Music Living\Gifts & Collectables Shopping\Stores & Products 555 | pictures siegfried roy Entertainment\Celebrities Entertainment\Games & Toys Entertainment\Humor & Fun 556 | pilates workout Living\Health & Fitness 557 | pinellas county clerk Information\Law & Politics Information\Local & Regional 558 | ping irons Sports\Outdoor Recreations Shopping\Stores & Products Information\Companies & Industries Living\Health & Fitness 559 | pita bread recipes Living\Food & Cooking 560 | platteville mining museum Information\Local & Regional Information\Arts & Humanities Information\Companies & Industries Information\References & Libraries 561 | playstation2 games Entertainment\Games & Toys Computers\Other 562 | pleasure of my company Living\Book & Magazine Information\Arts & Humanities 563 | plus size fashion Living\Fashion & Apparel Shopping\Stores & Products 564 | plus ultra Information\Arts & Humanities Information\Local & Regional 565 | pocket watch Shopping\Stores & Products Living\Gifts & Collectables 566 | polaris trail blazer Living\Car & Garage 567 | pontiac casting numbers Living\Car & Garage 568 | popgames Entertainment\Games & Toys 569 | portage county auditor Information\Law & Politics Information\Local & Regional 570 | post-tetanic depression Living\Health & Fitness Information\Science & Technology 571 | prefontaine Online Community\People Search Sports\News & Scores 572 | preschool and halloween Living\Family & Kids Information\Education 573 | present condition of comanche tribe Information\Law & Politics Information\Local & Regional 574 | president of russia Information\Law & Politics Information\Local & Regional 575 | presnet-presbyterian medical group Living\Religion & Belief Living\Health & Fitness Information\References & Libraries 576 | prince edward island Living\Travel & Vacation Information\Local & Regional Information\Education 577 | principal life Living\Finance & Investment 578 | pro ana websites Entertainment\Celebrities 579 | probability problems Information\Science & Technology Information\Education 580 | projectmanagement Information\Arts & Humanities Information\Education Information\Science & Technology 581 | prometheus laboratories Information\Companies & Industries Information\Science & Technology Living\Health & Fitness 582 | purpose of the national health service Information\References & Libraries Living\Health & Fitness Information\Law & Politics 583 | qingqi Living\Car & Garage 584 | quality paperback club Information\Arts & Humanities Living\Book & Magazine 585 | queens tudor realty Living\Real Estate Information\Local & Regional 586 | qwest telephone book bellevue washington Information\Local & Regional Living\Dating & Relationships 587 | qwestdex Online Community\Forums & Groups Computers\Networks & Telecommunication 588 | R.D. Call Entertainment\Movies Information\Arts & Humanities Online Community\Forums & Groups 589 | radio control boats Sports\Outdoor Recreations Entertainment\Games & Toys Shopping\Stores & Products Living\Tools & Hardware 590 | radio shack in-dash speakers Information\Companies & Industries Living\Furnishing & Houseware Living\Tools & Hardware 591 | rams head Living\Food & Cooking Information\Local & Regional Living\Travel & Vacation Living\Real Estate 592 | rarity bay Sports\Outdoor Recreations Living\Dating & Relationships 593 | rdanderson Entertainment\Celebrities Entertainment\TV Entertainment\Movies 594 | recipe crystal meth Information\Law & Politics Information\References & Libraries Living\Health & Fitness 595 | recipe for german potatoe salad Living\Food & Cooking 596 | red faction mods Computers\Software Computers\Multimedia Entertainment\Games & Toys 597 | Red Sox Playoff Tickets Sports\Schedules & Tickets Sports\Baseball 598 | refinancing a home Living\Finance & Investment Living\Furnishing & Houseware Living\Real Estate 599 | refurnished refridgeration displays Living\Tools & Hardware Shopping\Bargains & Discounts Shopping\Stores & Products 600 | registerd nurse Information\Science & Technology Information\References & Libraries Living\Health & Fitness 601 | relief society Information\Law & Politics Information\Local & Regional Online Community\Personal Services 602 | renassaince costumes Living\Fashion & Apparel Entertainment\Games & Toys 603 | reported speech Information\Local & Regional Information\References & Libraries 604 | republican nixon Information\Law & Politics Information\References & Libraries 605 | retirementlink Information\Local & Regional Living\Finance & Investment 606 | revivalsoy Living\Health & Fitness Living\Food & Cooking Information\References & Libraries Information\Companies & Industries 607 | roberts company Living\Finance & Investment Information\Education Information\Companies & Industries 608 | rock crawler wheels Living\Car & Garage Shopping\Stores & Products 609 | rods western wear ohio Information\Local & Regional Living\Fashion & Apparel Shopping\Stores & Products 610 | rolex gmt master Living\Fashion & Apparel Living\Gifts & Collectables Shopping\Stores & Products 611 | rolex wholesale Shopping\Bargains & Discounts Living\Gifts & Collectables Living\Fashion & Apparel 612 | rollingstonemagazine Living\Book & Magazine Information\Arts & Humanities Entertainment\Music 613 | roman empire christman Information\Arts & Humanities Information\Law & Politics Information\References & Libraries Living\Book & Magazine 614 | ronald and morgan and salisbury and maryland Information\Local & Regional Living\Travel & Vacation Online Community\People Search 615 | roof rack for 2005 mustang Living\Tools & Hardware Living\Car & Garage Shopping\Stores & Products 616 | root canal Living\Health & Fitness Information\References & Libraries Information\Science & Technology 617 | rosedale shopping center Information\Local & Regional Shopping\Stores & Products 618 | rosenbluth Living\Travel & Vacation Shopping\Stores & Products 619 | rotisserie chicken recipe Living\Food & Cooking 620 | roundworms in humans Living\Health & Fitness Information\References & Libraries Living\Pets & Animals 621 | rules for working in south korea Information\Law & Politics Information\Local & Regional Living\Career & Jobs 622 | run time error visual basic 6 Computers\Software Information\Science & Technology 623 | running clothes Living\Health & Fitness Living\Fashion & Apparel Shopping\Stores & Products Shopping\Buying Guides & Researching 624 | rv rentals Shopping\Lease & Rent Sports\Outdoor Recreations Shopping\Stores & Products 625 | safety technology Computers\Security Information\Science & Technology Information\Law & Politics Information\Companies & Industries 626 | saks fifth avenue Living\Fashion & Apparel Shopping\Stores & Products Living\Gifts & Collectables 627 | sally hansen Living\Fashion & Apparel Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 628 | sample lesson plan Information\Education Information\References & Libraries Computers\Other 629 | san francisco giants tickets Sports\Schedules & Tickets Sports\Baseball 630 | santa fe new mexico Information\Local & Regional Living\Travel & Vacation Information\References & Libraries Living\Real Estate 631 | santa rosa apartments Shopping\Lease & Rent Living\Real Estate Information\Local & Regional 632 | scandinavian food Living\Food & Cooking Shopping\Stores & Products Shopping\Buying Guides & Researching Information\Local & Regional 633 | scenes from basic instinct Entertainment\Movies Entertainment\Pictures & Photos Entertainment\Celebrities 634 | schooldude Computers\Software Information\Education Shopping\Stores & Products 635 | scientific names for leaves Information\Science & Technology Information\References & Libraries 636 | scott & todd in the morning Entertainment\Radio 637 | seememe refresh windows registry -serial -keygen -crack Computers\Security Computers\Software Information\Science & Technology Information\Companies & Industries 638 | self-storage units Shopping\Lease & Rent Living\Furnishing & Houseware Shopping\Stores & Products 639 | serenity prayer alanon Living\Religion & Belief 640 | sesame seeds Information\References & Libraries Living\Food & Cooking Living\Health & Fitness Shopping\Stores & Products 641 | shanghai frangance Living\Gifts & Collectables Shopping\Stores & Products 642 | sharereactor Online Community\Forums & Groups Computers\Software Information\Law & Politics 643 | shawnee tribe Information\Arts & Humanities Information\References & Libraries Information\Local & Regional 644 | sheraton hotel Living\Travel & Vacation Shopping\Stores & Products 645 | sheridan paintball Sports\Other Entertainment\Games & Toys Shopping\Stores & Products Shopping\Buying Guides & Researching 646 | shimla Living\Travel & Vacation Information\Local & Regional Information\References & Libraries Information\Arts & Humanities 647 | SHIRI Information\Law & Politics Information\Local & Regional 648 | siefred and roy Entertainment\Celebrities Entertainment\Other Living\Travel & Vacation Shopping\Stores & Products 649 | sierra madre oriental Living\Travel & Vacation Information\Local & Regional Information\References & Libraries 650 | signs of depression Living\Health & Fitness Information\References & Libraries 651 | silk clothing Living\Fashion & Apparel Shopping\Stores & Products 652 | silver necklace Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 653 | skull makeup Entertainment\Humor & Fun Shopping\Stores & Products 654 | smart and ripper Computers\Software Entertainment\Movies Shopping\Buying Guides & Researching 655 | smartmoney Living\Finance & Investment Information\Companies & Industries Shopping\Buying Guides & Researching 656 | smirnoff Living\Food & Cooking Entertainment\Other 657 | snap-on tools in west virginia Living\Tools & Hardware Shopping\Stores & Products 658 | snowflake pattern Information\Science & Technology Information\Education 659 | social security disability Information\Law & Politics Living\Health & Fitness Living\Finance & Investment 660 | sociology jobs Living\Career & Jobs Information\Law & Politics Information\References & Libraries 661 | sony and canada Information\Companies & Industries Information\Local & Regional Shopping\Stores & Products Entertainment\Other 662 | sony audio equipment Entertainment\Music Shopping\Stores & Products Living\Furnishing & Houseware 663 | south dakota public radio Information\Local & Regional Entertainment\Radio Information\References & Libraries 664 | south park mexican pictures Information\Local & Regional Entertainment\Pictures & Photos Living\Travel & Vacation 665 | southeast airlines Information\Companies & Industries Living\Travel & Vacation Shopping\Stores & Products 666 | southwest airlins Information\Companies & Industries Living\Travel & Vacation Shopping\Stores & Products 667 | spanish courses in jaca Information\Education Information\Local & Regional Living\Career & Jobs Shopping\Other 668 | spathic Information\Companies & Industries 669 | special pals Online Community\Homepages Living\Pets & Animals 670 | spiderman web blaster Entertainment\Games & Toys Shopping\Stores & Products 671 | spiegel ultimate outlet Living\Fashion & Apparel Living\Furnishing & Houseware Shopping\Stores & Products 672 | spooky halloween stuff Entertainment\Humor & Fun Shopping\Stores & Products 673 | sportaid Information\Companies & Industries Living\Furnishing & Houseware Shopping\Stores & Products Sports\Other Living\Health & Fitness 674 | sportcourtgolf.com Sports\Other Online Community\Homepages 675 | sports license plate Living\Car & Garage Sports\Other Living\Tools & Hardware 676 | sprint rollover minute Information\Companies & Industries Computers\Mobile Computing Shopping\Stores & Products 677 | sprintz Living\Furnishing & Houseware Information\Companies & Industries Shopping\Stores & Products 678 | spy cam Computers\Networks & Telecommunication Living\Furnishing & Houseware Living\Tools & Hardware 679 | stanford university Information\Education Information\References & Libraries Online Community\Homepages 680 | star magazine Living\Book & Magazine Entertainment\Celebrities 681 | star registry Living\Gifts & Collectables Information\Companies & Industries Online Community\Homepages 682 | starting my rap career Living\Career & Jobs Entertainment\Music Information\Arts & Humanities 683 | sterling custom cabinetry pa Living\Furnishing & Houseware Living\Tools & Hardware Shopping\Stores & Products 684 | steve newman" books Living\Book & Magazine 685 | stock futures Living\Finance & Investment Information\References & Libraries Information\Companies & Industries 686 | street wars Computers\Multimedia Computers\Software Entertainment\Games & Toys 687 | striper fishing Sports\Outdoor Recreations Living\Tools & Hardware 688 | strong funds Living\Finance & Investment Information\Companies & Industries Information\References & Libraries 689 | studioeonline.com Shopping\Lease & Rent Online Community\Homepages 690 | sun glasses Living\Fashion & Apparel Living\Health & Fitness Shopping\Stores & Products 691 | super saiyan brolly Entertainment\Humor & Fun Entertainment\Games & Toys 692 | supercars Living\Car & Garage Online Community\Homepages Shopping\Stores & Products 693 | supercheats Computers\Multimedia Entertainment\Games & Toys Online Community\Forums & Groups 694 | superman emblem Entertainment\Games & Toys Living\Gifts & Collectables Entertainment\Movies 695 | superstitions Information\References & Libraries 696 | surfing posters Sports\Outdoor Recreations Shopping\Stores & Products 697 | susquehanna county Information\Local & Regional Information\References & Libraries 698 | symantec removal tool Computers\Security Computers\Software Living\Tools & Hardware 699 | systray Computers\Software 700 | tama Information\Local & Regional Living\Tools & Hardware Living\Fashion & Apparel 701 | tankinis Living\Fashion & Apparel Shopping\Stores & Products 702 | tatoos Entertainment\Humor & Fun Living\Other 703 | taylor corporation Information\Companies & Industries 704 | tectonic plates Information\Science & Technology Information\References & Libraries 705 | tenchu Computers\Multimedia Entertainment\Games & Toys 706 | terri schaivo Information\Law & Politics Living\Religion & Belief Living\Health & Fitness 707 | texas wildflowers Living\Landscaping & Gardening Information\References & Libraries 708 | tgi fridays Living\Food & Cooking Information\Local & Regional 709 | the lincoln journal star Living\Book & Magazine Information\Local & Regional 710 | the media Computers\Internet & Intranet Information\Arts & Humanities 711 | the wb Entertainment\Movies Entertainment\Games & Toys Entertainment\TV 712 | themed evenings Entertainment\Humor & Fun Entertainment\Games & Toys 713 | themes of dracula (1931) Entertainment\Movies Information\Arts & Humanities 714 | thrombolytic therapy for thrombosis of aortic prosthesis Living\Health & Fitness Information\References & Libraries 715 | thunder tiger Entertainment\Games & Toys Living\Gifts & Collectables 716 | timberland loafers women Living\Fashion & Apparel Shopping\Stores & Products 717 | tips on tornatos Information\References & Libraries Living\Other 718 | tj maxx Living\Fashion & Apparel Information\Companies & Industries Shopping\Stores & Products 719 | todays interest rates Living\Finance & Investment 720 | toilet bowl Living\Furnishing & Houseware Shopping\Stores & Products 721 | tom clancy Living\Book & Magazine Information\Arts & Humanities Online Community\People Search 722 | toroidal transformer Living\Tools & Hardware Shopping\Stores & Products 723 | toshiba projector carrying case Computers\Hardware Computers\Multimedia Living\Furnishing & Houseware Shopping\Stores & Products Shopping\Buying Guides & Researching 724 | toy boomerang Entertainment\Games & Toys Shopping\Stores & Products 725 | toyota of el cajon Living\Car & Garage Shopping\Stores & Products 726 | trail running shoes Living\Fashion & Apparel Living\Health & Fitness Shopping\Stores & Products Shopping\Buying Guides & Researching Sports\Other 727 | travel hotels Living\Travel & Vacation 728 | travelzoo Living\Travel & Vacation Online Community\Homepages 729 | tritronic electronic training dog collars Living\Pets & Animals Information\Companies & Industries Shopping\Stores & Products 730 | tumeric Living\Food & Cooking Computers\Other 731 | turntables Living\Furnishing & Houseware Shopping\Stores & Products 732 | tv1 cable tv Entertainment\TV Information\Companies & Industries 733 | tweetybird pictures Entertainment\Pictures & Photos Living\Pets & Animals 734 | Tweeze Epil Living\Tools & Hardware Shopping\Stores & Products 735 | twisted colon Living\Health & Fitness Information\Science & Technology 736 | uconn football tickets Sports\American Football Sports\Schedules & Tickets 737 | ugly man Living\Dating & Relationships 738 | uk telephone directory Information\Local & Regional Online Community\People Search 739 | ultimate fitness Living\Health & Fitness Information\References & Libraries 740 | under construction gifs Computers\Software Entertainment\Pictures & Photos 741 | unix web hosting provider Computers\Internet & Intranet Information\Companies & Industries 742 | upland bird country Information\Local & Regional Sports\Outdoor Recreations Living\Travel & Vacation 743 | us free classifieds Information\Local & Regional Online Community\Personal Services Shopping\Buying Guides & Researching Shopping\Bargains & Discounts Shopping\Stores & Products 744 | usa lottery Information\Law & Politics Living\Other Information\Arts & Humanities 745 | used chopper motorcycles Living\Car & Garage Shopping\Bargains & Discounts 746 | used pianos pittsburgh; pa Shopping\Bargains & Discounts Shopping\Stores & Products Shopping\Other 747 | used tuxedos Living\Fashion & Apparel Shopping\Bargains & Discounts Shopping\Stores & Products Shopping\Buying Guides & Researching Shopping\Auctions & Bids 748 | ustanorcal Sports\Tennis 749 | vanity stool Living\Furnishing & Houseware Shopping\Stores & Products 750 | ventura mls Living\Real Estate 751 | venus ruler of taurus Living\Dating & Relationships Information\References & Libraries 752 | viagra dosage Living\Health & Fitness Living\Dating & Relationships 753 | victoria high school Information\Local & Regional Information\References & Libraries 754 | viking memory Computers\Hardware 755 | virtus online Computers\Software Computers\Internet & Intranet Information\Education 756 | voltage drop Information\Science & Technology Living\Career & Jobs 757 | vote ontario Information\Law & Politics Information\Local & Regional 758 | wade pottery Living\Gifts & Collectables Living\Furnishing & Houseware 759 | waldensians Living\Religion & Belief Information\Law & Politics Information\References & Libraries 760 | Walk in Coolers Living\Furnishing & Houseware Shopping\Other 761 | washington and jefferson Information\Law & Politics Information\Arts & Humanities Information\References & Libraries 762 | watersofteners Living\Food & Cooking Shopping\Stores & Products 763 | waynesville nc Information\Local & Regional Living\Travel & Vacation 764 | web shots Computers\Internet & Intranet Entertainment\Pictures & Photos Computers\Software Computers\Hardware Information\References & Libraries 765 | wedding food Living\Food & Cooking Living\Family & Kids Living\Other Shopping\Stores & Products 766 | wedding invitions Living\Family & Kids Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 767 | wedding shower invitations Living\Family & Kids Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 768 | wedgewood golf course Information\Local & Regional Sports\Outdoor Recreations Sports\Other Information\Other 769 | weight training tips Living\Health & Fitness Sports\Other Information\References & Libraries 770 | what is scandisk? Computers\Software Computers\Security 771 | wheaties premium giveaways Shopping\Auctions & Bids Shopping\Other 772 | where to login to john carroll university email Online Community\Forums & Groups Computers\Internet & Intranet Information\Local & Regional Information\Education 773 | wholesale office furniture Shopping\Bargains & Discounts Living\Furnishing & Houseware Shopping\Buying Guides & Researching Shopping\Stores & Products Shopping\Buying Guides & Researching 774 | wi athletic association Information\Local & Regional Sports\Other Living\Health & Fitness 775 | wild hog hunting Entertainment\Other Information\References & Libraries Living\Pets & Animals Sports\Outdoor Recreations 776 | Will Ferrell Entertainment\TV Entertainment\Humor & Fun Information\Law & Politics 777 | william-sonoma.com Living\Food & Cooking Living\Furnishing & Houseware Shopping\Stores & Products Living\Gifts & Collectables Living\Landscaping & Gardening 778 | wills helen newington Sports\Tennis Information\References & Libraries 779 | winchester amunition Sports\Other Entertainment\Other Information\Law & Politics 780 | windows xp blinds Computers\Software Computers\Security Computers\Other 781 | windvd Computers\Software Computers\Multimedia Entertainment\Movies 782 | wine and cheese baskets Living\Food & Cooking Living\Furnishing & Houseware Entertainment\Other Living\Gifts & Collectables Shopping\Stores & Products 783 | winona ryder photos Entertainment\Pictures & Photos Entertainment\Celebrities Entertainment\Movies 784 | womens suffrage in the 1920s Information\Law & Politics Information\References & Libraries 785 | wood workers Living\Career & Jobs Information\Companies & Industries 786 | words to song for you by kenny latimore Entertainment\Music 787 | worker and characteristics Information\Companies & Industries Information\Other 788 | workout clothing Living\Fashion & Apparel Living\Health & Fitness Shopping\Stores & Products Shopping\Buying Guides & Researching Sports\Other 789 | world atlas Information\References & Libraries Information\Local & Regional Living\Book & Magazine Living\Travel & Vacation 790 | world gold council Living\Finance & Investment Living\Gifts & Collectables Information\Science & Technology Information\References & Libraries Information\Companies & Industries 791 | wrist watch Living\Fashion & Apparel Living\Furnishing & Houseware Shopping\Stores & Products Shopping\Buying Guides & Researching 792 | writing an authorisation manual Information\Arts & Humanities Information\References & Libraries Information\Education Computers\Software 793 | writs of assistance Information\Law & Politics Information\References & Libraries Information\Education Living\Book & Magazine 794 | www irs com Information\Law & Politics Information\Local & Regional Information\Arts & Humanities 795 | www.jcpenneys Shopping\Stores & Products Information\Local & Regional Shopping\Bargains & Discounts 796 | yamaha musical instruments Entertainment\Music Shopping\Stores & Products Living\Furnishing & Houseware Living\Other 797 | yoga photos Living\Health & Fitness Entertainment\Pictures & Photos Information\Arts & Humanities Information\References & Libraries Living\Religion & Belief 798 | youngs figurine Living\Gifts & Collectables Shopping\Stores & Products Shopping\Buying Guides & Researching 799 | zeromancer Entertainment\Music Entertainment\Celebrities Entertainment\Pictures & Photos 800 | zoo disny Entertainment\Pictures & Photos Living\Family & Kids Living\Travel & Vacation Shopping\Stores & Products 801 | --------------------------------------------------------------------------------