├── .gitignore ├── Poster.jpg ├── Flow Chart.png ├── dataAP ├── user_defined_tagger.pickle ├── buildTrainTestFiles.py └── test.txt ├── Grammar Checker Using Hidden Markov Models and Trained CFGs.pdf ├── README.md ├── test.py ├── master.py ├── cfg.py ├── viterbi_tagger.py └── perceptron_tagger.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /Poster.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bencmbrook/grammar-checker/HEAD/Poster.jpg -------------------------------------------------------------------------------- /Flow Chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bencmbrook/grammar-checker/HEAD/Flow Chart.png -------------------------------------------------------------------------------- /dataAP/user_defined_tagger.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bencmbrook/grammar-checker/HEAD/dataAP/user_defined_tagger.pickle -------------------------------------------------------------------------------- /Grammar Checker Using Hidden Markov Models and Trained CFGs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bencmbrook/grammar-checker/HEAD/Grammar Checker Using Hidden Markov Models and Trained CFGs.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Grammar Checker 2 | 3 | ![Poster Summarizing Findings](https://raw.githubusercontent.com/bencmbrook/Grammar-Checker/master/Poster.jpg) 4 | 5 | # Paper 6 | [Grammar Checker Using Hidden Markov Models and Trained CFGs](https://github.com/bencmbrook/Grammar-Checker/blob/master/Grammar%20Checker%20Using%20Hidden%20Markov%20Models%20and%20Trained%20CFGs.pdf) 7 | 8 | # Usage 9 | Run `python master.py` to use the grammar-checker 10 | 11 | Run `python test.py` to train the AP tagger and get the information about the viterbi tagger. 12 | 13 | Run `python dataAP/buildTrainTestFiles.py` to generate the .txt for training the AP tagger for next step. 14 | -------------------------------------------------------------------------------- /dataAP/buildTrainTestFiles.py: -------------------------------------------------------------------------------- 1 | from nltk.corpus import conll2000, brown 2 | 3 | f = open('train.txt', 'w') 4 | 5 | total = 1000 6 | for sent in conll2000.tagged_sents(): 7 | for word, tag in sent: 8 | #f.write(word + '\t' + tag + '\n') 9 | f.write(word + '\t' + tag + '\n') 10 | 11 | total -= 1 12 | if total == 0: 13 | break 14 | 15 | print "generated train.txt" 16 | 17 | f = open('test.txt', 'w') 18 | total = 100 19 | for sent in conll2000.tagged_sents()[1001:1105]: 20 | for word, tag in sent: 21 | #f.write(word + '\t' + tag + '\n') 22 | f.write(word + '\t' + tag+'\n') 23 | 24 | total -= 1 25 | if total == 0: 26 | break 27 | 28 | print "generated test.txt" 29 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import viterbi_tagger, perceptron_tagger 2 | from nltk.corpus import conll2000, brown 3 | 4 | if __name__ == '__main__': 5 | 6 | # Verify the if AP tagger is ready to function 7 | taggerAP = perceptron_tagger.AP_Tagger(False) 8 | print "If the averaged perceptron tagger is not trained, train it and cache the results." 9 | try: 10 | taggerAP.APTaggerTesting() 11 | except IOError: 12 | taggerAP.APTaggerTraining() 13 | taggerAP.APTaggerTesting() 14 | 15 | 16 | viterbi_tagger = viterbi_tagger.PartOfSpeechTagger() 17 | print "The tests will take a while." 18 | print "Test of accuracy: conll2000 corpus (1/2)." 19 | viterbi_tagger.buildProbDist(conll2000) 20 | 21 | viterbi_tagger.testAgainstCorpus(conll2000) 22 | 23 | print "Test of accuracy: Brown corpus (2/2)" 24 | viterbi_tagger.buildProbDist(brown) 25 | viterbi_tagger.testAgainstCorpus(brown) 26 | -------------------------------------------------------------------------------- /master.py: -------------------------------------------------------------------------------- 1 | import viterbi_tagger, perceptron_tagger, cfg 2 | 3 | # test values for developers 4 | test_sentence = ["I", "saw", "the", "duck"] 5 | cfg_test = ["NNP","VBD","NNP"] 6 | from nltk.corpus import conll2000, brown 7 | 8 | if __name__ == '__main__': 9 | # Build HMM 10 | print "Generating Hidden Markov Model..." 11 | viterbi_tagger = viterbi_tagger.PartOfSpeechTagger() 12 | # Build probability distributions for each of the corpora we want to use 13 | print "Building POS tag probability distributions based on..." 14 | print "Corpora 1: Conll2000," 15 | viterbi_tagger.buildProbDist(conll2000) 16 | print "Corpora 2: Brown." 17 | viterbi_tagger.buildProbDist(brown) 18 | # 19 | #Train the AP Tagger weights 20 | print "Prepare Averaged Perceptron tagger based on tagged corpora" 21 | taggerAP = perceptron_tagger.AP_Tagger(False) 22 | # 23 | # Build CFG rule set based on treebank 24 | print "Generating Context Free Grammar based on Treebank..." 25 | cfg_checker = cfg.Grammar() 26 | tbank_grammar = cfg_checker.buildFromTreebank() 27 | # 28 | # Loop input to get and check sentences 29 | print "If an input word is not in the corpora, the averaged perceptron \ 30 | tagger will be used instead of the Viterbi tagger.\n" 31 | while True: 32 | # Turn sentence into part-of-speech tags 33 | tag_sequence = viterbi_tagger.inputToPOS() 34 | print "TAG SEQUENCE:", tag_sequence 35 | 36 | # Pass tag sequence to CFG checker 37 | cfg_checker.verify(tbank_grammar, tag_sequence) 38 | -------------------------------------------------------------------------------- /cfg.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import treebank 3 | from nltk.grammar import CFG, Nonterminal, Production 4 | from nltk import ChartParser 5 | 6 | class Grammar(object): 7 | 8 | def __init__(self, dev=False): 9 | super(Grammar, self).__init__() 10 | self.dev = dev 11 | 12 | grammar1 = CFG.fromstring(""" 13 | S -> NP VP 14 | NP -> "DT" Nom | "NNP" | "PRP" 15 | Nom -> "JJ" Nom | N 16 | VP -> V "JJ" | V NP | V S | V NP PP | V "RB" 17 | V -> "VBD" | "VB" | "VBG" | "VBN" | "VBP" | "VBZ" 18 | N -> "NN" | "NNP" | "NNS" | "NNPS" 19 | PP -> "IN" NP 20 | """) 21 | 22 | grammar2 = CFG.fromstring(""" 23 | S -> NP VP 24 | NP -> "DT" Nom | "NNP" | "PRP" 25 | Nom -> "JJ" Nom | N | Nom N 26 | VP -> V "JJ" | V NP | V S | V NP PP | V "RB" | V PP | V 27 | V -> "VBD" | "VB" | "VBG" | "VBN" | "VBP" | "VBZ" 28 | N -> "NN" | "NNP" | "NNS" | "NNPS" 29 | PP -> "IN" NP | "TO" NP 30 | """) 31 | 32 | 33 | def buildFromTreebank(self): 34 | """ Build a Context-Free-Grammar based on UPenn treebank """ 35 | tbank_productions = set() 36 | for sent in treebank.parsed_sents(): 37 | for production in sent.productions(): 38 | if production.is_lexical(): 39 | new_rhs = [str(production._lhs)] 40 | production = Production(production._lhs, new_rhs) 41 | tbank_productions.add(production) 42 | 43 | tbank_grammar = CFG(Nonterminal('S'), list(tbank_productions)) 44 | 45 | return tbank_grammar 46 | 47 | def verify(self, grammar, tags): 48 | """ Verify tag sequence as grammatically correct or not """ 49 | # rd_parser = RecursiveDescentParser(grammar) 50 | rd_parser = ChartParser(grammar) 51 | valid = False 52 | 53 | try: 54 | for tree in rd_parser.parse(tags): 55 | valid = True 56 | break 57 | except ValueError: 58 | print "This is a grammatical structure I don't understand yet." 59 | return 60 | 61 | if valid: 62 | print "Valid" 63 | return True 64 | else: 65 | print "Invalid" 66 | return False 67 | -------------------------------------------------------------------------------- /viterbi_tagger.py: -------------------------------------------------------------------------------- 1 | import nltk, re, sys, perceptron_tagger 2 | from nltk.corpus import conll2000, brown 3 | 4 | class PartOfSpeechTagger(object): 5 | def __init__(self): 6 | super(PartOfSpeechTagger, self).__init__() 7 | # List containing tuples: ( pd_tagwords, pd_tags, all_tags, flag ) 8 | self.corpora_prob_dists = [] 9 | # this input only prepares for AP tagger 10 | self.AP_sent = [] 11 | 12 | 13 | def buildProbDist(self, corpus): 14 | """ Build tag probability distribution for Viterbi algorithm """ 15 | 16 | corpus_tags_words = [] 17 | 18 | # Build array containing all tags and words of all sentences, in order 19 | for sent in corpus.tagged_sents(): 20 | corpus_tags_words.append( ("BEGIN","BEGIN") ) 21 | corpus_tags_words.extend( [(tag, word) for (word, tag) in sent ] ) 22 | corpus_tags_words.append( ("STOP","STOP") ) 23 | 24 | # Build a conditional frequency distribution based on all tags/words of all sentences 25 | fd_tagwords = nltk.ConditionalFreqDist(corpus_tags_words) 26 | # Build conditional probability of each tag/word based on the frequency distribution above 27 | pd_tagwords = nltk.ConditionalProbDist(fd_tagwords, nltk.MLEProbDist) 28 | 29 | # Build array containing all tags of all sentences, in order 30 | corpus_tags = [tag for (tag, word) in corpus_tags_words] 31 | 32 | # Build a frequency distribution based ONLY on bigrams tags 33 | fd_tags = nltk.ConditionalFreqDist(nltk.bigrams(corpus_tags)) 34 | # Build conditional probability of each tag based on the frequency distribution above 35 | pd_tags = nltk.ConditionalProbDist(fd_tags, nltk.MLEProbDist) 36 | all_tags = set(corpus_tags) 37 | 38 | self.corpora_prob_dists.append( (pd_tagwords, pd_tags, all_tags, corpus_tags_words) ) 39 | 40 | 41 | def sentenceToPOS(self, sentence, test=False): 42 | """ Choose and call method (Viterbi Brown, Viterbi conll2000, Averaged Perceptron) """ 43 | i = 0 44 | # Choose corpus (Are all words in input sentence in one of the corpora?) 45 | # Finds the ith corpus that contains all words in input. Flag is false if no corpora contains all words. 46 | for (_, _, _, corpus_tags_words) in self.corpora_prob_dists: 47 | flag = True 48 | for word in sentence: 49 | if word not in [w for (t, w) in corpus_tags_words[1:-2]]: 50 | flag = False 51 | break 52 | if flag: break 53 | else: i+= 1 54 | 55 | # Choose and call method 56 | if flag: 57 | # All words in a corpora. Choose Viterbi with ith corpora. 58 | if not test: print "Using Viterbi: corpora", i+1 59 | c = self.corpora_prob_dists[i] 60 | tag_sequence = self._getViterbiPath(sentence, c[0], c[1], c[2]) 61 | #print tag_sequence 62 | else: 63 | # Missing word from corpora. Choose Averaged Perceptron. 64 | print "Your input contains a never-before-seen word! Using an Average Perceptron" 65 | tag_sequence = perceptron_tagger.AP_Tagger().tag( self.AP_sent ) 66 | 67 | return tag_sequence 68 | 69 | 70 | def _getViterbiPath(self, sentence, pd_tagwords, pd_tags, all_tags): 71 | """ Hidden Markov Model using Viterbi alg """ 72 | 73 | len_sent = len(sentence) 74 | viterbi = [ ] 75 | backpointer = [ ] 76 | 77 | first_viterbi = { } 78 | first_backpointer = { } 79 | 80 | for tag in all_tags: 81 | if tag == "BEGIN": continue 82 | first_viterbi[ tag ] = pd_tags["BEGIN"].prob(tag) * pd_tagwords[tag].prob( sentence[0] ) 83 | first_backpointer[ tag ] = "BEGIN" 84 | 85 | viterbi.append(first_viterbi) 86 | backpointer.append(first_backpointer) 87 | 88 | curr_best = max(first_viterbi.keys(), key = lambda tag: first_viterbi[ tag ]) 89 | 90 | for wordindex in range(1, len_sent): 91 | temp_viterbi = { } 92 | temp_backpointer = { } 93 | pre_viterbi = viterbi[-1] 94 | 95 | for tag in all_tags: 96 | if tag == "BEGIN": continue 97 | pre_best = max(pre_viterbi.keys(), key = lambda pretag: pre_viterbi[pretag]*pd_tags[pretag].prob(tag)*pd_tagwords[tag].prob(sentence[wordindex])) 98 | 99 | temp_viterbi[tag] = pre_viterbi[pre_best]*pd_tags[pre_best].prob(tag)*pd_tagwords[tag].prob(sentence[wordindex]) 100 | temp_backpointer[tag] = pre_best 101 | 102 | curr_best = max(temp_viterbi.keys(), key=lambda tag: temp_viterbi[tag]) 103 | 104 | viterbi.append(temp_viterbi) 105 | backpointer.append(temp_backpointer) 106 | 107 | pre_viterbi = viterbi[-1] 108 | pre_best = max(pre_viterbi.keys(), key = lambda pretag: pre_viterbi[pretag]*pd_tags[pretag].prob("STOP")) 109 | prob_tag_seq = pre_viterbi [pre_best]*pd_tags[pre_best].prob("STOP") 110 | 111 | best_tag_seq = ["STOP", pre_best] 112 | backpointer.reverse() 113 | 114 | 115 | curr_best_tag = pre_best 116 | for b in backpointer: 117 | best_tag_seq.append(b[curr_best_tag]) 118 | curr_best_tag = b[curr_best_tag] 119 | 120 | best_tag_seq.reverse() 121 | 122 | # Remove BEGIN/END tags 123 | best_tag_seq.pop() 124 | best_tag_seq.pop(0) 125 | 126 | return best_tag_seq 127 | 128 | 129 | def stringToPOS(self, string): 130 | """ Convert string to array and get tag sequence """ 131 | self.AP_sent = string 132 | arr = re.findall(r"[\w']+|[.,!?;]", string) # split including commas 133 | return self.sentenceToPOS( arr ) 134 | 135 | 136 | def inputToPOS(self): 137 | """ Get input from command line and get tag sequence """ 138 | 139 | inp = raw_input("Let's check a sentence: ") 140 | return self.stringToPOS(inp) 141 | 142 | 143 | def testAgainstCorpus(self, corpus, total_runs=100): 144 | """ Test method for Viterbi method accuracy against a corpus """ 145 | 146 | print "Testing Viterbi accuracy against corpus..." 147 | num_true = 0 148 | num_runs = 0 149 | for sent in corpus.tagged_sents(): 150 | 151 | sentenceArr = [] 152 | trueTagSeq = [] 153 | for (word, tag) in sent: 154 | sentenceArr.append( word ) 155 | trueTagSeq.append( tag ) 156 | predTagSeq = self.sentenceToPOS(sentenceArr, True) 157 | 158 | if trueTagSeq == predTagSeq: 159 | num_true += 1 160 | num_runs += 1 161 | 162 | # Update percent complete output 163 | sys.stdout.write('\r') 164 | sys.stdout.write("%.1f%% " % (float(num_runs) / total_runs * 100)) 165 | sys.stdout.flush() 166 | 167 | if num_runs >= total_runs: 168 | break 169 | 170 | print "\nACCURACY: %.2f%%" % (num_true / float(num_runs) * 100) 171 | return 172 | -------------------------------------------------------------------------------- /perceptron_tagger.py: -------------------------------------------------------------------------------- 1 | '''An averaged perceptron, inspired by the nltk project and the website below: 2 | http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/ 3 | ''' 4 | 5 | from __future__ import division 6 | from __future__ import absolute_import 7 | from collections import defaultdict 8 | 9 | import os 10 | import pickle 11 | import random 12 | 13 | PICKLE = "dataAP/user_defined_tagger.pickle" 14 | 15 | class APerceptron(object): 16 | 17 | def __init__(self): 18 | # Each feature gets its own weight vector, so weights is a dict-of-dicts 19 | self.weights = {} 20 | self.classes = set() 21 | # The accumulated values, for the averaging. These will be keyed by 22 | # feature/clas tuples 23 | self._totals = defaultdict(int) 24 | # The last time the feature was changed, for the averaging. Also 25 | # keyed by feature/clas tuples 26 | # (tstamps is short for timestamps) 27 | self._tstamps = defaultdict(int) 28 | # Number of instances seen 29 | self.i = 0 30 | 31 | def predict(self, features): 32 | '''Dot-product the features and current weights and return the best label.''' 33 | scores = defaultdict(float) 34 | for feat, value in features.items(): 35 | if feat not in self.weights or value == 0: 36 | continue 37 | weights = self.weights[feat] 38 | for label, weight in weights.items(): 39 | scores[label] += value * weight 40 | # Do a secondary alphabetic sort, for stability 41 | return max(self.classes, key=lambda label: (scores[label], label)) 42 | 43 | def update(self, truth, guess, features): 44 | '''Update the feature weights.''' 45 | def upd_feat(c, f, w, v): 46 | param = (f, c) 47 | self._totals[param] += (self.i - self._tstamps[param]) * w 48 | self._tstamps[param] = self.i 49 | self.weights[f][c] = w + v 50 | 51 | self.i += 1 52 | if truth == guess: 53 | return None 54 | for feat in features: 55 | weights = self.weights.setdefault(feat, {}) 56 | upd_feat(truth, feat, weights.get(truth, 0.0), 1.0) 57 | upd_feat(guess, feat, weights.get(guess, 0.0), -1.0) 58 | return None 59 | 60 | def average_weights(self): 61 | '''Average weights from all iterations.''' 62 | for feat, weights in self.weights.items(): 63 | new_feat_weights = {} 64 | for clas, weight in weights.items(): 65 | param = (feat, clas) 66 | total = self._totals[param] 67 | total += (self.i - self._tstamps[param]) * weight 68 | averaged = round(total / float(self.i), 3) 69 | if averaged: 70 | new_feat_weights[clas] = averaged 71 | self.weights[feat] = new_feat_weights 72 | return None 73 | 74 | 75 | ####################################################################################### 76 | 77 | class AP_Tagger(): 78 | '''Greedy Averaged Perceptron tagger 79 | :param load: Load the pickled model upon instantiation. 80 | ''' 81 | 82 | BEGIN = ['-BEGIN-', '-BEGIN2-'] 83 | STOP = ['-STOP-', '-STOP2-'] 84 | AP_MODEL_LOC = os.path.join(os.path.dirname(__file__), PICKLE) 85 | 86 | def __init__(self, load=True): 87 | self.model = APerceptron() 88 | self.tagdict = {} 89 | self.classes = set() 90 | if load: 91 | self.load(self.AP_MODEL_LOC) 92 | 93 | def tag(self, sentence): 94 | '''Tags a sentence.''' 95 | # format untokenized corpus has \n between sentences and ' ' between words 96 | 97 | s_split = lambda t: t.split('\n') 98 | w_split = lambda s: s.split() 99 | 100 | def split_sents(sentence): 101 | for s in s_split(sentence): 102 | yield w_split(s) 103 | ######################################## 104 | 105 | prev, prev2 = self.BEGIN 106 | tokens = [] 107 | 108 | for words in split_sents(sentence): 109 | context = self.BEGIN + [self._normalize(w) for w in words] + self.STOP 110 | for i, word in enumerate(words): 111 | tag = self.tagdict.get(word) 112 | if not tag: 113 | features = self._get_features(i, word, context, prev, prev2) 114 | tag = self.model.predict(features) 115 | tokens.append((word, tag[:-1])) 116 | prev2 = prev 117 | prev = tag 118 | return tokens 119 | 120 | def train(self, sentences, save_loc=None, nr_iter=10): 121 | '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` 122 | controls the number of Perceptron training iterations. 123 | :param sentences: A list of (words, tags) tuples. 124 | :param save_loc: If not ``None``, saves a pickled model in this location. 125 | :param nr_iter: Number of training iterations. 126 | ''' 127 | self._make_tagdict(sentences) 128 | self.model.classes = self.classes 129 | for iter_ in range(nr_iter): 130 | c = 0 131 | n = 0 132 | for words, tags in sentences: 133 | prev, prev2 = self.BEGIN 134 | context = self.BEGIN + [self._normalize(w) for w in words] \ 135 | + self.STOP 136 | for i, word in enumerate(words): 137 | guess = self.tagdict.get(word) 138 | if not guess: 139 | feats = self._get_features(i, word, context, prev, prev2) 140 | guess = self.model.predict(feats) 141 | self.model.update(tags[i], guess, feats) 142 | prev2 = prev 143 | prev = guess 144 | c += guess == tags[i] 145 | n += 1 146 | random.shuffle(sentences) 147 | print "Iteration {0}: {1}/{2}={3}".format(iter_, c, n, (c / n) * 100) 148 | 149 | self.model.average_weights() 150 | # Pickle as a binary file 151 | if save_loc is not None: 152 | pickle.dump((self.model.weights, self.tagdict, self.classes), 153 | open(save_loc, 'wb'), -1) 154 | return None 155 | 156 | def load(self, loc): 157 | '''Load a pickled model.''' 158 | try: 159 | weight_tagdict_class = pickle.load(open(loc, 'rb')) 160 | except IOError: 161 | msg = ("Missing user-define pickle file.") 162 | raise IOError(msg) 163 | self.model.weights, self.tagdict, self.classes = weight_tagdict_class 164 | self.model.classes = self.classes 165 | return None 166 | 167 | def _normalize(self, word): 168 | '''Normalization used in pre-processing. 169 | - All words are lower cased 170 | - Digits in the range 1800-2100 are represented as !YEAR; 171 | - Other digits are represented as !DIGITS 172 | :rtype: str 173 | ''' 174 | if '-' in word and word[0] != '-': 175 | return '!HYPHEN' 176 | elif word.isdigit() and len(word) == 4: 177 | return '!YEAR' 178 | elif word[0].isdigit(): 179 | return '!DIGITS' 180 | else: 181 | return word.lower() 182 | 183 | def _get_features(self, i, word, context, prev, prev2): 184 | '''Map tokens into a feature representation, implemented as a 185 | {hashable: float} dict. If the features change, a new model must be 186 | trained. 187 | ''' 188 | 189 | def add(name, *args): 190 | features[' '.join((name,) + tuple(args))] += 1 191 | 192 | i += len(self.BEGIN) 193 | features = defaultdict(int) 194 | # It's useful to have a constant feature, which acts sort of like a prior 195 | add('bias') 196 | add('i suffix', word[-3:]) 197 | add('i pref1', word[0]) 198 | add('i-1 tag', prev) 199 | add('i-2 tag', prev2) 200 | add('i tag+i-2 tag', prev, prev2) 201 | add('i word', context[i]) 202 | add('i-1 tag+i word', prev, context[i]) 203 | add('i-1 word', context[i - 1]) 204 | add('i-1 suffix', context[i - 1][-3:]) 205 | add('i-2 word', context[i - 2]) 206 | add('i+1 word', context[i + 1]) 207 | add('i+1 suffix', context[i + 1][-3:]) 208 | add('i+2 word', context[i + 2]) 209 | return features 210 | 211 | def _make_tagdict(self, sentences): 212 | '''Make a tag dictionary for single-tag words.''' 213 | counts = defaultdict(lambda: defaultdict(int)) 214 | for words, tags in sentences: 215 | for word, tag in zip(words, tags): 216 | counts[word][tag] += 1 217 | self.classes.add(tag) 218 | freq_thresh = 20 219 | ambiguity_thresh = 0.97 220 | for word, tag_freqs in counts.items(): 221 | tag, mode = max(tag_freqs.items(), key=lambda item: item[1]) 222 | n = sum(tag_freqs.values()) 223 | # Don't add rare words to the tag dictionary 224 | # Only add quite unambiguous words 225 | if n >= freq_thresh and (float(mode) / n) >= ambiguity_thresh: 226 | self.tagdict[word] = tag 227 | 228 | def APTaggerTraining(self): 229 | AP_tagger = AP_Tagger(False) 230 | # to train the averaged perceptron tagger 231 | print 'Loading corpus...' 232 | training_data = [] 233 | training_sentence = ([], []) 234 | 235 | # put every line into training data 236 | for sent in open('dataAP/train.txt'): 237 | w_list = sent.split('\t') 238 | training_sentence[0].append(w_list[0]) 239 | training_sentence[1].append(w_list[1]) 240 | if w_list[0] == '.': 241 | training_data.append(training_sentence) 242 | training_sentence = ([], []) 243 | print 'Training corpus size : %d', len(training_data) 244 | print 'Start training AP Tagger...' 245 | AP_tagger.train(training_data, save_loc=PICKLE) 246 | 247 | def APTaggerTesting(self): 248 | "Testing averaged perceptron tagger..." 249 | AP_tagger = AP_Tagger(False) 250 | AP_tagger.load(PICKLE) 251 | 252 | right = 0.0 253 | total = 0.0 254 | test_sentence = ([], []) 255 | for line in open('dataAP/test.txt'): 256 | params = line.split() 257 | if len(params) != 2: continue 258 | test_sentence[0].append(params[0]) 259 | test_sentence[1].append(params[1]) 260 | if params[0] == '.': 261 | text = '' 262 | words = test_sentence[0] 263 | tags = test_sentence[1] 264 | for i, word in enumerate(words): 265 | text += word 266 | if i < len(words): text += ' ' 267 | outputs = AP_tagger.tag(text) 268 | assert len(tags) == len(outputs) 269 | total += len(tags) 270 | for o, t in zip(outputs, tags): 271 | if o[1].strip() == t: right += 1 272 | test_sentence = ([], []) 273 | print "ACCURACY: %.2f%%" % (100 * right / total) 274 | -------------------------------------------------------------------------------- /dataAP/test.txt: -------------------------------------------------------------------------------- 1 | Steelmakers NNS 2 | have VBP 3 | also RB 4 | been VBN 5 | adding VBG 6 | capacity NN 7 | of IN 8 | so-called JJ 9 | electrogalvanized JJ 10 | steel NN 11 | , , 12 | which WDT 13 | is VBZ 14 | another DT 15 | way NN 16 | to TO 17 | make VB 18 | coated VBN 19 | corrosion-resistant JJ 20 | steel NN 21 | . . 22 | One CD 23 | of IN 24 | the DT 25 | advantages NNS 26 | of IN 27 | the DT 28 | hot-dipped JJ 29 | process NN 30 | is VBZ 31 | that IN 32 | it PRP 33 | allows VBZ 34 | the DT 35 | steel NN 36 | to TO 37 | be VB 38 | covered VBN 39 | with IN 40 | a DT 41 | thicker JJR 42 | coat NN 43 | of IN 44 | zinc NN 45 | more RBR 46 | quickly RB 47 | . . 48 | ONCE RB 49 | YOU PRP 50 | MAKE VB 51 | UP IN 52 | your PRP$ 53 | mind NN 54 | about IN 55 | an DT 56 | investment NN 57 | , , 58 | the DT 59 | rest NN 60 | is VBZ 61 | easy JJ 62 | , , 63 | right NN 64 | ? . 65 | You PRP 66 | just RB 67 | call VBP 68 | your PRP$ 69 | broker NN 70 | and CC 71 | say VBP 72 | `` `` 73 | buy VB 74 | '' '' 75 | or CC 76 | `` `` 77 | sell VB 78 | . . 79 | '' '' 80 | Dream NNP 81 | on NNP 82 | . . 83 | There EX 84 | are VBP 85 | all DT 86 | sorts NNS 87 | of IN 88 | ways NNS 89 | to TO 90 | give VB 91 | buy NN 92 | and CC 93 | sell VB 94 | instructions NNS 95 | to TO 96 | a DT 97 | broker NN 98 | -- : 99 | and CC 100 | just RB 101 | as IN 102 | many JJ 103 | ways NNS 104 | to TO 105 | get VB 106 | burned VBN 107 | if IN 108 | you PRP 109 | do VBP 110 | n't RB 111 | know VB 112 | what WP 113 | you PRP 114 | 're VBP 115 | doing VBG 116 | . . 117 | So RB 118 | here RB 119 | 's VBZ 120 | a DT 121 | rundown NN 122 | of IN 123 | the DT 124 | most RBS 125 | common JJ 126 | types NNS 127 | of IN 128 | market NN 129 | orders NNS 130 | permitted VBN 131 | by IN 132 | the DT 133 | stock NN 134 | and CC 135 | commodity NN 136 | exchanges NNS 137 | . . 138 | Two CD 139 | things NNS 140 | to TO 141 | keep VB 142 | in IN 143 | mind NN 144 | : : 145 | Not RB 146 | all DT 147 | exchanges NNS 148 | accept VBP 149 | every DT 150 | type NN 151 | of IN 152 | order NN 153 | . . 154 | And CC 155 | even RB 156 | when WRB 157 | a DT 158 | specific JJ 159 | order NN 160 | is VBZ 161 | acceptable JJ 162 | to TO 163 | an DT 164 | exchange NN 165 | , , 166 | a DT 167 | brokerage NN 168 | firm NN 169 | can MD 170 | refuse VB 171 | to TO 172 | enter VB 173 | it PRP 174 | for IN 175 | a DT 176 | customer NN 177 | . . 178 | Market NNP 179 | Order NNP 180 | : : 181 | This DT 182 | is VBZ 183 | probably RB 184 | the DT 185 | most RBS 186 | widely RB 187 | used VBN 188 | order NN 189 | -- : 190 | and CC 191 | the DT 192 | one CD 193 | most RBS 194 | open JJ 195 | to TO 196 | abuse VB 197 | by IN 198 | unscrupulous JJ 199 | floor NN 200 | brokers NNS 201 | , , 202 | since IN 203 | it PRP 204 | imposes VBZ 205 | no DT 206 | price NN 207 | restrictions NNS 208 | . . 209 | With IN 210 | a DT 211 | market NN 212 | order NN 213 | , , 214 | an DT 215 | investor NN 216 | tells VBZ 217 | a DT 218 | broker NN 219 | to TO 220 | buy VB 221 | or CC 222 | sell VB 223 | `` `` 224 | at IN 225 | the DT 226 | market NN 227 | . . 228 | '' '' 229 | It PRP 230 | 's VBZ 231 | like IN 232 | saying VBG 233 | , , 234 | `` `` 235 | get VB 236 | me PRP 237 | in IN 238 | now RB 239 | '' '' 240 | or CC 241 | `` `` 242 | get VB 243 | me PRP 244 | out IN 245 | now RB 246 | . . 247 | '' '' 248 | For IN 249 | example NN 250 | , , 251 | if IN 252 | wheat NN 253 | is VBZ 254 | being VBG 255 | offered VBN 256 | at IN 257 | $ $ 258 | 4.065 CD 259 | and CC 260 | bid NN 261 | at IN 262 | $ $ 263 | 4.060 CD 264 | , , 265 | a DT 266 | market NN 267 | order NN 268 | to TO 269 | buy VB 270 | would MD 271 | be VB 272 | filled VBN 273 | at IN 274 | the DT 275 | higher JJR 276 | price NN 277 | and CC 278 | a DT 279 | market NN 280 | order NN 281 | to TO 282 | sell VB 283 | at IN 284 | the DT 285 | lower JJR 286 | price NN 287 | . . 288 | A DT 289 | recent JJ 290 | indictment NN 291 | alleges VBZ 292 | that IN 293 | some DT 294 | floor NN 295 | brokers NNS 296 | at IN 297 | the DT 298 | two CD 299 | largest JJS 300 | Chicago NNP 301 | commodity NN 302 | exchanges NNS 303 | used VBD 304 | market NN 305 | orders NNS 306 | to TO 307 | fill VB 308 | customers NNS 309 | ' POS 310 | orders NNS 311 | at IN 312 | unfavorable JJ 313 | prices NNS 314 | by IN 315 | arranging VBG 316 | trades NNS 317 | with IN 318 | fellow NN 319 | brokers NNS 320 | . . 321 | Profits NNS 322 | realized VBD 323 | from IN 324 | these DT 325 | trades NNS 326 | would MD 327 | then RB 328 | be VB 329 | shared VBN 330 | by IN 331 | the DT 332 | conspiring VBG 333 | brokers NNS 334 | . . 335 | Limit VB 336 | Order NNP 337 | : : 338 | Limit VB 339 | orders NNS 340 | are VBP 341 | used VBN 342 | when WRB 343 | investors NNS 344 | want VBP 345 | to TO 346 | restrict VB 347 | the DT 348 | amount NN 349 | they PRP 350 | will MD 351 | receive VB 352 | or CC 353 | pay VB 354 | for IN 355 | an DT 356 | investment NN 357 | . . 358 | Investors NNS 359 | do VBP 360 | this DT 361 | by IN 362 | specifying VBG 363 | a DT 364 | minimum JJ 365 | price NN 366 | at IN 367 | which WDT 368 | the DT 369 | investment NN 370 | may MD 371 | be VB 372 | sold VBN 373 | or CC 374 | the DT 375 | maximum JJ 376 | price NN 377 | that WDT 378 | may MD 379 | be VB 380 | paid VBN 381 | for IN 382 | it PRP 383 | . . 384 | Suppose VB 385 | an DT 386 | investor NN 387 | wants VBZ 388 | to TO 389 | sell VB 390 | a DT 391 | stock NN 392 | , , 393 | but CC 394 | not RB 395 | for IN 396 | less JJR 397 | than IN 398 | $ $ 399 | 55 NN 400 | . . 401 | A DT 402 | limit NN 403 | order NN 404 | to TO 405 | sell VB 406 | could MD 407 | be VB 408 | entered VBN 409 | at IN 410 | that DT 411 | price NN 412 | . . 413 | One CD 414 | risk NN 415 | : : 416 | Investors NNS 417 | may MD 418 | regret VB 419 | the DT 420 | restriction NN 421 | if IN 422 | the DT 423 | stock NN 424 | reaches VBZ 425 | 54 CD 426 | and CC 427 | then RB 428 | falls VBZ 429 | . . 430 | Unless IN 431 | the DT 432 | market NN 433 | goes VBZ 434 | at IN 435 | least JJS 436 | one CD 437 | tick VB 438 | -LRB- ( 439 | the DT 440 | smallest JJS 441 | price NN 442 | increment NN 443 | permitted VBN 444 | -RRB- ) 445 | beyond IN 446 | the DT 447 | limit NN 448 | price NN 449 | , , 450 | investors NNS 451 | are VBP 452 | n't RB 453 | assured VBN 454 | of IN 455 | having VBG 456 | their PRP$ 457 | orders NNS 458 | filled VBN 459 | because IN 460 | there EX 461 | may MD 462 | not RB 463 | be VB 464 | sufficient JJ 465 | trading NN 466 | volume NN 467 | to TO 468 | permit VB 469 | filling VBG 470 | it PRP 471 | at IN 472 | the DT 473 | specified VBN 474 | price NN 475 | . . 476 | Stop VB 477 | Order NNP 478 | : : 479 | Stop VB 480 | orders NNS 481 | tell VB 482 | a DT 483 | floor NN 484 | broker NN 485 | to TO 486 | buy VB 487 | or CC 488 | sell VB 489 | an DT 490 | investment NN 491 | once RB 492 | the DT 493 | price NN 494 | reaches VBZ 495 | a DT 496 | certain JJ 497 | level NN 498 | . . 499 | Once RB 500 | the DT 501 | price NN 502 | reaches VBZ 503 | that DT 504 | level NN 505 | , , 506 | a DT 507 | stop NN 508 | order NN 509 | turns VBZ 510 | into IN 511 | a DT 512 | market NN 513 | order NN 514 | , , 515 | and CC 516 | the DT 517 | order NN 518 | is VBZ 519 | filled VBN 520 | at IN 521 | whatever WDT 522 | price VBP 523 | the DT 524 | broker NN 525 | can MD 526 | get VB 527 | . . 528 | Stop VB 529 | orders NNS 530 | are VBP 531 | sometimes RB 532 | called VBN 533 | `` `` 534 | stop-loss NN 535 | '' '' 536 | orders NNS 537 | because IN 538 | they PRP 539 | are VBP 540 | frequently RB 541 | used VBN 542 | to TO 543 | protect VB 544 | profits NNS 545 | or CC 546 | limit NN 547 | losses NNS 548 | . . 549 | While IN 550 | stop NN 551 | orders NNS 552 | sound VBP 553 | similar JJ 554 | to TO 555 | limit VB 556 | orders NNS 557 | , , 558 | there EX 559 | is VBZ 560 | a DT 561 | difference NN 562 | : : 563 | Sell VB 564 | stops VBZ 565 | must MD 566 | be VB 567 | entered VBN 568 | at IN 569 | a DT 570 | price NN 571 | below IN 572 | the DT 573 | current JJ 574 | market NN 575 | price NN 576 | and CC 577 | buy VB 578 | stops VBZ 579 | above RB 580 | . . 581 | In IN 582 | contrast NN 583 | , , 584 | sell VB 585 | limit NN 586 | orders NNS 587 | must MD 588 | be VB 589 | placed VBN 590 | above IN 591 | the DT 592 | market NN 593 | price NN 594 | and CC 595 | buy VB 596 | limit NN 597 | orders NNS 598 | are VBP 599 | placed VBN 600 | below RB 601 | . . 602 | The DT 603 | crash NN 604 | in IN 605 | October NNP 606 | 1987 CD 607 | and CC 608 | last JJ 609 | Friday NNP 610 | 's POS 611 | sell-off NN 612 | painfully RB 613 | taught VBD 614 | some DT 615 | investors NNS 616 | exactly RB 617 | what WP 618 | stop VBP 619 | orders NNS 620 | will MD 621 | and CC 622 | wo MD 623 | n't RB 624 | do VB 625 | . . 626 | An DT 627 | investor NN 628 | who WP 629 | may MD 630 | have VB 631 | placed VBN 632 | a DT 633 | stop-loss NN 634 | order NN 635 | at IN 636 | $ $ 637 | 90 CD 638 | under IN 639 | a DT 640 | stock NN 641 | that WDT 642 | was VBD 643 | trading VBG 644 | at IN 645 | $ $ 646 | 100 CD 647 | a DT 648 | share NN 649 | on IN 650 | the DT 651 | Friday NNP 652 | before IN 653 | the DT 654 | crash NN 655 | was VBD 656 | stunned VBN 657 | to TO 658 | discover VB 659 | that IN 660 | the DT 661 | order NN 662 | was VBD 663 | filled VBN 664 | at IN 665 | $ $ 666 | 75 CD 667 | when WRB 668 | the DT 669 | stock NN 670 | opened VBD 671 | at IN 672 | that DT 673 | price NN 674 | on IN 675 | Monday NNP 676 | . . 677 | Stop-Limit NNP 678 | Order NNP 679 | : : 680 | Stop-limit JJ 681 | orders NNS 682 | turn VBP 683 | into IN 684 | limit NN 685 | orders NNS 686 | when WRB 687 | an DT 688 | investment NN 689 | trades NNS 690 | at IN 691 | the DT 692 | price NN 693 | specified VBN 694 | in IN 695 | the DT 696 | order NN 697 | . . 698 | Unlike IN 699 | stop NN 700 | orders NNS 701 | -- : 702 | which WDT 703 | are VBP 704 | filled VBN 705 | at IN 706 | the DT 707 | market NN 708 | price NN 709 | when WRB 710 | the DT 711 | stop NN 712 | price NN 713 | is VBZ 714 | hit VBN 715 | -- : 716 | stop-limit JJ 717 | orders NNS 718 | demand VBP 719 | that IN 720 | the DT 721 | trades NNS 722 | be VBP 723 | made VBN 724 | only RB 725 | at IN 726 | the DT 727 | specified VBN 728 | price NN 729 | . . 730 | If IN 731 | it PRP 732 | ca MD 733 | n't RB 734 | be VB 735 | made VBN 736 | at IN 737 | that DT 738 | price NN 739 | , , 740 | it PRP 741 | does VBZ 742 | n't RB 743 | get VB 744 | filled VBN 745 | . . 746 | Investors NNS 747 | who WP 748 | wish VBP 749 | to TO 750 | be VB 751 | out IN 752 | of IN 753 | a DT 754 | position NN 755 | , , 756 | without IN 757 | the DT 758 | risk NN 759 | of IN 760 | receiving VBG 761 | a DT 762 | worse-than-expected JJ 763 | price NN 764 | from IN 765 | a DT 766 | market NN 767 | order NN 768 | , , 769 | may MD 770 | use VB 771 | this DT 772 | type NN 773 | of IN 774 | order NN 775 | to TO 776 | specify VB 777 | the DT 778 | price NN 779 | at IN 780 | which WDT 781 | the DT 782 | order NN 783 | must MD 784 | be VB 785 | filled VBN 786 | . . 787 | But CC 788 | if IN 789 | the DT 790 | market NN 791 | moves NNS 792 | quickly RB 793 | enough RB 794 | , , 795 | it PRP 796 | may MD 797 | be VB 798 | impossible JJ 799 | for IN 800 | the DT 801 | broker NN 802 | to TO 803 | carry VB 804 | out RP 805 | the IN 806 | order NN 807 | because IN 808 | the DT 809 | investment NN 810 | has VBZ 811 | passed VBN 812 | the DT 813 | specified VBN 814 | price NN 815 | . . 816 | Market-If-Touched NNP 817 | Order NNP 818 | : : 819 | Market-if-touched JJ 820 | orders NNS 821 | are VBP 822 | like IN 823 | stop NN 824 | orders NNS 825 | in IN 826 | that DT 827 | they PRP 828 | become VBP 829 | market NN 830 | orders NNS 831 | if IN 832 | a DT 833 | specified VBN 834 | price NN 835 | is VBZ 836 | reached VBN 837 | . . 838 | However RB 839 | , , 840 | unlike IN 841 | a DT 842 | buy-stop JJ 843 | order NN 844 | , , 845 | a DT 846 | buy NN 847 | market-if-touched NN 848 | order NN 849 | is VBZ 850 | entered VBN 851 | at IN 852 | a DT 853 | price NN 854 | below IN 855 | the DT 856 | current JJ 857 | price NN 858 | , , 859 | while IN 860 | a DT 861 | sell NN 862 | market-if-touched NN 863 | order NN 864 | is VBZ 865 | entered VBN 866 | at IN 867 | a DT 868 | price NN 869 | above IN 870 | it PRP 871 | . . 872 | As RB 873 | soon RB 874 | as IN 875 | the DT 876 | market NN 877 | trades NNS 878 | at IN 879 | the DT 880 | specified VBN 881 | price NN 882 | the DT 883 | floor NN 884 | broker NN 885 | will MD 886 | fill VB 887 | it PRP 888 | at IN 889 | the DT 890 | best JJS 891 | possible JJ 892 | price NN 893 | . . 894 | Fill-Or-Kill NNP 895 | Order NNP 896 | : : 897 | The DT 898 | fill-or-kill JJ 899 | order NN 900 | is VBZ 901 | one CD 902 | of IN 903 | several RB 904 | associated VBN 905 | with IN 906 | the DT 907 | timing NN 908 | of IN 909 | trades NNS 910 | . . 911 | It PRP 912 | instructs VBZ 913 | a DT 914 | broker NN 915 | to TO 916 | buy VB 917 | or CC 918 | sell VB 919 | an DT 920 | investment NN 921 | at IN 922 | the DT 923 | specified VBN 924 | price NN 925 | or CC 926 | better JJR 927 | . . 928 | But CC 929 | if IN 930 | the DT 931 | investment NN 932 | ca MD 933 | n't RB 934 | be VB 935 | bought VBN 936 | or CC 937 | sold VBN 938 | immediately RB 939 | , , 940 | the DT 941 | order NN 942 | is VBZ 943 | automatically RB 944 | canceled VBN 945 | . . 946 | Gregory NNP 947 | Bessemer NNP 948 | , , 949 | who WP 950 | came VBD 951 | in IN 952 | second JJ 953 | in IN 954 | the DT 955 | stock NN 956 | division NN 957 | of IN 958 | the DT 959 | recently RB 960 | completed VBN 961 | U.S. NNP 962 | Trading NNP 963 | Championship NNP 964 | , , 965 | says VBZ 966 | he PRP 967 | uses VBZ 968 | fill-or-kill JJ 969 | orders NNS 970 | almost RB 971 | exclusively RB 972 | when WRB 973 | trading NN 974 | options NNS 975 | . . 976 | `` `` 977 | I PRP 978 | like VB 979 | to TO 980 | use VB 981 | them PRP 982 | to TO 983 | feel VB 984 | out RP 985 | the IN 986 | market NN 987 | , , 988 | '' '' 989 | he PRP 990 | says VBZ 991 | . . 992 | `` `` 993 | If IN 994 | they PRP 995 | do VBP 996 | n't RB 997 | fill VB 998 | it PRP 999 | immediately RB 1000 | , , 1001 | then RB 1002 | I PRP 1003 | can MD 1004 | start VB 1005 | over IN 1006 | at IN 1007 | a DT 1008 | new JJ 1009 | price NN 1010 | or CC 1011 | try VB 1012 | again RB 1013 | with IN 1014 | the DT 1015 | same JJ 1016 | price NN 1017 | . . 1018 | '' '' 1019 | Not-Held NNP 1020 | Order NNP 1021 | : : 1022 | This DT 1023 | is VBZ 1024 | another DT 1025 | timing NN 1026 | order NN 1027 | . . 1028 | It PRP 1029 | is VBZ 1030 | a DT 1031 | market NN 1032 | order NN 1033 | that WDT 1034 | allows VBZ 1035 | floor NN 1036 | brokers NNS 1037 | to TO 1038 | take VB 1039 | more JJR 1040 | time NN 1041 | to TO 1042 | buy VB 1043 | or CC 1044 | sell VB 1045 | an DT 1046 | investment NN 1047 | , , 1048 | if IN 1049 | they PRP 1050 | think VBP 1051 | they PRP 1052 | can MD 1053 | get VB 1054 | a DT 1055 | better JJR 1056 | price NN 1057 | by IN 1058 | waiting VBG 1059 | . . 1060 | Not-held JJ 1061 | orders NNS 1062 | , , 1063 | which WDT 1064 | are VBP 1065 | also RB 1066 | known VBN 1067 | as IN 1068 | `` `` 1069 | disregard NN 1070 | the DT 1071 | tape NN 1072 | '' '' 1073 | orders NNS 1074 | , , 1075 | are VBP 1076 | always RB 1077 | done VBN 1078 | at IN 1079 | the DT 1080 | customer NN 1081 | 's POS 1082 | risk NN 1083 | . . 1084 | One-Cancels-The-Other NNP 1085 | Order NNP 1086 | : : 1087 | This DT 1088 | is VBZ 1089 | really RB 1090 | two CD 1091 | orders NNS 1092 | in IN 1093 | one CD 1094 | , , 1095 | generally RB 1096 | for IN 1097 | the DT 1098 | same JJ 1099 | security NN 1100 | or CC 1101 | commodity NN 1102 | , , 1103 | instructing VBG 1104 | floor NN 1105 | brokers NNS 1106 | to TO 1107 | fill VB 1108 | whichever WDT 1109 | order VBP 1110 | they PRP 1111 | can MD 1112 | first RB 1113 | and CC 1114 | then RB 1115 | cancel VB 1116 | the DT 1117 | other JJ 1118 | order NN 1119 | . . 1120 | In IN 1121 | a DT 1122 | fast-moving JJ 1123 | market NN 1124 | , , 1125 | it PRP 1126 | prevents VBZ 1127 | an DT 1128 | investor NN 1129 | from IN 1130 | getting VBG 1131 | stuck VBN 1132 | with IN 1133 | having VBG 1134 | made VBN 1135 | two CD 1136 | trades NNS 1137 | on IN 1138 | the DT 1139 | same JJ 1140 | security NN 1141 | . . 1142 | Specific-Time NNP 1143 | Order NNP 1144 | : : 1145 | This DT 1146 | type NN 1147 | of IN 1148 | order NN 1149 | couples NNS 1150 | many NN 1151 | of IN 1152 | the DT 1153 | orders NNS 1154 | described VBN 1155 | above IN 1156 | with IN 1157 | instructions NNS 1158 | that IN 1159 | the DT 1160 | order NN 1161 | must MD 1162 | be VB 1163 | carried VBN 1164 | out IN 1165 | at IN 1166 | or CC 1167 | by IN 1168 | a DT 1169 | certain JJ 1170 | time NN 1171 | . . 1172 | `` `` 1173 | On IN 1174 | the DT 1175 | close NN 1176 | '' '' 1177 | can MD 1178 | be VB 1179 | added VBN 1180 | to TO 1181 | many VB 1182 | types NNS 1183 | of IN 1184 | orders NNS 1185 | . . 1186 | For IN 1187 | example NN 1188 | , , 1189 | `` `` 1190 | market-on-close JJ 1191 | orders NNS 1192 | '' '' 1193 | must MD 1194 | be VB 1195 | filled VBN 1196 | during IN 1197 | the DT 1198 | last JJ 1199 | few JJ 1200 | minutes NNS 1201 | of IN 1202 | trading NN 1203 | for IN 1204 | the DT 1205 | day NN 1206 | at IN 1207 | a DT 1208 | price NN 1209 | that WDT 1210 | is VBZ 1211 | within IN 1212 | the DT 1213 | official NN 1214 | closing VBG 1215 | range NN 1216 | of IN 1217 | prices NNS 1218 | as IN 1219 | determined VBN 1220 | by IN 1221 | the DT 1222 | exchange NN 1223 | . . 1224 | `` `` 1225 | Stop-close-only JJ 1226 | orders NNS 1227 | '' '' 1228 | are VBP 1229 | stop VB 1230 | orders NNS 1231 | that WDT 1232 | only RB 1233 | become VB 1234 | active JJ 1235 | during IN 1236 | the DT 1237 | closing VBG 1238 | minutes NNS 1239 | of IN 1240 | trading NN 1241 | . . 1242 | `` `` 1243 | Day NNP 1244 | orders NNS 1245 | '' '' 1246 | expire VB 1247 | at IN 1248 | the DT 1249 | end NN 1250 | of IN 1251 | the DT 1252 | day NN 1253 | on IN 1254 | which WDT 1255 | they PRP 1256 | are VBP 1257 | entered VBN 1258 | , , 1259 | `` `` 1260 | good-till-canceled JJ 1261 | orders NNS 1262 | '' '' 1263 | have VBP 1264 | no DT 1265 | expiration NN 1266 | date NN 1267 | . . 1268 | Most JJS 1269 | brokers NNS 1270 | assume VBP 1271 | that IN 1272 | all DT 1273 | orders NNS 1274 | are VBP 1275 | day NN 1276 | orders NNS 1277 | unless IN 1278 | specified VBN 1279 | otherwise RB 1280 | . . 1281 | On IN 1282 | Oct. NNP 1283 | 19 CD 1284 | , , 1285 | 1987 CD 1286 | , , 1287 | some DT 1288 | investors NNS 1289 | learned VBD 1290 | the DT 1291 | consequences NNS 1292 | of IN 1293 | entering VBG 1294 | `` `` 1295 | good-til-canceled JJ 1296 | limit NN 1297 | orders NNS 1298 | '' '' 1299 | and CC 1300 | then RB 1301 | forgetting VBG 1302 | about IN 1303 | them PRP 1304 | . . 1305 | They PRP 1306 | found VBD 1307 | they PRP 1308 | had VBD 1309 | bought VBN 1310 | stock NN 1311 | from IN 1312 | limit NN 1313 | orders NNS 1314 | that IN 1315 | they PRP 1316 | might MD 1317 | have VB 1318 | entered VBN 1319 | weeks NNS 1320 | or CC 1321 | months NNS 1322 | earlier RBR 1323 | and CC 1324 | had VBD 1325 | forgotten VBN 1326 | to TO 1327 | cancel VB 1328 | . . 1329 | It PRP 1330 | is VBZ 1331 | always RB 1332 | the DT 1333 | responsibility NN 1334 | of IN 1335 | investors NNS 1336 | to TO 1337 | keep VB 1338 | track NN 1339 | of IN 1340 | the DT 1341 | orders NNS 1342 | they PRP 1343 | have VBP 1344 | placed VBN 1345 | . . 1346 | Investors NNS 1347 | who WP 1348 | change VBP 1349 | their PRP$ 1350 | mind NN 1351 | about IN 1352 | buying VBG 1353 | or CC 1354 | selling VBG 1355 | after IN 1356 | an DT 1357 | order NN 1358 | has VBZ 1359 | been VBN 1360 | filled VBN 1361 | are VBP 1362 | , , 1363 | usually RB 1364 | , , 1365 | stuck VBN 1366 | with IN 1367 | the DT 1368 | consequences NNS 1369 | . . 1370 | Mr. NNP 1371 | Angrist NNP 1372 | writes VBZ 1373 | on IN 1374 | the DT 1375 | options NNS 1376 | and CC 1377 | commodities NNS 1378 | markets NNS 1379 | for IN 1380 | The DT 1381 | Wall NNP 1382 | Street NNP 1383 | Journal NNP 1384 | . . 1385 | IN IN 1386 | SIZING NNP 1387 | UP IN 1388 | the DT 1389 | risks NNS 1390 | of IN 1391 | stock-market NN 1392 | investments NNS 1393 | , , 1394 | there EX 1395 | 's VBZ 1396 | probably RB 1397 | no DT 1398 | starting VBG 1399 | place NN 1400 | better JJR 1401 | than IN 1402 | `` `` 1403 | beta NN 1404 | . . 1405 | '' '' 1406 | But CC 1407 | investors NNS 1408 | better RBR 1409 | not RB 1410 | ignore VB 1411 | its PRP$ 1412 | limitations NNS 1413 | , , 1414 | either RB 1415 | . . 1416 | Beta NNP 1417 | is VBZ 1418 | a DT 1419 | handy JJ 1420 | gauge NN 1421 | that IN 1422 | measures VBZ 1423 | the DT 1424 | volatility NN 1425 | of IN 1426 | a DT 1427 | stock NN 1428 | or CC 1429 | stock NN 1430 | mutual JJ 1431 | fund NN 1432 | . . 1433 | For IN 1434 | any DT 1435 | given VBN 1436 | move NN 1437 | in IN 1438 | the DT 1439 | overall JJ 1440 | market NN 1441 | , , 1442 | it PRP 1443 | suggests VBZ 1444 | how WRB 1445 | steeply RB 1446 | that IN 1447 | particular JJ 1448 | issue NN 1449 | might MD 1450 | rise VB 1451 | or CC 1452 | fall NN 1453 | . . 1454 | Beta JJ 1455 | figures NNS 1456 | are VBP 1457 | widely RB 1458 | available JJ 1459 | and CC 1460 | easy JJ 1461 | to TO 1462 | interpret VB 1463 | . . 1464 | The DT 1465 | beta NN 1466 | of IN 1467 | the DT 1468 | broad JJ 1469 | market NN 1470 | , , 1471 | typically RB 1472 | defined VBN 1473 | as IN 1474 | the DT 1475 | Standard NNP 1476 | & CC 1477 | Poor NNP 1478 | 's POS 1479 | 500-stock JJ 1480 | index NN 1481 | , , 1482 | is VBZ 1483 | always RB 1484 | 1.0 CD 1485 | . . 1486 | So RB 1487 | a DT 1488 | stock NN 1489 | with IN 1490 | a DT 1491 | beta NN 1492 | of IN 1493 | 0.5 CD 1494 | is VBZ 1495 | half NN 1496 | as IN 1497 | volatile JJ 1498 | , , 1499 | one CD 1500 | at IN 1501 | 1.5 CD 1502 | is VBZ 1503 | 50 CD 1504 | % NN 1505 | more RBR 1506 | volatile JJ 1507 | , , 1508 | and CC 1509 | so RB 1510 | on RB 1511 | . . 1512 | Cautious JJ 1513 | investors NNS 1514 | should MD 1515 | generally RB 1516 | go VB 1517 | with IN 1518 | stocks NNS 1519 | that WDT 1520 | have VBP 1521 | low JJ 1522 | betas NNS 1523 | . . 1524 | Go VB 1525 | with IN 1526 | high-beta JJ 1527 | stocks NNS 1528 | to TO 1529 | get VB 1530 | the DT 1531 | biggest JJS 1532 | payoff NN 1533 | from IN 1534 | a DT 1535 | bet NN 1536 | on IN 1537 | a DT 1538 | bull NN 1539 | market NN 1540 | . . 1541 | Remember VB 1542 | , , 1543 | though RB 1544 | , , 1545 | that DT 1546 | beta NN 1547 | also RB 1548 | has VBZ 1549 | important JJ 1550 | limitations NNS 1551 | . . 1552 | `` `` 1553 | Beta NNP 1554 | is VBZ 1555 | only JJ 1556 | part NN 1557 | of IN 1558 | the DT 1559 | risk NN 1560 | in IN 1561 | a DT 1562 | stock NN 1563 | , , 1564 | '' '' 1565 | says VBZ 1566 | William NNP 1567 | F. NNP 1568 | Sharpe NNP 1569 | , , 1570 | the DT 1571 | Stanford NNP 1572 | University NNP 1573 | emeritus NN 1574 | professor NN 1575 | who WP 1576 | developed VBD 1577 | the DT 1578 | measure NN 1579 | . . 1580 | `` `` 1581 | There EX 1582 | is VBZ 1583 | risk NN 1584 | that WDT 1585 | is VBZ 1586 | not RB 1587 | associated VBN 1588 | with IN 1589 | market NN 1590 | moves NNS 1591 | , , 1592 | and CC 1593 | the DT 1594 | beta NN 1595 | does VBZ 1596 | n't RB 1597 | tell VB 1598 | you PRP 1599 | the DT 1600 | magnitude NN 1601 | of IN 1602 | that DT 1603 | . . 1604 | '' '' 1605 | In IN 1606 | particular JJ 1607 | , , 1608 | beta NN 1609 | does VBZ 1610 | n't RB 1611 | measure VB 1612 | the DT 1613 | company NN 1614 | - : 1615 | and CC 1616 | industry-specific JJ 1617 | risk NN 1618 | associated VBN 1619 | with IN 1620 | an DT 1621 | individual JJ 1622 | stock NN 1623 | . . 1624 | That DT 1625 | `` `` 1626 | business NN 1627 | '' '' 1628 | risk NN 1629 | is VBZ 1630 | very RB 1631 | significant JJ 1632 | for IN 1633 | an DT 1634 | investor NN 1635 | with IN 1636 | only RB 1637 | a DT 1638 | few JJ 1639 | stocks NNS 1640 | , , 1641 | but CC 1642 | it PRP 1643 | virtually RB 1644 | disappears VBZ 1645 | in IN 1646 | a DT 1647 | large JJ 1648 | and CC 1649 | well-diversified JJ 1650 | portfolio NN 1651 | . . 1652 | Beta NNP 1653 | is VBZ 1654 | also RB 1655 | a DT 1656 | poor JJ 1657 | indicator NN 1658 | of IN 1659 | the DT 1660 | risk NN 1661 | in IN 1662 | stock NN 1663 | groups NNS 1664 | that WDT 1665 | march VBP 1666 | to TO 1667 | their PRP$ 1668 | own JJ 1669 | drummer NN 1670 | . . 1671 | In IN 1672 | particular JJ 1673 | , , 1674 | the DT 1675 | prices NNS 1676 | of IN 1677 | gold NN 1678 | and CC 1679 | other JJ 1680 | precious-metals NNS 1681 | stocks NNS 1682 | shoot VBP 1683 | up IN 1684 | and CC 1685 | down RB 1686 | , , 1687 | but CC 1688 | the DT 1689 | stocks NNS 1690 | tend VBP 1691 | to TO 1692 | have VB 1693 | low JJ 1694 | betas NNS 1695 | because IN 1696 | their PRP$ 1697 | moves NNS 1698 | are VBP 1699 | not RB 1700 | market-inspired JJ 1701 | . . 1702 | Concern NN 1703 | that IN 1704 | investors NNS 1705 | could MD 1706 | misinterpret VB 1707 | such JJ 1708 | readings NNS 1709 | led VBD 1710 | the DT 1711 | American NNP 1712 | Association NNP 1713 | of IN 1714 | Individual JJ 1715 | Investors NNS 1716 | to TO 1717 | eliminate VB 1718 | beta NN 1719 | figures NNS 1720 | for IN 1721 | precious-metals NNS 1722 | funds NNS 1723 | in IN 1724 | the DT 1725 | 1989 CD 1726 | edition NN 1727 | of IN 1728 | its PRP$ 1729 | mutual-fund JJ 1730 | guide NN 1731 | . . 1732 | `` `` 1733 | Our PRP$ 1734 | fear NN 1735 | was VBD 1736 | people NNS 1737 | would MD 1738 | look VB 1739 | just RB 1740 | at IN 1741 | the DT 1742 | beta NN 1743 | -LCB- ( 1744 | of IN 1745 | a DT 1746 | gold NN 1747 | fund NN 1748 | -RCB- ) 1749 | and CC 1750 | say VBP 1751 | here RB 1752 | is VBZ 1753 | an DT 1754 | investment NN 1755 | with IN 1756 | very RB 1757 | low JJ 1758 | risk NN 1759 | , , 1760 | '' '' 1761 | says VBZ 1762 | John NNP 1763 | Markese NNP 1764 | , , 1765 | director NN 1766 | of IN 1767 | research NN 1768 | for IN 1769 | the DT 1770 | Chicago-based JJ 1771 | group NN 1772 | . . 1773 | `` `` 1774 | In IN 1775 | reality NN 1776 | it PRP 1777 | 's VBZ 1778 | very RB 1779 | volatile JJ 1780 | , , 1781 | but CC 1782 | the DT 1783 | movements NNS 1784 | are VBP 1785 | not RB 1786 | because IN 1787 | of IN 1788 | market NN 1789 | movements NNS 1790 | . . 1791 | READY NNP 1792 | TO TO 1793 | REVIEW NNP 1794 | the DT 1795 | riskiness NN 1796 | of IN 1797 | your PRP$ 1798 | investment NN 1799 | portfolio NN 1800 | ? . 1801 | First RB 1802 | , , 1803 | a DT 1804 | pop NN 1805 | quiz NN 1806 | . . 1807 | When WRB 1808 | you PRP 1809 | think VBP 1810 | of IN 1811 | the DT 1812 | words NNS 1813 | `` `` 1814 | risk NN 1815 | '' '' 1816 | and CC 1817 | `` `` 1818 | investment NN 1819 | , , 1820 | '' '' 1821 | what WP 1822 | 's VBZ 1823 | the DT 1824 | specific JJ 1825 | peril NN 1826 | that WDT 1827 | comes VBZ 1828 | to TO 1829 | mind VB 1830 | ? . 1831 | Pencils NNS 1832 | down RB 1833 | . . 1834 | If IN 1835 | you PRP 1836 | 're VBP 1837 | like IN 1838 | most RBS 1839 | people NNS 1840 | , , 1841 | you PRP 1842 | said VBD 1843 | it PRP 1844 | 's VBZ 1845 | a DT 1846 | holding VBG 1847 | that WDT 1848 | goes VBZ 1849 | completely RB 1850 | sour JJ 1851 | -- : 1852 | maybe RB 1853 | a DT 1854 | bond NN 1855 | that IN 1856 | defaults NNS 1857 | or CC 1858 | a DT 1859 | stock NN 1860 | whose WP$ 1861 | value NN 1862 | disappears VBZ 1863 | in IN 1864 | a DT 1865 | bankruptcy NN 1866 | proceeding NN 1867 | . . 1868 | `` `` 1869 | People NNS 1870 | tend VBP 1871 | to TO 1872 | see VB 1873 | risk NN 1874 | primarily RB 1875 | on IN 1876 | that DT 1877 | one CD 1878 | dimension NN 1879 | , , 1880 | '' '' 1881 | says VBZ 1882 | Timothy NNP 1883 | Kochis NNP 1884 | , , 1885 | national JJ 1886 | director NN 1887 | of IN 1888 | personal JJ 1889 | financial JJ 1890 | planning NN 1891 | for IN 1892 | accountants NNS 1893 | Deloitte NNP 1894 | , , 1895 | Haskins NNP 1896 | & CC 1897 | Sells NNP 1898 | . . 1899 | But CC 1900 | therein RB 1901 | lies VBZ 1902 | another DT 1903 | aspect NN 1904 | of IN 1905 | investment NN 1906 | risk NN 1907 | : : 1908 | the DT 1909 | hazard NN 1910 | of IN 1911 | shaping VBG 1912 | your PRP$ 1913 | portfolio NN 1914 | to TO 1915 | avoid VB 1916 | one CD 1917 | or CC 1918 | more JJR 1919 | types NNS 1920 | of IN 1921 | risk NN 1922 | and CC 1923 | being VBG 1924 | blind-sided JJ 1925 | by IN 1926 | others NNS 1927 | . . 1928 | This DT 1929 | is VBZ 1930 | clearly RB 1931 | not RB 1932 | good JJ 1933 | news NN 1934 | to TO 1935 | all DT 1936 | you PRP 1937 | people NNS 1938 | who WP 1939 | sleep VBP 1940 | like IN 1941 | babies NNS 1942 | every DT 1943 | night NN 1944 | , , 1945 | lulled VBN 1946 | by IN 1947 | visions NNS 1948 | of IN 1949 | your PRP$ 1950 | money NN 1951 | sitting VBG 1952 | risk-free JJ 1953 | in IN 1954 | six-month JJ 1955 | CDs NNS 1956 | . . 1957 | Risk NN 1958 | wears VBZ 1959 | many JJ 1960 | disguises NNS 1961 | , , 1962 | and CC 1963 | investments NNS 1964 | that WDT 1965 | are VBP 1966 | low JJ 1967 | in IN 1968 | one CD 1969 | type NN 1970 | of IN 1971 | obvious JJ 1972 | risk NN 1973 | can MD 1974 | be VB 1975 | distressingly RB 1976 | high JJ 1977 | in IN 1978 | other JJ 1979 | , , 1980 | less RBR 1981 | obvious JJ 1982 | kinds NNS 1983 | . . 1984 | U.S. NNP 1985 | Treasury NNP 1986 | bonds NNS 1987 | , , 1988 | for IN 1989 | example NN 1990 | , , 1991 | are VBP 1992 | supersafe NN 1993 | when WRB 1994 | it PRP 1995 | comes VBZ 1996 | to TO 1997 | returning VBG 1998 | money NN 1999 | at IN 2000 | maturity NN 2001 | . . 2002 | But CC 2003 | their PRP$ 2004 | value NN 2005 | as IN 2006 | investments NNS 2007 | can MD 2008 | be VB 2009 | decimated VBN 2010 | by IN 2011 | inflation NN 2012 | , , 2013 | which WDT 2014 | erodes VBZ 2015 | the DT 2016 | purchasing VBG 2017 | power NN 2018 | of IN 2019 | bonds NNS 2020 | ' POS 2021 | fixed-dollar JJ 2022 | interest NN 2023 | payments NNS 2024 | . . 2025 | Risk NN 2026 | is VBZ 2027 | also RB 2028 | a DT 2029 | function NN 2030 | of IN 2031 | time NN 2032 | . . 2033 | When WRB 2034 | financial JJ 2035 | professionals NNS 2036 | measure VBP 2037 | risk NN 2038 | mathematically RB 2039 | , , 2040 | they PRP 2041 | usually RB 2042 | focus VBP 2043 | on IN 2044 | the DT 2045 | volatility NN 2046 | of IN 2047 | short-term JJ 2048 | returns NNS 2049 | . . 2050 | Stocks NNS 2051 | are VBP 2052 | much RB 2053 | riskier JJR 2054 | than IN 2055 | Treasury NNP 2056 | bills NNS 2057 | , , 2058 | for IN 2059 | example NN 2060 | , , 2061 | because IN 2062 | the DT 2063 | range NN 2064 | in IN 2065 | performance NN 2066 | from IN 2067 | the DT 2068 | best JJS 2069 | years NNS 2070 | to TO 2071 | the DT 2072 | worst JJS 2073 | is VBZ 2074 | much RB 2075 | wider JJR 2076 | . . 2077 | That DT 2078 | is VBZ 2079 | usually RB 2080 | measured VBN 2081 | by IN 2082 | the DT 2083 | standard JJ 2084 | deviation NN 2085 | , , 2086 | or CC 2087 | divergence NN 2088 | , , 2089 | of IN 2090 | annual JJ 2091 | results NNS 2092 | from IN 2093 | the DT 2094 | average JJ 2095 | return NN 2096 | over IN 2097 | time NN 2098 | . . 2099 | But CC 2100 | investors NNS 2101 | who WP 2102 | are VBP 2103 | preoccupied VBN 2104 | with IN 2105 | short-term JJ 2106 | fluctuations NNS 2107 | may MD 2108 | be VB 2109 | paying VBG 2110 | too RB 2111 | little JJ 2112 | attention NN 2113 | to TO 2114 | another DT 2115 | big JJ 2116 | risk NN 2117 | -- : 2118 | not RB 2119 | generating VBG 2120 | enough JJ 2121 | money NN 2122 | to TO 2123 | meet VB 2124 | long-term JJ 2125 | financial JJ 2126 | and CC 2127 | life-style NN 2128 | goals NNS 2129 | . . 2130 | For IN 2131 | instance NN 2132 | , , 2133 | some DT 2134 | investors NNS 2135 | have VBP 2136 | sworn VBN 2137 | off IN 2138 | stocks NNS 2139 | since IN 2140 | the DT 2141 | 1987 CD 2142 | market NN 2143 | crash NN 2144 | ; : 2145 | last JJ 2146 | Friday NNP 2147 | 's POS 2148 | debacle NN 2149 | only RB 2150 | reinforced VBD 2151 | those DT 2152 | feelings NNS 2153 | . . 2154 | But CC 2155 | the DT 2156 | stock NN 2157 | market NN 2158 | , , 2159 | despite IN 2160 | some DT 2161 | stomach-churning JJ 2162 | declines NNS 2163 | , , 2164 | has VBZ 2165 | far RB 2166 | outperformed VBN 2167 | other JJ 2168 | securities NNS 2169 | over IN 2170 | extended VBN 2171 | periods NNS 2172 | . . 2173 | By IN 2174 | retreating VBG 2175 | to TO 2176 | the DT 2177 | apparent JJ 2178 | security NN 2179 | of IN 2180 | , , 2181 | say VBP 2182 | , , 2183 | money-market JJ 2184 | funds NNS 2185 | , , 2186 | investors NNS 2187 | may MD 2188 | not RB 2189 | be VB 2190 | earning VBG 2191 | enough JJ 2192 | investment NN 2193 | return NN 2194 | to TO 2195 | pay VB 2196 | for IN 2197 | a DT 2198 | comfortable JJ 2199 | retirement NN 2200 | . . 2201 | `` `` 2202 | That DT 2203 | 's VBZ 2204 | the DT 2205 | biggest JJS 2206 | risk NN 2207 | of IN 2208 | all DT 2209 | -- : 2210 | the DT 2211 | risk NN 2212 | of IN 2213 | not RB 2214 | meeting VBG 2215 | your PRP$ 2216 | objectives NNS 2217 | , , 2218 | '' '' 2219 | says VBZ 2220 | Steven NNP 2221 | B. NNP 2222 | Enright NNP 2223 | , , 2224 | a DT 2225 | New NNP 2226 | York NNP 2227 | financial JJ 2228 | planner NN 2229 | with IN 2230 | Seidman NNP 2231 | Financial NNP 2232 | Services NNPS 2233 | . . 2234 | As IN 2235 | a DT 2236 | result NN 2237 | , , 2238 | financial JJ 2239 | advisers NNS 2240 | say VBP 2241 | they PRP 2242 | take VBP 2243 | several JJ 2244 | steps NNS 2245 | when WRB 2246 | evaluating VBG 2247 | the DT 2248 | riskiness NN 2249 | of IN 2250 | clients NNS 2251 | ' POS 2252 | portfolios NNS 2253 | . . 2254 | They PRP 2255 | estimate VBP 2256 | the DT 2257 | return NN 2258 | a DT 2259 | person NN 2260 | 's POS 2261 | current JJ 2262 | portfolio NN 2263 | is VBZ 2264 | likely JJ 2265 | to TO 2266 | generate VB 2267 | over IN 2268 | time NN 2269 | , , 2270 | along IN 2271 | with IN 2272 | a DT 2273 | standard JJ 2274 | deviation NN 2275 | that WDT 2276 | suggests VBZ 2277 | how WRB 2278 | much JJ 2279 | the DT 2280 | return NN 2281 | will MD 2282 | vary VB 2283 | year NN 2284 | by IN 2285 | year NN 2286 | . . 2287 | They PRP 2288 | try VBP 2289 | to TO 2290 | figure VB 2291 | out RP 2292 | the IN 2293 | long-term JJ 2294 | results NNS 2295 | the DT 2296 | person NN 2297 | needs VBZ 2298 | to TO 2299 | meet VB 2300 | major JJ 2301 | goals NNS 2302 | . . 2303 | --------------------------------------------------------------------------------