├── .gitignore
├── .travis.yml
├── LICENSE.txt
├── README.md
├── __init__.py
├── email_segmentation_example.py
├── requirements.txt
├── setup.py
├── signature_extractor
├── .DS_Store
├── __init__.py
├── configs
│ ├── .DS_Store
│ ├── __init__.py
│ └── configs.py
├── datasets
│ ├── .DS_Store
│ ├── __init__.py
│ ├── dataset_loader.py
│ ├── signatures
│ │ ├── .DS_Store
│ │ ├── 106_body
│ │ ├── 112_body
│ │ ├── 125_body
│ │ ├── 126_body
│ │ ├── 127_body
│ │ ├── 131_body
│ │ ├── 134_body
│ │ ├── 135_body
│ │ ├── 137_body
│ │ ├── 138_body
│ │ ├── 13_body
│ │ ├── 140_body
│ │ ├── 141_body
│ │ ├── 150_body
│ │ ├── 151_body
│ │ ├── 152_body
│ │ ├── 153_body
│ │ ├── 154_body
│ │ ├── 155_body
│ │ ├── 156_body
│ │ ├── 157_body
│ │ ├── 158_body
│ │ ├── 159_body
│ │ ├── 15_body
│ │ ├── 160_body
│ │ ├── 161_body
│ │ ├── 162_body
│ │ ├── 163_body
│ │ ├── 164_body
│ │ ├── 165_body
│ │ ├── 166_body
│ │ ├── 167_body
│ │ ├── 168_body
│ │ ├── 169_body
│ │ ├── 16_body
│ │ ├── 171_body
│ │ ├── 173_body
│ │ ├── 175_body
│ │ ├── 177_body
│ │ ├── 181_body
│ │ ├── 184_body
│ │ ├── 185_body
│ │ ├── 189_body
│ │ ├── 190_body
│ │ ├── 192_body
│ │ ├── 19_body
│ │ ├── 1_body
│ │ ├── 20_body
│ │ ├── 21_body
│ │ ├── 22_body
│ │ ├── 23_body
│ │ ├── 24_body
│ │ ├── 25_body
│ │ ├── 26_body
│ │ ├── 27_body
│ │ ├── 28_body
│ │ ├── 29_body
│ │ ├── 2_body
│ │ ├── 30_body
│ │ ├── 31_body
│ │ ├── 32_body
│ │ ├── 33_body
│ │ ├── 34_body
│ │ ├── 35_body
│ │ ├── 36_body
│ │ ├── 38_body
│ │ ├── 39_body
│ │ ├── 3_body
│ │ ├── 40_body
│ │ ├── 41_body
│ │ ├── 43_body
│ │ ├── 44_body
│ │ ├── 45_body
│ │ ├── 47_body
│ │ ├── 48_body
│ │ ├── 49_body
│ │ ├── 52_body
│ │ ├── 55_body
│ │ ├── 56_body
│ │ ├── 58_body
│ │ ├── 59_body
│ │ ├── 5_body
│ │ ├── 61_body
│ │ ├── 62_body
│ │ ├── 63_body
│ │ ├── 64_body
│ │ ├── 65_body
│ │ ├── 67_body
│ │ ├── 72_body
│ │ ├── 73_body
│ │ ├── 78_body
│ │ ├── 8_body
│ │ ├── 92_body
│ │ ├── 94_body
│ │ ├── 96_body
│ │ ├── 97_body
│ │ └── 98_body
│ └── test_emails
│ │ ├── .DS_Store
│ │ ├── email_1
│ │ └── email_2
├── feature.py
├── models
│ ├── .DS_Store
│ └── signature_model
├── persister.py
├── preprocessing
│ ├── .DS_Store
│ ├── __init__.py
│ ├── feature_parser.py
│ └── mail_parser.py
└── segmentation.py
├── tests
├── .DS_Store
├── __init__.py
├── test_feature_parser.py
├── test_segmentation.py
├── test_signature_feature_extractor.py
└── test_suit.py
└── train_signature_classifier.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .coverage
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | sudo: false
4 |
5 | python:
6 | - 3.4
7 | - 3.5
8 | - 3.6
9 |
10 | install:
11 | - pip install -r requirements.txt
12 | - pip install coverage
13 | - pip install codecov
14 |
15 | script:
16 | - python -c 'import nltk; nltk.download("punkt"); nltk.download("averaged_perceptron_tagger"); nltk.download("maxent_ne_chunker"); nltk.download("words")'
17 | - coverage run -m unittest tests.test_suit
18 | - coverage report -m
19 |
20 | after_success:
21 | - codecov
22 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
676 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Email Signature Extraction
2 |
3 | [](https://travis-ci.org/harmening/signature_extraction)
4 | [](https://codecov.io/gh/harmening/signature_extraction)
5 |
6 | Email Signature Extraction is a library for splitting email content into two parts: a human-written body and an automatically appended signature.
7 |
8 | ### Installation
9 | Install required dependencies in a virtual environment:
10 |
11 | ```sh
12 | $ pip install -r requirements.txt
13 | ```
14 |
15 |
16 |
17 | ## Approach
18 | A combination of standart algorithms and machine learning techniques is used to detect and extract the signature part. This detection problem is converted into a binary classification task with 2 possible outcomes: `signature` (1) or `body` (0).
19 | After preprocessing the email, its text is splitted into lines and each line is classified while taking previous and next lines into account as well. A subsequent algorithm is applied for grouping several continuous lines of ones (i.e. signatures) and extracting it from the email body.
20 |
21 | This repo is organized as follows:
22 |
23 |
24 | ### Text preprocessing
25 | Text preprocessing is the first step of the email segmentation process. Each input email is tokenized on sentence level, using the NLTK SentenceTokenizer. The F1-score significantly increased when applying this preprocessing step.
26 |
27 | ### Feature Extraction
28 | SignatureFeatureExtractor is a custom scikit-learn transformer, which is applied for feature extraction.
29 | It converts each line of text into a vector, also taking the previous and next line into account.
30 | For each line, the algorithm looks for several features, that are considered to be an important information for a successful classification. There are for example: count of named entities, countaining typical signature words, email-addresses, phone numbers, urls, etc.
31 | The NLTK named entity recognizer used to count entities in the text.
32 |
33 | ### Classification
34 | k-Nearest Neighbors (kNN) is a simple, effective and quite popular classification algorithm. After several experiments with different ML algorithms and tuning hyperparameters with GridSearchCV, kNN is chosen because of simplicity and effectiveness. kNN produced an F1 score of 0.95. However, other classification techniques such as LinearSVM also turned out to show good results.
35 |
36 | ### Evaluation
37 | The evaluation is done based on the f1 metric, which is widely used for evaluating binary classification models:
38 | `F1 = 2 * (precision * recall) / (precision + recall)`, where precision is a measure of result relevancy and recall is a measure of how many truly relevant results are returned. See also https://en.wikipedia.org/wiki/F1_score for more details.
39 |
40 |
41 | ### To Do
42 | * Find better solution for grouping lines of continuous signature. It is assumend that the longest repeating sequence of ones algorithm is going to fail for some edge cases.
43 | * Increase size of training data
44 | * Improve accuracy of classifier model
45 | * Experiment with Long short-term memory (LSTM) Recurrent Neural Networks (RNN).
46 |
47 |
48 | ### Support :gift_heart:
49 | I love open-source! Meaning, you are of cause free to integrate my project in your applications. However, **if you get some profit from this** or just want to support and encourage me to continue creating stuff, there are few ways you can do so:
50 | - Starring and sharing projects you like
51 | - :stew: [Share your next meal][sharemeal] with these unfortunate, because there is no reason not to do so!
52 | - :book: [Buy me a book][amazon]: I love books and I will always remember you :wink:
53 | - **Bitcoin**: You can send me bitcoins at this address:
54 | `xpub6DUNko8GTPePPgtbK1qfpiLCoujQXUBTi1qtfw7V2oBCdnk1H9d3if3pazmCy9QgENKSNPpHAXRZp8HLSG7pWwba5HRcHLC3TjbXYXXZh57`
55 |
56 | Thanks! :heart:
57 |
58 |
59 | [amazon]: http://a.co/4CZC8iN
60 | [sharemeal]: https://sharethemeal.org/en/index.html
61 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/__init__.py
--------------------------------------------------------------------------------
/email_segmentation_example.py:
--------------------------------------------------------------------------------
1 | import os
2 | from signature_extractor.segmentation import EmailSegmenter
3 | from signature_extractor.preprocessing import mail_parser as m_parser
4 |
5 |
6 | def print_segments(email_body, signature):
7 | print("-"*35, "\n", "EMAIL BODY")
8 | print("_"*35)
9 | print(email_body)
10 | print()
11 | print("-"*35, "\n", "SIGNATURE")
12 | print("-"*35)
13 | print(signature)
14 |
15 |
16 | def main():
17 | email_fpath = os.path.join('signature_extractor', 'datasets', 'test_emails', 'email_1')
18 | text = m_parser.get_from_file(email_fpath)
19 |
20 | segmentator_obj = EmailSegmenter()
21 | email_body, signature = segmentator_obj.segment_mail(text)
22 | print_segments(email_body, signature)
23 |
24 |
25 | if __name__ == '__main__':
26 | main()
27 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==0.19.1
2 | numpy==1.14.5
3 | scipy==1.1.0
4 | nltk==3.3
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | try: # for pip >= 10
3 | from pip._internal.req import parse_requirements
4 | except ImportError: # for pip <= 9.0.3
5 | from pip.req import parse_requirements
6 |
7 | setup(
8 | name = 'signature_extraction',
9 | version = '1.0.0',
10 | url = 'https://github.com/harmening/signature_extraction.git',
11 | packages = find_packages(),
12 | install_reqs = parse_requirements('requirements.txt', session='hack')
13 | )
14 |
--------------------------------------------------------------------------------
/signature_extractor/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/__init__.py
--------------------------------------------------------------------------------
/signature_extractor/configs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/configs/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/configs/__init__.py
--------------------------------------------------------------------------------
/signature_extractor/configs/configs.py:
--------------------------------------------------------------------------------
1 | ACTIVE_MODEL = "signature_model"
--------------------------------------------------------------------------------
/signature_extractor/datasets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/datasets/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/datasets/__init__.py
--------------------------------------------------------------------------------
/signature_extractor/datasets/dataset_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import nltk
3 | import numpy as np
4 | from os import listdir
5 | from os.path import isfile, join
6 |
7 |
8 | signatures_dataset_path = os.path.join("signature_extractor", "datasets", "signatures")
9 | data_files = [f for f in listdir(signatures_dataset_path) if isfile(join(signatures_dataset_path, f)) if f != '.DS_Store']
10 |
11 |
12 | def label_email_text(fname):
13 | # add line position in message
14 | X, y = [], []
15 | with open(fname, 'r') as myfile:
16 | data = myfile.read()
17 | lines = nltk.sent_tokenize(data)
18 | text = '\n'.join([txt for txt in lines])
19 | for l_idx in range(len(lines)):
20 | prev_line = "" if l_idx == 0 else lines[l_idx - 1]
21 | next_line = "" if l_idx+1 > len(lines)-1 else lines[l_idx+1]
22 | if lines[l_idx].find('#sig#') == -1:
23 | y.append('other')
24 | else:
25 | y.append('sig')
26 | X.append((text.replace("#sig#", ""), lines[l_idx].replace("#sig#", ""), prev_line, next_line))
27 | return X, y
28 |
29 |
30 | def load_signatures_dataset():
31 | x, y = [], []
32 | for f in data_files:
33 | file = os.path.join(signatures_dataset_path, f)
34 | loc_x, loc_y = label_email_text(file)
35 | x += loc_x
36 | y += loc_y
37 | return np.array(x), np.array(y)
38 |
39 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/datasets/signatures/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/106_body:
--------------------------------------------------------------------------------
1 | Here is a schedule that I received from Phillip.
2 |
3 | I'm not sure if I'm reading the schedule correctly, but here are some items to think about:
4 |
5 | 1) The price to Trigen is a 3 month average of NGW and IF for Z3 and Z4 times a premium.
6 | 2) Because only one month at a time rolls off the books, there will always be an OA variance.
7 | 3) The OA variance due to the 3 month rolling average will eventually flatten out over time.
8 | 4) This OA variance, however, doesn't address the fact that Andy is getting 50% Z3 and 50% Z4 and having to deliver Z4.
9 | 5) For November 2001, there was some gas that was delivered at Z3, but the price was not adjusted for fuel and transport, so if
10 | you deliver gas at Station 65, you do not get the same price as you would for Station 85.
11 |
12 | Hopefully this makes sense.
13 |
14 | #sig#Diane
15 | #sig#x-37059
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/112_body:
--------------------------------------------------------------------------------
1 | Andy, I have completed negotiation with TBG on the Settlement. TBG has executed the documents and Louise and John have approved the deal. In short, I want to make sure that you understand the effect on you. Attached is a more detailed summary but generally I removed all swing flexibility from the deal. They have baseload obligations of 12,500 MMBtu per day. We can deliver at any of the delivery points identified in the Agreement ( I will give you a copy of the Agreements) however the primary delivery point is Station 65. That is the location for all price settlements. If we delivery at an alternate location we receive additional basis differentials for moving downstream ( Station 85 ) and we pay the basis differential for moving upstream ( Sta 45, 30 ). TBG has the right to move our deliveries back to Sta 65 at anytime, if we are delivering at alternate locations. I want to check with you one more time before I execute the final documents. I think that this is a great trade for ENA if I do say so myself. My next effort is to clear up any issues with Trigen.
2 |
3 | #sig#Frank
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/125_body:
--------------------------------------------------------------------------------
1 | Davette,
2 |
3 | We need to credit Barry Tycholiz (P # 564185) with 40 hrs extra vacation time. His offer letter said that he would receive an additional 40 hrs vacation. He has not yet been credited with the extra hrs. He should be receiving a total of 160 hrs vacation. Please let me know when this has been completed.
4 |
5 | #sig#Thanks,
6 | #sig#Ben
7 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/126_body:
--------------------------------------------------------------------------------
1 | Sean,
2 |
3 | Attached is our response to your request for a description of the steps Enron is taking to improve flows to the Griffith power plant. Please let me know if you have any questions.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Kim
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/127_body:
--------------------------------------------------------------------------------
1 | I have reviewed the master physical gas agreements for the 10 counterparties (excluding trading companies) at the top of the exposure list to determine whether the contracts contemplate one-way or two-way payments.
2 |
3 | 1. Petro Canada Oil & Gas - One Way
4 | 2. Canadian Natural Resources - Two Way
5 | 3. Marathon Canada Limited - One Way
6 | 4. Husky Oil Operations Limited - One Way
7 | 5. Murphy Canada Exploration Company - One Way
8 | 6. Encal Energy Ltd. - One Way
9 | 7. Talisman Energy Inc. - One Way
10 | 8. Premstar Energy Canada Ltd. - Two Way
11 | 9. Rio Alto Exploration Ltd. - One Way
12 | 10. Sunoco Inc. - Two Way
13 |
14 | The basic operation of a one way payment provision is that, if the defaulting party has a net in-the-money position based upon the Early Termination Damages calculation, the Early Termination Damages are deemed to be zero and the non-defaulting party is not required to pay that amount to the defaulting party (the defaulting party's position is wiped out). However, we would still be entitled, prior to default, to call for collateral where a counterparty's out-of-the money position is in excess of its collateral threshold, which would provide us with security to ensure the counterparty continues to pay. In addition, if the counterparty failed to post such collateral, they would be in default, we could terminate the contract and, as the non-defaulting party, we would be in a position to realize on our in-the-money position. If, after we received the collateral, we were then to become insolvent or otherwise default under the master, the counterparty could terminate the contract, our mark-to-market position would be wiped out and we would be required to return any unused collateral.
15 |
16 | The operation of a two way payment provision is that the early termination damages are calculated by the non-defaulting party and whoever has a net in-the-money position based upon such calculation is owed the termination payment, regardless of whether such party is the defaulting or non-defaulting party. In other words, unlike a one way payment, if the defaulting party has a net in-the-money position upon termination of the contract, the non-defaulting party will be required to pay that amount to the defaulting party. Therefore, if we were to become insolvent or otherwise default under a master and we had a net in-the-money position under that master, the counterparty could terminate the contract but we would still be owed our mark-to-market position. The other point to consider in the situation where there is a two way payment provision is that, of the three counterparty masters above that contain two way payments, all three also contain cherry picking language that, upon the occurrence of an event of default other than insolvency, allows the non-defaulting counterparty to pick and choose which transactions to terminate. In other words, the counterparty need not terminate all transactions, which could effect the amount otherwise owed to the defaulting party. Even with cherry-picking, upon the occurrence of an insolvency, all transactions are deemed to terminate, so cherry picking would not be a factor.
17 |
18 | You had also asked whether the failure by a party to call for collateral when it is entitled to under a contract would have an impact on the ability of such party to collect on a termination payment owed to it upon termination of the contract. Other than the fact that the failure to call for collateral may mean that we do not hold credit support that would be usable to offset a termination payment owing if the party obligated to pay fails to pay, the right to call for collateral is just that, a right and not an obligation, and the failure to call for collateral does not colour the right of the party owed the termination payment to require it be paid.
19 |
20 | Let me know if you need anything further with respect to this matter.
21 |
22 | #sig#Greg
23 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/131_body:
--------------------------------------------------------------------------------
1 | Pls confirm that you have received your new WCom calling card. Thank you.
2 |
3 | #sig#Carolyn Graham
4 | #sig#Enron NetWorks LLC
5 | #sig#Vendor Management
6 | #sig#713-345-8008
7 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/134_body:
--------------------------------------------------------------------------------
1 | Barry,
2 | Here are our financial products for EOL that we would like to appear on the screen on Monday, as requested by John Zufferli.
3 | Call me if you have any questions
4 |
5 |
6 | #sig#Stephane
7 | #sig#x6721
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/135_body:
--------------------------------------------------------------------------------
1 | Here is the list of gas and power products we would like to see on the screen.
2 |
3 |
4 |
5 | Call me if you have any questions
6 | #sig#Stephane Brodeur
7 | #sig#(403) 974-6721
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/137_body:
--------------------------------------------------------------------------------
1 | Is exclusively gas.
2 |
3 | Can you add to your list? ($13 mm)
4 |
5 | #sig#Thanks,
6 |
7 | #sig#David
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/138_body:
--------------------------------------------------------------------------------
1 | Following is an email I have sent to my group, outlining a process to ensure everyone is on board and nothing falls through the cracks.
2 |
3 | #sig#David
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/13_body:
--------------------------------------------------------------------------------
1 | **For New UBS employees***
2 |
3 | I am going to send all of our File Migration compliance forms in one batch, so give them to me when you have signed the Document Migration Certificate. I will be sending this the evening of the 8th.
4 |
5 | If you don't know who I am or what this is about, ask Dave Steiner or any of the Admins.
6 |
7 | #sig#Thanks,
8 |
9 | #sig#Cara Freeland
10 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/140_body:
--------------------------------------------------------------------------------
1 | Barry,
2 |
3 | How are you? Just want to say hi. It must be a very hectic week and
4 | stressful. With the talents of people at Enron such as yourself being the
5 | major asset of the company, the industry recognizes that fact. I am sure
6 | that you will do well. In the mean time, hold tight, pray, and have faith.
7 |
8 | Keep in touch.
9 |
10 |
11 | #sig#Joseph Cheung
12 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/141_body:
--------------------------------------------------------------------------------
1 | Barry,
2 | Call Melissa Murphy X.31886 ...she is my lead on Power and can create the confirm for you.
3 |
4 | #sig#Kim
5 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/150_body:
--------------------------------------------------------------------------------
1 | Kim, can you call me upon receipt of this e-mail.. I require a confirm to be raised as part of a complicated three way transaction.
2 |
3 | #sig#Barry Tycholiz - 713-853-1587.
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/151_body:
--------------------------------------------------------------------------------
1 | Barry,
2 | Call Melissa Murphy X.31886 ...she is my lead on Power and can create the confirm for you.
3 |
4 | #sig#Kim
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/152_body:
--------------------------------------------------------------------------------
1 | Mark and Scott,
2 |
3 | Attached is a letter that covers the inventory transfer.
4 |
5 | If it's a go please revise as necessary sign and fax.
6 |
7 | #sig#Thanks.
8 |
9 | #sig#Jim Griffin
10 | #sig#Manager, Gas Transmission Marketing
11 | #sig#Montana Power Company
12 | #sig#40 East Broadway
13 | #sig#Butte, MT 59701
14 | #sig#Phone: 406-497-2806
15 | #sig#Fax: 406-497-2054
16 | #sig#Email: jgriffin@mtpower.com
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/153_body:
--------------------------------------------------------------------------------
1 | This may be too early, but just in case....I will save the good-byes until they become absolutely necessary.
2 |
3 | Could I have your outside e-mails and possibly your phone #'s? If you feel uncomfortable giving these to me, that is o.k. The following is my information.
4 |
5 | #sig#Carole C. Frank
6 | #sig#carole_frank@excite.com
7 | #sig#713.446.9307 cell
8 | #sig#713-467-3860 home
9 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/154_body:
--------------------------------------------------------------------------------
1 | Please find attached a draft agreement for the purchase of EPMI's position
2 | with Santa Clara.
3 |
4 | #sig#Peter Meier
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/155_body:
--------------------------------------------------------------------------------
1 | Hi.
2 | Just thought I'd see how you were holding up in light of the circumstances.
3 | Planning on coming back to the great white north for Xmas?
4 | Give us a call sometime.
5 | #sig#Bryn
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/156_body:
--------------------------------------------------------------------------------
1 | Barry, what did you do to the company!? Listen, sorry to hear about the
2 | problems at ENRON. What a strange turn of events. It is pretty bizarre
3 | after 10 years of ENRON being the model for other companies on how to
4 | succeed in the energy industry. What's your take on why it happened? We get
5 | a little bit of news down here but we're a little out of touch.
6 |
7 | How are you taking it? Are you pissed? Do you feel like you squeezed pretty
8 | much what you wanted out of the job? What's the next thing for Barry Tic?
9 | Well, like all of life's curve-balls there is always something sweet in the
10 | next pitch you get (jeez, how sappy is that!). Anyway, hope you're taking
11 | it all with the good nature you always do.
12 |
13 | Hey, maybe this will give you the chance to take a holiday in Ecuador
14 | (Christmas turkey at our house this year?). Say hi to Kim for us, drop me a
15 | line as soon as you can (a better address to reach me is craigh@ph.com.ec).
16 | Sorry I haven't sent a note before this but you know we are always rooting
17 | for you.
18 |
19 | #sig#Your pal, Craig.
20 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/157_body:
--------------------------------------------------------------------------------
1 | Mike, please ensure that as part of the dealer price quote process, we secure a long term quote to establish a MTM on this transaction. The particulars of the structure are.
2 |
3 | Est Volume: 5,000 - 50,000 MMbtu/day
4 | Start Date: June 1, 2003 - May 30, 2020
5 | Delivery Point: El Paso Keystone Pool
6 | Pricing Reference Point: Gas Dailiy Daily Index Electronic Index Midpoint or the Gas Daily Daily Index Midpoint.
7 |
8 |
9 | We need the index two way if that is possible.
10 |
11 | #sig#Thanks, BT
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/158_body:
--------------------------------------------------------------------------------
1 | Greetings Steve:
2 |
3 | We have had many inquiries regarding our transportation agreements, specifically PGT, Kern River and El Paso.
4 |
5 | Most pressing is PGT's request to market capacity on our behalf. Although I do not believe we need to have PGT perform this function, I do believe we could mitigate some demand charge exposure for "the estate".
6 |
7 | Are you available to meet with Barry and I first thing tomorrow morning? We need to have an answer for PGT's request. We need to know what the contractual rights are for 1) the pipeline when a shipper declares bankruptcy and 2) our ability to mitigate demand charge exposure by releasing capacity on a short term basis (month at a time?).
8 |
9 | Better yet, can we unilaterally turn capacity back to any of these pipes or do we need to reach some type of court approved settlement?
10 |
11 | Obviously, we have many questions. Please advise of your ability to meet ASAP.
12 |
13 | #sig#Thanks,
14 |
15 | #sig#Stephanie
16 |
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/159_body:
--------------------------------------------------------------------------------
1 | Thanks to each of you who help in the review process yesterday. As many of you know, we found that several of the contracts that we reviewed did reconcile with the Total Exposure (in other words, the agreements were not the right ones). Eric Moon and Russell Diamond are working with IT, Credit and Risk to access deal specific information in a way that will let us link deal numbers to contract numbers. Thereafter we can get the right agreements and finish the process. I will update you later today on the status. If you have any questions, please ask.
2 | #sig#Ed
3 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/15_body:
--------------------------------------------------------------------------------
1 | Welcome to UBS Warburg Energy, LLC
2 |
3 | All UBS Warburg Energy, LLC employees are invited to join us for breakfast and happy hour on
4 | Friday, February 8, 2002.
5 |
6 | Breakfast, 8:15 a.m., here on the 3rd floor.
7 |
8 | Happy Hour, 3:00 p.m., appetizers and drinks (2),
9 | Fernando's Hideaway, 824 SW First Avenue (upstairs)
10 |
11 |
12 | We hope you will join us!
13 | Chris and Tim
14 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/160_body:
--------------------------------------------------------------------------------
1 | Here's the draft that we discussed on Friday morning. Please take a look at
2 | it and let me know what you think (Jim is on vacation in New York this
3 | week). We've kept it brief. We put the upstream capacity in here in case you
4 | had achieved any further clarity on the disposition of those contracts, but
5 | it should be easy to remove if needed. We also changed from a two-month term
6 | to a three-month term because in the event that neither of us terminates the
7 | contracts by Feb. 15, we could then extend this agreement if we desired and
8 | not end up trying to sell the awkward block of March/April. Please let me
9 | know if you have any questions, and I look forward to discussing the draft.
10 | Thanks, and talk to you soon.
11 |
12 | <>
13 |
14 | #sig#Leslie.Ferron-Jones@neg.pge.com
15 | #sig#503-833-4350
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/161_body:
--------------------------------------------------------------------------------
1 | Please attend the subject meeting in at 9:00 AM on Wednesday, 12/19 in ECS 06716 (next to the credit war room). Invite anyone you think needs to be there. If you have any questions, please ask.
2 | #sig#Regards,
3 | #sig#Ed
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/162_body:
--------------------------------------------------------------------------------
1 | As some of you might already know, I resigned today. I feel fortunate and blessed to have had this experience here at Enron. Not only have I learned a tremendous amount, but I also have met some very talented and unique people. I am not 100% sure what I will do at this point, but I will probably be heading up to NYC in January. I feel sad to end this chapter, yet excited as I begin a new one in my life.
2 |
3 | As always, it has been a pleasure working with you! Keep in touch. I will be interested to see what happens.
4 |
5 | #sig#Carole C. Frank
6 | #sig#Enron Net Works
7 |
8 | #sig#713.345.3960 work
9 | #sig#713.446.9307 cell
10 | #sig#713.467.3860 home
11 | #sig#carole_frank@excite.com
12 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/163_body:
--------------------------------------------------------------------------------
1 | Mary,
2 |
3 | Attached is a letter providing notification of our cancellation as authorized market for the city of Pasadena. Please let me know if you have any quesitons.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Kim Ward
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/164_body:
--------------------------------------------------------------------------------
1 | Mary,
2 |
3 | Attached is a letter notifying Socal Gas that Enron Administrative Services is no longer the authorized marketer for Jefferson Smurfit Corp & Stone Container Corp. Please let me know if you have any questions.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Kim Ward
8 | #sig#Manager, West Gas Origination
9 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/165_body:
--------------------------------------------------------------------------------
1 | I hope you had a Merry Christmas. We are having a great time. Lucci and I are working with Ed Mc Michael on selling of the storage today. If you need me call me on my cellphone or email me.
2 |
3 | #sig#Take care,
4 |
5 | #sig#Mark
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/166_body:
--------------------------------------------------------------------------------
1 | For those of you that will be in the office after New Year's, we will begin an informal fundies meeting each day to cover what has happened in the market since we filed for bankruptcy. We will go over pipeline flows, storage balances, weather, monthly and daily prices, etc. With a few of the Banks showing interest in showing a bid, I thought it would be good for us to begin our preparation to trade the market.
2 |
3 | Chris Gaskill will be providing the daily packet and Patti Sullivan will provide the daily operations report.
4 |
5 | I was thinking that we should start around 9:30 each morning.
6 |
7 | #sig#Grigsby
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/167_body:
--------------------------------------------------------------------------------
1 | Will we be acting as agent for Pasadena in January? Should we turn this contract back? Let me know what your thoughts are.
2 |
3 | #sig#Mike
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/168_body:
--------------------------------------------------------------------------------
1 | Barry, hope that you had a great Christmas. Just wanted to touch base. I am continuing to handle some issues from home on KeySpan and some Florida contracts. I am not scheduled back until Jan 3. Let me know if I can do anything.
2 |
3 | #sig#Frank
4 | #sig#--------------------------
5 | #sig#Sent from my BlackBerry Wireless Hafrank vickers
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/169_body:
--------------------------------------------------------------------------------
1 | We have for the last couple of weeks started to compile the Re-start/Integration Plans for Netco. So far, we have primarily focussed on the mid/back plans where the technology requirements have been the driving factors. Several plans are in the final stages of completion including:-
2 |
3 | + Infrastructure Jenny Rub
4 | + Development Jay Webb
5 | + EnronOnline Webb / Forster
6 | + HR David Oxley
7 | + Cash Management Tom Myers
8 | + Credit Debbie Brackett
9 |
10 | The rest will be completed shortly.
11 |
12 | We now need to focus on the commercial plans which have a slightly different focus. John and I would like to receive the plans "Re-start/Integration" plans by January 7th, 2002 in order to go through them individually with each of you or in groups. The focus should be to ensure that we have as much of the business up and running in the shortest time possible. I have a suggested outline which you do not have to use but I thought it might help. Please decide within yourselves the areas you will cover together or individually.
13 |
14 | Customer Side
15 | + Customers Phase 1 - First Week (eg top 10)
16 | Phase 2 - First Month (eg top 50)
17 | Phase 3 - First Quarter (eg top 100)
18 | + Action Plan Phase 1 Customers
19 | Phase 2 Customers
20 | Phase 3 Customers
21 | + Contracts by customers (pre-prepared with credit terms etc)
22 | + Customer visit schedule
23 |
24 | Product Side
25 | + List of Products Phase 1 - First Week
26 | Phase 2 - First Month
27 | Phase 3 - First Quarter
28 |
29 | Target Number of Transactions
30 | + Phase 1
31 | + Phase 2
32 | + Phase 3
33 |
34 | IT transfer
35 |
36 | #sig#Louise
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/16_body:
--------------------------------------------------------------------------------
1 | Group,
2 |
3 | There are two new paths for EPE information.
4 |
5 | The first is for EPE schedules:
6 | This path is P:/Trading/Real Time/El Paso Schedules/Admin/El Paso Schedules/2002/Feb02/EPE_FEB_02
7 |
8 | The second is for the EPE models:
9 | M:/Electric/EPE/EPE 2002/Models/February02/...
10 |
11 | The estate will be settling CAPS each day (creating DA and HA finals) beginning on Friday.
12 |
13 | Please work closely with John and Dave to get all problems resolved as quickly as possible.
14 |
15 | #sig#Thanks,
16 | #sig#Bill
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/171_body:
--------------------------------------------------------------------------------
1 | The New Year has arrived and we really to finalize a lot of the work with regards to moving into NETCO. Obviously we still do not have a deal but the deadline is approaching and preparations need to be finalized.
2 |
3 | The main areas to focus on over the next week are:-
4 |
5 | (i) Re-start/Integration Plans (due on Jan 7) To be forwarded to Louise
6 | These plans need to be detailed and show clear detailed timelines and detailed responsibilities for getting us up and running as soon as possible.
7 | The current restart date is January 21, 2001 but may be pushed forward to January 14, 2002.
8 | (ii) Budget (due Jan 3, 2002) To be forwarded to Faith Killen
9 | First year budget to include all start up costs (some of which can be amortized)
10 | (iii) Seating Plans Tammy Shepperd to co-ordinate
11 | We need to start the planning process for seating as we will be living on floors 5 & 6 of the Enron South building.
12 | I have asked Tammy Shepperd to commence the seating plan and we would look to start the moves as soon as possible but with a large number occuring around January 11,2002.
13 | (iv) Due Diligence
14 | We continue the process with two new companies this week (Wednesday and Thursday). Andy Zipper is taking the lead for the company arriving on Wednesday, please help him with his requirements.
15 |
16 | I would ask that both John and I are notified of any changes to the Netco personnel list on a timely fashion and that the list is maintained on a continual basis. Please forward all alterations to Jeanie Slone who has responsibility for the master list.
17 |
18 | Communication - I believe that the New Year combined with a internal communication issues may be a good time to review what we want to say on Netco and what our policies are. I am asking David Oxley to co-ordinate with all of you on this. I know a lot of you believe that we need to only communicate once we have retention programme in place for the estate which may be a good idea but we we need to make sure that we lose as few people as possible.
19 |
20 | If you are unavailable this week, please ensure you delegate this work out.
21 |
22 | Happy New Year
23 |
24 | #sig#Louise
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/173_body:
--------------------------------------------------------------------------------
1 | Dave, I don't know if you received this original message or not.... we will cancel today's meeting.
2 | If you need to reach me, call in...
3 |
4 | #sig#Thanks. BT
5 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/175_body:
--------------------------------------------------------------------------------
1 | I have completed the organizer... I have also sent to you today my information for filing purposes. Call and let me know what else you need.
2 |
3 | #sig#Barry
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/177_body:
--------------------------------------------------------------------------------
1 | Mark, further to our conversation... Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been or will be settled financially, however these transactions could have been set up as physical. The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement, and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100% load factor and settlement would be on 100% of the volume regardless as to what physically flowed.
2 |
3 | Please call me if there are any questions regarding this note.
4 |
5 | #sig#Barry Tycholiz
6 | #sig#Vice President, Enron North America
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/181_body:
--------------------------------------------------------------------------------
1 | Barry, attached is a copy of one of our latest fundamentals packages.
2 |
3 | #sig#Dean
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/184_body:
--------------------------------------------------------------------------------
1 | Dorie, is there a different charge for one day vs the next? ( you us Canadians, always looking for the deal ).
2 |
3 | #sig#Barry
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/185_body:
--------------------------------------------------------------------------------
1 | Sorrry I missed your call.... I am in all day today. Call me back if you need to speak with me.
2 |
3 | #sig#BT 3-1587.
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/189_body:
--------------------------------------------------------------------------------
1 | Judy, what is the proper mailing address in Calgary that my supporting documents should be sent to. Also, should I include a blank cheque to you for payment to the Canadian Taxes or do you want me to wait until we know what the amount is.
2 |
3 | #sig#Thanks. BT
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/190_body:
--------------------------------------------------------------------------------
1 | Barry,
2 |
3 | Wuold you please send an email to Diana Willigerod (cc:'ed above) authorizing her to provide me with a secon screen for EOL.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Dave
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/192_body:
--------------------------------------------------------------------------------
1 | Mark and Eric,
2 |
3 | John Malowney here in the Portland office is trying to assist Louisiana-Pacific in financially hedging their overall gas exposure. The attached e-mail and spreadsheet outline the basics. Essentially, he would like to group their plants into 5 or 6 general pricing regions for parsimony. At this point, we are looking for assistance roughly grouping these plants together. Obvioulsy, If L-P is interested in this type of hedge, we will be contacting you for further assistance in obtaining quotes. L-P has made it clear that they wish to deal with only one point-man at Enron and John has that role. He is currently out of the office, so you should get back to me with any questions or needed clarifications.
4 |
5 | Thanks for your assistance. Please contact me by phone (503.464.8122) or e-mail.
6 |
7 | #sig#Todd A. Perry
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/19_body:
--------------------------------------------------------------------------------
1 | Bill:
2 |
3 | Please note the following due to the past two days schedules have been wrong in the EPE Schedules in Excel:
4 |
5 | Tag number 6181 has been cancelled (50mw to the CISO).
6 |
7 | Lending is wrong in the EPE schedules (it is 75mw from PSCO instead of 50mw).
8 |
9 | SPS is wrong for HE 08 (it is 130mw instead of 100mw).
10 |
11 | I thought you might like to since this is the only income we have currently for real-time and a major screw-up could hurt our relationship.
12 |
13 | #sig#Regards,
14 |
15 | #sig#Bert Meyers
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/1_body:
--------------------------------------------------------------------------------
1 | I had a problem with CAPS again (profile error). Last time you said that is was a problem with the mapping of the H: drive. Login: janders3
2 |
3 | #sig#JohnAnderson
4 | #sig#Realtime
5 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/20_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | Just wanted to drop you a line to let you know that I am again an employee of UBS. As I am sure you are aware, we are in process of transition from Enron to UBS. I recently filled out all the appropriate paperwork to complete the transition. From what I understand we are going to be a part of the trading arm of UBS Warbug. This should be very exciting. Maybe one day I will have the opportunity to trade stocks, bonds, or some other financial product if power ever becomes old hat.
4 |
5 | Furthermore, I have will soon receive a signing bonus and a raise, so I intend on funding my IRA and my wife's IRA for 2001 and maybe even 2002. If it wouldn't be too much trouble, I would like to know what the contribution levels are for 2002. I think I saw something that changed them and the income levels one can have for a Roth.
6 |
7 | As for Molly, the advertising business seems to be picking up for 2002. She opened enough new business in the fourth quarter of 2001 to meet her goals and is on her way to making it the first quarter as well. I think she is really beginning to understand her business.
8 |
9 | Anyway, I currently have lots of time to myself since we are not trading, so maybe we can have lunch sometime. If that is not possible, no big deal, I know that you are extremely busy.
10 |
11 | I hope all is well with you and your family.
12 |
13 | #sig#Sincerely,
14 |
15 | #sig#Bert
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/21_body:
--------------------------------------------------------------------------------
1 | Bill:
2 |
3 | Last night as I was trying to log onto the ADS machine I kept getting a server error. You will probably want to let one of the IT guys know of this problem
4 |
5 | #sig#Bert
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/22_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | I would be delighted to meet with you over breakfast. I am available for both of the times you suggested, just name the time and the place.
4 |
5 | #sig#Regards,
6 |
7 | #sig#Bert
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/23_body:
--------------------------------------------------------------------------------
1 | Hi Bertmeister!
2 | Good to have you back in the UBS fold. It is also good to hear all is going
3 | well for you and Molly. I would look forward to getting together sometime,
4 | perhaps for breakfast next Tuesday or Wednesday (2/5 or 2/6). Let me know
5 | which works for you.
6 |
7 | #sig#Tom
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/24_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | Just wanted to drop you a line to let you know that I am again an employee
4 | of UBS. As I am sure you are aware, we are in process of transition from
5 | Enron to UBS. I recently filled out all the appropriate paperwork to
6 | complete the transition. From what I understand we are going to be a part
7 | of the trading arm of UBS Warbug. This should be very exciting. Maybe one
8 | day I will have the opportunity to trade stocks, bonds, or some other
9 | financial product if power ever becomes old hat.
10 |
11 | Furthermore, I have will soon receive a signing bonus and a raise, so I
12 | intend on funding my IRA and my wife's IRA for 2001 and maybe even 2002. If
13 | it wouldn't be too much trouble, I would like to know what the contribution
14 | levels are for 2002. I think I saw something that changed them and the
15 | income levels one can have for a Roth.
16 |
17 | As for Molly, the advertising business seems to be picking up for 2002. She
18 | opened enough new business in the fourth quarter of 2001 to meet her goals
19 | and is on her way to making it the first quarter as well. I think she is
20 | really beginning to understand her business.
21 |
22 | Anyway, I currently have lots of time to myself since we are not trading, so
23 | maybe we can have lunch sometime. If that is not possible, no big deal, I
24 | know that you are extremely busy.
25 |
26 | I hope all is well with you and your family.
27 |
28 | #sig#Sincerely,
29 |
30 | #sig#Bert
31 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/25_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | If it is not too much trouble, I would prefer to meet downtown. I now live downtown and currently do not have a car. If that is not possible, its no big deal, I will just take a cab over to the buffalo gap. Just let me know...
4 |
5 | #sig#Bert
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/26_body:
--------------------------------------------------------------------------------
1 | Kathy:
2 |
3 | Please find the enclosed epe model for 01/25/02:
4 |
5 |
6 |
7 |
8 | #sig#Regards,
9 |
10 | #sig#Bert Meyers
11 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/27_body:
--------------------------------------------------------------------------------
1 | Bill:
2 |
3 | For yet another day we seem to be having problems including all the schedules in our EPE schedule sheet. Tag 52159 was not included (25mw schedule to LADWP). I think part of the problem is some counterparties are not including EPMIWE as a PSE. Fortunately, the dispatchers had the schedule and informed me of the problem. I then called LADWP and WESCO to checkout with them. If this did not happen we wouldn't have known what the deal was.
4 |
5 | If you have any questions, please give me a call.
6 |
7 | #sig#Bert
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/28_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | Earlier I wrote that it would be easier for me to meet downtown because I don't have my car. However, Molly has made an unexpected trip to PA so I have her car. Meeting @ the Buffalo Gap is not a problem anymore.
4 |
5 | #sig#Bert
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/29_body:
--------------------------------------------------------------------------------
1 | Tom:
2 |
3 | Buffalo Gap would be just fine.
4 |
5 | #sig#Bert
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/2_body:
--------------------------------------------------------------------------------
1 | Realtime group-
2 | This occurs when your H drive is not mapped. To fix the mapping, follow the instructions at:
3 |
4 | http://172.17.172.62/rt/tips/mapHdrive.html
5 |
6 | Hope this helps!
7 |
8 | #sig#John Oh
9 | #sig#Enron North America 503.464.5066
10 | #sig#121 SW Salmon Street 503.701.1160 (cell)
11 | #sig#3WTC 0306 503.464.3740 (fax)
12 | #sig#Portland, OR 97204 John.Oh@Enron.com
13 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/30_body:
--------------------------------------------------------------------------------
1 | Amy,
2 |
3 | Please note that I would like to interview for the trader assistant positions for both the cash desk and the term desk.
4 |
5 | #sig#Bert Meyers
6 | #sig#Portland WSCC Real Time Desk
7 | #sig#503-880-5315
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/31_body:
--------------------------------------------------------------------------------
1 | I checke our Historical Caps for these deals and i am certain that we bought from and sold to Reliant (NES1) for hour ending 11. If this were not the case
2 | we would have failed phase 2 with the ISO. I am searching my notes for the price and whom I spoke with at Reliant.
3 |
4 | #sig#Regards,
5 |
6 | #sig#Bert
7 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/32_body:
--------------------------------------------------------------------------------
1 | Courtney:
2 |
3 | Thought you might like to know that there were numerous cuts last night and into the morning hours.
4 |
5 | They are as follows:
6 |
7 | 05/06/01
8 | ISO cut import @ Four corners 9MW for HE 19 and Export @ Summit 9MW for HE 19.
9 | The same schedule was cut for by 21MW for HE 21 thru 22.
10 |
11 | For HE 23 the ISO cut a 21MW import @ PV and the corresponding export @ Summit.
12 |
13 | For all these hours, the ISO kept our buy resale whole with NCPA SP15 to NP15.
14 |
15 | PAC Cut tag 14987 PSCO_AEPMWE_EPMIWE_EPMEWE_AVWP00 25 MW for HE 19-24. PSCO booked out with Avista Water Power (WWP) 05/06/01.
16 |
17 |
18 | 05/07/01
19 | PAC was cutting tag 14988 PSCO_EPMI_MECOLB_AVSTWE_CHPD 25MW for HE 01. PSCO booked out with Avista Energy Trading (CHPD) 05/07/01.
20 |
21 | CISO cut schdule EPMI_CISO_5000 and EPMI_CISO_5001 by 1MW each at PV. We resold the power to SRP for $5.00. Not a great price but everyone in the SW was
22 | getting cut at the same time. The new deals in Enpower are as follows: 602984 and 602985. If you have any questions please see cut note or lotus notes.
23 |
24 | Also this morning the ISO was cutting and SC trade with NES1 in SP15 and dec'd load in PGE 1, 2, and 3. Must be Path 15 Congestion.
25 |
26 | #sig#Regards,
27 |
28 | #sig#Bert Meyers
29 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/33_body:
--------------------------------------------------------------------------------
1 | Courtney:
2 |
3 | Thought you might like to know that there were numerous cuts last night and into the morning hours.
4 |
5 | They are as follows:
6 |
7 | 05/06/01
8 | ISO cut import @ Four corners 9MW for HE 19 and Export @ Summit 9MW for HE 19.
9 | The same schedule was cut for by 21MW for HE 21 thru 22.
10 |
11 | For HE 23 the ISO cut a 21MW import @ PV and the corresponding export @ Summit.
12 |
13 | For all these hours, the ISO kept our buy resale whole with NCPA SP15 to NP15.
14 |
15 | PAC Cut tag 14987 PSCO_AEPMWE_EPMIWE_EPMEWE_AVWP00 25 MW for HE 19-24. PSCO booked out with Avista Water Power (WWP) 05/06/01.
16 |
17 |
18 | 05/07/01
19 | PAC was cutting tag 14988 PSCO_EPMI_MECOLB_AVSTWE_CHPD 25MW for HE 01. PSCO booked out with Avista Energy Trading (CHPD) 05/07/01.
20 |
21 | CISO cut schdule EPMI_CISO_5000 and EPMI_CISO_5001 by 1MW each at PV. We resold the power to SRP for $5.00. Not a great price but everyone in the SW was
22 | getting cut at the same time. The new deals in Enpower are as follows: 602984 and 602985. If you have any questions please see cut note or lotus notes.
23 |
24 | Also this morning the ISO was cutting and SC trade with NES1 in SP15 and dec'd load in PGE 1, 2, and 3. Must be Path 15 Congestion.
25 |
26 | #sig#Regards,
27 |
28 | #sig#Bert Meyers
29 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/34_body:
--------------------------------------------------------------------------------
1 | Donald,
2 |
3 | Ralph from the Alberta Power Pool called to review tag #22872. He is wondering if this tag should run both this sunday and the next sunday. If you could please check it out when you have some time on Monday for the following week he would certainly appreciate it. If there is no problem with the tag then he said not to bother. The main issue is that the schedule will flow for this weekend.
4 |
5 | Any questions please give me a call.
6 |
7 | #sig#Regards,
8 |
9 | #sig#Bert Meyers
10 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/35_body:
--------------------------------------------------------------------------------
1 | PAC NW 503-813-5389
2 | PAC SW 503-813-5374
3 | Old number get busy tones
4 | Enjoy
5 | #sig#Leaf
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/36_body:
--------------------------------------------------------------------------------
1 | Ryan:
2 |
3 | Please be aware that on the days of the 14, 15, and 16 of July. I need to help my wife pack and unpack for our move to downtown Portland. Any consideration you can arrange will be greatly appreciated.
4 |
5 | Give me a call with any questions or concerns
6 |
7 | #sig#Bert
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/38_body:
--------------------------------------------------------------------------------
1 | Tag number 25430 was cut for HE 19 by 3 MW. This was a buy from LV Cogen and a sell to SRP. I resupplied the power from EPEC in the hour from El Paso Electric for $50 and gave it to SRP @ Palo Verde. I spoke to Mike @ SRP for this deal and he agreed. In enpower I have made deals for a buy from EPE and a sell to LV Cogen under the ST West Services Book. The deals are 666355 and 66356.
2 |
3 | If you have any question please give me a call on the West short term services desk.
4 |
5 | #sig#Regards,
6 |
7 | #sig#Bert Meyers
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/39_body:
--------------------------------------------------------------------------------
1 | To Whom it may concern:
2 |
3 | Tag 23841 was cut by 50 MW for HE 01 and HE 02. Wally @ NEVP said he heard of the cut @ 23:30 on 06/27/01 but he neglected to call us and tell us about the cut. The reason for the cut was PAC non-firm tranny being cut that PSCO bought. However they sold us a firm product. PNM didn't know about the cut either until their reliability person told them they were pushing power on the grid (Approx. time of notification 01:20). We booked out with PSCO and PNM for HE 01 and HE 02 on the 4C345 part of the transaction (deals 666385 and 666387). For HE 03 25 MW was cut and we booked out with PSCO and PNM on the 4C345 piece of the transaction (deals 666385 and 666387). PSCO resupplied the power for HE 03 from SRP @ Mead 230. The same thing happened for HE 06 as HE 03. SRP resupplied the power for HE 06. I bought back for HE 06 from PSCO @ 4C345 and resold the power to SRP. The deals are in the RT Inc. sheet for the 28th of June. After the fact, I spoke with Ryan @ PSCO and he agreed to be billed liquidated damages for HE 01 and HE 02 on the MEAD 230 part of the transaction (06:00 on Kate Symes Phone).
4 |
5 |
6 | Please Call with question,
7 |
8 | #sig#Bert Meyers
9 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/3_body:
--------------------------------------------------------------------------------
1 | We are continuing work to ensure our new online trading system, UBSWenergy, is ready for launch on the week of Feb. 11, which is the week following the anticipated Enron/UBS Closing Date of Friday, Feb. 8.
2 |
3 | To assist in this process, we are planning an online trading simulation for Thursday, February 7 from 0900 to 1100 (CST). It is planned to include all traders, originators, mid/back-office staff and IT teams who would normally be involved with electronic trading.
4 |
5 | Purpose of the simulation - The simulation will test our "live" trading environment (restricted to internal access only) and will:
6 |
7 | Test the integrity and functionality of the system and related processes (including credit, risk, legal, operations, etc.)
8 | Provide an opportunity for traders to verify their products and product setup
9 | Demonstrate how the trading system and processes work to UBS staff visiting next week
10 |
11 | Nature of the simulation - The simulation will include testing of all processes related to trading and mid/back-office functions:
12 |
13 | Credit, Legal, Risk, Operations, and other groups:
14 | Profile tests for "external" customers (role-played by Originators)
15 | Profile tests for internal users (traders, back-office, etc.)
16 | Data and Process checking as completed transactions flow through Mid/back-Office systems
17 |
18 | Traders and Originators:
19 | Traders will be managing their individual books and associated products. Although we may be restricted to certain financial products on launch day, both physical and financial will be available during the simulation. Each trader will be asked to manage their individual position and profitability goals for the simulation.
20 | Originators and others will be asked to play the roles of counterparties. Credit limits, etc. will be tested during the simulation.
21 |
22 | Information Technology:
23 | Will assist in preparing data on the system for the simulation.
24 | After the simulation is over, will work with the relevant groups to test the trade-data in various systems and risk books
25 | Will ensure all simulation data is removed from the system after the simulation is complete.
26 |
27 |
28 | Next steps
29 |
30 | Traders and Originators will be contacted prior to simulation day to ensure they have appropriate access to the system.
31 | Originators will be assigned specific roles. You should expect to receive an email and/or handout on or before Wednesday, Feb. 6.
32 | UBSWenergy personnel will be meeting with back office personnel over the next several days to ensure everything is in place for the simulation.
33 |
34 |
35 | Summary
36 |
37 | This simulation is an important step towards re-launching our trading business. We appreciate the participation of everyone involved as we prepare for the public launch of UBS Warburg Energy and the UBSWenergy trading platform.
38 |
39 | If you have any questions, comments, suggestions, or ideas regarding this simulation, please feel free to contact Bob Shults (30397) or myself (31861).
40 |
41 |
42 | #sig#Regards,
43 | #sig#Dave Forster
44 | #sig#713.853.1861
45 | #sig#E-Mail: David.Forster@enron.com
46 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/40_body:
--------------------------------------------------------------------------------
1 | Leaf,
2 |
3 | Lisa Gang wanted me to tell the night shift that tag # 25883 is not a good tag. We tried to cancel it but for some reason WALC denied our cancel request. Cara already spoke to Leon @ PAC regarding this issue. The reason this tag is not a good tag is the tranny request was duplicated in another tag (25825) which is a good tag. If you have any questions please call Cara on her cell phone.
4 |
5 | #sig#Bert Meyers
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/41_body:
--------------------------------------------------------------------------------
1 | Les,
2 |
3 | FYI, LV COGEN went off line @ 15:00. We called SRP to inform them of the cut and Vanessa said she sees the schedule as unit contingent so she agreed to go to zero on the schedule.
4 |
5 | #sig#Bert
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/43_body:
--------------------------------------------------------------------------------
1 | To Whom this may concern:
2 |
3 | Please note that Colstrip tripped @ approx. 13:20 today. I picked up tranny from BPA (Mid-C to MPC SYS) for 62 MW. The first hour of the cut was an integrated number of 41MW. After Starting HE 15 on the cut was for the full 62 MW each hour. The deal numbers affected by this cut were reduced by the appropriate amount (711291 and 709803) and new deals were made to correct enpower. These new deals are 714148 and 714150.
4 |
5 | If you have any questions please see me. My number @ the real time desk is 1-800-684-1336.
6 |
7 | #sig#Regards,
8 |
9 | #sig#Bert Meyers
10 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/44_body:
--------------------------------------------------------------------------------
1 | Chris,
2 |
3 | I put a buy resale deal in Enpower under your book for 274 MW on peak today. If for some reason it does not relate to you please inform us so we can research it a little bit further. The deal number is 724362.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Bert Meyers
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/45_body:
--------------------------------------------------------------------------------
1 | Please make sure to check the new schedule for real time for the rest of the month of August. Due to Ryan becomming the EOL trader for real time...we had to make some adjustments to the schedule. I tried to make as fair for all as possible. If you have any concerns please see me to see if there is anything I can do to help.
2 |
3 | Also please give me any requests for Sept as I am going to be making it soon.
4 |
5 | #sig#Bert Meyers
6 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/47_body:
--------------------------------------------------------------------------------
1 | Guys,
2 |
3 | I have prepared the schedule for September already. If there are any conflicts that you have please see me on an individual basis.
4 |
5 | #sig#Thanks,
6 |
7 | #sig#Bert
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/48_body:
--------------------------------------------------------------------------------
1 | I HAD TO MAKE SOME MINOR ALTERATIONS TO THE SCHEDULES FOR AUGUST AND SEPTEMBER BECAUSE MEIR IS OUT OF TOWN ON SOME OF THE DAYS. I KNEW OF THIS ONLY AFTER I HAD MADE THE SCHEDULES. IT AFFECTS ONLY A COUPLE OF MEMBERS IN THE GROUP. SORRY FOR THE INCONVENIENCE.
2 |
3 | #sig#BERT
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/49_body:
--------------------------------------------------------------------------------
1 | FYI -
2 |
3 | The Southwest OASIS website (for PNM, SRP, NEVP, EPE, SMUD, SPP, and LADWP transmission reservations) has given us a new login ID and password. Don Norman is no longer our site administrator; Les Rawson has taken that title. I registered us with two user names and passwords, which are as follows:
4 |
5 | Login: rxtime12
6 | Password: epmipower
7 |
8 | Login: kxsymes1
9 | Password: realtime
10 |
11 | Please let me know if you have any questions regarding this new procedure.
12 |
13 | #sig#Thanks,
14 | #sig#Kate
15 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/52_body:
--------------------------------------------------------------------------------
1 | To whom it may concern:
2 |
3 | Tags numbered 33079 (5mw) and 33086 (20mw) were cut for a number of hours.
4 |
5 | HE 08 and HE 09:
6 | Both tags were cut and nobody informed any of the counter parties that the tags were cut until about 25minutes into the last hour of the cuts. I initially heard of the cuts from Larry @ MEANMN and immediately called each person in the path to inform them of the cut situation. No action was taken for these two hours.
7 |
8 | HE 13:
9 | Both tags were cut and we purchased 25mw from APS to cover the cut and had it delivered to four corners 345 @ a price of $25.00.
10 |
11 | HE 17:
12 | Both tags were cut and we purchased 25mw from LADWP to cover the cut and had it delivered to Palo Verde @ a price of $44.00. Will @ PNM agreed to having the power delivered @ a different location.
13 |
14 | HE 18 and HE 19:
15 | Both tags were cut and we purchased 25mw from SRP to cover the cut and had it delivered to four corners 345 @ a price of $45.00.
16 |
17 | HE 20:
18 | Tag 33086 was cut to 0 (total of 20mw) steve merriss bought power from El Paso for $45.00.
19 |
20 | Steve Merriss input the cut note into the lotus database.
21 |
22 | #sig#Regards,
23 |
24 | #sig#Bert Meyers
25 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/55_body:
--------------------------------------------------------------------------------
1 | TO WHOM IT MAY CONCERN:
2 |
3 | TODAY I BOUGHT TRANNY FROM APS FOR THE FOLLOWING HOURS AND IN THE FOLLOWING AMOUNTS FROM PALO VERDE TO FOUR CORNERS 345:
4 |
5 | HE 01: 15MW ON OASIS NUMBER 26810
6 |
7 | HE 02: 47MW ON OASIS NUMBER 26812
8 |
9 | HE 03: 20MW ON OASIS NUMBER 26813
10 | 50MW ON OASIS NUMBER 26814--PLEASE NOTE THAT THIS WAS A MISTAKE BUT I REALIZED IT TOO LATE TO TAKE THE SCHEDULE TO ZERO.
11 |
12 | HE 04: 50MW ON OASIS NUMBER 26816
13 |
14 | HE 06: 50MW ON OASIS NUMBER 26817
15 |
16 |
17 | PLEASE CALL ME WITH ANY QUESTION--503-880-5315
18 |
19 | #sig#REGARDS,
20 |
21 | #sig#BERT MEYERS
22 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/56_body:
--------------------------------------------------------------------------------
1 | God Bless America. If anyone uses one of these moves please report back to me with your results. I will attempt the Shanghai Shampoo on my date tonight.
2 |
3 | HOW TO IMPRESS YOUR GIRLFRIEND
4 | These are techniques that have been sent to me by various Bunthaus heads. I want to again stress that in no way do we condone the techniques listed here or practice these. Except maybe the Turkey Shoot and the Rear Admiral. And definitely the Spicey Stanley.
5 | Here is a fairly extensive compilation of some of the extraordinary sexual activities that can be performed by men:
6 | 1. Tea bag - As you are sitting on a girl's face, repeatedly dip your scrotum in and out of her mouth, similar to a tea bag in a cup of hot water. An old favorite.
7 | 2. Hot Lunch - While receiving head from a woman, you shit on her chest. (a.k.a. the Cleveland Steamer)
8 | 3. The Stranger - Sitting on your hand until it falls asleep and then jerking off, eliciting the feeling of a hand job from someone else.
9 | 4. Donkey Punch - Banging a girl doggy style and then moments before you cum, sticking your dick in her ass, and then punching her in the back of the head. This gives a tremendous sensation, but for it to work correctly, the girl must be knocked out so that her asshole tightens up.
10 | 5. Golden Shower - Any form of peeing on a girl. (aka: watersports)
11 | 6. Pearl Necklace - Well known. Whenever you cum on the neck/cleavage area of a girl, it takes on the look of beautiful jewelry.
12 | 7. Coyote - This occurs when you wake up in the room of a nasty skank and you know you've got to give her the slip. However, you realize that your arm is wrapped around her. Therefore, you must gnaw off your own arm to get out of this situation. Can be very painful.
13 | 8. Purple Mushroom - This occurs when a woman is giving you oral sex and you withdraw your penis in order to poke it back into her cheek. It should leave a lasting impression similar to a purple mushroom.
14 | 9. The Flying Camel - A personal favorite. As she is lying on her back and you are hammering her from your knees, you carefully balance yourself without using your arms to prop yourself up. You then to flap your arms and let out a long, shrieking howl. Strictly a class move.
15 | 10. Double Fishhook - From the doggy-style position, you hook your pinky fingers in her mouth and pull back to achieve deeper penetration.
16 | 11. The Ram - Again, you're attacking from behind, when you start ramming her head against the wall in a rhythmic motion. The force of the wall should allow for deeper penetration. Very handy for those lulls in penile sensitivity.
17 | 12. Dog in a Bathtub - This is the proper name for when you attempt to insert your nuts into a girl's ass. It is so named because it can be just as hard as keeping a dog in the tub while giving it a bath.
18 | 13. The Bronco - Back to reality with this classic. You start by going doggy style and then just when she is really enjoying it, you grab onto her tits as tightly as possible and yell another girl's name. This gives you the feeling of riding a bronco as she tries to buck you off.
19 | 14. Pink Glove - This frequently happens during sex when a girl is not wet enough. When you pull out to give her the money, the inside of her twat sticks to your hog. Thus, the pink glove.
20 | 15. The Fountain of You - While sitting on her face and having her eat your ass, jerk off like a madman. Build up as much pressure as possible before releasing, spewing like a venerable geyser all over her face, neck and tits. (Better in her bed)
21 | 16. New York Style Taco - Anytime when you are so drunk that when you go down on her, you puke on her box. Happy trails!
22 | 17. Dirty Sanchez - While banging a girl doggy style, quickly stick 2 fingers deep into her starfish, then reach around and wipe the residue on her upper lip, providing her a mustache.
23 | 18. Western Grip - When jerking off, turn your hand around, so that your thumb is facing towards you. It is the same grip that rodeo folks use; hence, western.
24 | 19. The Blumpkin - You need to find a real tramp to do this right. It involves having her suck you off while you're on the shitter.
25 | 20. The Bismark - Another one involving oral sex. Right before you are about to spew, pull out and shoot all over her face. Follow that with a punch and smear the blood and jism together.
26 | 21. Jelly Doughnut - A derivation of the Bismark. All you have to do is punch her in the nose while you are getting head.
27 | 22. Woody Woodpecker - While a chick is sucking on your balls, repeatedly tap the head of your cock on her forehead.
28 | 23. Tossing salad - Well known by now. A prison act where one person is forced to chow starfish with the help of whatever condiments are available, i.e. Jello, jism, etc
29 | 24. The Fish Eye - Working from behind, you shove your finger in her pooper. Thereupon, she turns around in a one-eyed winking motion to see what the hell you are doing.
30 | 25. Tuna Melt - You're down on a chick, lapping away, and you discover that it's her time of the month. By no means do you stop though. When the whale spews, tartar sauce with a hint of raspberry smothers your face.
31 | 26. The Fur Ball - You're chomping away at some mighty Zena who has a mane between her legs the size of Lionel Richie's afro, when a mammoth fur ball gets lodged in your throat. You punch her.
32 | 27. The Chili Dog - You take a dump on the girl's chest and then titty fuck her.
33 | 28. Gaylord Perry - Going to only one knuckle during an anal probe is for wimps. Make this famous knuckle-ball pitcher proud and use multiple digits on that virgin corn hole. A minimum of 2 knuckles required (either on one finger or on multiple).
34 | 29. The Rear Admiral - An absolute blast. When getting a chick from behind (with both partners standing), make sure you don't let her grab onto anything when she is bent over. Then, drive your hips into her backside so that the momentum pushes her forward. The goal is to push her into a wall or table, or have her trip and fall on her face. You attain the status of Admiral when you can push her around the room without crashing into anything and not using your hands to grab onto her hips.
35 | 30. Glass Bottom Boat - Putting saran wrap over the skank's face and taking a dump.
36 | 31. Ray Bans - Put your nuts over her eye sockets while getting head. You're can is on her forehead. Yes, it may be anatomically impossible, but it is definitely worth a try.
37 | 32. The Snowmobile - When plugging a girl while she's on all fours, reach around and sweep out her arms so she falls on her face.
38 | 33. The Dutch Oven - Also well known. Whenever you fart while humping, pull the covers over her head. Don't let her out until all movement ceases.
39 | 34. Smoking Pole - Self Explanatory. Don't use fire.
40 | 35. Rusty Trombone - Getting the reacharound while getting your salad tossed. Also known as milking the prostate.
41 | 36. Turkey Shoot - When you're coming, come on her face and let it drip off her chin so it looks like that red shit on the turkey's chin.
42 | 37. Stovepiping - Taking it in the Tush.
43 | 38. Rusty Anchor - After a healthy term of the Stovepiping, the recipient gets to enjoy a good fudgesicle.
44 | 39. Sandpiper - A stovepiping on the local beach, desert, or playground sandbox. Also known as the Sandblast.
45 | 40. Lucky Pierre - the middle man in a three way buttfuck. Also known as the french sandwich.
46 | UPDATED!!!! NEW LISTINGS!!!!
47 | 41.Divortex- A mystical place into which old friends are sucked when a married couple splits up.
48 | 42.Blump- To suck someone's dick while they are taking a dump.
49 | 43.Bustard- A very rude bus driver.
50 | 44.Cold Faithful- Blowing your visibly-steaming load outside in the winter-time, like when you get your cock sucked on a ski-lift.
51 | 45.Grand pappy smash- To beat your meat so hardcore that it starts to chafe and bleed.
52 | 46.Esplanade- To attempt an explanation while drunk.
53 | 47.Flatulence- The emergency vehicle that picks you up after you are run over by a steamroller.
54 | 48.Butt Rodeo- When you're going at it with a girl, you flip her over real fast, start ramming her in the ass and yell as loud as possible "BUTT RODEO!" You then see how long you can ride her till she tosses ya off!
55 | 49.Bargoyle- The hideous old hair-spray hag who seems to live at your local watering hole. She usually smokes endlessly, spends hundreds of dollars a night on video-poker, and makes sexually threatening comments to frightened college freshmen.
56 | 50.Pasteurize- Once you get her hairy bush pasteurize, you got it licked!
57 | 51.Beerelevant- A point which does not seem to be particularly important, given enough beer
58 | 52.Mangry- Describing the anger of women who are angry at men, specifically. "She's such a bitch, she's just plain mangry."
59 | 53.Clitourist- A man who won't stop and ask for directions in bed. ie: "Because of his fouled foreplay, Suzy realized that her new boyfriend was no experienced bedroom traveler, but merely a clitourist."
60 | 54.Stuffucking- The act of "stuffing in" your limp, helpless member in hopes of getting it up. Potential causes: you're too drunk or she's too ugly. (see also; Fugly)
61 | 55.Antlers- Wide, flat, flapjack titties that come to a sharp point at the nipples.
62 | 56.The Kangmin - while a girl is reciting bad poetry, you take her from behind.
63 | 57.The Flaming Amazon- This one's for all you pyromaniacs out there. When you're screwing some chick, right when your about to cum, pull out and quickly grab the nearest lighter and set her pubes on fire, then extinguish the flames with your jizz!
64 | 58.The Screwnicorn -When a dyke puts her strap-on dildo on her forehead and proceeds to go at her partner like a crazed unicorn.
65 | 59.Split pissonality -When you're taking a leak and you get two streams out of the one hole!
66 | 60.A Short in the Cord- A "code" phrase used by the common man to refer to Testicular Tendon Tangle Syndrome. Ex. "Oh fuck! My nuts are killing me... I think I've got a short in the cord."
67 | 61.Old Jism Trail -The stream of semen oozing down the chin and chest of someone who has just finished fellating a senior citizen.
68 | 62.Abdicate -To give up all hope of ever having a flat stomach.
69 | 63.Lymph -To walk with a lisp.
70 | 64.Anal Boot- An anal boot is when you take a pitcher of beer, everyone spits in it, someone stirs it with their cock and then the mixture is poured through the crack of a man ass into the waiting mouth of the loser of a bet or drinking game.
71 | 65.Australian Death Grip- The act of grabbing a woman by the haunches/crotch and staring deeply into her eyes until you're slapped or kissed. A recommended tactic for very crowded bars. Another great opportunity for wagering among friends.
72 | 66.Fumilingus -When a man (or woman) performs cunnilingus on a woman and she farts directly in his/her face.
73 | 67.Intoxicourse- Having sexual intercourse whilst piss-drunk.
74 | 68.Valsalva -The act of pinching shut (with thumb and forefinger) a woman's nose while receiving fellatio; most effective when employed just prior to the release point due to the gag reflex and ensuing swallow that the woman is forced to do to continue breathing. A great first date ploy, as it sets the stage for what the rules of engagement will be going forward.
75 | 69.Insta-gasm -Pre-mature ejaculation at the sight of a beautiful woman. ie: "She was so fine, I had an insta-gasm before I could get her clothes off!"
76 | 70.Manual Deconstipation -This is where you get out the hand cream and go in manually for the hammerhead by breaking it into smaller chunks and pulling it out a piece at a time.
77 | 71.Post Poodum Syndrome -The feeling of depression felt after successful removal of a hammerhead. The excitement has passed, and you must now find something else to occupy your time.
78 | 72.The Homolic Maneuver -Using your penis to dislodge an object blocking a choking victim's windpipe.
79 | 73.Pegging - having a female take you in the rear with a strap on.
80 | 74. The UnderDog - after a hard session at the gym, your armpit muscle begins to twitch; thus giving you the ability to jerk a guy off with your armpit muscle.
81 | 75. The Twinkler - when you are 69ing a girl and you shove your dick into mouth hard, and you watch her a-hole "twinkle" as she gags.
82 | 76. Angry dragon - This involves the girl giving the guy head and as he is about to cum slapping the girl on the back of the head causing the cum to come out her nose. Great care should be used to not slap her mouth shut.
83 | 77. Tony Danza - a takeoff of the donkey punch is called the Tony Danza. When you are about to cum while doing a girl from behind, you say "who's the boss?" and stick it in her ass. Before she says anything you shout "TONY DANZA!" and punch her in the back of the head.
84 | 78. Alaskan firedragon - another good take off is one of the angry dragon that is called the alaskan firedragon. When a girl is giving you a blowjob, cum in her mouth unexpectedly and plug up her mouth at the same time. Then whisper in her ear "i have syphilis" so she spews it out her nose.
85 | 79. The Walrus - when she's giving u a blowjob and u cum in her mouth unexpectadly, cover up her mouth and punch her in the stomach.
86 | 80. The Fat Lip - If you get poison ivy and finger a girl, her labia lips will swell. A la, the fat lip.
87 | 81. Sleeping Bag - If you're going down on a really fat girl, you pull her enormous stomach roll of fat over your head.
88 | 82. Hummer Bird - when a girl is giving a guy a hummer, and he's enjoying it, she bites on his bird.
89 | 83. Bloody Mary - when a drunk guy is going down on a girl and without even realizing it after he's done, he realizes Mary was very Bloody
90 | 84. The Houdini - this maneuver is accomplished while going at it doggy style. As you feel you are about to cum, you pull out and spit on the small of her back (making her think you've finished...). It's at the point when she turns around when *BAM!* You bust your load in her face (in the eye if you've got proper aiming techniques down.) Also known as the Doug Hennings and the David Copperfield.
91 | 85. Upperdecking - This one takes practice. This maneuver requires a toilet with a tank above it, like the ones in most homes. Instead of crapping in the bowl, you shit in the tank (i.e. upperdecking). Now don't flush. When the following victim flushes, the rancid waste fills the bowl. If you play your cards right, it may ferment
92 | 86. Journey into darkness - This is the most disturbing of all. It entails shitting into another person's asshole. Not for beginners.
93 | 87. Rocky Balboa - dont shower for 2 weeks, then diarrhea down her throat at any point during sexual contact.
94 | 88. Rocky Balboa Title Punch - same as the Rocky Balboa, but in that non-showering 2 weeks all you eat is corn.
95 | I don't usually do this, but I have to give credit to Sascha and Zach for the next 2. I'm only doing this because they are my bosses kids...
96 | 89. The McDonald's Quick Draw - Get your girlfriend to talk dirty into the intercom, making the order guy start to beat off. Then while pulling up to the window, have her give you falatio till you are about to blow your beefy chunk-load. Upon pulling up to the window, tell your girl friend to yell "Draw!". Then on "three", both you and the guy blow your loads either on her or eachother.
97 | 90. Uncle Jemima - the typical dirty chef at your local Denny's or other low-class food establishment who occasionally becomes disgruntled, and takes out his frustration on your meal, via "the ass wipe" or the "French Toast Strut" seen in Road Trip.
98 | 91. Airtight - this is where a girl has a cock in each of her three holes, hence, airtight.
99 | 92. The Throne of Lightning - This is done by fucking a girl while you shit in a toilet. When you're going to blow your load, turn her over and dunk her head in the toilet, while she's bobbing for your turd plummet a river of semen in her ass. Not to be confused with "Ride the Lightning," a Metallica album
100 | 93. Abe Lincoln - You're getting a girl up the ass and give her a swift donkey punch to the back of her head, knocking her unconscious. You then turn her around and jerk off and blow your load all over her face. Then you shave her beaver and take the clippings and spread it where you jizzed on her, making a beard that looks like good ol Honest Abe's.
101 | 94. Thanksgiving - Just like the holiday, Thanksgiving is when you do a girl and then she puts her two big butt cheeks on your face like holiday hams. An overcooked thanksgiving is similar to this but instead of just putting the cheeks on your head she farts on it too.
102 | 95. PEUM - An acronym coined by a group of drunk assholes that defines the annoying (and uncontrollable) tendency to piss in multiple directions after a raucous fuck: Post-Ejaculatory Urinary Misfire.
103 | 96. The Beverly Hills Whiffer -This move is restricted to those women who think they're God's gift to the world. Find a woman of the above description. Take her home and start doggie styling her. When you're about to blow, corkscrew two fingers into her ass, scraping as much shit as you can from her. Pull out your fingers, reach around her head to stick one finger in each nostril. Pull her head back so she can see you while you yell "So, you think your shit don't stink now ?!"
104 | 97. Shanghai Shampoo - Fuck a chick until you've built up a load large enough to paint a room. Blow it all in her hair, rub it in thoroughly. When it dries it will resemble the crunchy noodles often served with chop suey.
105 | 98 . Frosting the Cake - When you are about to cum, blow a load all over her chest. Then take your dick and evenly spread the Jism around the breasts and over the nipples. Then stick some candles on it and start singing "Happy Birthday." Then blow out the candles
106 | 99. Spicey Stanley - When a girl takes hot sauce and pours it on your cock. She then proceeds to give you a blowjob, making sure all of the hot sauce is gone.
107 |
108 |
109 | #sig#Pete Mehok
110 | #sig#NaviStaff, Inc.
111 | #sig#Technical Recruiter
112 | #sig#332 Springfield Ave Suite 210
113 | #sig#Summit, NJ 07901
114 | #sig#pm@navistaff.com
115 | #sig#p:908.273.6960 Ext. 104
116 | #sig#f:908.273.6940
117 | #sig#www.navistaff.com
118 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/58_body:
--------------------------------------------------------------------------------
1 | Bert,
2 |
3 | EnronOnline Production Cluster - Use Corp Log In
4 |
5 | EnronOnline Stack Manager
6 | User ID: bmeyers
7 | Password: bmeyers
8 |
9 | Please let me know if you have any questions.
10 |
11 |
12 | #sig#Thank you,
13 |
14 | #sig#Stephanie Sever
15 | #sig#EnronOnline
16 | #sig#713-853-3465
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/59_body:
--------------------------------------------------------------------------------
1 | Your EnronOnline user ID and password is noted below. To reach the EnronOnline web site, please use: www.enrononline.com .
2 |
3 |
4 | User ID: EOL59876
5 | Password: WELCOME!
6 |
7 |
8 | Please note that the User ID and password will need to be in upper case.
9 |
10 | Please contact the EnronOnline helpdesk if you have any questions or problems getting into the system.
11 |
12 | Helpdesk# 713-853-4357
13 |
14 | #sig#Thanks,
15 | #sig#Cecil John
16 | #sig#X3-6259
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/5_body:
--------------------------------------------------------------------------------
1 | Thanks for your interest in the Assistant Trader positions. We have had sixteen people express interest in this role. Based on the criteria listed below, you have been selected to be interviewed for this role.
2 |
3 | Been in a trading or scheduling position for at least one year.
4 | Consistently received high performance ratings through the PRC process.
5 | Has consistently pursued a trading role as a career objective.
6 |
7 | We are hoping to hold interviews during the next two days and make our decision by the end of the week. The interviews will be conducted by members of the trading team. Please send me an e-mail to let me know when you WON'T be available during the next few days. You will be going through three separate half hour interviews with Crandall/Scholtes, Richter/Mallory, and Belden/Swerzbin. Thanks.
8 |
9 | #sig#Regards,
10 | #sig#Tim
11 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/61_body:
--------------------------------------------------------------------------------
1 | FYI -
2 |
3 | All trades done on behalf of El Paso will now be directly billed by El Paso. This means three things for us:
4 |
5 | 1. Format of the model stays the same.
6 | 2. Deals do not go into Enpower.
7 | 3. "Counterparty" in the model must show the billing counterparty only (to simplify Kathy's settlement process).
8 |
9 | *Per El Paso, please try to market their power through SRP, PNM, PAC, and LADWP only.* These are the counterparties El Paso has contacted regarding our new arrangement.
10 |
11 | Please let me know if you have questions.
12 |
13 | #sig#Thanks,
14 | #sig#Kate
15 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/62_body:
--------------------------------------------------------------------------------
1 | Group,
2 |
3 | As of HE2 this morning, our contract with MPC was ended. For the time being, we will no longer be balancing MPC's load.
4 |
5 | Also, we can only purchase up to 100 mws from APS transmission. Please do not put wheels in to relieve congestion for over 100 mws. But do check the transmission availability on the website and maximize our ability to relieve congestion with APS in the Southwest. These should be the only congestion wheels that we run.
6 |
7 | Also, please use PNM not AEP for any sleeving for EPE.
8 |
9 | #sig#Thanks,
10 | #sig#Bill
11 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/63_body:
--------------------------------------------------------------------------------
1 | Mr. Meyers-
2 |
3 | I received your request from eHR.com this morning. Please let me know what
4 | kinds of information you would like so I can best direct your request.
5 | Thank you very much.
6 |
7 | #sig#________________________________________
8 | #sig#Gary Hewitt
9 | #sig#Web Content Manager
10 | #sig#Watson Wyatt Worldwide
11 | #sig#1717 H Street NW * Washington, DC 20006
12 | #sig#tel: 202-715-7098 * fax: 202-715-4953
13 | #sig#gary.hewitt@watsonwyatt.com * http://www.watsonwyatt.com
14 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/64_body:
--------------------------------------------------------------------------------
1 | Please be advised that:
2 |
3 | A counterparty that wants to terminate a contract needs to send a letter of termination via fax to (713) 646-3227
4 | The termination letter needs to include:
5 | 1.) List of contracts being terminated
6 | 2.) Calculation of termination payment
7 | 3.) Expectation of payments if applicable
8 |
9 | A counterparty that unwinds deals does not mean a contract its null and void; the counterparty is still liable for the existing deals governed by the contract
10 |
11 | Please forward this on to anyone who needs this information.
12 |
13 | #sig#Thanks,
14 |
15 | #sig#Susan
16 | #sig#*7086
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/65_body:
--------------------------------------------------------------------------------
1 | For all cuts the following procedures apply...
2 |
3 | 1.) Notes as usual
4 |
5 | 2.) In the Enpower Path Confirmation, we need to make cuts as Path Confirms. Enter comments in the confirmed section. We will be zeroing Enpower deals out on Monday (hint: NOTES are VERY important--All CUTS must be notated and the notes must be put in the accordion folders).
6 |
7 | 3.) Try to have the counterparties continue any paths around us. Just "bow out" gracefully if you have no other options with the words "L - D's."
8 |
9 | 4.) If we are cut by a generator/utility going into California, we must call the HA desk at California and tell them the increment that we will be underscheduling our load by. For example, EPE will be cutting all schedules going into California on light load (100 mws). If we cannot resupply the schedules we need to tell the state that we are underscheduling our load by 100 mws. THIS IS EXTREMELY IMPORTANT. We have posted an LC with Cali for this reason.
10 |
11 | Also, EPE will be long RT 100 mws off-peak as they are no longer selling to us. 25 mws should be sold to TEP to cover a forward schedule. Stacy has set this up at PV. Please do this as a direct bill with EPE. As of Jan 01, 2002 we will no longer be marketing for EPE. Also, PV#3 will be coming off this weekend, and FC#4 will be down from HE 22 on Saturday through HE 06 on Sunday.
12 |
13 | This will probably not be a fun weekend.
14 | I appreciate your help.
15 |
16 | #sig#Bill
17 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/67_body:
--------------------------------------------------------------------------------
1 | Good Afternoon FSP users,
2 |
3 | We have just finished a comprehensive FSP rebuild & testing phase for the next release of FSP. The rebuild should address all the issues raised in the previous go-live as well as some additional capabilities for robustness. Please note that we have already executed several hundred different test case scenarios & performed some stress testing using data before the Enron collapse.
4 |
5 | What we need now is help from you to continue to identify enhancements and/or bugs with FSP so that we can go-live with FSP once the new company starts trading. Susan Amador will be available to help in any testing. You should probably also coordinate with her when you are about to start testing so that our continuing testing doesn't interfere with your testing. PWRTESTN & PWRTESTP should be all setup for your use.
6 |
7 | Here are some of the bug fixes & enhancements we have rolled-out with this latest release:
8 | 1. Bid Curve - Ability to bid on zero volumes (for import, export, and wheels)
9 | 2. Rebuild of all FSP application utilizing Direct Oracle Access components for faster database access & higher reliability.
10 | 3. Bug Fixes on database commits to eliminate database "deadlocks".
11 | 4. New aggregation scout to "stream-line" the template writing process & improve template writing performance.
12 | 5. SQL tuned "Hour Ahead" template writing. Confirmed performance of 1min 30 seconds on heavy transaction volume days.
13 | 6. And many other small previously unidentified bugs.
14 |
15 | We are very excited about this newest release and are looking forward to your feedback over the next couple of days. Susan will probably be your best contact but please feel free to contact me at anytime.
16 |
17 | #sig#Thanks, David Poston
18 | #sig## 713-345-7834
19 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/72_body:
--------------------------------------------------------------------------------
1 | Friends and Co-Workers:
2 |
3 | You are hereby cordially invited to a pre-holiday office lunch this Thursday at approximately 11:30 a.m, courtesy of the Trading Attorneys.
4 |
5 | LEGAL NOTICE: Christian Yoder and Steve Hall (hereinafter "we" or "our" or "us" ) intend to pay for the food (which will probably, but not necessarily, be pizza and salad) with post-petition Chapter 11 salary dollars that have already cleared our respective individual bank accounts and, therefore, by joining us in the meal ("Lunch") you will not be exposed to any retroactive claw-back risk by way of the Cash Account Committee. We are also investigating the possibility of providing a limited quantity of a certain kind of beverage ("Beer") under a provisional waiver of the defunct pre-petition Company's drinking policies, subject to approval by the Estate's Liquor Control Board which is being sought on an expedited basis.
6 |
7 | We feel like making this modest holiday gesture for several reasons:
8 |
9 | 1. It has been a pleasure working with you all during the past year.
10 | 2. We are optimistic about this office's prospects in the upcoming year.
11 | 3. American Express forgot to cancel our Corporate Cards.
12 |
13 | We hope you will join us for a free lunch as we look forward to a prosperous New Year.
14 |
15 | #sig#Christian and Steve
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/73_body:
--------------------------------------------------------------------------------
1 | We have reached resolution regarding the parking and mass transit issue. Until notified otherwise, the parking and mass transit subsidy will continue as an employee benefit.
2 |
3 | For those who have a payroll parking deduction, your deduction has been stopped. No deduction should occur on 12/31/01 payroll. In addition, you should receive a credit for the deduction taken for the 12/15 payroll.
4 |
5 | For those who incurred any out-of-pocket parking costs during the month of December, you may expense up to $89 for reimbursement.
6 |
7 | Beginning 1/1/02, parking and mass transit will continue as follows:
8 |
9 | WTC Parking: You should receive your parking pass in the mail. Payroll deductions will begin on the 1/15/02 paycheck. (No Action Req'd)
10 |
11 | Outside Parking Lots: Park at lot of your choice and pay out of pocket. Request reimbursement of up to $89 per month. (No payroll parking deduction will occur)
12 |
13 | Tri-Met and MAX employees: Passes are available for January. If you would like to pick up your pass, please see Debra Davidson.
14 |
15 | NOTE: Extra Tri-Met passes are available. If you would like to select mass transit as your parking option, please notify either Grace or myself before Friday, 12/21/01 EOB and we will insure a monthly pass is available.
16 |
17 | Validations: We will no longer validate parking passes, however, you are able to reimburse up to $89 per month for daily parking.
18 |
19 | If you have any questions, please let us know!
20 |
21 | #sig#ENA Human Resources
22 | #sig#Julie Kearney
23 | #sig#503) 464-7404
24 |
25 | #sig#Grace Rodriguez
26 | #sig#503) 464-8321
27 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/78_body:
--------------------------------------------------------------------------------
1 | Here is a schedule that I received from Phillip.
2 |
3 | I'm not sure if I'm reading the schedule correctly, but here are some items to think about:
4 |
5 | 1) The price to Trigen is a 3 month average of NGW and IF for Z3 and Z4 times a premium.
6 | 2) Because only one month at a time rolls off the books, there will always be an OA variance.
7 | 3) The OA variance due to the 3 month rolling average will eventually flatten out over time.
8 | 4) This OA variance, however, doesn't address the fact that Andy is getting 50% Z3 and 50% Z4 and having to deliver Z4.
9 | 5) For November 2001, there was some gas that was delivered at Z3, but the price was not adjusted for fuel and transport, so if
10 | you deliver gas at Station 65, you do not get the same price as you would for Station 85.
11 |
12 | Hopefully this makes sense.
13 |
14 | #sig#Diane
15 | #sig#x-37059
16 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/8_body:
--------------------------------------------------------------------------------
1 | This message is only relevant for those going to UBS.
2 |
3 | When exporting your EMAIL messages listed in the instructions or via the Outlook Export utility, attachments WILL NOT BE SAVED!
4 | There will be a better solution to exporting your Email messages and all your attachments emailed later today.
5 |
6 | #sig#Thank you,
7 | #sig#John Oh
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/92_body:
--------------------------------------------------------------------------------
1 | The available interruptible transportation service (IT and
2 | secondary FT) on Transco for pipeline day Monday, December
3 | 24, 2001 and continuing until further notice is summarized
4 | as follows:
5 |
6 | Tier I and Tier II (only Sta. 90 through
7 | Sta. 180 affected) 0 MDt/day
8 |
9 | South Virginia Lateral Compressor Station 167 144 MDt/day
10 |
11 | Tier III (Station 180 through East) 0 MDt/day
12 |
13 | Leidy at Centerville Regulator Station
14 | (Only deliveries downstream of Centerville
15 | regulator affected) 100 MDt/day
16 |
17 | Linden Regulator Station
18 | (Located in Union County, New Jersey) 100 MDt/day
19 |
20 | Mobile Bay Lateral 162 MDt/day
21 |
22 | The above available interruptible transportation service
23 | refers to deliveries into the affected area that have
24 | receipts upstream of the bottleneck or affected area.
25 |
26 | If you have any questions, please call John McGrath at (713)
27 | 215-2514 or Donna C. Long at (713) 215-4061.
28 |
29 | #sig#J. E. McGrath
30 | #sig#Manager
31 | #sig#Operations Planning
32 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/94_body:
--------------------------------------------------------------------------------
1 | Enron employees not participating in contract parking are eligible to receive Metro Bus Passes or Woodlands Express Passes. You may pick up a bus pass from the Parking & Transportation Desk, on Level 3 of the Enron Building, from 8:30 AM to 4:30 PM.
2 |
3 |
4 | All Metro passes offered through the Enron Parking & Transportation desk will be the Metro 30 Day Zone Pass. When you use a 30-day zone pass, bus service is divided into four zones w/ unlimited rides. The zone pass is time activated, which means that it will not become active until the first time it is used and will not expire until 30 days after it was used for the first time. New passes will be available to Enron employees upon expiration of the 30 day time period. Each zone does have a different fare values based on the distance the bus travels. This is important to any buser who wants to transfer to or travel in a higher-cost zone than the zone pass they have, in this case the difference must be paid in cash. If they are traveling in a lower cost zone, then there is no extra cost.
5 |
6 | #sig#Parking & Transportation Desk
7 | #sig#3-7060
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/96_body:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | FT AVG. BUY SELL
5 | ------ ------ ------ ------
6 | Zone 1 2.2930 1.8650 2.6200
7 | Zone 2 2.3285 1.9000 2.6700
8 | Zone 3 2.3640 1.9350 2.7200
9 | Zone 4 2.3771 1.9463 2.7347
10 | Zone 5 2.4353 1.9961 2.7998
11 | Zone 6 2.4823 2.0363 2.8524
12 |
13 | IT AVG. BUY SELL
14 | ------ ------ ------ ------
15 | Zone 1 2.2930 1.8650 2.6200
16 | Zone 2 2.3285 1.9000 2.6700
17 | Zone 3 2.3640 1.9350 2.7200
18 | Zone 4 2.4178 1.9870 2.7754
19 | Zone 5 2.6210 2.1818 2.9855
20 | Zone 6 2.7765 2.3305 3.1466
21 |
22 | FTG AVG. BUY SELL
23 | ------ ------ ------ ------
24 | Zone 1 2.2930 1.8650 2.6200
25 | Zone 2 2.3285 1.9000 2.6700
26 | Zone 3 2.3640 1.9350 2.7200
27 | Zone 4 2.4449 2.0141 2.8025
28 | Zone 5 2.7448 2.3056 3.1093
29 |
30 | AVG. BUY SELL
31 | ------ ------ ------
32 | FTNT 2.3820 1.9600 2.7200
33 |
34 |
35 | For information concerning cash out transactions, please contact
36 | Casilda Vasquez at (713)215-4504.
37 |
38 | #sig#Stella Yu
39 | #sig#Manager - Rate Systems
40 | #sig#(713)215-3381
41 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/97_body:
--------------------------------------------------------------------------------
1 | Andy, I have completed negotiation with TBG on the Settlement. TBG has executed the documents and Louise and John have approved the deal. In short, I want to make sure that you understand the effect on you. Attached is a more detailed summary but generally I removed all swing flexibility from the deal. They have baseload obligations of 12,500 MMBtu per day. We can deliver at any of the delivery points identified in the Agreement ( I will give you a copy of the Agreements) however the primary delivery point is Station 65. That is the location for all price settlements. If we delivery at an alternate location we receive additional basis differentials for moving downstream ( Station 85 ) and we pay the basis differential for moving upstream ( Sta 45, 30 ). TBG has the right to move our deliveries back to Sta 65 at anytime, if we are delivering at alternate locations. I want to check with you one more time before I execute the final documents. I think that this is a great trade for ENA if I do say so myself. My next effort is to clear up any issues with Trigen.
2 |
3 | #sig#Frank
4 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/signatures/98_body:
--------------------------------------------------------------------------------
1 | Pls confirm that you have received your new WCom calling card. Thank you.
2 |
3 | #sig#Carolyn Graham
4 | #sig#Enron NetWorks LLC
5 | #sig#Vendor Management
6 | #sig#713-345-8008
7 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/test_emails/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/datasets/test_emails/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/datasets/test_emails/email_1:
--------------------------------------------------------------------------------
1 | Andy, I have completed negotiation with TBG on the Settlement. TBG has executed the documents and Louise and John have approved the deal. In short, I want to make sure that you understand the effect on you. Attached is a more detailed summary but generally I removed all swing flexibility from the deal. They have baseload obligations of 12,500 MMBtu per day. We can deliver at any of the delivery points identified in the Agreement ( I will give you a copy of the Agreements) however the primary delivery point is Station 65. That is the location for all price settlements. If we delivery at an alternate location we receive additional basis differentials for moving downstream ( Station 85 ) and we pay the basis differential for moving upstream ( Sta 45, 30 ). TBG has the right to move our deliveries back to Sta 65 at anytime, if we are delivering at alternate locations. I want to check with you one more time before I execute the final documents. I think that this is a great trade for ENA if I do say so myself. My next effort is to clear up any issues with Trigen.
2 |
3 |
4 |
5 | Thanks,
6 | Frank
7 | E.:frank@gmail.com | T.:+995 (599)607-066
8 |
--------------------------------------------------------------------------------
/signature_extractor/datasets/test_emails/email_2:
--------------------------------------------------------------------------------
1 | Pls confirm that you have received your new WCom calling card.
2 |
3 | Thank you,
4 |
5 | Carolyn Graham
6 | Enron NetWorks LLC
7 | Vendor Management
8 | 713-345-8008
9 |
--------------------------------------------------------------------------------
/signature_extractor/feature.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.base import TransformerMixin
3 |
4 | from .preprocessing import feature_parser as txt_prep
5 |
6 |
7 | class SignatureFeatureExtractor(TransformerMixin):
8 |
9 | def line_to_vec(self, l, t):
10 | curr_res = list()
11 | curr_res.append(txt_prep.contains_phone(l))
12 | curr_res.append(txt_prep.contains_signature_word(l))
13 | curr_res.append(txt_prep.has_only_quotes(l))
14 | curr_res.append(txt_prep.contains_email(l))
15 | curr_res.append(txt_prep.has_url(l))
16 | curr_res.append(txt_prep.count_named_entities(l))
17 | curr_res.append(txt_prep.is_under_closing_phrase(t, l))
18 | curr_res.append(txt_prep.is_in_second_part(t, l))
19 | curr_res.append(txt_prep.is_in_last_five_lines(t, l))
20 |
21 | curr_res = [int(elem) for elem in curr_res]
22 | return curr_res
23 |
24 | def transform(self, X, **kwargs):
25 | result = []
26 |
27 | for (t, l, prevLine, nextLine) in X:
28 | curr_line_vec = self.line_to_vec(l, t)
29 | prev_line_vec = self.line_to_vec(prevLine, t)
30 | next_line_vec = self.line_to_vec(nextLine, t)
31 | curr_res = next_line_vec + prev_line_vec + curr_line_vec
32 | result.append(curr_res)
33 | return np.array(result)
34 |
35 | def fit(self, X, y=None, **kwargs):
36 | return self
37 |
38 | def get_params(self, **kwargs):
39 | return {}
40 |
--------------------------------------------------------------------------------
/signature_extractor/models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/models/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/models/signature_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/models/signature_model
--------------------------------------------------------------------------------
/signature_extractor/persister.py:
--------------------------------------------------------------------------------
1 | import os
2 | from sklearn.externals import joblib
3 |
4 |
5 | def save_model(model, filename):
6 | joblib.dump(model, os.path.join("signature_extractor", "models", filename))
7 |
8 |
9 | def load_model(filename):
10 | loaded_model = joblib.load(os.path.join("signature_extractor", "models", filename))
11 | return loaded_model
12 |
--------------------------------------------------------------------------------
/signature_extractor/preprocessing/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/preprocessing/.DS_Store
--------------------------------------------------------------------------------
/signature_extractor/preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/signature_extractor/preprocessing/__init__.py
--------------------------------------------------------------------------------
/signature_extractor/preprocessing/feature_parser.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import re
3 | import nltk
4 |
5 |
6 | def contains_email(text):
7 | o = re.findall(r'[\w\.-]+@[\w\.-]+', text)
8 | return len(o) > 0
9 |
10 |
11 | def has_only_quotes(text):
12 | o = re.findall(r'^[\s]*---*[\s]*$', text)
13 | return len(o) > 0
14 |
15 |
16 | def contains_phone(text):
17 | o = re.findall(r'[\+\(]?[1-9][0-9 .\-\(\)]{8,}[0-9]', text)
18 | return len(o) > 0
19 |
20 |
21 | def count_named_entities(text):
22 | entities_count = 0
23 | for sent in nltk.sent_tokenize(text):
24 | for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
25 | if hasattr(chunk, 'label'):
26 | entities_count += 1
27 |
28 | if entities_count > 0:
29 | return entities_count
30 | return False
31 |
32 |
33 | def is_in_last_five_lines(text, line):
34 | try:
35 | lines = text.split('\n')
36 | lines = [l.strip() for l in lines if len(l) > 0]
37 | line_index = max([i for i, l in enumerate(lines) if line in l])
38 | output = True if len(lines) - 6 < line_index else False
39 | except:
40 | output = False
41 | return output
42 |
43 |
44 | def is_in_second_part(text, line):
45 | all_num = len(text)
46 | _50per = (50 * all_num) // 100
47 | idx = text.rfind(line)
48 |
49 | if idx > -1 and all_num - _50per <= idx:
50 | return True
51 | return False
52 |
53 |
54 | def contains_signature_word(text):
55 | o = re.findall(
56 | "Dept\.|University|Corp\.|Corporations?|College|Ave\.|Laboratory|[D|d]isclaimer| Division|"
57 | "Professor|Laboratories|Institutes?|Services|Engineering|Director|Doctor|President|Sciences?| Address|"
58 | "Manager|Street|St\.|Avenue",
59 | text)
60 | return len(o) > 0
61 |
62 |
63 | def is_under_closing_phrase(text, line):
64 | regex_closing_phrase = """Best|Cordially yours|Fond regards|In appreciation|In sympathy|Kind regards|Kind thanks|Kind wishes|
65 | Many thanks|Regards|Respectfully|Respectfully yours|Sincerely|Sincerely yours|Thanks|Thank you|
66 | Thank you for your assistance in this matter|Thank you for your consideration|Thank you for your recommendation|
67 | Thank you for your time|Warm regards|Warm wishes|Warmly|With appreciation|With deepest sympathy|With gratitude|
68 | With sincere thanks|With sympathy|Your help is greatly
69 | appreciated|Yours cordially|Yours faithfully|Yours sincerely|Yours truly"""
70 |
71 | o = re.findall(regex_closing_phrase, text)
72 | found_phrases = [m.end(0) for m in re.finditer(regex_closing_phrase, text)]
73 | signature_closing_end_idx = max(found_phrases) if len(list(found_phrases)) > 0 else -1
74 | line_idx = text.find(line)
75 | return signature_closing_end_idx < line_idx
76 |
77 |
78 | def has_url(text):
79 | url_regex = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop
80 | |info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|
81 | be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|
82 | do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|
83 | hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|
84 | me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|
85 | pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|
86 | tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|
87 | \([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?
88 | «»“”‘’])|(?:(? 0
98 |
--------------------------------------------------------------------------------
/signature_extractor/preprocessing/mail_parser.py:
--------------------------------------------------------------------------------
1 | import nltk
2 |
3 |
4 | def get_from_file(fname):
5 | with open(fname, 'r') as myfile:
6 | data = myfile.read()
7 | sent_text = '\n'.join([l for l in nltk.sent_tokenize(data)])
8 | return sent_text
9 |
10 |
11 | def get_from_json():
12 | pass
13 |
14 |
15 | def get_from_html():
16 | pass
--------------------------------------------------------------------------------
/signature_extractor/segmentation.py:
--------------------------------------------------------------------------------
1 | from .configs import configs
2 | from .persister import load_model
3 |
4 |
5 | class EmailSegmenter:
6 |
7 | def __init__(self):
8 | self.model = load_model(configs.ACTIVE_MODEL)
9 |
10 | def prepare_text_for_classification(self, text):
11 | lines = text.split('\n')
12 | lines = [l.strip() for l in lines if len(l.strip()) > 0]
13 | output = []
14 |
15 | for idx in range(len(lines)):
16 | curr = lines[idx]
17 | next = "" if idx == len(lines) - 1 else lines[idx + 1]
18 | prev = "" if idx == 0 else lines[idx - 1]
19 | output.append([curr, prev, next])
20 | return output
21 |
22 | def classify_lines(self, text):
23 | output, pred_sequence = [], ""
24 | lines = self.prepare_text_for_classification(text)
25 |
26 | for curr_line in lines:
27 | an_input = (text, curr_line[0], curr_line[1], curr_line[2])
28 | sig_pred = self.model.predict([an_input])
29 | pred_sequence += str(sig_pred[0])
30 | output.append((sig_pred, curr_line[0]))
31 | return output, pred_sequence
32 |
33 | def get_longest_signature_segment(self, seq):
34 | count, prev_count = 0, 0
35 | _range = -1, -1
36 | for i in range(len(seq)-1, -1, -1):
37 | if seq[i] == '1':
38 | count += 1
39 | if count > prev_count:
40 | _range = i, i + count
41 | else:
42 | if count > prev_count:
43 | _range = i+1, i+count+1
44 | prev_count = count
45 | count = 0
46 | return _range
47 |
48 | def segment_mail(self, text):
49 | lines, seq = self.classify_lines(text)
50 | signature_ranges = self.get_longest_signature_segment(seq)
51 |
52 | signature_segment = lines[signature_ranges[0]:signature_ranges[1]]
53 | email_body = lines[:signature_ranges[0]:] + lines[signature_ranges[1]:]
54 |
55 | signature_segment = '\n'.join([l[1] for l in signature_segment])
56 | email_body = '\n'.join([l[1] for l in email_body])
57 | return email_body, signature_segment
58 |
--------------------------------------------------------------------------------
/tests/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harmening/signature_extraction/d897e9d5c7eb05c2cffdf5e01bafa5b0d94f229f/tests/.DS_Store
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 |
3 | def get_signature_extractor_folder_path():
4 | path = os.path.abspath(__file__)
5 | dir_path = os.path.dirname(path)
6 |
7 | folders = dir_path.split('/')
8 | folders.remove('tests')
9 | folders.append('signature_extractor')
10 | return os.path.join('/', *folders)
11 |
12 | sys.path.append(get_signature_extractor_folder_path())
--------------------------------------------------------------------------------
/tests/test_feature_parser.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from signature_extractor.preprocessing import feature_parser as f_parser
3 |
4 |
5 | class TestFeatureParser(TestCase):
6 |
7 | def setUp(self):
8 | pass
9 |
10 | def tearDown(self):
11 | pass
12 |
13 | def test_contains_phone(self):
14 | phone_text_tests = ["Matches +4917612345678 test first", "No phone number here",
15 | "My name is Fabienne", "This is phone 333.444.5555", "My mobile num 123-444-5555",
16 | "T.: 333 601 5435"]
17 | phone_text_tests_output = [True, False, False, True, True, True]
18 |
19 | for _idx, test in enumerate(phone_text_tests):
20 | self.assertEqual(f_parser.contains_phone(test), phone_text_tests_output[_idx])
21 |
22 | def test_contains_email(self):
23 | email_tests = ["E.: butch@gmail.com", "Our email is yolanda@becool.eu", "no email here", "this is a string"]
24 | email_tests_output = [True, True, False, False]
25 |
26 | for _idx, test in enumerate(email_tests):
27 | self.assertEqual(f_parser.contains_email(test), email_tests_output[_idx])
28 |
29 | def test_contains_signature_word(self):
30 | signature_words_tests = ["Manager Vincent Vega", "Quarter Pounder with Cheese in Paris", "President Marcellus Wallace",
31 | "Director Q. Tarantino", "A Royale with cheese"]
32 | signature_words_tests_output = [True, False, True, True, False]
33 |
34 | for _idx, test in enumerate(signature_words_tests):
35 | self.assertEqual(f_parser.contains_signature_word(test), signature_words_tests_output[_idx])
36 |
37 | def test_has_only_quotes(self):
38 | quotes_tests = ["---", "-----", "", "this is just a line"]
39 | quotes_tests_output = [True, True, False, False]
40 |
41 | for _idx, test in enumerate(quotes_tests):
42 | self.assertEqual(f_parser.has_only_quotes(test), quotes_tests_output[_idx])
43 |
44 | def test_has_url(self):
45 | url_tests = ["WEB.: mail.com", "Our url is http://www.mia-wallace.com", "no email here",
46 | "My webpage is www.ringo.com"]
47 | url_tests_output = [True, True, False, True]
48 |
49 | for _idx, test in enumerate(url_tests):
50 | self.assertEqual(f_parser.has_url(test), url_tests_output[_idx])
51 |
52 | def test_count_named_entities(self):
53 | named_entity_count_tests = ["My name is Winston the Wolf", "no email or names here",
54 | "I think fast, I talk fast and I need you guys to act fast if you wanna get out of this. My name is Winston.",
55 | "I solve problems"]
56 | named_entity_count_tests_output = [2, 0, 1, 0]
57 |
58 | for _idx, test in enumerate(named_entity_count_tests):
59 | self.assertEqual(f_parser.count_named_entities(test), named_entity_count_tests_output[_idx])
60 |
61 | def test_is_under_closing_phrase(self):
62 | closing_phrase_tests = ["Hello friend, thanks here, One more time Thanks, Jules",
63 | "Hello Jules! Thanks here, One more time Thanks"
64 | "Hello friend Thanks Jules, One more time Thanks"]
65 | closing_phrase_tests_output = [True, False, False]
66 | for _idx, test in enumerate(closing_phrase_tests):
67 | self.assertEqual(f_parser.is_under_closing_phrase(test, 'Jules'), closing_phrase_tests_output[_idx])
68 |
69 | def test_is_in_second_part(self):
70 | text = """Mmmm Goddamn, Jimmie! This is some serious gourmet shit!\n
71 | Usually, me and Vince would be happy with some freeze-dried Taster's
72 | Choice right, but he springs this serious GOURMET shit on us! What
73 | flavor is this?\n Thanks Jimmie, Jules."""
74 | line_tests = ["Jimmie", "serious gourmet shit"]
75 | line_tests_output = [True, False]
76 |
77 | for _idx, test in enumerate(line_tests):
78 | self.assertEqual(f_parser.is_in_second_part(text, test), line_tests_output[_idx])
79 |
80 | def test_is_in_last_five_lines(self):
81 | text = """
82 | I ain't saying it's right. But you're saying a foot massage don't mean nothing, and I'm saying it does.\n
83 | Now look, I've given a million ladies a million foot massages, and they all meant something. We act like they don't, but they do, and that's what's so fucking cool about them.\n
84 | There's a sensuous thing going on where you don't talk about it, but
85 | you know it, she knows it, fucking Marsellus knew it, and Antwone
86 | should have fucking better known better.\n
87 | I mean, that's his fucking wife, man.\n
88 | He can't be expected to have a sense of humor about that shit.\n
89 | You know what I'm saying?\n
90 | That's an interesting point.\n
91 | """
92 |
93 | line_tests = ["sense of humor", "wife", "foot massage", 0]
94 | line_tests_output = [True, True, False, False]
95 |
96 | for test, result in zip(line_tests, line_tests_output):
97 | self.assertEqual(f_parser.is_in_last_five_lines(text, test), result)
98 |
99 |
100 | if __name__ == '__main__':
101 | unittest.main()
102 |
--------------------------------------------------------------------------------
/tests/test_segmentation.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from signature_extractor.segmentation import EmailSegmenter
3 |
4 |
5 | class TestEmailSegmenter(TestCase):
6 |
7 | def setUp(self):
8 | self.segmenter_obj = EmailSegmenter()
9 |
10 | def tearDown(self):
11 | pass
12 |
13 | def test_get_longest_signature_segment(self):
14 | tests = ["11001110", "11111", "101", "000000", "00110101110"]
15 | output = ["111", "11111", "1", "", "111"]
16 | for _idx, t in enumerate(tests):
17 | extracted = self.segmenter_obj.get_longest_signature_segment(t)
18 | self.assertEqual(t[extracted[0]:extracted[1]], output[_idx])
19 |
20 | def test_prepare_text_for_classification(self):
21 | text = "This is first line.\n This is second line.\n This one is third line.\n And this one is fourth line."
22 | expected = [['This is first line.', '', 'This is second line.'],
23 | ['This is second line.', 'This is first line.', 'This one is third line.'],
24 | ['This one is third line.', 'This is second line.', 'And this one is fourth line.'],
25 | ['And this one is fourth line.', 'This one is third line.', '']]
26 |
27 | output = self.segmenter_obj.prepare_text_for_classification(text)
28 | self.assertTrue(expected == output)
29 |
30 |
31 | if __name__ == '__main__':
32 | unittest.main()
33 |
--------------------------------------------------------------------------------
/tests/test_signature_feature_extractor.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from signature_extractor.feature import SignatureFeatureExtractor
3 |
4 |
5 | class TestSignatureFeatureExtractor(TestCase):
6 |
7 | def setUp(self):
8 | self.feature_extractor_obj = SignatureFeatureExtractor()
9 | self.text = """Mark, further to our conversation...\n
10 | Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n
11 | or will be settled financially, however these transactions could have been set up as physical.\n
12 | The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n
13 | and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n
14 | load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n
15 | Please call me if there are any questions regarding this note.\n
16 | Barry Tycholiz\n
17 | Vice President, Enron North America"""
18 | self.input_x = [[
19 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
20 | 'Mark, further to our conversation...', '',
21 | 'Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been'],
22 | [
23 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
24 | 'Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been',
25 | 'Mark, further to our conversation...',
26 | 'or will be settled financially, however these transactions could have been set up as physical.'],
27 | [
28 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
29 | 'or will be settled financially, however these transactions could have been set up as physical.',
30 | 'Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been',
31 | 'The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,'],
32 | [
33 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
34 | 'The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,',
35 | 'or will be settled financially, however these transactions could have been set up as physical.',
36 | 'and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%'],
37 | [
38 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
39 | 'and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%',
40 | 'The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,',
41 | 'load factor and settlement would be on 100% of the volume regardless as to what physically flowed.'],
42 | [
43 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
44 | 'load factor and settlement would be on 100% of the volume regardless as to what physically flowed.',
45 | 'and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%',
46 | 'Please call me if there are any questions regarding this note.'], [
47 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
48 | 'Please call me if there are any questions regarding this note.',
49 | 'load factor and settlement would be on 100% of the volume regardless as to what physically flowed.',
50 | 'Barry Tycholiz'], [
51 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
52 | 'Barry Tycholiz', 'Please call me if there are any questions regarding this note.',
53 | 'Vice President, Enron North America'], [
54 | 'Mark, further to our conversation...\n\n Mercado transacted with Enron North America under the terms and conditions of a financial transaction and these transactions have been\n\n or will be settled financially, however these transactions could have been set up as physical.\n\n The structure of the transaction changes as a result of a few variables, including the price,day of cash settlement,\n\n and curtailment issues on San Jan deliveries which would then automatically convert the fixed price portion to 100%\n\n load factor and settlement would be on 100% of the volume regardless as to what physically flowed.\n\n Please call me if there are any questions regarding this note.\n\n Barry Tycholiz\n\n Vice President, Enron North America',
55 | 'Vice President, Enron North America', 'Barry Tycholiz', '']]
56 |
57 | def tearDown(self):
58 | pass
59 |
60 | def test_line_to_vec(self):
61 | line = "Barry Tycholiz"
62 | output = [0, 0, 0, 0, 0, 2, 0, 0, 0]
63 | self.assertEqual(self.feature_extractor_obj.line_to_vec(line, self.text), output)
64 |
65 | def test_transform(self):
66 | output = [[0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0],
67 | [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0],
68 | [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
69 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
70 | [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
71 | [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
72 | [0, 0, 0, 0, 0, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
73 | [0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 1],
74 | [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1]]
75 |
76 | self.assertTrue((self.feature_extractor_obj.transform(self.input_x) == output).all())
77 |
--------------------------------------------------------------------------------
/tests/test_suit.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from .test_feature_parser import TestFeatureParser
3 | from .test_signature_feature_extractor import TestSignatureFeatureExtractor
4 | from .test_segmentation import TestEmailSegmenter
5 |
6 |
7 | def create_suite():
8 | test_suite = unittest.TestSuite()
9 | test_suite.addTest(TestEmailSegmenter())
10 | test_suite.addTest(TestFeatureParser())
11 | test_suite.addTest(TestSignatureFeatureExtractor())
12 | return test_suite
13 |
14 | if __name__ == '__main__':
15 | suite = create_suite()
16 |
17 | runner=unittest.TextTestRunner()
18 | runner.run(suite)
19 |
--------------------------------------------------------------------------------
/train_signature_classifier.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from signature_extractor.persister import save_model
4 | from signature_extractor.datasets import dataset_loader as d_loader
5 | from signature_extractor.feature import SignatureFeatureExtractor
6 |
7 | from sklearn.utils import shuffle
8 | from sklearn.neighbors import KNeighborsClassifier
9 | from sklearn.preprocessing import LabelBinarizer
10 | from sklearn.model_selection import train_test_split
11 | from sklearn.pipeline import Pipeline
12 | from sklearn.model_selection import GridSearchCV
13 | from sklearn.metrics import classification_report
14 |
15 |
16 | def build_pipeline():
17 | output_pipeline = Pipeline([
18 | ('features', SignatureFeatureExtractor()),
19 | ('clf', KNeighborsClassifier(weights='distance'))
20 | ])
21 |
22 | return output_pipeline
23 |
24 |
25 | def get_gridsearch_params():
26 | n_neighbors = [2, 3, 5, 7, 10]
27 | param_grid = {'clf__n_neighbors': n_neighbors}
28 | return param_grid
29 |
30 |
31 | def train_model(data, pipeline, parameters):
32 | X_train, y_train_bin = data
33 | grid_search = GridSearchCV(pipeline, parameters, scoring='f1', verbose=1, n_jobs=-1)
34 | grid_search.fit(X_train, y_train_bin)
35 |
36 | print("Best parameters set:")
37 | best_parameters = grid_search.best_estimator_.get_params()
38 | for param_name in sorted(parameters.keys()):
39 | print("\t%s: %r" % (param_name, best_parameters[param_name]))
40 | return grid_search.best_estimator_
41 |
42 |
43 | def evaluate_model(model, data):
44 | X_test, y_test = data
45 | y_pred = model.predict(X_test)
46 | score = classification_report(y_test, y_pred)
47 | print("-" * 25)
48 | print("Model Evaluation:")
49 | print("Accuracy Score:", score)
50 | print("-" * 25)
51 | return score
52 |
53 |
54 | def downsample_majority_class(X, y):
55 | mask_sig = y == "sig"
56 | X_sig, y_sig = X[mask_sig], y[mask_sig]
57 |
58 | mask_oth = y == "other"
59 | X_oth, y_oth = X[mask_oth], y[mask_oth]
60 | X_oth, y_oth = shuffle(X_oth, y_oth)
61 | X_oth, y_oth = X_oth[:len(X_sig)], y_oth[:len(y_sig)]
62 |
63 | X_new = np.concatenate([X_sig, X_oth], axis=0)
64 | y_new = np.concatenate([y_sig, y_oth], axis=0)
65 |
66 | return shuffle(X_new, y_new)
67 |
68 |
69 | if __name__ == '__main__':
70 | print("Loading signatures dataset")
71 | X, y = d_loader.load_signatures_dataset()
72 |
73 | print("Downsampling majority class")
74 | X, y = downsample_majority_class(X, y)
75 |
76 | print("Preparing data for training")
77 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=124)
78 | label_binarizer = LabelBinarizer().fit(y)
79 | print("X_train size:", len(X_train), "X_test size:", len(X_test))
80 |
81 | print("Binarizing target label")
82 | y_train_bin = label_binarizer.transform(y_train)
83 | y_test_bin = label_binarizer.transform(y_test)
84 | print(y_train[0], y_train_bin[0])
85 |
86 | print("Building pipeline")
87 | pipeline = build_pipeline()
88 |
89 | print("Training classification model")
90 | model = train_model((X_train, y_train_bin), pipeline, get_gridsearch_params())
91 |
92 | print("Evaluating trained model")
93 | _ = evaluate_model(model, (X_test, y_test_bin))
94 |
95 | print("Saving model")
96 | _ = save_model(model, "signature_model")
97 |
--------------------------------------------------------------------------------