├── .dockerignore
├── .envrc
├── .gitignore
├── .travis.yml
├── HACKING.md
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── PKGBUILD_template
├── README.md
├── __init__.py
├── brew
├── .gitignore
└── email2pdf_template.rb
├── debian
└── DEBIAN
│ ├── .gitignore
│ └── control_template
├── docker
└── email2pdf
│ └── getmail
├── email2pdf
├── email2pdf.py
├── getmailrc.sample
├── performance
└── printstats.py
├── requirements.txt
├── requirements_hacking.txt
├── setup.py
└── tests
├── BaseTestClasses.py
├── Direct
├── __init__.py
├── test_Direct_Arguments.py
├── test_Direct_AttachmentDetection.py
├── test_Direct_Basic.py
├── test_Direct_BasicPlain.py
├── test_Direct_CID.py
├── test_Direct_Errors.py
├── test_Direct_FrozenTime.py
├── test_Direct_Metadata.py
└── test_Direct_Module.py
├── Subprocess
├── __init__.py
├── test_Subprocess_Basic.py
└── test_Subprocess_MIME.py
├── UPPERCASE.png
├── __init__.py
├── basi2c16.png
└── jpeg444.jpg
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | *.deb
3 |
--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | layout python3
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .direnv
2 | PKGBUILD
3 | *.deb
4 | cover/
5 | pkg/
6 | src/
7 | *.pkg.tar.xz
8 |
9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 |
58 | # Sphinx documentation
59 | docs/_build/
60 |
61 | # PyBuilder
62 | target/
63 |
64 | .email2pdf.profile
65 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | services:
3 | - docker
4 | language: python
5 | install:
6 | - "echo 'Skip'"
7 | script: make rundocker_testing
8 | notifications:
9 | email:
10 | recipients:
11 | - secure: "Tmt2vtBW60X9digOMKdKM8NigEZX/X1wp8mbffbHaiSUkI32Y873b9ILmqF7roDhnMxJsiGQuQ/QMHsK6D5cTOs4pYwrshRySkU3TmbuHB6n5RlV5RDElFTFjs+uc0iHesQYgcIrUSqzinr6toAStAJ00Aa2RuN+137r6Et1QkY="
12 |
--------------------------------------------------------------------------------
/HACKING.md:
--------------------------------------------------------------------------------
1 | # email2pdf - Hacking
2 |
3 | This document talks about hacking/developing on email2pdf - for more
4 | information on email2pdf and how to use it, please see
5 | [README.md](https://github.com/andrewferrier/email2pdf/blob/master/README.md).
6 |
7 | In general, [bug reports/enhancement
8 | requests](https://github.com/andrewferrier/email2pdf/issues) as well as [pull
9 | requests](https://github.com/andrewferrier/email2pdf/pulls) are welcome;
10 | please note the [license
11 | conditions](https://github.com/andrewferrier/email2pdf/blob/master/LICENSE.txt).
12 | If you are trying to report an issue, please try running email2pdf with the
13 | `-vv` option to maximise the debugging output first.
14 |
15 | ## Building & Packaging
16 |
17 | All the supplied build and packaging is based on a
18 | [Makefile](https://github.com/andrewferrier/email2pdf/blob/master/Makefile).
19 | You'll need `make` if you don't have it (`sudo apt-get install make` on
20 | Ubuntu/Debian, `brew install make` on OS X).
21 |
22 | ## Design & Coding Principles
23 |
24 | * Follow [PEP-8](https://www.python.org/dev/peps/pep-0008/). Running `make
25 | analysis` will check against this and run other static code analysis checks
26 | also.
27 |
28 | * Try to keep `email2pdf` as "safe" as possible by default. Without supplying
29 | any potentially harmful command-line options, `email2pdf` will not ignore
30 | parts of the email it shouldn't, and will fail in the standard UNIX way with
31 | an error code if it has any significant doubts about the integrity of the
32 | email it's reading, or any other serious error occurs.
33 |
34 | ## Unit Tests
35 |
36 | All the unit tests are in the `tests/` directory. You can run them from the
37 | Makefile using the `unittest` or `unittest_test` targets (the second is more
38 | verbose, and stops on failing tests).
39 |
40 | All new code should be covered by a test. There is a code coverage checker
41 | target in the Makefile - run `make coverage`. You'll need to have the
42 | `coverage` and `nose` Python modules installed (`pip3 install coverage nose`)
43 | to run them.
44 |
45 | In addition to the standard dependencies from the [standard install
46 | documentation](https://github.com/andrewferrier/email2pdf/blob/master/README.md),
47 | there are some additional dependencies which will be needed to make the tests
48 | work:
49 |
50 | ### OS X
51 |
52 | Just run `pip3 install -r requirements_hacking.txt`.
53 |
54 | ### Debian/Ubuntu
55 |
56 | * `python3-freezegun` - only available in Ubuntu 14.10 onwards - see
57 | \n" + payload + "\n
"
358 |
359 | return payload
360 |
361 |
362 | def handle_html_message_body(input_email, part):
363 | logger = logging.getLogger("email2pdf")
364 |
365 | cid_parts_used = set()
366 |
367 | payload = part.get_payload(decode=True)
368 | charset = part.get_content_charset()
369 | if not charset:
370 | charset = 'utf-8'
371 | logger.info("Determined email is HTML with charset " + str(charset))
372 |
373 | try:
374 | payload_unicode = str(payload, charset)
375 | except UnicodeDecodeError:
376 | detection = chardet.detect(payload)
377 | charset = detection["encoding"]
378 | logger.info("Detected charset can't decode body; trying again with charset " + charset)
379 | payload_unicode = str(payload, charset)
380 |
381 | def cid_replace(cid_parts_used, matchobj):
382 | cid = matchobj.group(1)
383 |
384 | logger.debug("Looking for image for cid " + cid)
385 | image_part = find_part_by_content_id(input_email, cid)
386 |
387 | if image_part is None:
388 | image_part = find_part_by_content_type_name(input_email, cid)
389 |
390 | if image_part is not None:
391 | assert image_part['Content-Transfer-Encoding'] == 'base64'
392 | image_base64 = image_part.get_payload(decode=False)
393 | image_base64 = re.sub("[\r\n\t]", "", image_base64)
394 | image_decoded = image_part.get_payload(decode=True)
395 | mime_type = get_mime_type(image_decoded)
396 | cid_parts_used.add(image_part)
397 | return "data:" + mime_type + ";base64," + image_base64
398 | else:
399 | logger.warning("Could not find image cid " + cid + " in email content.")
400 | return "broken"
401 |
402 | payload = re.sub(r'cid:([\w_@.-]+)', functools.partial(cid_replace, cid_parts_used),
403 | payload_unicode)
404 |
405 | return (payload, cid_parts_used)
406 |
407 |
408 | def output_body_pdf(input_email, payload, output_file_name):
409 | logger = logging.getLogger("email2pdf")
410 |
411 | wkh2p_process = Popen([WKHTMLTOPDF_EXTERNAL_COMMAND, '-q', '--load-error-handling', 'ignore',
412 | '--load-media-error-handling', 'ignore', '--encoding', 'utf-8', '-',
413 | output_file_name], stdin=PIPE, stdout=PIPE, stderr=PIPE)
414 | output, error = wkh2p_process.communicate(input=payload)
415 | assert output == b''
416 |
417 | stripped_error = str(error, 'utf-8')
418 | if os.environ['XDG_SESSION_TYPE'] == 'wayland':
419 | w_err = r'Warning: Ignoring XDG_SESSION_TYPE=wayland on Gnome. Use QT_QPA_PLATFORM=wayland to run on ' \
420 | r'Wayland anyway.'
421 | global WKHTMLTOPDF_ERRORS_IGNORE
422 | WKHTMLTOPDF_ERRORS_IGNORE = WKHTMLTOPDF_ERRORS_IGNORE.union({w_err})
423 |
424 | for error_pattern in WKHTMLTOPDF_ERRORS_IGNORE:
425 | (stripped_error, number_of_subs_made) = re.subn(error_pattern, '', stripped_error)
426 | if number_of_subs_made > 0:
427 | logger.debug("Made " + str(number_of_subs_made) + " subs with pattern " + error_pattern)
428 |
429 | original_error = str(error, 'utf-8').rstrip()
430 | stripped_error = stripped_error.rstrip()
431 |
432 | if wkh2p_process.returncode > 0 and original_error == '':
433 | raise FatalException("wkhtmltopdf failed with exit code " + str(wkh2p_process.returncode) + ", no error output.")
434 | elif wkh2p_process.returncode > 0 and stripped_error != '':
435 | raise FatalException("wkhtmltopdf failed with exit code " + str(wkh2p_process.returncode) + ", stripped error: " +
436 | stripped_error)
437 | elif stripped_error != '':
438 | raise FatalException("wkhtmltopdf exited with rc = 0 but produced unknown stripped error output " + stripped_error)
439 |
440 | add_metadata_obj = {}
441 |
442 | for key in HEADER_MAPPING:
443 | if HEADER_MAPPING[key] in input_email:
444 | add_metadata_obj[key] = get_utf8_header(input_email[HEADER_MAPPING[key]])
445 |
446 | add_metadata_obj['Producer'] = 'email2pdf'
447 |
448 | add_update_pdf_metadata(output_file_name, add_metadata_obj)
449 |
450 |
451 | def remove_invalid_urls(payload):
452 | logger = logging.getLogger("email2pdf")
453 |
454 | soup = BeautifulSoup(payload, "html5lib")
455 |
456 | for img in soup.find_all('img'):
457 | if img.has_attr('src'):
458 | src = img['src']
459 | lower_src = src.lower()
460 | if lower_src == 'broken':
461 | del img['src']
462 | elif not lower_src.startswith('data'):
463 | found_blacklist = False
464 |
465 | for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST:
466 | if image_load_blacklist_item in lower_src:
467 | found_blacklist = True
468 |
469 | if not found_blacklist:
470 | logger.debug("Getting img URL " + src)
471 |
472 | if not can_url_fetch(src):
473 | logger.warning("Could not retrieve img URL " + src + ", replacing with blank.")
474 | del img['src']
475 | else:
476 | logger.debug("Removing URL that was found in blacklist " + src)
477 | del img['src']
478 | else:
479 | logger.debug("Ignoring URL " + src)
480 |
481 | return str(soup)
482 |
483 |
484 | def can_url_fetch(src):
485 | try:
486 | encoded_src = src.replace(" ", "%20")
487 | req = Request(encoded_src)
488 | urlopen(req)
489 | except HTTPError:
490 | return False
491 | except URLError:
492 | return False
493 | except ValueError:
494 | return False
495 | else:
496 | return True
497 |
498 |
499 | def handle_attachments(input_email, output_directory, add_prefix_date, ignore_floating_attachments, parts_to_ignore):
500 | logger = logging.getLogger("email2pdf")
501 |
502 | parts = find_all_attachments(input_email, parts_to_ignore)
503 | logger.debug("Attachments found by handle_attachments: " + str(len(parts)))
504 |
505 | for part in parts:
506 | filename = extract_part_filename(part)
507 | if not filename:
508 | if ignore_floating_attachments:
509 | continue
510 |
511 | filename = get_content_id(part)
512 | if not filename:
513 | filename = AUTOGENERATED_ATTACHMENT_PREFIX
514 |
515 | extension = get_type_extension(part.get_content_type())
516 | if extension:
517 | filename = filename + extension
518 |
519 | assert filename is not None
520 |
521 | if add_prefix_date:
522 | if not re.search(r"\d\d\d\d[-_]\d\d[-_]\d\d", filename):
523 | filename = datetime.now().strftime("%Y-%m-%d-") + filename
524 |
525 | logger.info("Extracting attachment " + filename)
526 |
527 | full_filename = os.path.join(output_directory, filename)
528 | full_filename = get_unique_version(full_filename)
529 |
530 | payload = part.get_payload(decode=True)
531 | with open(full_filename, 'wb') as output_file:
532 | output_file.write(payload)
533 |
534 | return len(parts)
535 |
536 |
537 | def add_update_pdf_metadata(filename, update_dictionary):
538 | # This seems to be the only way to modify the existing PDF metadata.
539 | #
540 | # pylint: disable=protected-access, no-member
541 |
542 | def add_prefix(value):
543 | return '/' + value
544 |
545 | full_update_dictionary = {add_prefix(k): v for k, v in update_dictionary.items()}
546 |
547 | with open(filename, 'rb') as input_file:
548 | pdf_input = PdfFileReader(input_file)
549 | pdf_output = PdfFileWriter()
550 |
551 | for page in range(pdf_input.getNumPages()):
552 | pdf_output.addPage(pdf_input.getPage(page))
553 |
554 | info_dict = pdf_output._info.getObject()
555 |
556 | info = pdf_input.documentInfo
557 |
558 | full_update_dictionary = dict(chain(info.items(), full_update_dictionary.items()))
559 |
560 | for key in full_update_dictionary:
561 | assert full_update_dictionary[key] is not None
562 | info_dict.update({NameObject(key): createStringObject(full_update_dictionary[key])})
563 |
564 | os_file_out, temp_file_name = tempfile.mkstemp(prefix="email2pdf_add_update_pdf_metadata", suffix=".pdf")
565 | # Immediately close the file as created to work around issue on
566 | # Windows where file cannot be opened twice.
567 | os.close(os_file_out)
568 |
569 | with open(temp_file_name, 'wb') as file_out:
570 | pdf_output.write(file_out)
571 |
572 | shutil.move(temp_file_name, filename)
573 |
574 |
575 | def extract_part_filename(part):
576 | logger = logging.getLogger("email2pdf")
577 | filename = part.get_filename()
578 | if filename is not None:
579 | logger.debug("Pre-decoded filename: " + filename)
580 | if decode_header(filename)[0][1] is not None:
581 | logger.debug("Encoding: " + str(decode_header(filename)[0][1]))
582 | logger.debug("Filename in bytes: " + str(decode_header(filename)[0][0]))
583 | filename = str(decode_header(filename)[0][0], (decode_header(filename)[0][1]))
584 | logger.debug("Post-decoded filename: " + filename)
585 | return filename
586 | else:
587 | return None
588 |
589 |
590 | def get_unique_version(filename):
591 | # From here: http://stackoverflow.com/q/183480/27641
592 | counter = 1
593 | file_name_parts = os.path.splitext(filename)
594 | while os.path.isfile(filename):
595 | filename = file_name_parts[0] + '_' + str(counter) + file_name_parts[1]
596 | counter += 1
597 | return filename
598 |
599 |
600 | def find_part_by_content_type_name(message, content_type_name):
601 | for part in message.walk():
602 | if part.get_param('name', header="Content-Type") == content_type_name:
603 | return part
604 | return None
605 |
606 |
607 | def find_part_by_content_type(message, content_type):
608 | for part in message.walk():
609 | if part.get_content_type() == content_type:
610 | return part
611 | return None
612 |
613 |
614 | def find_part_by_content_id(message, content_id):
615 | for part in message.walk():
616 | if part['Content-ID'] in (content_id, '<' + content_id + '>'):
617 | return part
618 | return None
619 |
620 |
621 | def get_content_id(part):
622 | content_id = part['Content-ID']
623 | if content_id:
624 | content_id = content_id.lstrip('<').rstrip('>')
625 |
626 | return content_id
627 |
628 | # part.get_content_disposition() is only available in Python 3.5+, so this is effectively a backport so we can continue to support
629 | # earlier versions of Python 3. It uses an internal API so is a bit unstable and should be replaced with something stable when we
630 | # upgrade to a minimum of Python 3.5. See http://bit.ly/2bHzXtz.
631 |
632 |
633 | def get_content_disposition(part):
634 | value = part.get('content-disposition')
635 | if value is None:
636 | return None
637 | c_d = email.message._splitparam(value)[0].lower()
638 | return c_d
639 |
640 |
641 | def get_type_extension(content_type):
642 | filetypes = set(mimetypes.guess_all_extensions(content_type)) - AUTOCALCULATED_FILENAME_EXTENSION_BLACKLIST
643 |
644 | if len(filetypes) > 0:
645 | return sorted(list(filetypes))[0]
646 | else:
647 | return None
648 |
649 |
650 | def find_all_attachments(message, parts_to_ignore):
651 | parts = set()
652 |
653 | for part in message.walk():
654 | if part not in parts_to_ignore and not part.is_multipart():
655 | if part.get_content_type() not in MIME_TYPES_BLACKLIST:
656 | parts.add(part)
657 |
658 | return parts
659 |
660 |
661 | def filter_filenamed_parts(parts):
662 | new_parts = set()
663 |
664 | for part in parts:
665 | if part.get_filename() is not None:
666 | new_parts.add(part)
667 |
668 | return new_parts
669 |
670 |
671 | def get_formatted_header_info(input_email):
672 | header_info = ""
673 |
674 | for header in FORMATTED_HEADERS_TO_INCLUDE:
675 | if input_email[header]:
676 | decoded_string = get_utf8_header(input_email[header])
677 | header_info = header_info + '' + header + ': ' + \
678 | html.escape(decoded_string) + '
'
679 |
680 | return header_info + '
'
681 |
682 | # There are various different magic libraries floating around for Python, and
683 | # this function abstracts that out. The first clause is for `pip3 install
684 | # python-magic`, and the second is for the Ubuntu package python3-magic.
685 |
686 |
687 | def get_mime_type(buffer_data):
688 | # pylint: disable=no-member
689 | if 'from_buffer' in dir(magic):
690 | mime_type = magic.from_buffer(buffer_data, mime=True)
691 | if type(mime_type) is not str:
692 | # Older versions of python-magic seem to output bytes for the
693 | # mime_type name. As of Python 3.6+, it seems to be outputting
694 | # strings directly.
695 | mime_type = str(magic.from_buffer(buffer_data, mime=True), 'utf-8')
696 | else:
697 | m_handle = magic.open(magic.MAGIC_MIME_TYPE)
698 | m_handle.load()
699 | mime_type = m_handle.buffer(buffer_data)
700 |
701 | return mime_type
702 |
703 |
704 | def get_utf8_header(header):
705 | # There is a simpler way of doing this here:
706 | # http://stackoverflow.com/a/21715870/27641. However, it doesn't seem to
707 | # work, as it inserts a space between certain elements in the string
708 | # that's not warranted/correct.
709 |
710 | logger = logging.getLogger("email2pdf")
711 |
712 | decoded_header = decode_header(header)
713 | logger.debug("Decoded header: " + str(decoded_header))
714 | hdr = ""
715 | for element in decoded_header:
716 | if isinstance(element[0], bytes):
717 | hdr += str(element[0], element[1] or 'ASCII')
718 | else:
719 | hdr += element[0]
720 | return hdr
721 |
722 |
723 | class WarningCountFilter(logging.Filter):
724 | # pylint: disable=too-few-public-methods
725 | warning_pending = False
726 |
727 | def filter(self, record):
728 | if record.levelno == logging.WARNING:
729 | self.warning_pending = True
730 | return True
731 |
732 |
733 | class FatalException(Exception):
734 |
735 | def __init__(self, value):
736 | Exception.__init__(self, value)
737 | self.value = value
738 |
739 | def __str__(self):
740 | return repr(self.value)
741 |
742 |
743 | def call_main(argv, syslog_handler, syserr_handler):
744 | # pylint: disable=bare-except
745 | logger = logging.getLogger("email2pdf")
746 |
747 | try:
748 | (warning_pending, mostly_hide_warnings) = main(argv, syslog_handler, syserr_handler)
749 | except FatalException as exception:
750 | logger.error(exception.value)
751 | sys.exit(2)
752 | except:
753 | traceback.print_exc()
754 | sys.exit(3)
755 |
756 | if warning_pending and not mostly_hide_warnings:
757 | sys.exit(1)
758 |
759 |
760 | if __name__ == "__main__":
761 | logger_setup = logging.getLogger("email2pdf")
762 | logger_setup.propagate = False
763 | logger_setup.setLevel(logging.DEBUG)
764 |
765 | syserr_handler_setup = logging.StreamHandler(stream=sys.stderr)
766 | syserr_handler_setup.setLevel(logging.WARNING)
767 | syserr_formatter = logging.Formatter('%(levelname)s: %(message)s')
768 | syserr_handler_setup.setFormatter(syserr_formatter)
769 | logger_setup.addHandler(syserr_handler_setup)
770 |
771 | if _platform == "linux" or _platform == "linux2":
772 | SYSLOG_ADDRESS = '/dev/log'
773 | elif _platform == "darwin":
774 | SYSLOG_ADDRESS = '/var/run/syslog'
775 | else:
776 | logger_setup.warning("I don't know this platform (" + _platform + "); cannot log to syslog.")
777 | SYSLOG_ADDRESS = None
778 |
779 | if SYSLOG_ADDRESS and os.path.exists(SYSLOG_ADDRESS):
780 | syslog_handler_setup = logging.handlers.SysLogHandler(address=SYSLOG_ADDRESS)
781 | syslog_handler_setup.setLevel(logging.INFO)
782 | SYSLOG_FORMATTER = logging.Formatter('%(pathname)s[%(process)d] %(levelname)s %(lineno)d %(message)s')
783 | syslog_handler_setup.setFormatter(SYSLOG_FORMATTER)
784 | logger_setup.addHandler(syslog_handler_setup)
785 | else:
786 | syslog_handler_setup = None
787 |
788 | call_main(sys.argv, syslog_handler_setup, syserr_handler_setup)
789 |
--------------------------------------------------------------------------------
/email2pdf.py:
--------------------------------------------------------------------------------
1 | email2pdf
--------------------------------------------------------------------------------
/getmailrc.sample:
--------------------------------------------------------------------------------
1 | [retriever]
2 | type = SimpleIMAPSSLRetriever
3 | server = mail.example.com
4 | username = pdf@example.com
5 | password = mypassword
6 |
7 | [options]
8 | delete = true
9 |
10 | [destination]
11 | type = MDA_external
12 | path = /where/I/installed/email2pdf
13 | arguments = ("-d", "/where/I/want/PDFs/to/go", "--enforce-syslog", "--mostly-hide-warnings")
14 |
--------------------------------------------------------------------------------
/performance/printstats.py:
--------------------------------------------------------------------------------
1 | import pstats
2 |
3 | p = pstats.Stats('.email2pdf.profile')
4 | p.strip_dirs().sort_stats('time').print_callers(30)
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4>=4.6.3
2 | html5lib
3 | lxml
4 | pypdf2
5 | python-magic
6 | reportlab
7 |
--------------------------------------------------------------------------------
/requirements_hacking.txt:
--------------------------------------------------------------------------------
1 | flake8
2 | freezegun
3 | nose
4 | pdfminer.six
5 | reportlab
6 | requests
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='email2pdf',
5 | version='',
6 | packages=['tests', 'tests.Direct', 'tests.Subprocess'],
7 | url='https://github.com/andrewferrier/email2pdf',
8 | license='MIT',
9 | author='Andrew Ferrier',
10 | description='email2pdf is a Python script to convert emails to PDF.',
11 | install_requires=[
12 | 'beautifulsoup4>=4.6.3',
13 | 'html5lib',
14 | 'lxml',
15 | 'pypdf2',
16 | 'python-magic',
17 | 'reportlab',
18 | ],
19 | )
20 |
--------------------------------------------------------------------------------
/tests/BaseTestClasses.py:
--------------------------------------------------------------------------------
1 | from PyPDF2 import PdfFileReader
2 | from datetime import datetime
3 | from datetime import timedelta
4 | from email import encoders
5 | from email.header import Header
6 | from email.mime.base import MIMEBase
7 | from email.mime.image import MIMEImage
8 | from email.mime.multipart import MIMEMultipart
9 | from email.mime.text import MIMEText
10 | from email.utils import formatdate
11 | from reportlab.pdfgen import canvas
12 | from requests.exceptions import RequestException
13 | from subprocess import Popen, PIPE
14 |
15 | import io
16 | import imghdr
17 | import logging
18 | import inspect
19 | import os
20 | import os.path
21 | import pdfminer.high_level
22 | import requests
23 | import shutil
24 | import sys
25 | import tempfile
26 | import unittest
27 |
28 |
29 | class Email2PDFTestCase(unittest.TestCase):
30 | isOnline = None
31 | examineDir = None
32 |
33 | time_invoked = None
34 | time_completed = None
35 |
36 | NONEXIST_IMG = 'http://www.andrewferrier.com/nonexist.jpg'
37 | NONEXIST_IMG_BLACKLIST = 'http://www.emltrk.com/nonexist.jpg'
38 | EXIST_IMG = 'https://raw.githubusercontent.com/andrewferrier/email2pdf/master/tests/basi2c16.png'
39 | EXIST_IMG_UPPERCASE = 'https://raw.githubusercontent.com/andrewferrier/email2pdf/master/tests/UPPERCASE.png'
40 | COMMAND = os.path.normpath(os.path.join(os.getcwd(), 'email2pdf'))
41 |
42 | DEFAULT_FROM = "from@example.org"
43 | DEFAULT_TO = "to@example.org"
44 | DEFAULT_SUBJECT = "Subject of the email"
45 |
46 | JPG_FILENAME = 'tests/jpeg444.jpg'
47 | PNG_FILENAME = 'tests/basi2c16.png'
48 |
49 | JPG_SIZE = os.path.getsize(JPG_FILENAME)
50 | PNG_SIZE = os.path.getsize(PNG_FILENAME)
51 |
52 | WARNINGS_AND_ERRORS_POSTFIX = "_warnings_and_errors.txt"
53 | ORIGINAL_EMAIL_POSTFIX = "_original.eml"
54 |
55 | def setUp(self):
56 | self.workingDir = tempfile.mkdtemp(dir='/tmp')
57 | self._check_online()
58 | self._check_examine_dir()
59 |
60 | def getTimeStamp(self, my_time):
61 | return my_time.strftime("%Y-%m-%dT%H-%M-%S")
62 |
63 | def existsByTime(self, path=None):
64 | if self.getTimedFilename(path):
65 | return True
66 | else:
67 | return False
68 |
69 | def existsByTimeWarning(self):
70 | if self.getTimedFilename(postfix=self.WARNINGS_AND_ERRORS_POSTFIX):
71 | return True
72 | else:
73 | return False
74 |
75 | def existsByTimeOriginal(self):
76 | if self.getTimedFilename(postfix=self.ORIGINAL_EMAIL_POSTFIX):
77 | return True
78 | else:
79 | return False
80 |
81 | def getWarningFileContents(self):
82 | filename = self.getTimedFilename(postfix=self.WARNINGS_AND_ERRORS_POSTFIX)
83 | with open(filename) as f:
84 | return f.read()
85 |
86 | def assertValidOriginalFileContents(self, filename=None):
87 | try:
88 | if not filename:
89 | filename = self.getTimedFilename(postfix=self.ORIGINAL_EMAIL_POSTFIX)
90 | with open(filename, 'rb') as f:
91 | contents = f.read()
92 |
93 | assert(contents == self.msg.as_bytes())
94 | except:
95 | raise AssertionError("General error validating email, contents=" + contents +
96 | "\n, self.msg.as_string=" + self.msg.as_string())
97 |
98 | def getTimedFilename(self, path=None, postfix=".pdf"):
99 | if path is None:
100 | path = self.workingDir
101 |
102 | for single_time in self._timerange(self.time_invoked, self.time_completed):
103 | filename = os.path.join(path, self.getTimeStamp(single_time) + postfix)
104 | if os.path.exists(filename):
105 | return filename
106 |
107 | return None
108 |
109 | def addHeaders(self, frm=DEFAULT_FROM, to=DEFAULT_TO, subject=DEFAULT_SUBJECT, subject_encoding=None):
110 | if subject:
111 | if subject_encoding:
112 | assert isinstance(subject, bytes)
113 | header = Header(subject, subject_encoding)
114 | self.msg['Subject'] = header
115 | else:
116 | assert isinstance(subject, str)
117 | self.msg['Subject'] = subject
118 |
119 | if frm:
120 | self.msg['From'] = frm
121 |
122 | if to:
123 | self.msg['To'] = to
124 |
125 | self.msg['Date'] = formatdate()
126 |
127 | def invokeAsSubprocess(self, inputFile=False, outputDirectory=None, outputFile=None, extraParams=None,
128 | expectOutput=False, okToExist=False):
129 | if type(inputFile) is str:
130 | input_content = bytes(inputFile, 'utf-8')
131 | else:
132 | input_content = self.msg.as_bytes()
133 |
134 | options = [Email2PDFTestCase.COMMAND]
135 |
136 | if inputFile:
137 | input_file_handle = tempfile.NamedTemporaryFile()
138 | options.extend(['-i', input_file_handle.name])
139 | input_file_handle.write(input_content)
140 | input_file_handle.flush()
141 | my_stdin = None
142 | my_input = None
143 | else:
144 | my_stdin = PIPE
145 | my_input = input_content
146 |
147 | if outputDirectory:
148 | options.extend(['-d', outputDirectory])
149 |
150 | if outputFile:
151 | options.extend(['-o', outputFile])
152 | if not okToExist:
153 | assert not os.path.exists(outputFile)
154 |
155 | if extraParams is None:
156 | extraParams = []
157 |
158 | options.extend(extraParams)
159 |
160 | self.time_invoked = datetime.now()
161 | if outputDirectory is None:
162 | my_cwd = self.workingDir
163 | else:
164 | my_cwd = None
165 |
166 | email2pdf_process = Popen(options, stdin=my_stdin, stdout=PIPE, stderr=PIPE, cwd=my_cwd)
167 |
168 | output, error = email2pdf_process.communicate(my_input)
169 | email2pdf_process.wait()
170 | self.time_completed = datetime.now()
171 |
172 | output = str(output, "utf-8")
173 | error = str(error, "utf-8")
174 |
175 | if expectOutput:
176 | self.assertNotEqual("", output)
177 | else:
178 | self.assertEqual("", output)
179 |
180 | if inputFile:
181 | input_file_handle.close()
182 |
183 | return (email2pdf_process.returncode, output, error)
184 |
185 | def invokeDirectly(self, outputDirectory=None, outputFile=None, extraParams=None, completeMessage=None, okToExist=False):
186 | module_path = self._get_original_script_path()
187 | email2pdf = self._get_email2pdf_object(module_path)
188 |
189 | if completeMessage:
190 | bytes_message = bytes(completeMessage, 'utf-8')
191 | else:
192 | bytes_message = self.msg.as_bytes()
193 |
194 | with tempfile.NamedTemporaryFile() as input_file_handle:
195 | options = [module_path, '-i', input_file_handle.name]
196 | input_file_handle.write(bytes_message)
197 | input_file_handle.flush()
198 |
199 | options.extend(['-d', outputDirectory if outputDirectory else self.workingDir])
200 |
201 | if outputFile:
202 | options.extend(['-o', outputFile])
203 | if not okToExist:
204 | assert not os.path.exists(outputFile)
205 |
206 | if extraParams is None:
207 | extraParams = []
208 |
209 | options.extend(extraParams)
210 |
211 | stream = io.StringIO()
212 | stream_handler = logging.StreamHandler(stream)
213 | log = logging.getLogger('email2pdf')
214 | log.propagate = False
215 | log.setLevel(logging.DEBUG)
216 | log.addHandler(stream_handler)
217 |
218 | self.time_invoked = datetime.now()
219 |
220 | try:
221 | email2pdf.main(options, None, stream_handler)
222 | finally:
223 | self.time_completed = datetime.now()
224 | for handler in log.handlers:
225 | handler.close()
226 | log.removeHandler(handler)
227 | for log_filter in log.filters:
228 | log.removeFilter(log_filter)
229 | stream_handler.close()
230 |
231 | error = stream.getvalue()
232 |
233 | return error
234 |
235 | def setPlainContent(self, content, charset='UTF-8'):
236 | if isinstance(self.msg, MIMEMultipart):
237 | raise Exception("Cannot call setPlainContent() on a MIME-based message.")
238 | else:
239 | self.msg.set_default_type("text/plain")
240 | self.msg.set_payload(content)
241 | self.msg.set_charset(charset)
242 |
243 | def attachHTML(self, content, charset=None):
244 | assert isinstance(self.msg, MIMEMultipart)
245 |
246 | # According to the docs
247 | # (https://docs.python.org/3.3/library/email.mime.html), setting
248 | # charset explicitly to None is different from not setting it. Not
249 | # sure how that works. But for the moment, sticking with this
250 | # style of invocation to be safe.
251 | if charset:
252 | self.msg.attach(MIMEText(content, 'html', charset))
253 | else:
254 | self.msg.attach(MIMEText(content, 'html'))
255 |
256 | def attachText(self, content, charset=None):
257 | assert isinstance(self.msg, MIMEMultipart)
258 |
259 | if charset:
260 | self.msg.attach(MIMEText(content, 'plain', charset))
261 | else:
262 | self.msg.attach(MIMEText(content, 'plain'))
263 |
264 | def attachPDF(self, string, filePrefix="email2pdf_unittest_file",
265 | extension="pdf", mainContentType="application", subContentType="pdf", no_filename=False):
266 | _, file_name = tempfile.mkstemp(prefix=filePrefix, suffix="." + extension)
267 |
268 | try:
269 | pdf_canvas = canvas.Canvas(file_name)
270 | pdf_canvas.drawString(0, 500, string)
271 | pdf_canvas.save()
272 |
273 | with open(file_name, "rb") as open_handle:
274 | if no_filename:
275 | self.attachAttachment(mainContentType, subContentType, open_handle.read(), None)
276 | else:
277 | self.attachAttachment(mainContentType, subContentType, open_handle.read(), file_name)
278 |
279 | return os.path.basename(file_name)
280 | finally:
281 | os.unlink(file_name)
282 |
283 | def attachImage(self, content_id=None, jpeg=True, content_type=None, content_type_add_filename=False, inline=False, force_filename=False, extension=None):
284 | if jpeg:
285 | real_filename = self.JPG_FILENAME
286 | file_suffix = 'jpg' if extension is None else extension
287 | else:
288 | real_filename = self.PNG_FILENAME
289 | file_suffix = 'png' if extension is None else extension
290 |
291 | if file_suffix != '':
292 | suffix = "." + file_suffix
293 | else:
294 | suffix = file_suffix
295 |
296 | with tempfile.NamedTemporaryFile(prefix="email2pdf_unittest_image", suffix=suffix) as temp_file:
297 | _, basic_file_name = os.path.split(temp_file.name)
298 |
299 | with open(real_filename, 'rb') as image_file:
300 | image = MIMEImage(image_file.read())
301 | if content_id:
302 | image.add_header('Content-ID', content_id)
303 |
304 | if content_type:
305 | self._replace_header(image, 'Content-Type', content_type)
306 |
307 | if content_type_add_filename:
308 | image.set_param('name', basic_file_name, header='Content-Type')
309 |
310 | if inline:
311 | if force_filename:
312 | self._replace_header(image, 'Content-Disposition', 'inline; filename="%s"' % basic_file_name)
313 | else:
314 | self._replace_header(image, 'Content-Disposition', 'inline')
315 | else:
316 | self._replace_header(image, 'Content-Disposition', 'attachment; filename="%s"' % basic_file_name)
317 | self.msg.attach(image)
318 |
319 | if inline and not force_filename:
320 | return None
321 | else:
322 | return basic_file_name
323 |
324 | def attachAttachment(self, mainContentType, subContentType, data, file_name=None, file_name_encoding=None):
325 | assert isinstance(self.msg, MIMEMultipart)
326 |
327 | part = MIMEBase(mainContentType, subContentType)
328 | part.set_payload(data)
329 | encoders.encode_base64(part)
330 |
331 | if file_name:
332 | if file_name_encoding:
333 | # I would like to use a more simple implementation here based
334 | # on part.add_header, but the encoding mechanism provided for
335 | # that gives a different output, placing the filename in
336 | # Content-Disposition, with it subtly differently encoded.
337 | # This doesn't match a real-world problematic email which was
338 | # observed like this:
339 | #
340 | # Content-Type: APPLICATION/pdf; NAME="=?UTF-8?Q?123.pdf?="
341 | # Content-Transfer-Encoding: QUOTED-PRINTABLE
342 | # Content-Disposition: attachment
343 |
344 | header = mainContentType + '/' + subContentType
345 | header += '; name="' + Header(os.path.basename(file_name), file_name_encoding).encode() + '"'
346 | del part['Content-Type']
347 | part['Content-Type'] = header
348 | part.add_header('Content-Disposition', 'attachment')
349 | else:
350 | part.add_header('Content-Disposition', 'attachment', filename=os.path.basename(file_name))
351 | else:
352 | part.add_header('Content-Disposition', 'inline')
353 |
354 | self.msg.attach(part)
355 |
356 | def assertIsJPG(self, filename):
357 | self.assertEqual(imghdr.what(filename), 'jpeg')
358 |
359 | def getMetadataField(self, pdf_filename, field_name):
360 | with open(pdf_filename, 'rb') as file_input:
361 | input_f = PdfFileReader(file_input)
362 | document_info = input_f.getDocumentInfo()
363 | key = '/' + field_name
364 | if key in document_info.keys():
365 | return document_info[key]
366 | else:
367 | return None
368 |
369 | def getPDFText(self, filename):
370 | if os.path.exists(filename):
371 | try:
372 | text = pdfminer.high_level.extract_text(filename)
373 | except pdfminer.pdfparser.PDFSyntaxError:
374 | return None
375 |
376 | text = text.replace("\t", " ")
377 | return text
378 | else:
379 | return None
380 |
381 | def touch(self, fname):
382 | open(fname, 'w').close()
383 |
384 | def find_mount_point(self, path):
385 | while not os.path.ismount(path):
386 | path = os.path.dirname(path)
387 | return path
388 |
389 | def _timerange(self, start_time, end_time):
390 | start_time = start_time.replace(microsecond=0)
391 | end_time = end_time.replace(microsecond=0)
392 | for step in range(int((end_time - start_time).seconds) + 1):
393 | yield start_time + timedelta(0, step)
394 |
395 | def _replace_header(self, mime_base, header, value):
396 | mime_base.__delitem__(header)
397 | mime_base.add_header(header, value)
398 |
399 | @classmethod
400 | def _get_original_script_path(cls):
401 | module_path = inspect.getfile(inspect.currentframe())
402 | module_path = os.path.join(os.path.dirname(os.path.dirname(module_path)), 'email2pdf')
403 |
404 | return module_path
405 |
406 | @classmethod
407 | def _get_email2pdf_object(cls, module_path):
408 | import importlib.machinery
409 | loader = importlib.machinery.SourceFileLoader('email2pdf', module_path)
410 | return loader.load_module()
411 |
412 | @classmethod
413 | def _check_examine_dir(cls):
414 | if Email2PDFTestCase.examineDir is None:
415 | Email2PDFTestCase.examineDir = '/tmp'
416 | Email2PDFTestCase.examineDir = tempfile.mkdtemp(dir=Email2PDFTestCase.examineDir)
417 | print("Output examination directory: " + Email2PDFTestCase.examineDir)
418 |
419 | @classmethod
420 | def _check_online(cls):
421 | if Email2PDFTestCase.isOnline is None:
422 | print("Checking if online... ", end="")
423 | sys.stdout.flush()
424 | try:
425 | request = requests.get(Email2PDFTestCase.EXIST_IMG, headers={'Connection': 'close'})
426 | request.raise_for_status()
427 | Email2PDFTestCase.isOnline = True
428 | print("Yes.")
429 | except RequestException as exception:
430 | Email2PDFTestCase.isOnline = False
431 | print("No (" + str(exception) + ")")
432 |
433 | def tearDown(self):
434 | shutil.rmtree(self.workingDir)
435 |
--------------------------------------------------------------------------------
/tests/Direct/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewferrier/email2pdf/c3b20226bc255a75f52c762aece66c58fb76b2c4/tests/Direct/__init__.py
--------------------------------------------------------------------------------
/tests/Direct/test_Direct_Arguments.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from email.mime.multipart import MIMEMultipart
3 |
4 | import os
5 |
6 | from tests import BaseTestClasses
7 |
8 |
9 | class Direct_Arguments(BaseTestClasses.Email2PDFTestCase):
10 | def setUp(self):
11 | super(Direct_Arguments, self).setUp()
12 | self.msg = MIMEMultipart()
13 |
14 | def test_no_body(self):
15 | error = self.invokeDirectly(extraParams=['--no-body'])
16 | self.assertFalse(self.existsByTime())
17 | self.assertRegex(error, "body.*any.*attachments")
18 | self.assertTrue(self.existsByTimeWarning())
19 | self.assertTrue(self.existsByTimeOriginal())
20 | self.assertRegex(self.getWarningFileContents(), "body.*any.*attachments")
21 | self.assertValidOriginalFileContents()
22 |
23 | def test_no_body_but_some_attachments(self):
24 | filename = self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
25 | self.invokeDirectly(extraParams=['--no-body'])
26 | self.assertFalse(self.existsByTime())
27 | self.assertFalse(self.existsByTimeWarning())
28 | self.assertFalse(self.existsByTimeOriginal())
29 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename)))
30 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some PDF content")
31 |
32 | def test_no_body_mostly_hide_warnings(self):
33 | error = self.invokeDirectly(extraParams=['--no-body', '--mostly-hide-warnings'])
34 | self.assertFalse(self.existsByTime())
35 | self.assertEqual("", error)
36 | self.assertTrue(self.existsByTimeWarning())
37 | self.assertTrue(self.existsByTimeOriginal())
38 | self.assertRegex(self.getWarningFileContents(), "body.*any.*attachments")
39 | self.assertValidOriginalFileContents()
40 |
41 | def test_no_attachments(self):
42 | self.addHeaders()
43 | self.attachText("Some basic textual content")
44 | filename = self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
45 | filename2 = self.attachPDF("Some PDF content")
46 | filename3 = self.attachImage()
47 | error = self.invokeDirectly(extraParams=['--no-attachments'])
48 | self.assertEqual('', error)
49 | self.assertTrue(self.existsByTime())
50 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename)))
51 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename2)))
52 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename3)))
53 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
54 | self.assertFalse(self.existsByTimeWarning())
55 | self.assertFalse(self.existsByTimeOriginal())
56 |
57 | def test_no_attachments_mostly_hide_warnings(self):
58 | self.addHeaders()
59 | self.attachText("Some basic textual content")
60 | filename = self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
61 | filename2 = self.attachPDF("Some PDF content")
62 | filename3 = self.attachImage()
63 | error = self.invokeDirectly(extraParams=['--no-attachments', '--mostly-hide-warnings'])
64 | self.assertEqual('', error)
65 | self.assertTrue(self.existsByTime())
66 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename)))
67 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename2)))
68 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename3)))
69 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
70 | self.assertFalse(self.existsByTimeWarning())
71 | self.assertFalse(self.existsByTimeOriginal())
72 |
73 | def test_no_body_and_no_attachments(self):
74 | self.addHeaders()
75 | self.attachText("Some basic textual content")
76 | self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
77 | self.attachPDF("Some PDF content")
78 | self.attachImage()
79 | with self.assertRaisesRegex(Exception, "attachments.*not allowed with.*body"):
80 | self.invokeDirectly(extraParams=['--no-body', '--no-attachments'])
81 | self.assertFalse(self.existsByTime())
82 | self.assertFalse(self.existsByTimeWarning())
83 | self.assertFalse(self.existsByTimeOriginal())
84 |
85 | def test_verbose_and_mostly_hide_warnings(self):
86 | with self.assertRaisesRegex(Exception, "mostly-hide.*not allowed with.*verbose"):
87 | self.invokeDirectly(extraParams=['--verbose', '--mostly-hide-warnings'])
88 | self.assertFalse(self.existsByTime())
89 | self.assertFalse(self.existsByTimeWarning())
90 | self.assertFalse(self.existsByTimeOriginal())
91 |
92 | def test_headers(self):
93 | path = os.path.join(self.examineDir, "headers.pdf")
94 | self.addHeaders()
95 | self.attachText("Hello!")
96 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
97 | self.assertEqual('', error)
98 | self.assertTrue(os.path.exists(path))
99 | pdf_text = self.getPDFText(path)
100 | self.assertRegex(pdf_text, "Subject")
101 | self.assertRegex(pdf_text, "From")
102 | self.assertRegex(pdf_text, "To")
103 | self.assertRegex(pdf_text, "Hello")
104 | self.assertFalse(self.existsByTimeWarning())
105 | self.assertFalse(self.existsByTimeOriginal())
106 |
107 | def test_add_prefix_date(self):
108 | self.addHeaders()
109 | self.attachText("Some basic textual content")
110 | filename = self.attachPDF("Some PDF content")
111 | filename2 = self.attachPDF("Some PDF content", filePrefix="unittest_file_2014-01-01")
112 | filename3 = self.attachPDF("Some PDF content", filePrefix="unittest_2014-01-01_file")
113 | filename4 = self.attachPDF("Some PDF content", filePrefix="2014-01-01_unittest_file")
114 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename)))
115 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename2)))
116 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename3)))
117 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename4)))
118 | error = self.invokeDirectly(extraParams=['--add-prefix-date'])
119 | self.assertEqual('', error)
120 | self.assertTrue(self.existsByTime())
121 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, filename)))
122 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename2)))
123 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename3)))
124 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename4)))
125 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, datetime.now().strftime("%Y-%m-%d-") + filename)))
126 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename2)), "Some PDF content")
127 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename3)), "Some PDF content")
128 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename4)), "Some PDF content")
129 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir,
130 | datetime.now().strftime("%Y-%m-%d-") + filename)), "Some PDF content")
131 | self.assertFalse(self.existsByTimeWarning())
132 | self.assertFalse(self.existsByTimeOriginal())
133 |
134 | def test_verbose(self):
135 | self.attachText("Hello!")
136 | error = self.invokeDirectly(extraParams=['-v'])
137 | self.assertNotEqual('', error)
138 | self.assertTrue(self.existsByTime())
139 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Hello!")
140 | self.assertFalse(self.existsByTimeWarning())
141 | self.assertFalse(self.existsByTimeOriginal())
142 |
143 | def test_veryverbose(self):
144 | self.attachText("Hello!")
145 | error = self.invokeDirectly(extraParams=['-vv'])
146 | self.assertNotEqual('', error)
147 | self.assertTrue(self.existsByTime())
148 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Hello!")
149 | self.assertFalse(self.existsByTimeWarning())
150 | self.assertFalse(self.existsByTimeOriginal())
151 |
--------------------------------------------------------------------------------
/tests/Direct/test_Direct_AttachmentDetection.py:
--------------------------------------------------------------------------------
1 | from email.mime.multipart import MIMEMultipart
2 |
3 | import os
4 |
5 | from tests.BaseTestClasses import Email2PDFTestCase
6 |
7 |
8 | class AttachmentDetection(Email2PDFTestCase):
9 | def setUp(self):
10 | super(AttachmentDetection, self).setUp()
11 | self.msg = MIMEMultipart()
12 |
13 | def test_pdf_as_octet_stream(self):
14 | self.addHeaders()
15 | self.attachText("Some basic textual content")
16 | filename = self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
17 | error = self.invokeDirectly()
18 | self.assertEqual('', error)
19 | self.assertTrue(self.existsByTime())
20 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename)))
21 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
22 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some PDF content")
23 | self.assertFalse(self.existsByTimeWarning())
24 | self.assertFalse(self.existsByTimeOriginal())
25 |
26 | def test_pdf_with_invalid_extension(self):
27 | self.addHeaders()
28 | self.attachText("Some basic textual content")
29 | filename = self.attachPDF("Some PDF content", extension="pdf")
30 | error = self.invokeDirectly()
31 | self.assertEqual('', error)
32 | self.assertTrue(self.existsByTime())
33 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename)))
34 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
35 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some PDF content")
36 | self.assertFalse(self.existsByTimeWarning())
37 | self.assertFalse(self.existsByTimeOriginal())
38 |
39 | def test_pdf_as_octet_stream_with_invalid_extension(self):
40 | self.addHeaders()
41 | self.attachText("Some basic textual content")
42 | filename = self.attachPDF("Some PDF content", extension="xyz", mainContentType="application", subContentType="octet-stream")
43 | error = self.invokeDirectly()
44 | self.assertEqual('', error)
45 | self.assertTrue(self.existsByTime())
46 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename)))
47 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
48 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some PDF content")
49 | self.assertFalse(self.existsByTimeWarning())
50 | self.assertFalse(self.existsByTimeOriginal())
51 |
52 | def test_pdf_as_octet_stream_no_body(self):
53 | self.addHeaders()
54 | self.attachText("Some basic textual content")
55 | filename = self.attachPDF("Some PDF content", mainContentType="application", subContentType="octet-stream")
56 | error = self.invokeDirectly(extraParams=['--no-body'])
57 | self.assertEqual('', error)
58 | self.assertFalse(self.existsByTime())
59 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename)))
60 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some PDF content")
61 | self.assertFalse(self.existsByTimeWarning())
62 | self.assertFalse(self.existsByTimeOriginal())
63 |
64 | def test_jpeg_as_octet_stream(self):
65 | self.addHeaders()
66 | self.attachText("Some basic textual content")
67 | image_filename = self.attachImage(jpeg=True, content_type="application/octet-stream")
68 | error = self.invokeDirectly()
69 | self.assertEqual('', error)
70 | self.assertTrue(self.existsByTime())
71 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
72 | self.assertIsJPG(os.path.join(self.workingDir, image_filename))
73 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
74 | self.assertFalse(self.existsByTimeWarning())
75 | self.assertFalse(self.existsByTimeOriginal())
76 |
77 | def test_jpeg_with_invalid_extension(self):
78 | self.addHeaders()
79 | self.attachText("Some basic textual content")
80 | image_filename = self.attachImage(jpeg=True, extension="blah")
81 | error = self.invokeDirectly()
82 | self.assertEqual('', error)
83 | self.assertTrue(self.existsByTime())
84 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
85 | self.assertIsJPG(os.path.join(self.workingDir, image_filename))
86 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
87 | self.assertFalse(self.existsByTimeWarning())
88 | self.assertFalse(self.existsByTimeOriginal())
89 |
90 | def test_jpeg_as_octet_stream_with_invalid_extension(self):
91 | self.addHeaders()
92 | self.attachText("Some basic textual content")
93 | image_filename = self.attachImage(jpeg=True, content_type="application/octet-stream", extension="xyz")
94 | error = self.invokeDirectly()
95 | self.assertEqual('', error)
96 | self.assertTrue(self.existsByTime())
97 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
98 | self.assertIsJPG(os.path.join(self.workingDir, image_filename))
99 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
100 | self.assertFalse(self.existsByTimeWarning())
101 | self.assertFalse(self.existsByTimeOriginal())
102 |
103 | def test_word_document(self):
104 | self.addHeaders()
105 | self.attachText("Some basic textual content")
106 | self.attachAttachment("application", "vnd.openxmlformats-officedocument.wordprocessingml.document",
107 | "Word document content", "somefile.docx")
108 | error = self.invokeDirectly()
109 | self.assertEqual('', error)
110 | self.assertTrue(self.existsByTime())
111 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, "somefile.docx")))
112 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
113 | self.assertFalse(self.existsByTimeWarning())
114 | self.assertFalse(self.existsByTimeOriginal())
115 |
116 | def test_unidentified_file(self):
117 | self.addHeaders()
118 | self.attachText("Some basic textual content")
119 | self.attachAttachment("application", "data", "some data in some format", "somefile.xyz")
120 | error = self.invokeDirectly()
121 | self.assertEqual('', error)
122 | self.assertTrue(self.existsByTime())
123 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, "somefile.xyz")))
124 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some basic textual content")
125 | self.assertFalse(self.existsByTimeWarning())
126 | self.assertFalse(self.existsByTimeOriginal())
127 |
128 | def test_attachment_filename_has_encoding(self):
129 | path = os.path.join(self.workingDir, "somefile.xyz")
130 | self.attachAttachment("application", "data", "some data in some format", "somefile.xyz", file_name_encoding="utf-8")
131 | (rc, output, error) = self.invokeAsSubprocess(extraParams=['--no-body'])
132 | self.assertTrue(os.path.exists(path))
133 | self.assertEqual('', error)
134 | self.assertFalse(self.existsByTimeWarning())
135 | self.assertFalse(self.existsByTimeOriginal())
136 |
--------------------------------------------------------------------------------
/tests/Direct/test_Direct_Basic.py:
--------------------------------------------------------------------------------
1 | from email.header import Header
2 | from email.mime.multipart import MIMEMultipart
3 |
4 | import os
5 |
6 | from tests import BaseTestClasses
7 |
8 |
9 | class Direct_Basic(BaseTestClasses.Email2PDFTestCase):
10 | def setUp(self):
11 | super(Direct_Basic, self).setUp()
12 | self.msg = MIMEMultipart()
13 |
14 | def test_simple(self):
15 | self.addHeaders()
16 | error = self.invokeDirectly()
17 | self.assertTrue(self.existsByTime())
18 | self.assertEqual('', error)
19 | self.assertFalse(self.existsByTimeWarning())
20 | self.assertFalse(self.existsByTimeOriginal())
21 |
22 | def test_missing_from_to(self):
23 | path = os.path.join(self.examineDir, "missing_from_to.pdf")
24 | self.addHeaders(frm=None, to=None)
25 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
26 | self.assertTrue(os.path.exists(path))
27 | self.assertEqual('', error)
28 | self.assertFalse(self.existsByTimeWarning())
29 | self.assertFalse(self.existsByTimeOriginal())
30 |
31 | def test_internationalised_subject(self):
32 | path = os.path.join(self.examineDir, "internationalised_subject.pdf")
33 | self.addHeaders(subject=bytes("Hello!", 'iso-8859-1'), subject_encoding='iso-8859-1')
34 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
35 | self.assertTrue(os.path.exists(path))
36 | self.assertEqual('', error)
37 | self.assertFalse(self.existsByTimeWarning())
38 | self.assertFalse(self.existsByTimeOriginal())
39 |
40 | def test_internationalised_subject2(self):
41 | path = os.path.join(self.examineDir, "internationalised_subject_jp.pdf")
42 | self.addHeaders(subject='=?iso-2022-jp?B?GyRCOiNHLyRiSSwkOiRkJGo/ayQyJGsbKEIhIRskQkcvS3ZBMCRO?=')
43 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
44 | self.assertTrue(os.path.exists(path))
45 | self.assertEqual('', error)
46 | self.assertFalse(self.existsByTimeWarning())
47 | self.assertFalse(self.existsByTimeOriginal())
48 |
49 | def test_internationalised_subject3(self):
50 | path = os.path.join(self.examineDir, "internationalised_subject_de.pdf")
51 | self.addHeaders(subject='Ihre Anfrage, Giesestra=?utf-8?B?w58=?=e 5')
52 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
53 | self.assertTrue(os.path.exists(path))
54 | self.assertEqual('', error)
55 | self.assertFalse(self.existsByTimeWarning())
56 | self.assertFalse(self.existsByTimeOriginal())
57 |
58 | def test_internationalised_subject4(self):
59 | path = os.path.join(self.examineDir, "internationalised_subject_complex.pdf")
60 | header = Header()
61 | header.append(bytes('£100', 'iso-8859-1'), 'iso-8859-1')
62 | header.append(bytes(' is != how much ', 'utf-8'), 'utf-8')
63 | header.append(bytes('I have to spend!', 'iso-8859-15'), 'iso-8859-15')
64 | self.addHeaders(subject=header.encode())
65 | error = self.invokeDirectly(outputFile=path, extraParams=['--headers'])
66 | self.assertTrue(os.path.exists(path))
67 | self.assertEqual('', error)
68 | self.assertFalse(self.existsByTimeWarning())
69 | self.assertFalse(self.existsByTimeOriginal())
70 |
71 | def test_contains_left_angle_bracket_mime(self):
72 | path = os.path.join(self.examineDir, "left_angle_bracket_mime.pdf")
73 | self.attachText("')
18 | error = self.invokeDirectly(extraParams=['--no-body'])
19 | self.assertFalse(self.existsByTime())
20 | self.assertRegex(error, "body.*any.*attachments")
21 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, 'myid.jpg')))
22 | self.assertTrue(self.existsByTimeWarning())
23 | self.assertRegex(self.getWarningFileContents(), "body.*any.*attachments")
24 | self.assertTrue(self.existsByTimeOriginal())
25 | self.assertValidOriginalFileContents()
26 |
27 | def test_inline_image_with_filename_no_body(self):
28 | self.addHeaders()
29 | image_filename = self.attachImage('myid', inline=True, force_filename=True)
30 | self.attachHTML('
')
31 | error = self.invokeDirectly(extraParams=['--no-body'])
32 | self.assertEqual('', error)
33 | self.assertFalse(self.existsByTime())
34 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
35 | self.assertFalse(self.existsByTimeWarning())
36 | self.assertFalse(self.existsByTimeOriginal())
37 |
38 | def test_inline_image_and_pdf(self):
39 | self.addHeaders()
40 | self.attachImage('myid', inline=True)
41 | self.attachHTML('
')
42 | pdf_file_name = self.attachPDF("Some PDF content")
43 | error = self.invokeDirectly(extraParams=['--no-body'])
44 | self.assertEqual('', error)
45 | self.assertFalse(self.existsByTime())
46 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, pdf_file_name)))
47 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, pdf_file_name)), "Some PDF content")
48 | self.assertFalse(self.existsByTimeWarning())
49 | self.assertFalse(self.existsByTimeOriginal())
50 |
51 | def test_embedded_image(self):
52 | path = os.path.join(self.examineDir, "embeddedImage.pdf")
53 | self.addHeaders()
54 | image_filename = self.attachImage('myid')
55 | self.attachHTML('
')
56 | error = self.invokeDirectly(outputFile=path)
57 | self.assertEqual('', error)
58 | self.assertTrue(os.path.exists(path))
59 | self.assertLess(Email2PDFTestCase.JPG_SIZE, os.path.getsize(path))
60 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, image_filename)))
61 | self.assertFalse(self.existsByTimeWarning())
62 | self.assertFalse(self.existsByTimeOriginal())
63 |
64 | # This test is an attempt to recreate a real-world failing email where the image attachment looked like:
65 | #
66 | # Content-Type: image/png; name=map_8dff3523-1a2d-4fc8-926f-d18e93964f3d
67 | # Content-Disposition: inline; filename=map_8dff3523-1a2d-4fc8-926f-d18e93964f3d
68 | # Content-Transfer-Encoding: base64
69 | # Content-ID: <>
70 | #
71 | # And the HTML looked like:
72 | #
73 | #
74 |
75 | def test_embedded_image_cid_empty(self):
76 | path = os.path.join(self.examineDir, "embeddedImageCIDEmpty.pdf")
77 | self.addHeaders()
78 | image_filename = self.attachImage('<>', jpeg=False, inline=True, force_filename=True, content_type_add_filename=True, extension="")
79 | self.attachHTML('
')
80 | error = self.invokeDirectly(outputFile=path)
81 | self.assertEqual('', error)
82 | self.assertTrue(os.path.exists(path))
83 | self.assertLess(Email2PDFTestCase.PNG_SIZE, os.path.getsize(path))
84 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, image_filename)))
85 | self.assertFalse(self.existsByTimeWarning())
86 | self.assertFalse(self.existsByTimeOriginal())
87 |
88 | def test_embedded_image_with_complex_name(self):
89 | path = os.path.join(self.examineDir, "embeddedImageWithComplexName.pdf")
90 | self.addHeaders()
91 | image_filename = self.attachImage('myid@A34A.1A23E', jpeg=False)
92 | self.attachHTML('
')
93 | error = self.invokeDirectly(outputFile=path)
94 | self.assertEqual('', error)
95 | self.assertTrue(os.path.exists(path))
96 | self.assertLess(Email2PDFTestCase.PNG_SIZE, os.path.getsize(path))
97 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, image_filename)))
98 | self.assertFalse(self.existsByTimeWarning())
99 | self.assertFalse(self.existsByTimeOriginal())
100 |
101 | def test_embedded_image_invalid_cid(self):
102 | self.addHeaders()
103 | image_filename = self.attachImage('myid')
104 | self.attachHTML('
')
105 | error = self.invokeDirectly()
106 | self.assertRegex(error, "(?i)could not find image")
107 | self.assertTrue(self.existsByTime())
108 | self.assertGreater(Email2PDFTestCase.JPG_SIZE, os.path.getsize(self.getTimedFilename()))
109 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
110 | self.assertTrue(self.existsByTimeWarning())
111 | self.assertRegex(self.getWarningFileContents(), "(?i)could not find image")
112 | self.assertTrue(self.existsByTimeOriginal())
113 | self.assertValidOriginalFileContents()
114 |
115 | def test_embedded_image_invalid_cid_output_file(self):
116 | path = os.path.join(self.workingDir, "test_embedded_image_invalid_cid_output_file.pdf")
117 | self.addHeaders()
118 | image_filename = self.attachImage('myid')
119 | self.attachHTML('
')
120 | error = self.invokeDirectly(outputFile=path)
121 | self.assertRegex(error, "(?i)could not find image")
122 | self.assertTrue(os.path.exists(path))
123 | self.assertGreater(Email2PDFTestCase.JPG_SIZE, os.path.getsize(path))
124 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, image_filename)))
125 | warning_filename = os.path.join(self.workingDir, "test_embedded_image_invalid_cid_output_file_warnings_and_errors.txt")
126 | self.assertTrue(os.path.exists(warning_filename))
127 | with open(warning_filename) as f:
128 | warning_file_contents = f.read()
129 | self.assertRegex(warning_file_contents, "(?i)could not find image")
130 | original_email_filename = os.path.join(self.workingDir, "test_embedded_image_invalid_cid_output_file_original.eml")
131 | self.assertTrue(os.path.exists(original_email_filename))
132 | self.assertValidOriginalFileContents(filename=original_email_filename)
133 |
134 | def test_embedded_image_png(self):
135 | path = os.path.join(self.examineDir, "embeddedImagePNG.pdf")
136 | self.addHeaders()
137 | image_filename = self.attachImage('myid', jpeg=False)
138 | self.attachHTML('
')
139 | error = self.invokeDirectly(outputFile=path)
140 | self.assertEqual('', error)
141 | self.assertTrue(os.path.exists(path))
142 | self.assertLess(Email2PDFTestCase.PNG_SIZE, os.path.getsize(path))
143 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, image_filename)))
144 | self.assertFalse(self.existsByTimeWarning())
145 | self.assertFalse(self.existsByTimeOriginal())
146 |
147 | def test_embedded_image_cid_underscore(self):
148 | self.addHeaders()
149 | image_filename = self.attachImage('
')
151 | error = self.invokeDirectly()
152 | self.assertEqual('', error)
153 | self.assertTrue(self.existsByTime())
154 | self.assertLess(Email2PDFTestCase.JPG_SIZE, os.path.getsize(self.getTimedFilename()))
155 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, image_filename)))
156 | self.assertFalse(self.existsByTimeWarning())
157 | self.assertFalse(self.existsByTimeOriginal())
158 |
159 | def test_embedded_image_extra_html_content(self):
160 | if self.isOnline:
161 | self.addHeaders()
162 | image_filename = self.attachImage('myid')
163 | self.attachHTML('
' +
164 | '
ɯɐɹƃoɹd ɟpdᄅlᴉɐɯǝ ǝɥʇ ɟo ʇsǝʇ ɐ sᴉ sᴉɥʇ ollǝH
") 66 | (rc, output, error) = self.invokeAsSubprocess() 67 | self.assertEqual(0, rc) 68 | self.assertTrue(self.existsByTime()) 69 | self.assertEqual('', error) 70 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "ɯɐɹƃoɹd\sɟpdᄅlᴉɐɯǝ\sǝɥʇ\sɟo\sʇsǝʇ\sɐ\ssᴉ\ssᴉɥʇ\sollǝH") 71 | self.assertFalse(self.existsByTimeWarning()) 72 | self.assertFalse(self.existsByTimeOriginal()) 73 | 74 | def test_html_entities_currency(self): 75 | path = os.path.join(self.examineDir, "htmlEntitiesCurrency.pdf") 76 | self.addHeaders() 77 | self.attachHTML(b'Pounds: \xc2\xa37.14, Another Pounds: £7.14'.decode('utf-8')) 78 | (rc, output, error) = self.invokeAsSubprocess(outputFile=path) 79 | self.assertEqual(0, rc) 80 | self.assertEqual('', error) 81 | self.assertTrue(os.path.exists(path)) 82 | self.assertRegex(self.getPDFText(path), "Pounds:\s£7.14,\sAnother\sPounds:\s£7.14") 83 | self.assertFalse(self.existsByTimeWarning()) 84 | self.assertFalse(self.existsByTimeOriginal()) 85 | 86 | def test_html_poundsign_iso88591(self): 87 | self.addHeaders() 88 | path = os.path.join(self.examineDir, "html_poundsign_iso88591.pdf") 89 | self.attachHTML("Hello - this email costs \xa35!", charset="ISO-8859-1") 90 | (rc, output, error) = self.invokeAsSubprocess(outputFile=path) 91 | self.assertEqual(0, rc) 92 | self.assertEqual('', error) 93 | self.assertTrue(os.path.exists(path)) 94 | self.assertRegex(self.getPDFText(path), "Hello\s-\sthis\semail\scosts\s\xa35!") 95 | self.assertFalse(self.existsByTimeWarning()) 96 | self.assertFalse(self.existsByTimeOriginal()) 97 | 98 | def test_text_poundsign_iso88591(self): 99 | self.addHeaders() 100 | path = os.path.join(self.examineDir, "text_poundsign_iso88591.pdf") 101 | self.attachText("Hello - this email costs \xa35!", charset="ISO-8859-1") 102 | (rc, output, error) = self.invokeAsSubprocess(outputFile=path) 103 | self.assertEqual(0, rc) 104 | self.assertEqual('', error) 105 | self.assertTrue(os.path.exists(path)) 106 | self.assertRegex(self.getPDFText(path), "Hello\s-\sthis\semail\scosts\s\xa35!") 107 | self.assertFalse(self.existsByTimeWarning()) 108 | self.assertFalse(self.existsByTimeOriginal()) 109 | 110 | def test_plaincontent_poundsign_utf8_8bit(self): 111 | input_email = ("From: \"XYZ\"Some basic HTML content
") 136 | (rc, output, error) = self.invokeAsSubprocess() 137 | self.assertEqual(0, rc) 138 | self.assertEqual('', error) 139 | self.assertTrue(self.existsByTime()) 140 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some\sbasic\sHTML\scontent") 141 | self.assertFalse(self.existsByTimeWarning()) 142 | self.assertFalse(self.existsByTimeOriginal()) 143 | 144 | def test_wrong_charset_html(self): 145 | self.addHeaders() 146 | broken_body = b"Something with raw accents: \xe9
" 147 | self.attachHTML(broken_body, charset="utf-8") 148 | (rc, output, error) = self.invokeAsSubprocess() 149 | self.assertEqual(0, rc) 150 | self.assertEqual('', error) 151 | self.assertTrue(self.existsByTime()) 152 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Something\swith\sraw\saccents:\s\é") 153 | self.assertFalse(self.existsByTimeWarning()) 154 | self.assertFalse(self.existsByTimeOriginal()) 155 | 156 | def test_pdf(self): 157 | self.addHeaders() 158 | self.attachText("Some basic textual content") 159 | filename = self.attachPDF("Some PDF content") 160 | (rc, output, error) = self.invokeAsSubprocess() 161 | self.assertEqual(0, rc) 162 | self.assertEqual('', error) 163 | self.assertTrue(self.existsByTime()) 164 | self.assertTrue(os.path.exists(os.path.join(self.workingDir, filename))) 165 | self.assertRegex(self.getPDFText(self.getTimedFilename()), "Some\sbasic\stextual\scontent") 166 | self.assertRegex(self.getPDFText(os.path.join(self.workingDir, filename)), "Some\sPDF\scontent") 167 | self.assertFalse(self.existsByTimeWarning()) 168 | self.assertFalse(self.existsByTimeOriginal()) 169 | 170 | def test_plaincontent_outputfileoverrides_with_attachments(self): 171 | mainFilename = os.path.join(self.examineDir, "outputFileOverridesWithAttachments.pdf") 172 | self.attachText("Hello!") 173 | attachmentFilename = self.attachPDF("Some PDF content") 174 | with tempfile.TemporaryDirectory() as tempdir: 175 | (rc, output, error) = self.invokeAsSubprocess(outputDirectory=tempdir, outputFile=mainFilename) 176 | self.assertEqual(0, rc) 177 | self.assertEqual('', error) 178 | self.assertFalse(self.existsByTime()) 179 | self.assertFalse(self.existsByTime(tempdir)) 180 | self.assertFalse(os.path.exists(os.path.join(tempdir, "outputFileOverrides.pdf"))) 181 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, "outputFileOverrides.pdf"))) 182 | self.assertTrue(os.path.exists(mainFilename)) 183 | self.assertFalse(os.path.exists(os.path.join(self.examineDir, attachmentFilename))) 184 | self.assertFalse(os.path.exists(os.path.join(self.workingDir, attachmentFilename))) 185 | self.assertTrue(os.path.exists(os.path.join(tempdir, attachmentFilename))) 186 | self.assertRegex(self.getPDFText(mainFilename), "Hello!") 187 | self.assertRegex(self.getPDFText(os.path.join(tempdir, attachmentFilename)), "Some\sPDF\scontent") 188 | self.assertFalse(self.existsByTimeWarning()) 189 | self.assertFalse(self.existsByTimeOriginal()) 190 | 191 | def test_remote_image_does_exist(self): 192 | if self.isOnline: 193 | path = os.path.join(self.examineDir, "remoteImageDoesExist.pdf") 194 | self.addHeaders() 195 | self.attachHTML('