├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── pdfparser
    ├── __init__.py
    ├── __main__.py
    ├── custom_error.py
    ├── main_operations.py
    ├── user_choice.py
    ├── user_inputs.py
    └── utilities.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | bin/
 28 | include/
 29 | 
 30 | 
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | ##my files
111 | check.py
112 | *.pdf
113 | deletes/
114 | sorted/
115 | splits/
116 | merged/
117 | pyvenv.cfg
118 | .DS_Store
119 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Udit Vashisht
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uditvashisht/sg-pdfparser/4c1f55e221212d54aa4a29e4a5334557f6dc66ac/MANIFEST.in


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SaralGyaan PDF Parser
 2 | SaralGyaan PDF Parser is a command-line PDF parsing tool which allows you to:-
 3 | 1. Delete Pages from a PDF
 4 | 2. Merge PDFs
 5 | 3. Sort Pages of a PDF
 6 | 4. Split a PDF
 7 | ## Installation
 8 | You can install SaralGyaan PDF Parser from [PyPI](https://pypi.org/project/sg-pdfparser/):
 9 | ```pip install sg-pdfparser```
10 | The PDF Parser supports Python 3.6 and above.
11 | ## How to use?
12 | The SaralGyaan PDF Parser is a command line application, named pdfparser. To start it you can simply open the terminal, go to the folder containing the PDF file(s) to be parsed and call the program:-
13 | ```
14 | $ pdfparser
15 | Welcome to PDF Parser
16 | 
17 | What do you want to do?
18 | 
19 | 1. Delete Pages from a PDF
20 | 2. Merge PDFs
21 | 3. Sort Pages of a PDF
22 | 4. Split a PDF
23 | 
24 | Press ctrl + C to exit.
25 | Enter your choice (1-4):
26 | ```
27 | ## Delete Pages from a PDF
28 | If you select the option to delete pages, it will ask for the filename followed by '.pdf'
29 | ```
30 | Enter your choice (1-4): 1
31 | Delete Pages from a PDF
32 | Enter the name of file with extension(.pdf)
33 | ```
34 | Make sure the pdf file exist at the location and once you provide a valid pdf file, it will give you two options:-
35 | ```
36 | What do you want to do?
37 |  1. Delete specific pages
38 |  2. Keep Specific pages
39 |  ```
40 |  Both the options accept comma separated values of pages or page ranges or both e.g. 1, 2, 3-5 or 1-2 or 2-3, 4-6.
41 |  One, thing you need to know that if you use Keep specific pages and change the order like 1-2, 6-4, then it will re-arrange the pages too.
42 | 
43 |  ## Merge PDFs
44 |  This options accepts comma separated file names and it will merge the files in the order, it is provided as an input.
45 | 
46 |  ## Sort pages of a PDF
47 |  Sort pages, give you three options
48 |  ```
49 |  What do you want to do?
50 |  1. Reverse order of all the pages
51 |  2. Swap Pages
52 |  3. Move certain pages to a specific index
53 | ```
54 | 1. The first one, will simply reverse the order of the pages.
55 | 2. The second one will swap two pages, you can input multiple or single swaps e.g. 1-3, 2-7, 8-9, etc.
56 | 3. The third one will move the page to a certain page number (not index). It also accepts comma separated values. So 21-2 will move page number 21 to 2 and hence shifting the rest of the page to right.
57 | 
58 | ## Split a PDF
59 | This gives you two options:-
60 | ```
61 | What do you want to do?
62 |  1. Split all the pages
63 |  2. Split specific pages
64 | ```
65 | 1. The first option will make n splits for n-paged PDF file.
66 | 2. The second one will split the pdf into the ranges or pages as provided. e.g. 1, 3, 9-22, will give three split files first page, third page and pages from nine to twenty two.
67 | 
68 | ## Video Usage Guide
69 | You can also check out our [Usage Guide](https://youtu.be/BenY3DeEaf4) on Youtube.
70 | 
71 | ## License
72 | 
73 | © 2020 Udit Vashisht
74 | 
75 | This repository is licensed under the MIT license. See LICENSE for details.
76 | 


--------------------------------------------------------------------------------
/pdfparser/__init__.py:
--------------------------------------------------------------------------------
1 | # Version of sg-pdfparser
2 | 
3 | __version__ ="1.0.2"
4 | 


--------------------------------------------------------------------------------
/pdfparser/__main__.py:
--------------------------------------------------------------------------------
  1 | """SaralGyaan PDF Parser
  2 | Installation:
  3 | ------------
  4 | $ pip install sg-pdfparser
  5 | 
  6 | Usage:
  7 | ------
  8 | $ pdfparser
  9 | 
 10 | Available Options:
 11 | 1. Delete Pages from a PDF
 12 | 2. Merge PDFs
 13 | 3. Sort Pages of a PDF
 14 | 4. Split a PDF
 15 | 
 16 | Inputs:
 17 | 
 18 | Top Level:
 19 | 
 20 | Single PDF file with file extension (.pdf) for option 1, 3 & 4 and atleast two PDF files (separated by comma) with file extension (.pdf) for option 2.
 21 | 
 22 | Each Function:
 23 | 
 24 | Delete Pages from a PDF:
 25 | 
 26 | Page numbers or page ranges separated by comma or both e.g. 1, 3, 4 or 1-2, 4 or 1-7, 9-11
 27 | 
 28 | Merge PDFs:
 29 | 
 30 | Atleast two PDF files (separated by comma) with file extension(.pdf). The PDFs will be merged in the order of input.
 31 | 
 32 | Sort Pages of PDF:
 33 | 
 34 | Reverse the order will reverse the order of the pages.
 35 | 
 36 | A hyphen separated combination of page numbers. Can add multiple separated by comma e.g. 1-9, 2-6
 37 | 
 38 | Swap:- Will swap page number 1 & 9 and 2 & 6.
 39 | Move:- Will move page number 1 to Page 9 and 2 to 6 moving the rest of the pages to the right.
 40 | 
 41 | Split a PDF:
 42 | 
 43 | Split all will split n-paged files into n split files one page in each.
 44 | 
 45 | Page numbers or page ranges separated by comma or both e.g. 1, 3, 4 or 1-2, 4 or 1-7, 9-11
 46 | 
 47 | 1-2, 4 will make two PDF files.
 48 | 
 49 | 
 50 | Contact:
 51 | --------
 52 | - admin@saralgyaan.com
 53 | 
 54 | More information is available at:
 55 | - https://pypi.org/project/sg-pdfparser/
 56 | - https://github.com/uditvashisht/sg-pdfparser
 57 | 
 58 | Version:
 59 | ---------
 60 | pdfparser v1.0.0
 61 | """
 62 | # Inbuilt library imports
 63 | import os
 64 | import sys
 65 | # Import top level functions
 66 | from pdfparser.main_operations import pdf_delete, pdf_merge, pdf_sort, pdf_split
 67 | from pdfparser.user_choice import UserChoice
 68 | # Import colorama for color effects
 69 | from colorama import Fore, Back, Style, init
 70 | init(autoreset=True)
 71 | 
 72 | 
 73 | ALL_OPERATIONS = [pdf_delete, pdf_merge, pdf_sort, pdf_split]
 74 | 
 75 | welcome_choices = UserChoice(4)
 76 | 
 77 | 
 78 | def display_welcome_message():
 79 |     """ Prints the welcome screen with all the top level options of PDF Parsing
 80 | 
 81 |     Parameters:
 82 |     Input : int (1-4)
 83 |     """
 84 |     while True:
 85 |         try:
 86 |             print("\nWelcome to PDF Parser\n")
 87 |             print("What do you want to do?\n")
 88 |             print("""1. Delete Pages from a PDF\n2. Merge PDFs\n3. Sort Pages of a PDF\n4. Split a PDF\n""")
 89 |             print("Press ctrl + c to exit.")
 90 | 
 91 |             welcome_choices.run_block(ALL_OPERATIONS
 92 |                                       )
 93 |         # Keyboard interrupt
 94 |         except KeyboardInterrupt:
 95 |             print(Fore.RED + "\nExiting...")
 96 |             sys.exit()
 97 | 
 98 | 
 99 | def main():
100 |     display_welcome_message()
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     main()
105 | 


--------------------------------------------------------------------------------
/pdfparser/custom_error.py:
--------------------------------------------------------------------------------
 1 | class Error(Exception):
 2 |     """Base class for other exceptions"""
 3 |     pass
 4 | 
 5 | 
 6 | class ChoiceNotInOptions(Error):
 7 |     """If the choice is not in option"""
 8 |     pass
 9 | 
10 | 
11 | class InvalidSelection(Error):
12 |     """If the selected pages are invalid"""
13 |     pass
14 | 
15 | 
16 | class ChoiceNotInRange(Error):
17 |     """If the page number is not in range"""
18 |     pass
19 | 
20 | 
21 | class EnterCombination(Error):
22 |     """If user has entered only one value for swaps"""
23 |     pass
24 | 
25 | 
26 | class InputMultipleFilesError(Error):
27 |     """ If user passes only one file in merge functions.
28 |     """
29 |     pass
30 | 


--------------------------------------------------------------------------------
/pdfparser/main_operations.py:
--------------------------------------------------------------------------------
  1 | from PyPDF2 import PdfFileReader, PdfFileWriter
  2 | from pdfparser.user_inputs import file_input
  3 | from pdfparser.user_choice import UserChoice
  4 | from pdfparser.custom_error import *
  5 | from pdfparser.utilities import *
  6 | import os
  7 | import re
  8 | from colorama import Fore, Back, Style, init
  9 | import datetime
 10 | 
 11 | # Regex Match for checking the user input for page ranges
 12 | INPUT_MATCH = '^[0-9][0-9,-]*[0-9]*$'
 13 | CURRENT_DIRECTORY = os.getcwd()
 14 | 
 15 | timestamp = datetime.datetime.now().strftime("%d%m%Y_%H%M%S")
 16 | 
 17 | 
 18 | def user_range_input(message, number_of_pages, func, final_list, others=None, sorts=False):
 19 |     """ This function takes the user's range input and then runs the next function
 20 | 
 21 |     Parameters:
 22 |     -----------
 23 |     message: string
 24 |         Adds necessary message in Input Prompt Text
 25 |     number_of_pages : int
 26 |         No. of pages in the PDF file.
 27 |     func : function
 28 |         Next function to be run
 29 |     final_list: list
 30 |         Final list of user input after processing
 31 |     others: list
 32 |         Handles the kind of the function we are running e.g. deleting or keeping, swapping or moving, etc.
 33 |     sort : Boolean
 34 |         Tells which operation is we are doing
 35 | 
 36 |     Runs:
 37 |         Function func with given parameters
 38 |     """
 39 | 
 40 |     while True:
 41 |         try:
 42 |             if sorts:
 43 |                 user_input = input(f'Please enter combination of pages (separated by hyphen[-]) [{message}]\n')
 44 |             else:
 45 |                 user_input = input(f'Please enter the page numbers or range [{message}]\n')
 46 |             # Remove extra white spaces
 47 |             user_input = str(user_input).replace(' ', '')
 48 |             # Remove comma at the beginning or end
 49 |             user_input = user_input.strip(',')
 50 |             # Run RE match to check input
 51 |             if not re.match(INPUT_MATCH, user_input):
 52 |                 raise InvalidSelection
 53 |             # Create final list on the basis of user input
 54 |             final_list = process_selections(user_input, sorts=sorts)
 55 |             # Check if pages are in range
 56 |             check_pages_in_range(final_list, number_of_pages)
 57 | 
 58 |             if others is None:
 59 |                 exec(f'{func}({final_list})')
 60 |             else:
 61 |                 exec(f'{func}({final_list}, "{others[0]}", {others[1]})')
 62 |             break
 63 | 
 64 |         except InvalidSelection:
 65 |             print(Fore.RED + "\nInvalid Selection- Allowed options are 1,2,3 or 1-2, 3 or 1, or 1-3\n")
 66 |         except ChoiceNotInRange:
 67 |             print(Fore.RED + f'\nInvalid Page Number- Choose from pages 1 to {number_of_pages}\n')
 68 |         except EnterCombination:
 69 |             print(Fore.RED + f'\nEnter the combination of page numbers separated by hyphen(-)\n')
 70 | 
 71 | 
 72 | def write_pdf(page_number, pdf, to_directory, file_name):
 73 |     """This function writes a single page to the pdf
 74 | 
 75 |     Parameters:
 76 |     -----------
 77 |     page_number: int
 78 |         Single page number to be written
 79 |     pdf : PyPDFs PDF element
 80 |     to_directory: str
 81 |         The directory where the output file will be saved.
 82 |     file_name : str
 83 |         Name of the file being processed
 84 | 
 85 |     Returns:
 86 |     --------
 87 |     A PDF file.
 88 |     """
 89 |     pdf_writer = PdfFileWriter()
 90 |     pdf_writer.addPage(pdf.getPage(page_number - 1))
 91 |     output = f'{to_directory}/{os.path.splitext(file_name)[0].replace(" ","")}_{page_number}_{timestamp}.pdf'
 92 |     with open(output, 'wb') as output_pdf:
 93 |         pdf_writer.write(output_pdf)
 94 | 
 95 | 
 96 | def write_multiple_pages_pdf(list_of_pages, pdf, to_directory, file_name):
 97 |     """This function writes a range of pages to pdf file like 1-5 (1 to 5) and 3-1 (3 to 1)
 98 | 
 99 |     Parameters:
100 |     -----------
101 |     list_of_pages: list
102 |         A list containing range of pages e.g. [1-5]
103 |     pdf : PyPDFs PDF element
104 |     to_directory: str
105 |         The directory where the output file will be saved.
106 |     file_name : str
107 |         Name of the file being processed
108 | 
109 |     Returns:
110 |     --------
111 |     A PDF file.
112 |     """
113 |     # if list is [5-1]
114 |     if list_of_pages[0] > list_of_pages[-1]:
115 |         page_range = range(list_of_pages[0], list_of_pages[-1] - 1, -1)
116 |     # if list is [1-5]
117 |     else:
118 |         page_range = range(list_of_pages[0], list_of_pages[-1] + 1)
119 | 
120 |     pdf_writer = PdfFileWriter()
121 |     output = f'{to_directory}/{os.path.splitext(file_name)[0].replace(" ","")}_{"_".join([str(i) for i in list_of_pages])}_{timestamp}.pdf'
122 |     for item in page_range:
123 |         pdf_writer.addPage(pdf.getPage(item - 1))
124 |         with open(output, 'wb') as output_pdf:
125 |             pdf_writer.write(output_pdf)
126 | 
127 | 
128 | def do_the_splits(final_list):
129 |     """This function creates the split pdf files from the final list of input
130 |     Parameters
131 |     ----------
132 |     final_list : list
133 |         A processed and valid list containing page numbers or ranges or both.
134 | 
135 |     Returns:
136 | 
137 |     Split PDF files
138 |     """
139 | 
140 |     print("Splitting...\n")
141 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'splits/')
142 |     if not os.path.isdir(to_directory):
143 |         os.mkdir(to_directory)
144 | 
145 |     for item in final_list:
146 |         if not isinstance(item, list):
147 |             write_pdf(item, single_pdf_element_split, to_directory, file_to_be_split)
148 |         else:
149 |             write_multiple_pages_pdf(item, single_pdf_element_split, to_directory, file_to_be_split)
150 |     print(Fore.GREEN + f'{len(final_list)} split files saved in directory {to_directory}')
151 | 
152 | 
153 | def write_pdf_for_pages(list_of_pages, pdf, output):
154 |     pdf_writer = PdfFileWriter()
155 |     """This write pdf for list of pages [1, 3, 8] unlike write_multiple_pages_pdf which takes range.
156 |     Parameters:
157 |     -----------
158 |     list_of_pages: list
159 |         A list containing pages e.g. [1, 3, 8]
160 |     pdf : PyPDFs PDF element
161 |     output: str
162 |         Name of the output file
163 | 
164 |     Returns:
165 |     --------
166 |     A PDF file.
167 |     """
168 |     for item in list_of_pages:
169 |         pdf_writer.addPage(pdf.getPage(item - 1))
170 |         with open(output, 'wb') as output_pdf:
171 |             pdf_writer.write(output_pdf)
172 | 
173 | 
174 | def delete_pages(list_of_pages, file, to_directory, deleting):
175 |     """ This pages deletes/keeps pages of pdf and create the new pdf
176 | 
177 |     Parameters:
178 |     ----------
179 |     list_of_pages : list
180 |         list of pages to be kept or deleted
181 |     file : str
182 |         Name of the PDF file
183 |     to_directory: str
184 |         Directory where the new pdf file will be saved
185 |     deleting : boolean
186 |         If True, it deletes
187 |         else keeps
188 |     Returns:
189 |     --------
190 |     A PDF file.
191 |     """
192 | 
193 |     total_pages = list(range(1, number_of_pages_delete + 1))
194 |     if deleting:
195 |         keep_pages = [item for item in total_pages if item not in list_of_pages]
196 |     else:
197 |         keep_pages = list_of_pages
198 |     output = f'{to_directory}/{os.path.splitext(file)[0].replace(" ","")}_deleted_{list_of_pages[0]}_{list_of_pages[-1]}_{timestamp}.pdf'
199 |     write_pdf_for_pages(keep_pages, single_pdf_element_delete, output)
200 | 
201 | 
202 | def do_the_deletes(final_list, msg, deleting):
203 |     """This function processes the final_list (nested) and further flatten it and runs delete_pages function to create the final output pdf
204 |     Parameters:
205 |     ----------
206 |     final_list : list
207 |         List input by the user (can be nested)
208 |     msg : str
209 |         deleting of keeping
210 |     deleting : boolean
211 |         if true deleting
212 |         else keeping
213 |     Runs:
214 |     delete_pages
215 |     """
216 |     print(f'{msg} specific pages...\n')
217 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'deletes/')
218 |     if not os.path.isdir(to_directory):
219 |         os.mkdir(to_directory)
220 |     list_of_user_choices = []
221 |     for item in final_list:
222 |         if not isinstance(item, list):
223 |             list_of_user_choices.append(item)
224 |         else:
225 |             if item[0] < item[-1]:
226 |                 list_of_user_choices += list(range(item[0], item[-1] + 1))
227 |             else:
228 |                 list_of_user_choices += range(item[0], item[-1] - 1, -1)
229 |     list_of_user_choices = sorted(list(set(list_of_user_choices)))
230 |     delete_pages(list_of_user_choices, file_to_be_deleted, to_directory, deleting)
231 |     print(Fore.GREEN + f'File after {msg.lower()} the pages have been saved in {to_directory}')
232 | 
233 | 
234 | def split_options(all=True):
235 |     """
236 |     This function runs all the three split functions
237 | 
238 |     Parameters:
239 |     ----------
240 |     all : boolean
241 |         If true it splits all otherwise ask for user_input
242 | 
243 |     Returns:
244 |     If all = True returns n split pages for n paged pdf
245 | 
246 |     Runs:
247 |     If all = False,runs user_range_input to take user's input
248 | 
249 |     """
250 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'splits/')
251 |     if not os.path.isdir(to_directory):
252 |         os.mkdir(to_directory)
253 |     if all:
254 |         for page in range(1, number_of_pages_split + 1):
255 |             write_pdf(page, single_pdf_element_split, to_directory, file_to_be_split)
256 |         print(Fore.GREEN + f'{number_of_pages_split} split files saved in directory {to_directory}')
257 |     else:
258 |         print("Enter the pages number(s) separated by comma or range of pages (1-2) or both\n")
259 |         user_range_input("To be Split", number_of_pages_split, func="do_the_splits", final_list=None)
260 | 
261 | 
262 | def delete_options(deleting=True):
263 |     """ This function gives all the delete options to the user.
264 |     Parameters:
265 |     ----------
266 |     deleting: boolean
267 |         If true, deletes the pages
268 |         else keeps the pages
269 | 
270 |     Runs :
271 |     user_range_input to take the user input
272 | 
273 |     """
274 |     if deleting:
275 |         msg = "To be Deleted"
276 |         msg_2 = "Deleting"
277 |         others = [msg_2, "deleting = True"]
278 |     else:
279 |         msg = "To be Kept"
280 |         msg_2 = "Keeping"
281 |         others = [msg_2, "deleting = False"]
282 | 
283 |     print(f'\nEnter the pages number(s)[{msg}] separated by comma or range of pages(1-2) or both\n')
284 | 
285 |     user_range_input(msg, number_of_pages_delete, func="do_the_deletes", others=others, final_list=None)
286 | 
287 | 
288 | def swap_move(final_list, msg, swap):
289 |     """ This functions performs the all the sort options like swap, move
290 |     Parameters:
291 |     final_list : list
292 |         Processed input list
293 |     msg : str
294 |         Message to be added in print statements
295 |     swap : boolean
296 |         If True, swaps
297 |         else, moves
298 | 
299 |     Returns:
300 |     Swapped or moved PDF file.
301 |     """
302 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'sorted/')
303 |     if not os.path.isdir(to_directory):
304 |         os.mkdir(to_directory)
305 |     list_of_all_pages = list(range(1, number_of_pages_sort + 1))
306 |     temp_list = list_of_all_pages.copy()
307 |     if swap:
308 |         for item in final_list:
309 |             temp_list[item[0] - 1] = list_of_all_pages[item[-1] - 1]
310 |             temp_list[item[1] - 1] = list_of_all_pages[item[0] - 1]
311 |         message = "swapped"
312 |     else:
313 |         for item in final_list:
314 |             temp_list.remove(list_of_all_pages[item[0] - 1])
315 |             temp_list.insert(item[1] - 1, list_of_all_pages[item[0] - 1])
316 |         message = "moved"
317 |     output = f'{to_directory}/{os.path.splitext(file_to_be_sorted)[0].replace(" ","")}_{message}_{timestamp}.pdf'
318 |     write_pdf_for_pages(temp_list, single_pdf_element_sort, output)
319 |     print(Fore.GREEN + f'File with {message} pages have been saved in {to_directory}')
320 | 
321 | 
322 | def swap_options(reverse=False, swap=False, move=False):
323 |     """ This function gives all the swap options to the user.
324 |     Parameters:
325 |     ----------
326 |     reverse: boolean
327 |         If true, it reverses the orders of the pages
328 |     swap: boolean
329 |         If true, it swaps the pages
330 |     move: boolean
331 |         If true, it moves the page to desired page number.
332 | 
333 |     Runs :
334 |     user_range_input to take the user input
335 | 
336 |     """
337 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'sorted/')
338 |     if not os.path.isdir(to_directory):
339 |         os.mkdir(to_directory)
340 |     if reverse:
341 |         print("Reversing the order of pages...\n")
342 |         output = f'{to_directory}/{os.path.splitext(file_to_be_sorted)[0].replace(" ","")}_reversed_{timestamp}.pdf'
343 |         list_of_pages = list(range(number_of_pages_sort, 0, -1))
344 |         write_pdf_for_pages(list_of_pages, single_pdf_element_sort, output)
345 |         print(Fore.GREEN + f'File with pages in the reversed order has been saved in {to_directory}')
346 |     else:
347 |         if swap:
348 |             msg_1 = "swap"
349 |             msg_2 = "To be swapped"
350 |             func = "swap_move"
351 |             others = [msg_2, "swap = True"]
352 |         else:
353 |             msg_1 = "move"
354 |             msg_2 = "To be moved"
355 |             func = "swap_move"
356 |             others = [msg_2, "swap = False"]
357 |         print(f'Enter the pages number(s) separated by - to {msg_1} their positions. For mulitple {msg_1}s, separate them with comma\n')
358 |         user_range_input(msg_2, number_of_pages_sort, func=func, others=others, final_list=None, sorts=True)
359 | 
360 | 
361 | # Create class UserChoice for each top level function
362 | split_choices = UserChoice(2)
363 | delete_choices = UserChoice(2)
364 | sort_choices = UserChoice(3)
365 | # create a list of all second level functions
366 | split_functions = [split_options]
367 | delete_functions = [delete_options]
368 | sort_functions = [swap_options]
369 | 
370 | 
371 | def pdf_split():
372 |     """ This function gives the option of all the split functions to the user
373 | 
374 |     Runs
375 |     split_choices
376 |     """
377 |     global file_to_be_split
378 |     global single_pdf_element_split
379 |     global number_of_pages_split
380 |     print("\nSplit a PDF\n")
381 |     file_to_be_split = file_input()
382 |     single_pdf_element_split = PdfFileReader(file_to_be_split)
383 |     number_of_pages_split = single_pdf_element_split.getNumPages()
384 |     print(Fore.GREEN + f'\nYour file {file_to_be_split} has {number_of_pages_split} page(s).\n')
385 |     print("""What do you want to do?\n\n 1. Split all the pages\n 2. Split specific pages\n""")
386 |     split_choices.run_block(split_functions, splits=True)
387 | 
388 | 
389 | def pdf_delete():
390 |     """ This function gives the option of all the delete functions to the user
391 | 
392 |     Runs
393 |     delete_choices
394 |     """
395 |     print("\nDelete Pages from a PDF\n")
396 |     global file_to_be_deleted
397 |     global single_pdf_element_delete
398 |     global number_of_pages_delete
399 |     file_to_be_deleted = file_input()
400 |     single_pdf_element_delete = PdfFileReader(file_to_be_deleted)
401 |     number_of_pages_delete = single_pdf_element_delete.getNumPages()
402 |     print(f'\nYour file {file_to_be_deleted} has {number_of_pages_delete} page(s).\n')
403 |     print("""What do you want to do?\n\n 1. Delete specific pages\n 2. Keep Specific pages\n""")
404 |     delete_choices.run_block(delete_functions, deletes=True)
405 | 
406 | 
407 | def pdf_merge():
408 |     """
409 |     This function runs the merge functions and returns the merged file
410 | 
411 |     """
412 |     print("\nMerge PDFs\n")
413 |     print("PDFs will be merged in the same order as in input.")
414 |     files = file_input(single_file=False)
415 |     to_directory = os.path.join(CURRENT_DIRECTORY, 'merged/')
416 |     if not os.path.isdir(to_directory):
417 |         os.mkdir(to_directory)
418 |     pdf_writer = PdfFileWriter()
419 |     output = f'{to_directory}/{"_".join([os.path.splitext(file)[0].replace(" ", "") for  file in files])}_{timestamp}.pdf'
420 |     for file in files:
421 |         pdf_reader = PdfFileReader(file)
422 |         for page in range(pdf_reader.getNumPages()):
423 |             pdf_writer.addPage(pdf_reader.getPage(page))
424 |     with open(output, 'wb') as output_file:
425 |         pdf_writer.write(output_file)
426 | 
427 |     print(Fore.GREEN + f'The merged file has been saved in {to_directory}')
428 | 
429 | 
430 | def pdf_sort():
431 |     """ This function gives the option of all the sort functions to the user
432 | 
433 |     Runs
434 |     sort_choices
435 |     """
436 | 
437 |     print("Sort Pages of a PDF\n")
438 |     global file_to_be_sorted
439 |     global single_pdf_element_sort
440 |     global number_of_pages_sort
441 |     file_to_be_sorted = file_input()
442 |     single_pdf_element_sort = PdfFileReader(file_to_be_sorted)
443 |     number_of_pages_sort = single_pdf_element_sort.getNumPages()
444 |     print(f'\nYour file {file_to_be_sorted} has {number_of_pages_sort} page(s).\n')
445 |     print("""What do you want to do?\n\n 1. Reverse order of all the pages\n 2. Swap Pages\n 3. Move certain pages to a specific index\n""")
446 |     sort_choices.run_block(sort_functions, sorts=True)
447 | 


--------------------------------------------------------------------------------
/pdfparser/user_choice.py:
--------------------------------------------------------------------------------
 1 | from pdfparser.custom_error import *
 2 | from colorama import Fore, Back, Style, init
 3 | init(autoreset=True)
 4 | 
 5 | 
 6 | class UserChoice:
 7 |     """ This class creates a UserChoice for each function
 8 |     """
 9 | 
10 |     def __init__(self, number_of_choices):
11 |         self.number_of_choices = number_of_choices
12 | 
13 |     def run_block(self, custom_functions, deletes=False, splits=False, sorts=False):
14 |         """
15 |         This one runs the prompt for taking user's input for each choice and then runs the necessary function."""
16 |         while True:
17 |             try:
18 |                 user_choice = int(input(f'Enter your choice (1-{self.number_of_choices}): '))
19 | 
20 |                 if user_choice not in list(range(1, self.number_of_choices + 1)):
21 |                     raise ChoiceNotInOptions
22 | 
23 |                 if deletes:
24 |                     if user_choice == 1:
25 |                         custom_functions[0](deleting=True)
26 |                     elif user_choice == 2:
27 |                         custom_functions[0](deleting=False)
28 |                 elif splits:
29 |                     if user_choice == 1:
30 |                         custom_functions[0](all=True)
31 |                     elif user_choice == 2:
32 |                         custom_functions[0](all=False)
33 |                 elif sorts:
34 |                     if user_choice == 1:
35 |                         custom_functions[0](reverse=True)
36 |                     elif user_choice == 2:
37 |                         custom_functions[0](swap=True)
38 |                     else:
39 |                         custom_functions[0](move=True)
40 | 
41 |                 else:
42 |                     custom_functions[user_choice - 1]()
43 | 
44 |                 break
45 | 
46 |             except ValueError:
47 |                 print(Fore.RED + f'Not an Integer: Enter a digit from 1 to {self.number_of_choices}')
48 | 
49 |             except ChoiceNotInOptions:
50 |                 print(Fore.RED + f'Option not in choices: Enter a digit from 1 to {self.number_of_choices}')
51 | 


--------------------------------------------------------------------------------
/pdfparser/user_inputs.py:
--------------------------------------------------------------------------------
 1 | from pdfparser.utilities import *
 2 | from pdfparser.custom_error import *
 3 | from colorama import Fore, Back, Style, init
 4 | init(autoreset=True)
 5 | 
 6 | 
 7 | def file_input(single_file=True):
 8 |     """
 9 |     This function takes the file input from the user
10 | 
11 |     Parameters:
12 |     single_file : boolean
13 |         if True, single input is allowed (used for delete, sort and split)
14 |         else, multiple file inputs separated by comma ( used for merge )
15 |     Returns:
16 |     file : str
17 |         In case of delete, sort and split
18 |     list of files : list
19 |         In case of merge
20 |     """
21 |     while True:
22 |         try:
23 |             if single_file:
24 |                 file_name = str(input('Enter the name of file with extension(.pdf)\n'))
25 |                 """ Throw I/O error, if file is not present"""
26 |                 file_check(file_name)
27 |                 return file_name
28 |             else:
29 |                 file_names = str(input('Enter the names of files with extension(.pdf) separated by a comma\n'))
30 |                 files = file_names.split(',')
31 |                 files = [item.strip() for item in files]
32 |                 # If only one input is given for merge through error.
33 |                 if len(files) == 1:
34 |                     raise InputMultipleFilesError
35 |                 for file in files:
36 |                     file_check(file)
37 |                 return files
38 |             break
39 |         except ValueError:
40 |             print(Fore.RED + "Please enter the correct filename")
41 |         except IOError as e:
42 |             err_no, err_msg = e.args
43 |             print(Fore.RED + f'IO Error-({err_no}) : {err_msg}')
44 |         except InputMultipleFilesError:
45 |             print(Fore.RED + "\nInput at least two PDF files.\n")
46 | 


--------------------------------------------------------------------------------
/pdfparser/utilities.py:
--------------------------------------------------------------------------------
 1 | from PyPDF2 import PdfFileReader
 2 | from pdfparser.custom_error import ChoiceNotInRange, EnterCombination
 3 | 
 4 | 
 5 | def flatten_list(input_list):
 6 |     """ Flattens a nested list
 7 |     Parameters:
 8 |     -----------
 9 |     input_list : list
10 |         Nested list of user input
11 |     Returns:
12 |     flat_list : list
13 |         Flattened list
14 |     """
15 | 
16 |     for item in input_list:
17 |         if type(item) == list:
18 |             flatten_list(item)
19 |         else:
20 |             flat_list.append(item)
21 |     return flat_list
22 | 
23 | 
24 | def process_selections(selection_string, sorts=False):
25 |     """
26 |     This function processes the user input and converts that string into a meaningful list
27 |     Parameters:
28 |     selection_string: str
29 |         It is a processed and allowed string input by the user
30 |     sorts: boolean
31 |         If true, single elements are not allowed it means the list will only have range of pages [[1-3,m[2-5]]
32 |     Returns:
33 |     multiple_pages : list
34 |         Used in sorting. A list in which hyphens are converted to comma [[1,3],[2,5]]
35 |     selection_list_nested: list
36 |         Used in delete and split. A list in which hyphens are converted to comma but single pages are also allowed [1,[1,3],[2,5]]
37 | 
38 |     """
39 |     temp_list = selection_string.split(',')
40 |     single_pages = [int(i) for i in temp_list if '-' not in i]
41 |     temp_multiple_pages = [i for i in temp_list if '-' in i]
42 |     multiple_pages = [list(map(int, i.split('-'))) for i in temp_multiple_pages]
43 |     if sorts:
44 |         if len(single_pages) > 0:
45 |             raise EnterCombination
46 |         else:
47 |             return multiple_pages
48 |     else:
49 |         selection_list_nested = single_pages + multiple_pages
50 |         return selection_list_nested
51 | 
52 | 
53 | def check_pages_in_range(selection_list_nested, total_pages):
54 |     """ It checks that the page numbers entered by the user is in the range of total pages of pdf
55 |     Parameters:
56 |     selection_list_nested:list
57 |         list returned by processed_selections
58 |     total_pages:int
59 |         Total number of pages in the PDF to be parsed
60 |     Raises:
61 |     ChoiceNotInRange Error if the page number is not in range.
62 |     """
63 |     global flat_list
64 |     flat_list = []
65 |     temp_list = flatten_list(selection_list_nested)
66 |     for item in temp_list:
67 |         if item not in range(1, total_pages + 1):
68 |             raise ChoiceNotInRange
69 | 
70 | 
71 | def file_check(pdf_path):
72 |     """ Checks, whether the input file is pdf file, takes use of PyPDF2 to throw the error if the file can't be read.
73 | 
74 |     Returns:
75 |     pdf : PyPDF2's pdf element
76 |     """
77 |     with open(pdf_path, 'rb') as f:
78 |         pdf = PdfFileReader(f)
79 |     return pdf
80 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from setuptools import setup
 3 | 
 4 | # The directory containing this file
 5 | HERE = pathlib.Path(__file__).parent
 6 | 
 7 | # The text of the README file
 8 | README = (HERE / "README.md").read_text()
 9 | 
10 | # This call to setup() does all the work
11 | setup(
12 |     name="sg-pdfparser",
13 |     version="1.0.2",
14 |     description="A Command Line tool for parsing PDFs.",
15 |     long_description=README,
16 |     long_description_content_type="text/markdown",
17 |     url="https://github.com/uditvashisht/sg-pdfparser",
18 |     author="Udit Vashisht",
19 |     author_email="admin@saralgyaan.com",
20 |     license="MIT",
21 |     classifiers=[
22 |         "License :: OSI Approved :: MIT License",
23 |         "Programming Language :: Python :: 3.6",
24 |         "Programming Language :: Python :: 3.7",
25 |         "Programming Language :: Python :: 3.8",
26 |     ],
27 |     packages=["pdfparser"],
28 |     include_package_data=True,
29 |     install_requires=["pypdf2", "colorama"],
30 |     entry_points={
31 |         "console_scripts": [
32 |             "pdfparser=pdfparser.__main__:main",
33 |         ]
34 |     },
35 | )
36 | 


--------------------------------------------------------------------------------