├── .devcontainer
    └── devcontainer.json
├── .github
    └── FUNDING.yml
├── .gitignore
├── .streamlit
    └── config.toml
├── LICENSE
├── README.md
├── app.py
├── assets
    └── sidebar.html
├── packages.txt
├── requirements.txt
└── utils
    ├── helpers.py
    ├── init_session_states.py
    ├── page_config.py
    ├── render_sidebar.py
    └── version.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "app.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: SiddhantSadangi
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.idea
 2 | *.ipynb
 3 | *.yaml
 4 | /final_image.png
 5 | *.pyc
 6 | *.pdf
 7 | secrets.toml
 8 | *.txt
 9 | doc_converter.py
10 | 


--------------------------------------------------------------------------------
/.streamlit/config.toml:
--------------------------------------------------------------------------------
 1 | [server]
 2 | enableXsrfProtection = false
 3 | enableCORS = false
 4 | 
 5 | [theme]
 6 | base="dark"
 7 | 
 8 | [client]
 9 | toolbarMode = "minimal"
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Siddhant Sadangi
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PDF WorkDesk 📄✨
 2 | 
 3 | ![Screenshot_10-8-2024_234038_pdfworkdesk streamlit app](https://github.com/user-attachments/assets/174a1d98-55d2-4f4c-b990-4512199e491c)
 4 | 
 5 | Welcome to **PDF WorkDesk**, the ultimate open-source Streamlit app for all your PDF processing needs!
 6 | 
 7 | ## Try it now
 8 | 
 9 | <p align="center">
10 |     <a href="https://pdfworkdesk.streamlit.app/" target="_blank"><img src="https://static.streamlit.io/badges/streamlit_badge_black_white.svg" alt="Open in Streamlit" style="height: 60px !important;width: 217px !important;">
11 |     </a>
12 | </p>
13 | 
14 | ## Features 🚀
15 | 
16 | PDF WorkDesk is designed to be a user-friendly, lightweight tool that empowers you to handle PDFs with ease and efficiency. Here's what you can do:
17 | 
18 | - **Upload PDFs**: Choose files from your disk or load them directly via URL.
19 | - **Preview PDFs**: Instantly view your PDF's contents and metadata.
20 | - **Extract Data**: Seamlessly extract text, images, and tables from your documents.
21 | - **Secure PDFs**: Encrypt or decrypt your PDFs for added security.
22 | - **Modify PDFs**: Rotate, resize, merge, or convert PDFs to Word with simple clicks.
23 | - **Watermark PDFs**: Add a label to watermark your PDFs.
24 | - **Reduce Size**: Reduce your PDF's file size without compromising quality.
25 | 
26 | And the best part? It's completely **free**, **open-source**, and respects your privacy by not requiring any personal information! 🛡️
27 | 
28 | ## Get Involved! 🤝
29 | 
30 | Your feedback and contributions can help shape the future of PDF WorkDesk. If you have ideas or features you'd like to see, let's collaborate!
31 | 
32 | - **Contribute**: Submit PRs or open issues on GitHub.
33 | - **Connect**: Have questions or suggestions? Reach out to me on [LinkedIn](https://linkedin.com/in/siddhantsadangi).
34 | 
35 | ## Acknowledgements 🤗
36 | 
37 | This app is based on [pypdf](https://github.com/py-pdf/pypdf) and basically provides a UI for non-technical users to use supported features of pypdf. Thanks to the pypdf team 🫶  
38 | Also, thanks as usual to the team behind and the community surrounding [Streamlit](https://streamlit.io/) 🎈
39 | 
40 | Special thanks to the following individuals for their contributions:
41 | 
42 | - [Pierre-Louis BESCOND](https://github.com/pierrelouisbescond) for contributing the PDF to Word conversion code
43 | - [Tomasz Hasiów](https://discuss.streamlit.io/u/TomJohn/summary) for his help with forcing dark-mode and introducing me to CSS injection
44 | - [Émilien Foissotte](https://github.com/Emilien-Foissotte) for implementing the watermark functionality
45 | 
46 | Finally, to all open-source projects out there used either directly or indirectly 🙇
47 | 
48 | ## Support PDF WorkDesk 💖
49 | 
50 | Love PDF WorkDesk? Here's how you can show your support:
51 | 
52 | - **Star**: Give us a star on GitHub and help spread the word!
53 | - **Share**: Tell your friends and colleagues about us on social media.
54 | - **Donate**: Buy me a coffee or sponsor me on GitHub!
55 | 
56 | <p align="center">
57 |     <a href="https://www.buymeacoffee.com/siddhantsadangi" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 60px !important;width: 217px !important;">
58 |     </a>
59 |     <a href="https://github.com/sponsors/SiddhantSadangi" target="_blank"><img
60 |         src="https://img.shields.io/badge/Sponsor%20me%20on-GitHub-f34b7d?logo=github&style=flat"
61 |         alt="Sponsor me on GitHub" style="height: 26px !important;">
62 | </p>
63 | 
64 | Thank you for supporting PDF WorkDesk! 🤗
65 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import os
  3 |     import sys
  4 |     import traceback
  5 |     from io import BytesIO
  6 | 
  7 |     import streamlit as st
  8 |     from pypdf import PaperSize, PdfReader, PdfWriter, Transformation
  9 |     from pypdf.errors import FileNotDecryptedError
 10 |     from streamlit import session_state
 11 |     from streamlit_pdf_viewer import pdf_viewer
 12 | 
 13 |     from utils import helpers, init_session_states, page_config, render_sidebar
 14 | 
 15 |     page_config.set()
 16 | 
 17 |     # ---------- HEADER ----------
 18 |     st.title("📄 PDF WorkDesk!")
 19 |     st.write(
 20 |         "User-friendly, lightweight, and open-source tool to preview and extract content and metadata from PDFs, add or remove passwords, modify, merge, convert and compress PDFs."
 21 |     )
 22 | 
 23 |     init_session_states.init()
 24 | 
 25 |     render_sidebar.render()
 26 | 
 27 |     # ---------- OPERATIONS ----------
 28 |     # TODO: Extract attachments (https://pypdf.readthedocs.io/en/stable/user/extract-attachments.html)
 29 |     # TODO: Undo last operation
 30 |     # TODO: Update metadata (https://pypdf.readthedocs.io/en/stable/user/metadata.html)
 31 | 
 32 |     try:
 33 |         (
 34 |             pdf,
 35 |             reader,
 36 |             session_state["password"],
 37 |             session_state["is_encrypted"],
 38 |         ) = helpers.load_pdf(key="main")
 39 | 
 40 |     except FileNotDecryptedError:
 41 |         pdf = "password_required"
 42 | 
 43 |     if pdf == "password_required":
 44 |         st.error("PDF is password protected. Please enter the password to proceed.")
 45 |     elif pdf:
 46 |         lcol, rcol = st.columns(2)
 47 |         with lcol.expander(label="🔍 Extract text"):
 48 |             extract_text_lcol, extract_text_rcol = st.columns(2)
 49 | 
 50 |             page_numbers_str = helpers.select_pages(
 51 |                 container=extract_text_lcol,
 52 |                 key="extract_text_pages",
 53 |             )
 54 | 
 55 |             mode = extract_text_rcol.radio(
 56 |                 "Extraction mode",
 57 |                 options=["plain", "layout"],
 58 |                 horizontal=True,
 59 |                 help="Layout mode extracts text in a format resembling the layout of the source PDF",
 60 |             )
 61 | 
 62 |             if page_numbers_str:
 63 |                 try:
 64 |                     text = helpers.extract_text(reader, page_numbers_str, mode)
 65 |                 except (IndexError, ValueError):
 66 |                     st.error("Specified pages don't exist. Check the format.", icon="⚠️")
 67 |                 else:
 68 |                     st.text(text)
 69 | 
 70 |                     with open("text.txt", "w", encoding="utf-8") as f:
 71 |                         f.write(text)
 72 | 
 73 |                     with open("text.txt") as f:
 74 |                         st.download_button(
 75 |                             "💾 Download extracted text",
 76 |                             data=f,
 77 |                             use_container_width=True,
 78 |                         )
 79 | 
 80 |         with rcol.expander(label="️🖼️ Extract images"):
 81 |             if page_numbers_str := helpers.select_pages(
 82 |                 container=st,
 83 |                 key="extract_image_pages",
 84 |             ):
 85 |                 try:
 86 |                     images = helpers.extract_images(reader, page_numbers_str)
 87 |                 except (IndexError, ValueError):
 88 |                     st.error("Specified pages don't exist. Check the format.", icon="⚠️")
 89 |                 else:
 90 |                     if images:
 91 |                         for data, name in images.items():
 92 |                             st.image(data, caption=name)
 93 |                     else:
 94 |                         st.info("No images found")
 95 | 
 96 |         with lcol.expander("📊 Extract table"):
 97 |             if page_numbers_str := helpers.select_pages(
 98 |                 container=st,
 99 |                 key="extract_table_pages",
100 |             ):
101 |                 helpers.extract_tables(
102 |                     session_state["file"],
103 |                     page_numbers_str,
104 |                 )
105 | 
106 |         with rcol.expander("🔄️ Convert to Word"):
107 |             st.caption("Takes ~1 second/page. Will remove password if present")
108 | 
109 |             if st.button("Convert PDF to Word", use_container_width=True):
110 |                 st.download_button(
111 |                     "📥 Download Word document",
112 |                     data=helpers.convert_pdf_to_word(pdf),
113 |                     file_name=f"{session_state['name'][:-4]}.docx",
114 |                     mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
115 |                     use_container_width=True,
116 |                 )
117 | 
118 |         with lcol.expander(
119 |             f"🔐 {'Change' if session_state['is_encrypted'] else 'Add'} password"
120 |         ):
121 |             new_password = st.text_input(
122 |                 "Enter password",
123 |                 type="password",
124 |             )
125 | 
126 |             algorithm = st.selectbox(
127 |                 "Algorithm",
128 |                 options=["RC4-40", "RC4-128", "AES-128", "AES-256-R5", "AES-256"],
129 |                 index=3,
130 |                 help="Use `RC4` for compatibility and `AES` for security",
131 |             )
132 | 
133 |             filename = f"protected_{session_state['name']}"
134 | 
135 |             if st.button(
136 |                 "🔒 Submit",
137 |                 use_container_width=True,
138 |                 disabled=(len(new_password) == 0),
139 |             ):
140 |                 with PdfWriter() as writer:
141 |                     # Add all pages to the writer
142 |                     for page in reader.pages:
143 |                         writer.add_page(page)
144 | 
145 |                     # Add a password to the new PDF
146 |                     writer.encrypt(new_password, algorithm=algorithm)
147 | 
148 |                     # Save the new PDF to a file
149 |                     with open(filename, "wb") as f:
150 |                         writer.write(f)
151 | 
152 |             if os.path.exists(filename):
153 |                 st.download_button(
154 |                     "📥 Download protected PDF",
155 |                     data=open(filename, "rb"),
156 |                     mime="application/pdf",
157 |                     file_name=filename,
158 |                     use_container_width=True,
159 |                 )
160 | 
161 |         with rcol.expander("🔓 Remove password"):
162 |             if reader.is_encrypted:
163 |                 st.download_button(
164 |                     "📥 Download unprotected PDF",
165 |                     data=open(session_state["decrypted_filename"], "rb"),
166 |                     mime="application/pdf",
167 |                     file_name=session_state["decrypted_filename"],
168 |                     use_container_width=True,
169 |                 )
170 |             else:
171 |                 st.info("PDF does not have a password")
172 | 
173 |         with lcol.expander("🔃 Rotate PDF"):
174 |             # TODO: Add password back to converted PDF if original was protected
175 |             st.caption("Will remove password if present")
176 |             angle = st.slider(
177 |                 "Clockwise angle",
178 |                 min_value=0,
179 |                 max_value=270,
180 |                 step=90,
181 |                 format="%d°",
182 |             )
183 | 
184 |             with PdfWriter() as writer:
185 |                 for page in reader.pages:
186 |                     writer.add_page(page)
187 |                     writer.pages[-1].rotate(angle)
188 | 
189 |                 # TODO: Write to byte_stream
190 |                 writer.write("rotated.pdf")
191 | 
192 |                 with open("rotated.pdf", "rb") as f:
193 |                     pdf_viewer(f.read(), height=250, width=300)
194 |                     st.download_button(
195 |                         "📥 Download rotated PDF",
196 |                         data=f,
197 |                         mime="application/pdf",
198 |                         file_name=f"{session_state['name'].rsplit('.')[0]}_rotated_{angle}.pdf",
199 |                         use_container_width=True,
200 |                     )
201 | 
202 |         with rcol.expander("↔ Resize/Scale PDF"):
203 |             # TODO: Add password back to converted PDF if original was protected
204 |             st.caption("Will remove password if present")
205 |             new_size = st.selectbox(
206 |                 "New size",
207 |                 options={
208 |                     attr: getattr(PaperSize, attr)
209 |                     for attr in dir(PaperSize)
210 |                     if not attr.startswith("__")
211 |                     and not callable(getattr(PaperSize, attr))
212 |                 },
213 |                 index=4,
214 |                 help="Changes will be apparant only on printing the PDF",
215 |             )
216 | 
217 |             scale_content = st.slider(
218 |                 "Scale content",
219 |                 min_value=0.1,
220 |                 max_value=2.0,
221 |                 step=0.1,
222 |                 value=1.0,
223 |                 help="Scale content independently of the page size",
224 |                 format="%fx",
225 |             )
226 | 
227 |             with PdfWriter() as writer:
228 |                 for page in reader.pages:
229 |                     page.scale_to(
230 |                         width=getattr(PaperSize, new_size).width,
231 |                         height=getattr(PaperSize, new_size).height,
232 |                     )
233 |                     op = Transformation().scale(sx=scale_content, sy=scale_content)
234 |                     page.add_transformation(op)
235 |                     writer.add_page(page)
236 | 
237 |                 # TODO: Write to byte_stream
238 |                 writer.write("scaled.pdf")
239 | 
240 |                 with open("scaled.pdf", "rb") as f:
241 |                     st.caption("Content scaling preview")
242 |                     pdf_viewer(f.read(), height=250, width=300)
243 |                     st.download_button(
244 |                         "📥 Download scaled PDF",
245 |                         data=f,
246 |                         mime="application/pdf",
247 |                         file_name=f"{session_state['name'].rsplit('.')[0]}_scaled_{new_size}_{scale_content}x.pdf",
248 |                         use_container_width=True,
249 |                     )
250 | 
251 |         with lcol.expander("➕ Merge PDFs"):
252 |             # TODO: Add password back to converted PDF if original was protected
253 |             st.caption(
254 |                 "Second PDF will be appended to the first. Passwords will be removed from both."
255 |             )
256 |             # TODO: Add more merge options (https://pypdf.readthedocs.io/en/stable/user/merging-pdfs.html#showing-more-merging-options)
257 |             pdf_to_merge, reader_to_merge, *_ = helpers.load_pdf(key="merge")
258 | 
259 |             if st.button(
260 |                 "➕ Merge PDFs", disabled=(not pdf_to_merge), use_container_width=True
261 |             ):
262 |                 with PdfWriter() as merger:
263 |                     for file in (reader, reader_to_merge):
264 |                         merger.append(file)
265 | 
266 |                     # TODO: Write to byte_stream
267 |                     merger.write("merged.pdf")
268 | 
269 |                     pdf_viewer(
270 |                         open("merged.pdf", "rb").read(),
271 |                         height=250,
272 |                         width=300,
273 |                     )
274 |                     st.download_button(
275 |                         "📥 Download merged PDF",
276 |                         data=open("merged.pdf", "rb"),
277 |                         mime="application/pdf",
278 |                         file_name="merged.pdf",
279 |                         use_container_width=True,
280 |                     )
281 |         # create a watermark
282 |         with rcol.expander("©️ Add watermark"):
283 |             if text_watermark := st.text_input(
284 |                 "Enter watermark text",
285 |                 placeholder="PDF-Workdesk Watermark",
286 |             ):
287 |                 size_watermark = st.slider(
288 |                     "Font size", min_value=6, max_value=30, value=12
289 |                 )
290 |                 lcol, rcol_inner = st.columns([1, 3])
291 |                 color = lcol.color_picker("Color", "#F90004")
292 |                 transparency = rcol_inner.slider(
293 |                     "Opacity", min_value=0.0, max_value=1.0, value=0.8
294 |                 )
295 | 
296 |                 watermarked_pdf = helpers.watermark_pdf(
297 |                     pdf=pdf,
298 |                     stamp_label=text_watermark,
299 |                     stamp_size=size_watermark,
300 |                     stamp_color=color,
301 |                     stamp_transparency=transparency,
302 |                 )
303 |                 pdf_viewer(watermarked_pdf, height=400, width=500)
304 | 
305 |                 st.download_button(
306 |                     "📥 Download watermarked PDF",
307 |                     data=watermarked_pdf,
308 |                     mime="application/pdf",
309 |                     file_name="watermarked.pdf",
310 |                     use_container_width=True,
311 |                 )
312 | 
313 |         with st.expander("🤏 Reduce PDF size"):
314 |             # TODO: Add password back to converted PDF if original was protected
315 |             st.caption("Will remove password if present")
316 | 
317 |             pdf_small = pdf
318 | 
319 |             lcol, mcol, rcol = st.columns(3)
320 | 
321 |             with lcol:
322 |                 remove_duplication = st.toggle(
323 |                     "Remove duplication",
324 |                     help="""
325 |                     Some PDF documents contain the same object multiple times.  
326 |                     For example, if an image appears three times in a PDF it could be embedded three times. 
327 |                     Or it can be embedded once and referenced twice.  
328 |                     **Note:** This option will not remove objects, rather it will use a reference to the original object for subsequent uses.
329 |                     """,
330 |                 )
331 | 
332 |                 remove_images = st.toggle(
333 |                     "Remove images",
334 |                     help="Remove images from the PDF. Will also remove duplication.",
335 |                 )
336 | 
337 |                 if remove_images or remove_duplication:
338 |                     pdf_small = helpers.remove_images(
339 |                         pdf,
340 |                         remove_images=remove_images,
341 |                         password=session_state.password,
342 |                     )
343 | 
344 |                 if st.toggle(
345 |                     "Reduce image quality",
346 |                     help="""
347 |                     Reduce the quality of images in the PDF. Will also remove duplication.  
348 |                     May not work for all cases.
349 |                     """,
350 |                     disabled=remove_images,
351 |                 ):
352 |                     quality = st.slider(
353 |                         "Quality",
354 |                         min_value=0,
355 |                         max_value=100,
356 |                         value=50,
357 |                         disabled=remove_images,
358 |                     )
359 |                     pdf_small = helpers.reduce_image_quality(
360 |                         pdf_small,
361 |                         quality,
362 |                         password=session_state.password,
363 |                     )
364 | 
365 |                 if st.toggle(
366 |                     "Lossless compression",
367 |                     help="Compress PDF without losing quality",
368 |                 ):
369 |                     pdf_small = helpers.compress_pdf(
370 |                         pdf_small, password=session_state.password
371 |                     )
372 | 
373 |                 original_size = sys.getsizeof(pdf)
374 |                 reduced_size = sys.getsizeof(pdf_small)
375 |                 st.caption(
376 |                     f"Reduction: {100 - (reduced_size / original_size) * 100:.2f}%"
377 |                 )
378 | 
379 |             with mcol:
380 |                 st.caption(f"Original size: {original_size / 1024:.2f} KB")
381 |                 helpers.preview_pdf(
382 |                     reader,
383 |                     pdf,
384 |                     key="other",
385 |                     password=session_state.password,
386 |                 )
387 |             with rcol:
388 |                 st.caption(f"Reduced size: {reduced_size / 1024:.2f} KB")
389 |                 helpers.preview_pdf(
390 |                     PdfReader(BytesIO(pdf_small)),
391 |                     pdf_small,
392 |                     key="other",
393 |                     password=session_state.password,
394 |                 )
395 |             st.download_button(
396 |                 "📥 Download smaller PDF",
397 |                 data=pdf_small,
398 |                 mime="application/pdf",
399 |                 file_name=f"{filename}_reduced.pdf",
400 |                 use_container_width=True,
401 |             )
402 | 
403 | except Exception as e:
404 |     st.error(
405 |         f"""The app has encountered an error:  
406 |         `{e}`  
407 |         Please create an issue [here](https://github.com/SiddhantSadangi/pdf-workdesk/issues/new) 
408 |         with the below traceback""",
409 |         icon="🥺",
410 |     )
411 |     st.code(traceback.format_exc())
412 | 
413 | st.success(
414 |     "[Star the repo](https://github.com/SiddhantSadangi/pdf-workdesk) to show your :heart:",
415 |     icon="⭐",
416 | )
417 | 


--------------------------------------------------------------------------------
/assets/sidebar.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 |     <head>
 5 |         <title>Sidebar content</title>
 6 |         <meta charset="UTF-8" name="viewport">
 7 |         <link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro" rel="stylesheet"
 8 |             type="text/css">
 9 |         <script src="https://code.iconify.design/2/2.1.2/iconify.min.js"></script>
10 |     </head>
11 | 
12 |     <body style="background-color: transparent;">
13 | 
14 |         <div
15 |             style="text-align:center; font-size:14px; color: lightgray;font-family: 'Source Sans Pro', sans-serif;">
16 |             <hr style="margin-bottom: 0%; margin-top: 0%;"><br>
17 |             <b>v{VERSION}</b><br>
18 |             ⭐ the repo to be notified of updates<br><br>
19 |             <iframe
20 |                 src="https://ghbtns.com/github-btn.html?user=SiddhantSadangi&repo=pdf-workdesk&type=star&size=large"
21 |                 frameborder="0" width="70" height="30" title="GitHub"></iframe><br>
22 |             <br>
23 |             <hr style="margin-bottom: 0%; margin-top: 0%;"><br>
24 |             Made with ❤️ by <b>Siddhant Sadangi</b>
25 |             <a href="https://linkedin.com/in/siddhantsadangi">
26 |                 <span class="iconify-inline" data-icon="ion:logo-linkedin"></span></a>
27 |             <a href="mailto:siddhantsadangi@gmail.com">
28 |                 <span class="iconify-inline" data-icon="carbon:email"></span></a><br>
29 |             <br>
30 |             <script type="text/javascript"
31 |                 src="https://cdnjs.buymeacoffee.com/1.0.0/button.prod.min.js" data-name="bmc-button"
32 |                 data-slug="siddhantsadangi" data-color="#000000" data-emoji="" data-font="Cookie"
33 |                 data-text="Buy me a coffee" data-outline-color="#ffffff" data-font-color="#ffffff"
34 |                 data-coffee-color="#FFDD00"></script><br>
35 | 
36 |             <a href="https://github.com/sponsors/SiddhantSadangi" target="_blank"><img
37 |                     src="https://img.shields.io/badge/Sponsor%20me%20on-GitHub-f34b7d?logo=github&style=flat"
38 |                     alt="Sponsor me on GitHub" style="height: 26px !important;"><br>
39 |             </a>
40 |         </div>
41 |     </body>
42 | 
43 | </html>
44 | 


--------------------------------------------------------------------------------
/packages.txt:
--------------------------------------------------------------------------------
1 | libjpeg-dev
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by uv via the following command:
 2 | #    uv export --no-hashes --format requirements-txt
 3 | altair==5.5.0
 4 | attrs==25.3.0
 5 | beautifulsoup4==4.13.3
 6 | blinker==1.9.0
 7 | cachetools==5.5.2
 8 | certifi==2025.1.31
 9 | cffi==1.17.1 ; platform_python_implementation != 'PyPy'
10 | chardet==5.2.0
11 | charset-normalizer==3.4.1
12 | click==8.1.8
13 | colorama==0.4.6 ; sys_platform == 'win32'
14 | cryptography==44.0.2
15 | fire==0.7.0
16 | fonttools==4.56.0
17 | gitdb==4.0.12
18 | gitpython==3.1.44
19 | idna==3.10
20 | jinja2==3.1.6
21 | jsonschema==4.23.0
22 | jsonschema-specifications==2024.10.1
23 | lxml==5.3.1
24 | markupsafe==3.0.2
25 | narwhals==1.32.0
26 | numpy==2.2.4
27 | opencv-python-headless==4.11.0.86
28 | packaging==24.2
29 | pandas==2.2.3
30 | pdf2docx==0.5.8
31 | pdfminer-six==20250327
32 | pdfplumber==0.11.6
33 | pillow==11.1.0
34 | protobuf==5.29.4
35 | pyarrow==19.0.1
36 | pycparser==2.22 ; platform_python_implementation != 'PyPy'
37 | pydeck==0.9.1
38 | pymupdf==1.25.4
39 | pypdf==5.4.0
40 | pypdfium2==4.30.1
41 | python-dateutil==2.9.0.post0
42 | python-docx==1.1.2
43 | pytz==2025.2
44 | referencing==0.36.2
45 | reportlab==4.3.1
46 | requests==2.32.3
47 | rpds-py==0.24.0
48 | six==1.17.0
49 | smmap==5.0.2
50 | soupsieve==2.6
51 | st-social-media-links==0.1.4
52 | streamlit==1.44.0
53 | streamlit-pdf-viewer==0.0.21
54 | tenacity==9.0.0
55 | termcolor==2.5.0
56 | toml==0.10.2
57 | tornado==6.5.1
58 | typing-extensions==4.13.0
59 | tzdata==2025.2
60 | urllib3==2.3.0
61 | watchdog==6.0.0 ; sys_platform != 'darwin'
62 | 


--------------------------------------------------------------------------------
/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import re
  3 | from datetime import datetime
  4 | from io import BytesIO
  5 | from pathlib import Path
  6 | from random import random
  7 | from typing import Callable, Dict, Literal, Optional, Tuple, Union
  8 | 
  9 | import pandas as pd
 10 | import pdfplumber
 11 | import requests
 12 | import streamlit as st
 13 | from pdf2docx import Converter
 14 | from PIL import Image
 15 | from pypdf import PdfReader, PdfWriter, Transformation
 16 | from pypdf.errors import PdfReadError, PdfStreamError
 17 | from reportlab.lib.pagesizes import letter
 18 | from reportlab.pdfgen import canvas
 19 | from streamlit import session_state
 20 | from streamlit.runtime.uploaded_file_manager import UploadedFile
 21 | from streamlit_pdf_viewer import pdf_viewer
 22 | 
 23 | 
 24 | def select_pages(container, key: str):
 25 |     return container.text_input(
 26 |         "Pages to extract from?",
 27 |         placeholder="all",
 28 |         help="""
 29 |     Format
 30 |     ------
 31 |     **all:** all pages  
 32 |     **2:** 2nd page  
 33 |     **1-3:** pages 1 to 3  
 34 |     **2,4:** pages 2 and 4  
 35 |     **1-3,5:** pages 1 to 3 and 5""",
 36 |         key=key,
 37 |     ).lower()
 38 | 
 39 | 
 40 | @st.cache_data
 41 | def image_to_pdf(stamp_img: Union[Path, str]) -> PdfReader:
 42 |     img = Image.open(stamp_img)
 43 |     img_as_pdf = BytesIO()
 44 |     img.save(img_as_pdf, "pdf")
 45 |     return PdfReader(img_as_pdf)
 46 | 
 47 | 
 48 | @st.cache_data
 49 | def watermark_img(
 50 |     reader: PdfReader,
 51 |     stamp_img: UploadedFile,
 52 | ) -> None:
 53 |     # Convert the image to a PDF
 54 |     stamp_pdf = image_to_pdf(stamp_img)
 55 | 
 56 |     # Then use the same stamp code from above
 57 |     stamp_page = stamp_pdf.pages[0]
 58 | 
 59 |     writer = PdfWriter()
 60 | 
 61 |     writer.append(reader)
 62 | 
 63 |     for content_page in writer.pages:
 64 |         content_page.merge_transformed_page(
 65 |             stamp_page, Transformation(), expand=True, over=False
 66 |         )
 67 | 
 68 |     # TODO: Write to byte_stream
 69 |     with open("watermarked.pdf", "wb") as fp:
 70 |         writer.write(fp)
 71 | 
 72 | 
 73 | def get_option(key: Literal["main", "merge"]) -> str:
 74 |     return st.radio(
 75 |         label="Upload a PDF, or load PDF from a URL",
 76 |         options=(
 77 |             "Upload a PDF ⬆️",
 78 |             "Load PDF from a URL 🌐",
 79 |         ),
 80 |         horizontal=True,
 81 |         help="PDFs are deleted from the server when you\n"
 82 |         "* upload another PDF, or\n"
 83 |         "* clear the file uploader, or\n"
 84 |         "* close the browser tab.",
 85 |         key=f"upload_{key}",
 86 |     )
 87 | 
 88 | 
 89 | def get_password(key: Literal["main", "merge"]) -> Optional[str]:
 90 |     password = st.text_input(
 91 |         "PDF Password",
 92 |         type="password",
 93 |         placeholder="Required if PDF is protected",
 94 |         key=f"password_{key}",
 95 |     )
 96 |     return password if password != "" else None
 97 | 
 98 | 
 99 | def upload_pdf(
100 |     key: Literal["main", "merge"], password: Optional[str]
101 | ) -> Optional[Tuple[bytes, PdfReader]]:
102 |     if file := st.file_uploader(
103 |         label="Upload a PDF",
104 |         type=["pdf"],
105 |         key=f"file_{key}",
106 |     ):
107 |         session_state["file"] = file
108 |         session_state["name"] = file.name
109 |         pdf = file.getvalue()
110 |         try:
111 |             reader = PdfReader(BytesIO(pdf), password=password)
112 |         except PdfReadError:
113 |             reader = PdfReader(BytesIO(pdf))
114 |         return pdf, reader
115 |     return None, None
116 | 
117 | 
118 | def load_pdf_from_url(
119 |     key: Literal["main", "merge"], password: Optional[str]
120 | ) -> Optional[Tuple[bytes, PdfReader]]:
121 |     url = st.text_input(
122 |         "PDF URL",
123 |         key=f"url_{key}",
124 |         value="https://getsamplefiles.com/download/pdf/sample-1.pdf",
125 |     )
126 | 
127 |     @st.cache_data
128 |     def _cached_get_url(url: str) -> requests.Response:
129 |         return requests.get(url)
130 | 
131 |     if url != "":
132 |         try:
133 |             response = _cached_get_url(url)
134 |             session_state["file"] = pdf = response.content
135 |             session_state["name"] = url.split("/")[-1]
136 |             try:
137 |                 reader = PdfReader(BytesIO(pdf), password=password)
138 |             except PdfReadError:
139 |                 reader = PdfReader(BytesIO(pdf))
140 |             return pdf, reader
141 |         except PdfStreamError:
142 |             st.error("The URL does not seem to be a valid PDF file.", icon="❌")
143 |     return None, None
144 | 
145 | 
146 | def load_pdf(
147 |     key: Literal["main", "merge"] = "main",
148 | ) -> Optional[Tuple[bytes, PdfReader, str, bool]]:
149 |     option = get_option(key)
150 |     password = get_password(key)
151 | 
152 |     # Map options to functions
153 |     option_functions: Dict[str, Callable[[str, str], Tuple[bytes, PdfReader]]] = {
154 |         "Upload a PDF ⬆️": upload_pdf,
155 |         "Load PDF from a URL 🌐": load_pdf_from_url,
156 |     }
157 | 
158 |     if function := option_functions.get(option):
159 |         pdf, reader = function(key, password)
160 | 
161 |         if pdf:
162 |             preview_pdf(
163 |                 reader,
164 |                 pdf,
165 |                 key,
166 |                 password,
167 |             )
168 |             return pdf, reader, password, reader.is_encrypted
169 | 
170 |     return None, None, "", False
171 | 
172 | 
173 | def handle_encrypted_pdf(reader: PdfReader, password: str, key: str) -> None:
174 |     if password:
175 |         session_state["decrypted_filename"] = f"unprotected_{session_state['name']}"
176 |         decrypt_pdf(
177 |             reader,
178 |             password,
179 |             filename=session_state["decrypted_filename"],
180 |         )
181 |         pdf_viewer(
182 |             f"unprotected_{session_state['name']}",
183 |             height=600 if key == "main" else 250,
184 |             key=str(random()),
185 |         )
186 |     else:
187 |         st.error("Password required", icon="🔒")
188 | 
189 | 
190 | def handle_unencrypted_pdf(pdf: bytes, key: str) -> None:
191 |     pdf_viewer(
192 |         pdf,
193 |         height=600 if key == "main" else 250,
194 |         key=str(random()),
195 |     )
196 | 
197 | 
198 | def display_metadata(reader: PdfReader) -> None:
199 |     metadata = {"Number of pages": len(reader.pages)}
200 |     for k in reader.metadata:
201 |         value = reader.metadata[k]
202 |         if is_pdf_datetime(value):
203 |             value = convert_pdf_datetime(value)
204 | 
205 |         metadata[k.replace("/", "")] = value
206 | 
207 |     metadata = pd.DataFrame.from_dict(metadata, orient="index", columns=["Value"])
208 |     metadata.index.name = "Metadata"
209 | 
210 |     st.dataframe(metadata)
211 | 
212 | 
213 | def preview_pdf(
214 |     reader: PdfReader,
215 |     pdf: bytes = None,
216 |     key: Literal["main", "other"] = "main",
217 |     password: str = "",
218 | ) -> None:
219 |     with contextlib.suppress(NameError):
220 |         if key == "main":
221 |             lcol, rcol = st.columns([2, 1])
222 |             with lcol.expander("📄 **Preview**", expanded=bool(pdf)):
223 |                 if reader.is_encrypted:
224 |                     handle_encrypted_pdf(reader, password, key)
225 |                 else:
226 |                     handle_unencrypted_pdf(pdf, key)
227 | 
228 |             with rcol.expander("🗄️ **Metadata**"):
229 |                 display_metadata(reader)
230 |         elif reader.is_encrypted:
231 |             handle_encrypted_pdf(reader, password, key)
232 |         else:
233 |             handle_unencrypted_pdf(pdf, key)
234 | 
235 | 
236 | @st.cache_data
237 | def is_pdf_datetime(s: str) -> bool:
238 |     pattern = r"^D:\d{14}\+\d{2}\'\d{2}\'$"
239 |     return bool(re.match(pattern, s))
240 | 
241 | 
242 | @st.cache_data
243 | def convert_pdf_datetime(pdf_datetime: str) -> str:
244 |     # Remove the 'D:' at the beginning
245 |     pdf_datetime = pdf_datetime[2:]
246 | 
247 |     # Extract the date, time, and timezone components
248 |     date_str = pdf_datetime[:8]
249 |     time_str = pdf_datetime[8:14]
250 |     tz_str = pdf_datetime[14:]
251 | 
252 |     return (
253 |         datetime.strptime(date_str + time_str, "%Y%m%d%H%M%S").strftime(
254 |             "%Y-%m-%d %H:%M:%S "
255 |         )
256 |         + tz_str
257 |     )
258 | 
259 | 
260 | @st.cache_data
261 | def parse_page_numbers(page_numbers_str):
262 |     # Split the input string by comma or hyphen
263 |     parts = page_numbers_str.split(",")
264 | 
265 |     # Initialize an empty list to store parsed page numbers
266 |     parsed_page_numbers = []
267 | 
268 |     # Iterate over each part
269 |     for part in parts:
270 |         # Remove any leading/trailing spaces
271 |         part = part.strip()
272 | 
273 |         # If the part contains a hyphen, it represents a range
274 |         if "-" in part:
275 |             start, end = map(int, part.split("-"))
276 |             parsed_page_numbers.extend(range(start, end + 1))
277 |         else:
278 |             # Otherwise, it's a single page number
279 |             parsed_page_numbers.append(int(part))
280 | 
281 |     return [i - 1 for i in parsed_page_numbers]
282 | 
283 | 
284 | def extract_text(
285 |     reader: PdfReader.pages,
286 |     page_numbers_str: str = "all",
287 |     mode: Literal["plain", "layout"] = "plain",
288 | ) -> str:
289 |     text = ""
290 | 
291 |     if page_numbers_str == "all":
292 |         for page in reader.pages:
293 |             text = text + " " + page.extract_text(extraction_mode=mode)
294 |     else:
295 |         pages = parse_page_numbers(page_numbers_str)
296 |         for page in pages:
297 |             text = text + " " + reader.pages[page].extract_text()
298 | 
299 |     return text
300 | 
301 | 
302 | def extract_images(reader: PdfReader.pages, page_numbers_str: str = "all") -> str:
303 |     images = {}
304 |     if page_numbers_str == "all":
305 |         for page in reader.pages:
306 |             images |= {image.data: image.name for image in page.images}
307 | 
308 |     else:
309 |         pages = parse_page_numbers(page_numbers_str)
310 |         for page in pages:
311 |             images.update(
312 |                 {image.data: image.name for image in reader.pages[page].images}
313 |             )
314 | 
315 |     return images
316 | 
317 | 
318 | def extract_tables(file, page_numbers_str):
319 |     st.caption(
320 |         "Adjust vertical and horizontal strategies for better extraction. Read details about the strategies [here](https://github.com/jsvine/pdfplumber?tab=readme-ov-file#table-extraction-strategies)."
321 |     )
322 |     col0, col1 = st.columns(2)
323 |     vertical_strategy = col0.selectbox(
324 |         "Vertical strategy",
325 |         ["lines", "lines_strict", "text"],
326 |         index=2,
327 |     )
328 |     horizontal_strategy = col1.selectbox(
329 |         "Horizontal strategy",
330 |         ["lines", "lines_strict", "text"],
331 |         index=2,
332 |     )
333 | 
334 |     header = st.checkbox("Header")
335 | 
336 |     first_row_index = 1 if header else 0
337 | 
338 |     with pdfplumber.open(
339 |         BytesIO(file) if isinstance(file, bytes) else file,
340 |         password=session_state["password"],
341 |     ) as table_pdf:
342 |         if page_numbers_str == "all":
343 |             for page in table_pdf.pages:
344 |                 for table in page.extract_tables(
345 |                     {
346 |                         "vertical_strategy": vertical_strategy,
347 |                         "horizontal_strategy": horizontal_strategy,
348 |                     }
349 |                 ):
350 |                     st.write(
351 |                         pd.DataFrame(
352 |                             table[first_row_index:],
353 |                             columns=table[0] if header else None,
354 |                         )
355 |                     )
356 |         else:
357 |             pages = parse_page_numbers(page_numbers_str)
358 |             for page in pages:
359 |                 for page in table_pdf.pages[page : page + 1]:
360 |                     for table in page.extract_tables(
361 |                         {
362 |                             "vertical_strategy": vertical_strategy,
363 |                             "horizontal_strategy": horizontal_strategy,
364 |                         }
365 |                     ):
366 |                         st.write(
367 |                             pd.DataFrame(
368 |                                 table[first_row_index:],
369 |                                 columns=table[0] if header else None,
370 |                             )
371 |                         )
372 | 
373 | 
374 | def decrypt_pdf(reader: PdfReader, password: str, filename: str) -> None:
375 |     reader.decrypt(password)
376 | 
377 |     writer = PdfWriter()
378 | 
379 |     for page in reader.pages:
380 |         writer.add_page(page)
381 | 
382 |     with open(filename, "wb") as f:
383 |         writer.write(f)
384 | 
385 | 
386 | @st.cache_data
387 | def remove_images(pdf: bytes, remove_images: bool, password: str) -> bytes:
388 |     reader = PdfReader(BytesIO(pdf))
389 | 
390 |     if reader.is_encrypted:
391 |         reader.decrypt(password)
392 | 
393 |     writer = PdfWriter()
394 | 
395 |     for page in reader.pages:
396 |         writer.add_page(page)
397 | 
398 |     writer.add_metadata(reader.metadata)
399 | 
400 |     if remove_images:
401 |         writer.remove_images()
402 | 
403 |     bytes_stream = BytesIO()
404 |     writer.write(bytes_stream)
405 | 
406 |     bytes_stream.seek(0)
407 | 
408 |     return bytes_stream.getvalue()
409 | 
410 | 
411 | def reduce_image_quality(pdf: bytes, quality: int, password: str) -> bytes:
412 |     reader = PdfReader(BytesIO(pdf))
413 | 
414 |     if reader.is_encrypted:
415 |         reader.decrypt(password)
416 | 
417 |     writer = PdfWriter()
418 | 
419 |     for page in reader.pages:
420 |         writer.add_page(page)
421 | 
422 |     writer.add_metadata(reader.metadata)
423 | 
424 |     for page in writer.pages:
425 |         for img in page.images:
426 |             img.replace(img.image, quality=quality)
427 | 
428 |     bytes_stream = BytesIO()
429 |     writer.write(bytes_stream)
430 | 
431 |     bytes_stream.seek(0)
432 | 
433 |     return bytes_stream.getvalue()
434 | 
435 | 
436 | @st.cache_data
437 | def compress_pdf(pdf: bytes, password: str) -> bytes:
438 |     reader = PdfReader(BytesIO(pdf))
439 | 
440 |     if reader.is_encrypted:
441 |         reader.decrypt(password)
442 | 
443 |     writer = PdfWriter(clone_from=reader)
444 | 
445 |     for page in writer.pages:
446 |         page.compress_content_streams()  # This is CPU intensive!
447 | 
448 |     bytes_stream = BytesIO()
449 |     writer.write(bytes_stream)
450 |     bytes_stream.seek(0)
451 | 
452 |     return bytes_stream.getvalue()
453 | 
454 | 
455 | @st.cache_data
456 | def convert_pdf_to_word(pdf):
457 |     cv = Converter(stream=pdf, password=session_state.password)
458 |     docx_stream = BytesIO()
459 |     cv.convert(docx_stream, start=0, end=None)
460 |     cv.close()
461 | 
462 |     docx_stream.seek(0)
463 |     return docx_stream
464 | 
465 | 
466 | def hex_to_rgb(hex_color: str) -> Tuple[float, float, float]:
467 |     """
468 |     Convert a hexadecimal color code to an RGB color tuple.
469 | 
470 |     Args:
471 |         hex_color (str): The hexadecimal color code.
472 | 
473 |     Returns:
474 |         Tuple[float, float, float]: The RGB color tuple
475 |     """
476 |     hex_color = hex_color.lstrip("#")
477 |     return tuple(int(hex_color[i : i + 2], 16) / 255 for i in (0, 2, 4))
478 | 
479 | 
480 | def draw_watermark_grid(
481 |     can, stamp_label: str, step_x: int, step_y: int, width: float, height: float
482 | ) -> None:
483 |     """
484 |     Draw a grid of watermarks on the given canvas.
485 | 
486 |     Args:
487 |         can (canvas.Canvas): The canvas to draw the watermarks on.
488 |         stamp_label (str): The label to be displayed as the watermark.
489 |         step_x (int): The horizontal spacing between watermarks.
490 |         step_y (int): The vertical spacing between watermarks.
491 |         width (float): The width of the canvas.
492 |         height (float): The height of the canvas.
493 | 
494 |     Returns:
495 |         None
496 |     """
497 |     for x in range(-100, int(width) + 100, step_x):
498 |         for y in range(-100, int(height) + 100, step_y):
499 |             can.saveState()
500 |             can.translate(x, y)
501 |             can.rotate(45)
502 |             can.drawCentredString(0, 0, stamp_label)
503 |             can.restoreState()
504 | 
505 | 
506 | def merge_watermark_into_pdf(pdf: bytes, watermark: BytesIO) -> bytes:
507 |     """
508 |     Merge a watermark into a PDF document.
509 | 
510 |     Args:
511 |         pdf (bytes): The PDF document to merge the watermark into.
512 |         watermark (BytesIO): The watermark to merge into the PDF.
513 | 
514 |     Returns:
515 |         bytes: The merged PDF document.
516 |     """
517 |     writer = PdfWriter()
518 |     reader = PdfReader(BytesIO(pdf))
519 |     watermark_reader = PdfReader(watermark)
520 |     watermark_page = watermark_reader.pages[0]
521 |     for page in reader.pages:
522 |         page.merge_page(watermark_page)
523 |         writer.add_page(page)
524 |     with BytesIO() as fp:
525 |         writer.write(fp)
526 |         fp.seek(0)
527 |         return fp.read()
528 | 
529 | 
530 | def create_watermark_canvas(
531 |     stamp_label: str, stamp_size: int, stamp_color: str, stamp_transparency: float
532 | ) -> BytesIO:
533 |     """
534 |     Create a watermark canvas with the given label, size, color, and transparency.
535 | 
536 |     Args:
537 |         stamp_label (str): The label to be displayed as the watermark.
538 |         stamp_size (int): The font size of the watermark.
539 |         stamp_color (str): The color of the watermark in hexadecimal format.
540 |         stamp_transparency (float): The transparency of the watermark.
541 | 
542 |     Returns:
543 |         BytesIO: A BytesIO object containing the watermark canvas.
544 |     """
545 |     packet = BytesIO()
546 |     can = canvas.Canvas(packet, pagesize=letter)
547 |     can.setFont("Helvetica", stamp_size)
548 |     color = hex_to_rgb(stamp_color)
549 |     can.setFillColorRGB(*color)
550 |     can.setFillAlpha(stamp_transparency)
551 |     can.saveState()
552 |     draw_watermark_grid(
553 |         can, stamp_label, step_x=150, step_y=100, width=letter[0], height=letter[1]
554 |     )
555 |     can.save()
556 |     packet.seek(0)
557 |     return packet
558 | 
559 | 
560 | @st.cache_data
561 | def watermark_pdf(
562 |     pdf: bytes,
563 |     stamp_label: str,
564 |     stamp_size: int,
565 |     stamp_color: str,
566 |     stamp_transparency: float,
567 | ) -> bytes:
568 |     watermark = create_watermark_canvas(
569 |         stamp_label, stamp_size, stamp_color, stamp_transparency
570 |     )
571 |     return merge_watermark_into_pdf(pdf, watermark)
572 | 


--------------------------------------------------------------------------------
/utils/init_session_states.py:
--------------------------------------------------------------------------------
 1 | from streamlit import session_state
 2 | 
 3 | 
 4 | def init():
 5 |     session_state["decrypted_filename"] = (
 6 |         None
 7 |         if "decrypted_filename" not in session_state
 8 |         else session_state["decrypted_filename"]
 9 |     )
10 |     session_state["password"] = (
11 |         "" if "password" not in session_state else session_state["password"]
12 |     )
13 |     session_state["is_encrypted"] = (
14 |         False if "is_encrypted" not in session_state else session_state["is_encrypted"]
15 |     )
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     init()
20 | 


--------------------------------------------------------------------------------
/utils/page_config.py:
--------------------------------------------------------------------------------
 1 | from streamlit import set_page_config
 2 | 
 3 | from utils import version
 4 | 
 5 | 
 6 | def set():
 7 |     set_page_config(
 8 |         page_title="PDF WorkDesk",
 9 |         page_icon="📄",
10 |         menu_items={
11 |             "About": f"PDF WorkDesk v{version.__version__}  "
12 |             "\nDeveloper contact: [Siddhant Sadangi](mailto:siddhant.sadangi@gmail.com)",
13 |             "Report a Bug": "https://github.com/SiddhantSadangi/pdf-workdesk/issues/new",
14 |             "Get help": None,
15 |         },
16 |         layout="wide",
17 |     )
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     set()
22 | 


--------------------------------------------------------------------------------
/utils/render_sidebar.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from st_social_media_links import SocialMediaIcons
 3 | 
 4 | from utils.version import __version__
 5 | 
 6 | 
 7 | def render():
 8 |     with st.sidebar:
 9 |         with st.expander("✅ Supported operations"):
10 |             st.write(
11 |                 "* Upload from disk/URL\n"
12 |                 "* Preview content/metadata\n"
13 |                 "* Extract text/images/tables\n"
14 |                 "* Convert PDF to Word\n"
15 |                 "* Add/remove password\n"
16 |                 "* Rotate/resize PDF\n"
17 |                 "* Merge PDFs\n"
18 |                 "* Reduce PDF size\n"
19 |             )
20 | 
21 |         with open("assets/sidebar.html", "r", encoding="UTF-8") as sidebar_file:
22 |             sidebar_html = sidebar_file.read().replace("{VERSION}", __version__)
23 | 
24 |         st.components.v1.html(sidebar_html, height=290)
25 | 
26 |         st.html(
27 |             """
28 |                 <div style="text-align:center; font-size:14px; color:lightgrey">
29 |                     <hr style="margin-bottom: 6%; margin-top: 0%;">
30 |                     Share the ❤️ on social media
31 |                 </div>"""
32 |         )
33 | 
34 |         social_media_links = [
35 |             "https://www.facebook.com/sharer/sharer.php?kid_directed_site=0&sdk=joey&u=https%3A%2F%2Fpdfworkdesk.streamlit.app%2F&display=popup&ref=plugin&src=share_button",
36 |             "https://www.linkedin.com/sharing/share-offsite/?url=https%3A%2F%2Fpdfworkdesk.streamlit.app%2F",
37 |             "https://x.com/intent/tweet?original_referer=https%3A%2F%2Fpdfworkdesk.streamlit.app%2F&ref_src=twsrc%5Etfw%7Ctwcamp%5Ebuttonembed%7Ctwterm%5Eshare%7Ctwgr%5E&text=Check%20out%20this%20open-source%20PDF-editing%20Streamlit%20app%21&url=https%3A%2F%2Fpdfworkdesk.streamlit.app%2F",
38 |         ]
39 | 
40 |         social_media_icons = SocialMediaIcons(
41 |             social_media_links, colors=["lightgray"] * len(social_media_links)
42 |         )
43 | 
44 |         social_media_icons.render(sidebar=True)
45 | 
46 |         st.html(
47 |             """
48 |                 <div style="text-align:center; font-size:12px; color:lightgrey">
49 |                     <hr style="margin-bottom: 6%; margin-top: 6%;">
50 |                     <a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
51 |                         <img alt="Creative Commons License" style="border-width:0"
52 |                             src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" />
53 |                     </a><br><br>
54 |                     This work is licensed under a <b>Creative Commons
55 |                         Attribution-NonCommercial-ShareAlike 4.0 International License</b>.<br>
56 |                     You can modify and build upon this work non-commercially. All derivatives should be
57 |                     credited to Siddhant Sadangi and
58 |                     be licenced under the same terms.
59 |                 </div>
60 |             """
61 |         )
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     render()
66 | 


--------------------------------------------------------------------------------
/utils/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.7.1"
2 | 


--------------------------------------------------------------------------------