├── README.md ├── example_short.pdf └── ncode.py /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Summarize a PDF using chat gpt and python 3 | 4 | Check out this incredible project! I have developed a command-line application that harnesses the power of Chat GPT-3 and Python to perform a variety of amazing feats, including: 5 | 6 | Converting an example.pdf file to an example.txt file and removing all pesky images 7 | Slicing the text into bite-sized chunks for easy reading 8 | Summarizing these chunks to extract the most important information 9 | Merging all of these chunks into a cohesive summary 10 | Creating key notes based on this summary for easy reference 11 | Developing a step-by-step guide to help guide you through the material 12 | Distilling the bare essentials of the book into an easy-to-digest format 13 | Writing a captivating blog post based on the notes 14 | Generating mid-journey prompts to engage and excite readers with stunning illustrations 15 | With this groundbreaking technology at your fingertips, you'll be able to streamline your workflow, save valuable time, and delve deeper into the text than ever before! 16 | 17 | 18 | 19 | 20 | ## Authors 21 | 22 | - [@alaminjava](https://github.com/alaminjava) 23 | 24 | 25 | ## Badges 26 | 27 | Add badges from somewhere like: [shields.io](https://shields.io/) 28 | 29 | [![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](https://choosealicense.com/licenses/mit/) 30 | 31 | 32 | ## Demo 33 | 34 | https://im4.ezgif.com/tmp/ezgif-4-c6506fab22.webm 35 | ## 🚀 About Me 36 | I'm a seasoned Java Developer and Digital Marketing professional who is always up-to-date with constantly evolving technologies in online social networking 37 | 38 | 39 | ## Features 40 | 41 | - Converting an example.pdf file to an example.txt file and removing all pesky images 42 | - Slicing the text into bite-sized chunks for easy reading 43 | - Summarizing these chunks to extract the most important information 44 | - Merging all of these chunks into a cohesive summary 45 | - Creating key notes based on this summary for easy reference 46 | - Developing a step-by-step guide to help guide you through the material 47 | - Distilling the bare essentials of the book into an easy-to-digest format 48 | - Writing a captivating blog post based on the notes 49 | - Generating mid-journey prompts to engage and excite readers with stunning illustrations 50 | 51 | 52 | 53 | ## 🔗 Links 54 | [![portfolio](https://img.shields.io/badge/my_portfolio-000?style=for-the-badge&logo=ko-fi&logoColor=white)](https://www.upwork.com/freelancers/~01dce9578dac52e244/) 55 | [![linkedin](https://img.shields.io/badge/linkedin-0A66C2?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/marketeralamin) 56 | [![twitter](https://img.shields.io/badge/twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://twitter.com/marketeralamin_) 57 | 58 | -------------------------------------------------------------------------------- /example_short.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alaminjava/Summarize-a-PDF-using-chat-gpt-and-python/57d1c2064381066d0a300f7c3362ca824ade6510/example_short.pdf -------------------------------------------------------------------------------- /ncode.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import PyPDF2 3 | import time 4 | 5 | # Set up OpenAI API key 6 | openai.api_key = "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" 7 | 8 | # Read the PDF file and create a text file without images 9 | pdf_file = open('example_short.pdf', 'rb') 10 | pdf_reader = PyPDF2.PdfFileReader(pdf_file) 11 | text = "" 12 | 13 | for page_num in range(pdf_reader.numPages): 14 | page = pdf_reader.getPage(page_num) 15 | text += page.extractText() 16 | 17 | # Remove images from the text 18 | text_without_images = "" 19 | 20 | for word in text.split(): 21 | if word.startswith("Image") or word.startswith("IMAGE"): 22 | continue 23 | text_without_images += word + " " 24 | 25 | # Slice words into chunks of text 26 | text_chunks = [] 27 | chunk_size = 500 28 | 29 | for i in range(0, len(text_without_images), chunk_size): 30 | text_chunks.append(text_without_images[i:i+chunk_size]) 31 | 32 | 33 | # Summarize the chunks 34 | summary = "" 35 | for chunk in text_chunks: 36 | response = openai.Completion.create( 37 | engine="text-davinci-003", 38 | prompt=chunk + "\nSummarize the above text in one sentence.", 39 | max_tokens=50, 40 | n=1, 41 | stop=None, 42 | temperature=0.5 43 | ) 44 | summary += response.choices[0].text.strip() + " " 45 | 46 | 47 | # Merge all the chunks 48 | merged_text = "" 49 | for chunk in text_chunks: 50 | merged_text += chunk + " " 51 | 52 | # Write a new summary of the merged chunks 53 | merged_summary = openai.Completion.create( 54 | engine="text-davinci-002", 55 | prompt=merged_text + "\nSummarize the above text in one paragraph.", 56 | max_tokens=200, 57 | n=1, 58 | stop=None, 59 | temperature=0.5 60 | ).choices[0].text.strip() 61 | 62 | 63 | # Write key notes from the summary 64 | key_notes = openai.Completion.create( 65 | engine="text-davinci-002", 66 | prompt=merged_summary + "\nList the key points from the above text.", 67 | max_tokens=100, 68 | n=1, 69 | stop=None, 70 | temperature=0.5 71 | ).choices[0].text.strip() 72 | 73 | # Write a step-by-step guide from the notes 74 | steps = openai.Completion.create( 75 | engine="text-davinci-002", 76 | prompt=key_notes + "\nWrite a step-by-step guide based on the above key points.", 77 | max_tokens=300, 78 | n=1, 79 | stop=None, 80 | temperature=0.5 81 | ).choices[0].text.strip() 82 | 83 | # Summarize the notes into the bare essentials of the book 84 | bare_essentials = openai.Completion.create( 85 | engine="text-davinci-002", 86 | prompt=key_notes + "\nSummarize the key points into the bare essentials of the book.", 87 | max_tokens=100, 88 | n=1, 89 | stop=None, 90 | temperature=0.5 91 | ).choices[0].text.strip() 92 | 93 | # Write a blog post from the notes 94 | blog_post = openai.Completion.create( 95 | engine="text-davinci-002", 96 | prompt=key_notes + "\nWrite a blog post based on the above key points.", 97 | max_tokens=1000, 98 | n=1, 99 | stop=None, 100 | temperature=0.5 101 | ).choices[0].text.strip() 102 | 103 | # Create mid-journey prompts from the notes for illustrations 104 | mid_journey_prompts = openai.Completion.create( 105 | engine="davinci", 106 | prompt=key_notes + "\nCreate mid-journey prompts from the above key points.", 107 | max_tokens=300, 108 | n=1, 109 | stop=None, 110 | temperature=0.5 111 | ).choices[0].text.strip() 112 | 113 | 114 | print("------------summary-----------") 115 | print(summary) 116 | print("------------merged_summary-----------") 117 | print(merged_summary) 118 | print("------------key_notes-----------") 119 | print(key_notes) 120 | print("------------blog_post-----------") 121 | print(blog_post) 122 | print("------------mid_journey_prompts-----------") 123 | print(mid_journey_prompts) 124 | --------------------------------------------------------------------------------