├── README.md └── pdfs_ai_rename.py /README.md: -------------------------------------------------------------------------------- 1 | # PDFs-AI-rename 2 | 3 | > **We've transformed this open-source project into a user-friendly product that's ready to use instantly. For an easy AI-powered file management solution, visit [riffo.ai](https://riffo.ai). Our platform offers enhanced functionality with a simpler interface, making it accessible to everyone.** 4 | 5 | > Check out [riffo.ai](https://riffo.ai) to see how we've streamlined your document organization process. 6 | 7 | Imagine you have a folder full of PDF documents, and you're looking for a quick and creative way to rename them. This Python script is designed to help you do just that! It's like having a personal assistant who reads through your PDFs and suggests new, concise names for each file. 8 | 9 | Here's how it works: 10 | 11 | 1. **Read PDFs**: The script scans a specified folder on your computer, looking for PDF files. It then reads the text from each PDF, pulling out the information that will inspire the new file names. 12 | 13 | 2. **Generate Names**: With the content of your PDFs in hand, the script sends a message to a smart AI service (think of it like a really intelligent wordsmith). This AI takes the content, thinks about it, and comes up with a short, catchy name for each file – no more than 15 characters long. 14 | 15 | 3. **Rename Files**: Once the AI has suggested new names, the script goes back to your folder and renames each PDF accordingly. Now, instead of generic names like "document1.pdf" or "report.pdf," you have meaningful, concise titles that reflect the content of each file. 16 | 17 | This script is perfect for anyone who wants to organize their digital files more efficiently or add a touch of creativity to their file naming process. It's a simple tool that can save you time and make your file management a little more enjoyable. 18 | 19 | Remember, to use this script, you'll need to have Python installed on your computer, along with the necessary libraries (PyPDF2 and requests). You'll also need an API key from OpenAI, which you can obtain by signing up for their services. The script assumes you're comfortable with renaming files on your computer and that you're looking for a fun, easy way to refresh your file organization. 20 | -------------------------------------------------------------------------------- /pdfs_ai_rename.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tiktoken 3 | from PyPDF2 import PdfReader 4 | from openai import OpenAI 5 | import re 6 | import time 7 | 8 | client = OpenAI() 9 | 10 | max_length = 15000 11 | 12 | def get_new_filename_from_openai(pdf_content): 13 | response = client.chat.completions.create( 14 | model="gpt-3.5-turbo-0125", 15 | messages=[ 16 | {"role": "system", "content": "You are a helpful assistant designed to output JSON. Please reply with a filename that consists only of English characters, numbers, and underscores, and is no longer than 50 characters. Do not include characters outside of these, as the system may crash. Do not reply in JSON format, just reply with text."}, 17 | {"role": "user", "content": pdf_content} 18 | ] 19 | ) 20 | initial_filename = response.choices[0].message.content 21 | filename = validate_and_trim_filename(initial_filename) 22 | return filename 23 | 24 | def validate_and_trim_filename(initial_filename): 25 | allowed_chars = r'[a-zA-Z0-9_]' 26 | 27 | if not initial_filename: 28 | timestamp = time.strftime('%Y%m%d%H%M%S', time.gmtime()) 29 | return f'empty_file_{timestamp}' 30 | 31 | if re.match("^[A-Za-z0-9_]$", initial_filename): 32 | return initial_filename if len(initial_filename) <= 100 else initial_filename[:100] 33 | else: 34 | cleaned_filename = re.sub("^[A-Za-z0-9_]$", '', initial_filename) 35 | return cleaned_filename if len(cleaned_filename) <= 100 else cleaned_filename[:100] 36 | 37 | def rename_pdfs_in_directory(directory): 38 | pdf_contents = [] 39 | files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] 40 | files.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)), reverse=True) 41 | for filename in files: 42 | if filename.endswith(".pdf"): 43 | filepath = os.path.join(directory, filename) 44 | print(f"Reading file {filepath}") 45 | pdf_content = pdfs_to_text_string(filepath) 46 | new_file_name = get_new_filename_from_openai(pdf_content) 47 | if new_file_name in [f for f in os.listdir(directory) if f.endswith(".pdf")]: 48 | print(f"The new filename '{new_file_name}' already exists.") 49 | new_file_name += "_01" 50 | 51 | new_filepath = os.path.join(directory, new_file_name + ".pdf") 52 | try: 53 | os.rename(filepath, new_filepath) 54 | print(f"File renamed to {new_filepath}") 55 | except Exception as e: 56 | print(f"An error occurred while renaming the file: {e}") 57 | 58 | def pdfs_to_text_string(filepath): 59 | with open(filepath, 'rb') as file: 60 | reader = PdfReader(file) 61 | content = reader.pages[0].extract_text() 62 | if not content.strip(): 63 | content = "Content is empty or contains only whitespace." 64 | encoding = tiktoken.get_encoding("cl100k_base") 65 | num_tokens = len(encoding.encode(content)) 66 | if num_tokens > max_length: 67 | content = content_token_cut(content, num_tokens, max_length) 68 | return content 69 | 70 | def content_token_cut(content, num_tokens, max_length): 71 | content_length = len(content) 72 | while num_tokens > max_length: 73 | ratio = num_tokens / max_length 74 | new_length = int(content_length * num_tokens * (90 / 100)) 75 | content = content[:new_length] 76 | num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(content)) 77 | return content 78 | 79 | def main(): 80 | directory = '' # Replace with your PDF directory path 81 | if directory == '': 82 | directory = input("Please input your path:") 83 | rename_pdfs_in_directory(directory) 84 | 85 | if __name__ == "__main__": 86 | main() 87 | --------------------------------------------------------------------------------