└── scrape_loras.py /scrape_loras.py: -------------------------------------------------------------------------------- 1 | from huggingface_hub import HfApi, list_models 2 | from datasets import Dataset 3 | import json 4 | import logging 5 | import re 6 | 7 | # Set up logging 8 | logging.basicConfig(level=logging.INFO) 9 | logger = logging.getLogger(__name__) 10 | 11 | def fetch_lora_models(): 12 | """Fetch LoRA models from Hugging Face that match the criteria.""" 13 | logger.info("Fetching LoRA models from Hugging Face Hub...") 14 | 15 | # Initialize the Hugging Face API 16 | api = HfApi() 17 | 18 | # Search for models with LoRA in the tags or description 19 | # Focus on models for SDXL Flux 20 | models = list_models( 21 | filter="lora", # Look for LoRA models 22 | tags=["flux"], # Filter for flux tagged models 23 | cardData=True # Get additional card data 24 | ) 25 | 26 | logger.info(f"Found {len(models)} potential models") 27 | 28 | lora_data = [] 29 | for model in models: 30 | try: 31 | # Get model card data to extract more information 32 | model_info = { 33 | "repo": model.id, 34 | "title": model.id.split("/")[-1].replace("-", " ").title() 35 | } 36 | 37 | # Try to get image example from model card metadata 38 | image_url = None 39 | if hasattr(model, "cardData") and model.cardData: 40 | card_data = model.cardData 41 | 42 | # Extract trigger words from model card if available 43 | if "tags" in card_data and card_data["tags"]: 44 | potential_triggers = [tag for tag in card_data["tags"] if "trigger" in tag or "prompt" in tag] 45 | if potential_triggers: 46 | model_info["trigger_word"] = potential_triggers[0].split(":")[-1].strip() 47 | 48 | # Try to find example image URL 49 | if "widgets" in card_data: 50 | for widget in card_data["widgets"]: 51 | if widget.get("type") == "image" and "src" in widget: 52 | image_url = widget["src"] 53 | break 54 | 55 | # If no image found in metadata, try to get first image from repo 56 | if not image_url: 57 | try: 58 | files = api.list_repo_files(model.id) 59 | image_files = [f for f in files if f.endswith(('.png', '.jpg', '.jpeg', '.webp'))] 60 | if image_files: 61 | image_url = f"https://huggingface.co/{model.id}/resolve/main/{image_files[0]}" 62 | except Exception as e: 63 | logger.warning(f"Error fetching files for {model.id}: {e}") 64 | 65 | if image_url: 66 | model_info["image"] = image_url 67 | 68 | # Extract trigger word from README if not found in tags 69 | if "trigger_word" not in model_info: 70 | try: 71 | readme_content = api.get_repo_file_content(model.id, "README.md") 72 | trigger_matches = re.findall(r"trigger word[s]?[\s]*:[\s]*[\"']?([^\"'\n]+)[\"']?", 73 | readme_content, re.IGNORECASE) 74 | if trigger_matches: 75 | model_info["trigger_word"] = trigger_matches[0].strip() 76 | else: 77 | # Look for other common patterns 78 | prompt_matches = re.findall(r"prompt[s]?[\s]*:[\s]*[\"']?([^\"'\n]+)[\"']?", 79 | readme_content, re.IGNORECASE) 80 | if prompt_matches: 81 | model_info["trigger_word"] = prompt_matches[0].strip() 82 | except Exception as e: 83 | logger.warning(f"Error reading README for {model.id}: {e}") 84 | 85 | # Default empty string if no trigger word found 86 | if "trigger_word" not in model_info: 87 | model_info["trigger_word"] = "" 88 | 89 | # Default trigger position 90 | if "prepend" in str(model_info.get("trigger_word")).lower(): 91 | model_info["trigger_position"] = "prepend" 92 | 93 | lora_data.append(model_info) 94 | logger.info(f"Processed {model.id}") 95 | 96 | except Exception as e: 97 | logger.error(f"Error processing model {model.id}: {e}") 98 | 99 | logger.info(f"Successfully gathered information for {len(lora_data)} LoRA models") 100 | return lora_data 101 | 102 | def create_and_push_dataset(lora_data, dataset_name): 103 | """Create a dataset from LoRA data and push to Hugging Face Hub.""" 104 | logger.info(f"Creating dataset with {len(lora_data)} entries") 105 | 106 | # Create dataset 107 | dataset = Dataset.from_list(lora_data) 108 | 109 | # Print dataset info 110 | logger.info(f"Dataset created with schema: {dataset.features}") 111 | logger.info(f"Dataset contains {len(dataset)} examples") 112 | 113 | # Push to Hub 114 | logger.info(f"Pushing dataset to {dataset_name}") 115 | dataset.push_to_hub(dataset_name, private=False) 116 | logger.info(f"Dataset successfully pushed to https://huggingface.co/datasets/{dataset_name}") 117 | 118 | def main(): 119 | # Fetch LoRA models 120 | lora_data = fetch_lora_models() 121 | 122 | # Save locally as JSON (backup) 123 | with open("loras_fetched.json", "w") as f: 124 | json.dump(lora_data, f, indent=2) 125 | 126 | # Create and push dataset 127 | username = "your-username" # Replace with your Hugging Face username 128 | dataset_name = f"{username}/flux-lora-models" 129 | create_and_push_dataset(lora_data, dataset_name) 130 | 131 | if __name__ == "__main__": 132 | main() 133 | --------------------------------------------------------------------------------