├── requirements.txt ├── .gitattributes ├── labels.txt ├── app.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | torch 3 | pillow 4 | gradio 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tar filter=lfs diff=lfs merge=lfs -text 29 | *.tflite filter=lfs diff=lfs merge=lfs -text 30 | *.tgz filter=lfs diff=lfs merge=lfs -text 31 | *.wasm filter=lfs diff=lfs merge=lfs -text 32 | *.xz filter=lfs diff=lfs merge=lfs -text 33 | *.zip filter=lfs diff=lfs merge=lfs -text 34 | *.zst filter=lfs diff=lfs merge=lfs -text 35 | *tfevents* filter=lfs diff=lfs merge=lfs -text 36 | -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | 2 | # Food-101 labels 3 | labels = { 4 | "0": "apple_pie", "1": "baby_back_ribs", "2": "baklava", "3": "beef_carpaccio", "4": "beef_tartare", 5 | "5": "beet_salad", "6": "beignets", "7": "bibimbap", "8": "bread_pudding", "9": "breakfast_burrito", 6 | "10": "bruschetta", "11": "caesar_salad", "12": "cannoli", "13": "caprese_salad", "14": "carrot_cake", 7 | "15": "ceviche", "16": "cheesecake", "17": "cheese_plate", "18": "chicken_curry", "19": "chicken_quesadilla", 8 | "20": "chicken_wings", "21": "chocolate_cake", "22": "chocolate_mousse", "23": "churros", "24": "clam_chowder", 9 | "25": "club_sandwich", "26": "crab_cakes", "27": "creme_brulee", "28": "croque_madame", "29": "cup_cakes", 10 | "30": "deviled_eggs", "31": "donuts", "32": "dumplings", "33": "edamame", "34": "eggs_benedict", 11 | "35": "escargots", "36": "falafel", "37": "filet_mignon", "38": "fish_and_chips", "39": "foie_gras", 12 | "40": "french_fries", "41": "french_onion_soup", "42": "french_toast", "43": "fried_calamari", "44": "fried_rice", 13 | "45": "frozen_yogurt", "46": "garlic_bread", "47": "gnocchi", "48": "greek_salad", "49": "grilled_cheese_sandwich", 14 | "50": "grilled_salmon", "51": "guacamole", "52": "gyoza", "53": "hamburger", "54": "hot_and_sour_soup", 15 | "55": "hot_dog", "56": "huevos_rancheros", "57": "hummus", "58": "ice_cream", "59": "lasagna", 16 | "60": "lobster_bisque", "61": "lobster_roll_sandwich", "62": "macaroni_and_cheese", "63": "macarons", "64": "miso_soup", 17 | "65": "mussels", "66": "nachos", "67": "omelette", "68": "onion_rings", "69": "oysters", 18 | "70": "pad_thai", "71": "paella", "72": "pancakes", "73": "panna_cotta", "74": "peking_duck", 19 | "75": "pho", "76": "pizza", "77": "pork_chop", "78": "poutine", "79": "prime_rib", 20 | "80": "pulled_pork_sandwich", "81": "ramen", "82": "ravioli", "83": "red_velvet_cake", "84": "risotto", 21 | "85": "samosa", "86": "sashimi", "87": "scallops", "88": "seaweed_salad", "89": "shrimp_and_grits", 22 | "90": "spaghetti_bolognese", "91": "spaghetti_carbonara", "92": "spring_rolls", "93": "steak", "94": "strawberry_shortcake", 23 | "95": "sushi", "96": "tacos", "97": "takoyaki", "98": "tiramisu", "99": "tuna_tartare", "100": "waffles" 24 | } 25 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from transformers import AutoImageProcessor, SiglipForImageClassification 3 | from PIL import Image 4 | import torch 5 | 6 | # Load model and processor 7 | model_name = "prithivMLmods/Food-101-93M" 8 | model = SiglipForImageClassification.from_pretrained(model_name) 9 | processor = AutoImageProcessor.from_pretrained(model_name) 10 | 11 | # Food-101 labels 12 | labels = { 13 | "0": "apple_pie", "1": "baby_back_ribs", "2": "baklava", "3": "beef_carpaccio", "4": "beef_tartare", 14 | "5": "beet_salad", "6": "beignets", "7": "bibimbap", "8": "bread_pudding", "9": "breakfast_burrito", 15 | "10": "bruschetta", "11": "caesar_salad", "12": "cannoli", "13": "caprese_salad", "14": "carrot_cake", 16 | "15": "ceviche", "16": "cheesecake", "17": "cheese_plate", "18": "chicken_curry", "19": "chicken_quesadilla", 17 | "20": "chicken_wings", "21": "chocolate_cake", "22": "chocolate_mousse", "23": "churros", "24": "clam_chowder", 18 | "25": "club_sandwich", "26": "crab_cakes", "27": "creme_brulee", "28": "croque_madame", "29": "cup_cakes", 19 | "30": "deviled_eggs", "31": "donuts", "32": "dumplings", "33": "edamame", "34": "eggs_benedict", 20 | "35": "escargots", "36": "falafel", "37": "filet_mignon", "38": "fish_and_chips", "39": "foie_gras", 21 | "40": "french_fries", "41": "french_onion_soup", "42": "french_toast", "43": "fried_calamari", "44": "fried_rice", 22 | "45": "frozen_yogurt", "46": "garlic_bread", "47": "gnocchi", "48": "greek_salad", "49": "grilled_cheese_sandwich", 23 | "50": "grilled_salmon", "51": "guacamole", "52": "gyoza", "53": "hamburger", "54": "hot_and_sour_soup", 24 | "55": "hot_dog", "56": "huevos_rancheros", "57": "hummus", "58": "ice_cream", "59": "lasagna", 25 | "60": "lobster_bisque", "61": "lobster_roll_sandwich", "62": "macaroni_and_cheese", "63": "macarons", "64": "miso_soup", 26 | "65": "mussels", "66": "nachos", "67": "omelette", "68": "onion_rings", "69": "oysters", 27 | "70": "pad_thai", "71": "paella", "72": "pancakes", "73": "panna_cotta", "74": "peking_duck", 28 | "75": "pho", "76": "pizza", "77": "pork_chop", "78": "poutine", "79": "prime_rib", 29 | "80": "pulled_pork_sandwich", "81": "ramen", "82": "ravioli", "83": "red_velvet_cake", "84": "risotto", 30 | "85": "samosa", "86": "sashimi", "87": "scallops", "88": "seaweed_salad", "89": "shrimp_and_grits", 31 | "90": "spaghetti_bolognese", "91": "spaghetti_carbonara", "92": "spring_rolls", "93": "steak", "94": "strawberry_shortcake", 32 | "95": "sushi", "96": "tacos", "97": "takoyaki", "98": "tiramisu", "99": "tuna_tartare", "100": "waffles" 33 | } 34 | 35 | def classify_food(image): 36 | """Predicts the type of food in the image.""" 37 | image = Image.fromarray(image).convert("RGB") 38 | inputs = processor(images=image, return_tensors="pt") 39 | 40 | with torch.no_grad(): 41 | outputs = model(**inputs) 42 | logits = outputs.logits 43 | probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist() 44 | 45 | predictions = {labels[str(i)]: round(probs[i], 3) for i in range(len(probs))} 46 | # Sort by descending probability 47 | predictions = dict(sorted(predictions.items(), key=lambda item: item[1], reverse=True)[:5]) 48 | 49 | return predictions 50 | 51 | # Gradio Interface 52 | iface = gr.Interface( 53 | fn=classify_food, 54 | inputs=gr.Image(type="numpy"), 55 | outputs=gr.Label(num_top_classes=5, label="Top 5 Prediction Scores"), 56 | title="Food-101-93M 🍽️", 57 | description="Upload an image of food to classify it into one of 101 dish categories based on the Food-101 dataset." 58 | ) 59 | 60 | # Launch app 61 | if __name__ == "__main__": 62 | iface.launch() 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![zxdfdsxf.png](https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/EnlRwJl06-OKBOYm7l0Lh.png) 2 | 3 | # **Food-101-93M** 4 | 5 | > **Food-101-93M** is a fine-tuned image classification model built on top of **google/siglip2-base-patch16-224** using the **SiglipForImageClassification** architecture. It is trained to classify food images into one of 101 popular dishes, derived from the [Food-101 dataset](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/). 6 | 7 | ```py 8 | Classification Report: 9 | precision recall f1-score support 10 | 11 | apple_pie 0.8399 0.8253 0.8325 750 12 | baby_back_ribs 0.9445 0.8853 0.9140 750 13 | baklava 0.9736 0.9347 0.9537 750 14 | beef_carpaccio 0.9079 0.9200 0.9139 750 15 | beef_tartare 0.8486 0.8293 0.8388 750 16 | beet_salad 0.8649 0.8707 0.8678 750 17 | beignets 0.8961 0.9080 0.9020 750 18 | bibimbap 0.9361 0.9373 0.9367 750 19 | bread_pudding 0.7979 0.8000 0.7989 750 20 | breakfast_burrito 0.8784 0.9053 0.8917 750 21 | bruschetta 0.8672 0.8533 0.8602 750 22 | caesar_salad 0.9444 0.9293 0.9368 750 23 | cannoli 0.9263 0.9547 0.9402 750 24 | caprese_salad 0.9110 0.9280 0.9194 750 25 | carrot_cake 0.9068 0.8040 0.8523 750 26 | ceviche 0.8375 0.8453 0.8414 750 27 | cheesecake 0.8225 0.8093 0.8159 750 28 | cheese_plate 0.9627 0.9627 0.9627 750 29 | chicken_curry 0.8970 0.8827 0.8898 750 30 | chicken_quesadilla 0.9254 0.9093 0.9173 750 31 | chicken_wings 0.9512 0.9360 0.9435 750 32 | chocolate_cake 0.7958 0.8107 0.8032 750 33 | chocolate_mousse 0.6947 0.7827 0.7361 750 34 | churros 0.9440 0.9440 0.9440 750 35 | clam_chowder 0.8883 0.9120 0.9000 750 36 | club_sandwich 0.9396 0.9133 0.9263 750 37 | crab_cakes 0.9185 0.8720 0.8947 750 38 | creme_brulee 0.9141 0.9227 0.9184 750 39 | croque_madame 0.9106 0.8960 0.9032 750 40 | cup_cakes 0.8986 0.9333 0.9156 750 41 | deviled_eggs 0.9787 0.9813 0.9800 750 42 | donuts 0.8893 0.8787 0.8840 750 43 | dumplings 0.9212 0.8880 0.9043 750 44 | edamame 0.9960 0.9920 0.9940 750 45 | eggs_benedict 0.9207 0.9440 0.9322 750 46 | escargots 0.8709 0.8907 0.8807 750 47 | falafel 0.8945 0.8933 0.8939 750 48 | filet_mignon 0.7598 0.7467 0.7532 750 49 | fish_and_chips 0.9454 0.9467 0.9460 750 50 | foie_gras 0.6659 0.8027 0.7279 750 51 | french_fries 0.9447 0.9333 0.9390 750 52 | french_onion_soup 0.8667 0.9187 0.8919 750 53 | french_toast 0.8890 0.8760 0.8825 750 54 | fried_calamari 0.9448 0.9133 0.9288 750 55 | fried_rice 0.9325 0.9213 0.9269 750 56 | frozen_yogurt 0.8716 0.9507 0.9094 750 57 | garlic_bread 0.9103 0.8800 0.8949 750 58 | gnocchi 0.8554 0.8280 0.8415 750 59 | greek_salad 0.9203 0.9240 0.9222 750 60 | grilled_cheese_sandwich 0.8523 0.8773 0.8647 750 61 | grilled_salmon 0.8463 0.8960 0.8705 750 62 | guacamole 0.9537 0.9347 0.9441 750 63 | gyoza 0.8970 0.9173 0.9071 750 64 | hamburger 0.8899 0.8947 0.8923 750 65 | hot_and_sour_soup 0.9439 0.9413 0.9426 750 66 | hot_dog 0.8859 0.9320 0.9084 750 67 | huevos_rancheros 0.8465 0.8827 0.8642 750 68 | hummus 0.9394 0.9093 0.9241 750 69 | ice_cream 0.8633 0.8507 0.8570 750 70 | lasagna 0.8780 0.8733 0.8757 750 71 | lobster_bisque 0.8952 0.9107 0.9028 750 72 | lobster_roll_sandwich 0.9664 0.9573 0.9618 750 73 | macaroni_and_cheese 0.9273 0.9013 0.9141 750 74 | macarons 0.9892 0.9747 0.9819 750 75 | miso_soup 0.9565 0.9667 0.9615 750 76 | mussels 0.9602 0.9640 0.9621 750 77 | nachos 0.9337 0.9387 0.9362 750 78 | omelette 0.8889 0.8960 0.8924 750 79 | onion_rings 0.9493 0.9493 0.9493 750 80 | oysters 0.9808 0.9533 0.9669 750 81 | pad_thai 0.9188 0.9507 0.9345 750 82 | paella 0.9352 0.9240 0.9296 750 83 | pancakes 0.9277 0.9067 0.9171 750 84 | panna_cotta 0.8056 0.8507 0.8275 750 85 | peking_duck 0.8529 0.9120 0.8814 750 86 | pho 0.9746 0.9227 0.9479 750 87 | pizza 0.9512 0.9360 0.9435 750 88 | pork_chop 0.8085 0.7373 0.7713 750 89 | poutine 0.9424 0.9387 0.9405 750 90 | prime_rib 0.9106 0.8147 0.8600 750 91 | pulled_pork_sandwich 0.8887 0.9053 0.8970 750 92 | ramen 0.8986 0.9213 0.9098 750 93 | ravioli 0.8532 0.8293 0.8411 750 94 | red_velvet_cake 0.9330 0.8907 0.9113 750 95 | risotto 0.8809 0.8680 0.8744 750 96 | samosa 0.9153 0.9227 0.9190 750 97 | sashimi 0.9248 0.9187 0.9217 750 98 | scallops 0.8564 0.8507 0.8535 750 99 | seaweed_salad 0.9597 0.9533 0.9565 750 100 | shrimp_and_grits 0.8995 0.8947 0.8971 750 101 | spaghetti_bolognese 0.9667 0.9667 0.9667 750 102 | spaghetti_carbonara 0.9601 0.9627 0.9614 750 103 | spring_rolls 0.9045 0.9467 0.9251 750 104 | steak 0.6311 0.7027 0.6650 750 105 | strawberry_shortcake 0.8832 0.8467 0.8645 750 106 | sushi 0.9204 0.8947 0.9074 750 107 | tacos 0.9225 0.8893 0.9056 750 108 | takoyaki 0.9419 0.9507 0.9463 750 109 | tiramisu 0.9074 0.8627 0.8845 750 110 | tuna_tartare 0.7691 0.7773 0.7732 750 111 | waffles 0.9629 0.9347 0.9486 750 112 | 113 | accuracy 0.8973 75750 114 | macro avg 0.8987 0.8973 0.8977 75750 115 | weighted avg 0.8987 0.8973 0.8977 75750 116 | ``` 117 | 118 | The model categorizes images into 101 food classes such as `sushi`, `hamburger`, `waffles`, `pad_thai`, and more. 119 | 120 | --- 121 | 122 | # **Run with Transformers 🤗** 123 | 124 | ```python 125 | !pip install -q transformers torch pillow gradio 126 | ``` 127 | 128 | ```python 129 | import gradio as gr 130 | from transformers import AutoImageProcessor, SiglipForImageClassification 131 | from PIL import Image 132 | import torch 133 | 134 | # Load model and processor 135 | model_name = "prithivMLmods/Food-101-93M" 136 | model = SiglipForImageClassification.from_pretrained(model_name) 137 | processor = AutoImageProcessor.from_pretrained(model_name) 138 | 139 | # Food-101 labels 140 | labels = { 141 | "0": "apple_pie", "1": "baby_back_ribs", "2": "baklava", "3": "beef_carpaccio", "4": "beef_tartare", 142 | "5": "beet_salad", "6": "beignets", "7": "bibimbap", "8": "bread_pudding", "9": "breakfast_burrito", 143 | "10": "bruschetta", "11": "caesar_salad", "12": "cannoli", "13": "caprese_salad", "14": "carrot_cake", 144 | "15": "ceviche", "16": "cheesecake", "17": "cheese_plate", "18": "chicken_curry", "19": "chicken_quesadilla", 145 | "20": "chicken_wings", "21": "chocolate_cake", "22": "chocolate_mousse", "23": "churros", "24": "clam_chowder", 146 | "25": "club_sandwich", "26": "crab_cakes", "27": "creme_brulee", "28": "croque_madame", "29": "cup_cakes", 147 | "30": "deviled_eggs", "31": "donuts", "32": "dumplings", "33": "edamame", "34": "eggs_benedict", 148 | "35": "escargots", "36": "falafel", "37": "filet_mignon", "38": "fish_and_chips", "39": "foie_gras", 149 | "40": "french_fries", "41": "french_onion_soup", "42": "french_toast", "43": "fried_calamari", "44": "fried_rice", 150 | "45": "frozen_yogurt", "46": "garlic_bread", "47": "gnocchi", "48": "greek_salad", "49": "grilled_cheese_sandwich", 151 | "50": "grilled_salmon", "51": "guacamole", "52": "gyoza", "53": "hamburger", "54": "hot_and_sour_soup", 152 | "55": "hot_dog", "56": "huevos_rancheros", "57": "hummus", "58": "ice_cream", "59": "lasagna", 153 | "60": "lobster_bisque", "61": "lobster_roll_sandwich", "62": "macaroni_and_cheese", "63": "macarons", "64": "miso_soup", 154 | "65": "mussels", "66": "nachos", "67": "omelette", "68": "onion_rings", "69": "oysters", 155 | "70": "pad_thai", "71": "paella", "72": "pancakes", "73": "panna_cotta", "74": "peking_duck", 156 | "75": "pho", "76": "pizza", "77": "pork_chop", "78": "poutine", "79": "prime_rib", 157 | "80": "pulled_pork_sandwich", "81": "ramen", "82": "ravioli", "83": "red_velvet_cake", "84": "risotto", 158 | "85": "samosa", "86": "sashimi", "87": "scallops", "88": "seaweed_salad", "89": "shrimp_and_grits", 159 | "90": "spaghetti_bolognese", "91": "spaghetti_carbonara", "92": "spring_rolls", "93": "steak", "94": "strawberry_shortcake", 160 | "95": "sushi", "96": "tacos", "97": "takoyaki", "98": "tiramisu", "99": "tuna_tartare", "100": "waffles" 161 | } 162 | 163 | def classify_food(image): 164 | """Predicts the type of food in the image.""" 165 | image = Image.fromarray(image).convert("RGB") 166 | inputs = processor(images=image, return_tensors="pt") 167 | 168 | with torch.no_grad(): 169 | outputs = model(**inputs) 170 | logits = outputs.logits 171 | probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist() 172 | 173 | predictions = {labels[str(i)]: round(probs[i], 3) for i in range(len(probs))} 174 | # Sort by descending probability 175 | predictions = dict(sorted(predictions.items(), key=lambda item: item[1], reverse=True)[:5]) 176 | 177 | return predictions 178 | 179 | # Gradio Interface 180 | iface = gr.Interface( 181 | fn=classify_food, 182 | inputs=gr.Image(type="numpy"), 183 | outputs=gr.Label(num_top_classes=5, label="Top 5 Prediction Scores"), 184 | title="Food-101-93M 🍽️", 185 | description="Upload an image of food to classify it into one of 101 dish categories based on the Food-101 dataset." 186 | ) 187 | 188 | # Launch app 189 | if __name__ == "__main__": 190 | iface.launch() 191 | ``` 192 | 193 | --- 194 | 195 | # **Intended Use:** 196 | 197 | The **Food-101-93M** model is intended for: 198 | 199 | - **Recipe Recommendation Engines:** Automatically tagging food images to suggest recipes. 200 | - **Food Logging & Calorie Tracking Apps:** Categorizing meals based on photos. 201 | - **Smart Kitchens:** Assisting food recognition in smart appliances. 202 | - **Restaurant Menu Digitization:** Auto-classifying dishes for visual menus or ordering systems. 203 | - **Dataset Labeling:** Enabling automatic annotation of food datasets for training other ML models. 204 | --------------------------------------------------------------------------------