├── champions.csv ├── scrap.ipynb ├── .ipynb_checkpoints └── scrap-checkpoint.ipynb └── football_analysis.ipynb /champions.csv: -------------------------------------------------------------------------------- 1 | Rank,Team,Participations,Titles,Pld,W,D,L,GF,GA,GD,Pts 2 | 1,Brazil,21,5,109,73,18,18,229,105,124,237 3 | 2,Germany,19,4,109,67,20,22,226,125,101,221 4 | 3,Italy,18,4,83,45,21,17,128,77,51,156 5 | 4,Argentina,17,2,81,43,15,23,137,93,44,144 6 | 5,France,15,2,66,34,13,19,120,77,43,115 7 | 6,England,15,1,69,29,21,19,91,64,27,108 8 | 7,Spain,15,1,63,30,15,18,99,72,27,105 9 | 8,Uruguay,13,2,56,24,12,20,87,74,13,84 10 | -------------------------------------------------------------------------------- /scrap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 77, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from bs4 import BeautifulSoup as bs\n", 10 | "import requests\n", 11 | "import pandas as pd\n", 12 | "import csv\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 78, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "url='https://en.wikipedia.org/wiki/FIFA_World_Cup'\n", 22 | "headers = {\"User-Agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0\"}\n", 23 | "data=requests.get(url,headers=headers)\n", 24 | "soup=bs(data.text,'html.parser')\n", 25 | "\n", 26 | "#soup.prettify()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 79, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "\n", 36 | "resp_table=soup.select('table.wikitable')[7]\n", 37 | "#resp_table\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### listing only the headers\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 80, 50 | "metadata": { 51 | "scrolled": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "headers=[]\n", 56 | "for h in resp_table.find_all('th'):\n", 57 | " headers.append(h.text.replace('\\n',\"\"))\n", 58 | "#print(headers)\n", 59 | "\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 84, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "
\n", 71 | "\n", 84 | "\n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
RankTeamParticipationsTitlesPldWDLGFGAGDPts
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | "Empty DataFrame\n", 109 | "Columns: [Rank, Team, Participations, Titles, Pld, W, D, L, GF, GA, GD, Pts]\n", 110 | "Index: []" 111 | ] 112 | }, 113 | "execution_count": 84, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "df=pd.DataFrame(columns=headers)\n", 120 | "#df\n" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 86, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "rows=[]\n", 130 | "row_tags=rows=resp_table.find_all('tr')\n", 131 | "for r in row_tags[1:]:\n", 132 | " dirty_rows=r.find_all('td')\n", 133 | " beauty_rows=[rw.text.strip() for rw in dirty_rows]\n", 134 | " #print(beauty_rows)\n", 135 | " length=len(df)\n", 136 | " df.loc[length]=beauty_rows\n", 137 | " \n", 138 | " " 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 87, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/html": [ 149 | "
\n", 150 | "\n", 163 | "\n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | "
RankTeamParticipationsTitlesPldWDLGFGAGDPts
01Brazil215109731818229105124237
12Germany[124]194109672022226125101221
23Italy184834521171287751156
34Argentina172814315231379344144
45France152663413191207743115
56England15169292119916427108
67Spain15163301518997227105
78Uruguay1325624122087741384
\n", 304 | "
" 305 | ], 306 | "text/plain": [ 307 | " Rank Team Participations Titles Pld W D L GF GA GD \\\n", 308 | "0 1 Brazil 21 5 109 73 18 18 229 105 124 \n", 309 | "1 2 Germany[124] 19 4 109 67 20 22 226 125 101 \n", 310 | "2 3 Italy 18 4 83 45 21 17 128 77 51 \n", 311 | "3 4 Argentina 17 2 81 43 15 23 137 93 44 \n", 312 | "4 5 France 15 2 66 34 13 19 120 77 43 \n", 313 | "5 6 England 15 1 69 29 21 19 91 64 27 \n", 314 | "6 7 Spain 15 1 63 30 15 18 99 72 27 \n", 315 | "7 8 Uruguay 13 2 56 24 12 20 87 74 13 \n", 316 | "\n", 317 | " Pts \n", 318 | "0 237 \n", 319 | "1 221 \n", 320 | "2 156 \n", 321 | "3 144 \n", 322 | "4 115 \n", 323 | "5 108 \n", 324 | "6 105 \n", 325 | "7 84 " 326 | ] 327 | }, 328 | "execution_count": 87, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "df\n", 335 | " " 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 88, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "df.to_csv('champions.csv',index=False)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [] 353 | } 354 | ], 355 | "metadata": { 356 | "kernelspec": { 357 | "display_name": "Python 3 (ipykernel)", 358 | "language": "python", 359 | "name": "python3" 360 | }, 361 | "language_info": { 362 | "codemirror_mode": { 363 | "name": "ipython", 364 | "version": 3 365 | }, 366 | "file_extension": ".py", 367 | "mimetype": "text/x-python", 368 | "name": "python", 369 | "nbconvert_exporter": "python", 370 | "pygments_lexer": "ipython3", 371 | "version": "3.11.0" 372 | }, 373 | "vscode": { 374 | "interpreter": { 375 | "hash": "d3e10ef16274dd72e574b8fa73b58450b957d8421a2901baded3cca26fcf5dda" 376 | } 377 | } 378 | }, 379 | "nbformat": 4, 380 | "nbformat_minor": 2 381 | } 382 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/scrap-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from bs4 import BeautifulSoup as bs\n", 10 | "import requests\n", 11 | "import pandas\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "url='https://en.wikipedia.org/wiki/FIFA_World_Cup'\n", 21 | "headers = {\"User-Agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0\"}\n", 22 | "data=requests.get(url,headers=headers)\n", 23 | "soup=bs(data.text,'html.parser')" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "#print(soup.prettify())\n", 33 | "\n", 34 | "#access the titles of the website\n", 35 | "#tit=soup.title.text\n", 36 | "#print(tit)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": { 43 | "scrolled": true 44 | }, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "\n", 51 | "\n", 52 | "\n", 54 | "\n", 56 | "\n", 58 | "\n", 60 | "\n", 62 | "\n", 64 | "\n", 66 | "\n", 68 | "\n", 70 | "\n", 72 | "\n", 74 | "\n", 76 | "\n", 77 | "\n", 79 | "\n", 81 | "\n", 83 | "\n", 85 | "\n", 87 | "\n", 89 | "\n", 91 | "\n", 93 | "\n", 95 | "\n", 97 | "\n", 99 | "\n", 101 | "\n", 102 | "\n", 104 | "\n", 106 | "\n", 108 | "\n", 110 | "\n", 112 | "\n", 114 | "\n", 116 | "\n", 118 | "\n", 120 | "\n", 122 | "\n", 124 | "\n", 126 | "\n", 127 | "\n", 129 | "\n", 131 | "\n", 133 | "\n", 135 | "\n", 137 | "\n", 139 | "\n", 141 | "\n", 143 | "\n", 145 | "\n", 147 | "\n", 149 | "\n", 151 | "\n", 152 | "\n", 154 | "\n", 156 | "\n", 158 | "\n", 160 | "\n", 162 | "\n", 164 | "\n", 166 | "\n", 168 | "\n", 170 | "\n", 172 | "\n", 174 | "\n", 176 | "\n", 177 | "\n", 179 | "\n", 181 | "\n", 183 | "\n", 185 | "\n", 187 | "\n", 189 | "\n", 191 | "\n", 193 | "\n", 195 | "\n", 197 | "\n", 199 | "\n", 201 | "\n", 202 | "\n", 204 | "\n", 206 | "\n", 208 | "\n", 210 | "\n", 212 | "\n", 214 | "\n", 216 | "\n", 218 | "\n", 220 | "\n", 222 | "\n", 224 | "\n", 226 | "\n", 227 | "\n", 229 | "\n", 231 | "\n", 233 | "\n", 235 | "\n", 237 | "\n", 239 | "\n", 241 | "\n", 243 | "\n", 245 | "\n", 247 | "\n", 249 | "\n", 251 | "\n", 252 | "\n", 254 | "\n", 256 | "\n", 258 | "\n", 260 | "\n", 262 | "\n", 264 | "\n", 266 | "\n", 268 | "\n", 270 | "\n", 272 | "\n", 274 | "
Rank\n", 53 | "Team\n", 55 | "Participations\n", 57 | "Titles\n", 59 | "Pld\n", 61 | "W\n", 63 | "D\n", 65 | "L\n", 67 | "GF\n", 69 | "GA\n", 71 | "GD\n", 73 | "Pts\n", 75 | "
1\n", 78 | "\"\" Brazil\n", 80 | "21\n", 82 | "5\n", 84 | "109\n", 86 | "73\n", 88 | "18\n", 90 | "18\n", 92 | "229\n", 94 | "105\n", 96 | "124\n", 98 | "237\n", 100 | "
2\n", 103 | "\"\" Germany[123]\n", 105 | "19\n", 107 | "4\n", 109 | "109\n", 111 | "67\n", 113 | "20\n", 115 | "22\n", 117 | "226\n", 119 | "125\n", 121 | "101\n", 123 | "221\n", 125 | "
3\n", 128 | "\"\" Italy\n", 130 | "18\n", 132 | "4\n", 134 | "83\n", 136 | "45\n", 138 | "21\n", 140 | "17\n", 142 | "128\n", 144 | "77\n", 146 | "51\n", 148 | "156\n", 150 | "
4\n", 153 | "\"\" Argentina\n", 155 | "17\n", 157 | "2\n", 159 | "81\n", 161 | "43\n", 163 | "15\n", 165 | "23\n", 167 | "137\n", 169 | "93\n", 171 | "44\n", 173 | "144\n", 175 | "
5\n", 178 | "\"\" France\n", 180 | "15\n", 182 | "2\n", 184 | "66\n", 186 | "34\n", 188 | "13\n", 190 | "19\n", 192 | "120\n", 194 | "77\n", 196 | "43\n", 198 | "115\n", 200 | "
6\n", 203 | "\"\" England\n", 205 | "15\n", 207 | "1\n", 209 | "69\n", 211 | "29\n", 213 | "21\n", 215 | "19\n", 217 | "91\n", 219 | "64\n", 221 | "27\n", 223 | "108\n", 225 | "
7\n", 228 | "\"\" Spain\n", 230 | "15\n", 232 | "1\n", 234 | "63\n", 236 | "30\n", 238 | "15\n", 240 | "18\n", 242 | "99\n", 244 | "72\n", 246 | "27\n", 248 | "105\n", 250 | "
8\n", 253 | "\"\" Uruguay\n", 255 | "13\n", 257 | "2\n", 259 | "56\n", 261 | "24\n", 263 | "12\n", 265 | "20\n", 267 | "87\n", 269 | "74\n", 271 | "13\n", 273 | "84\n", 275 | "
\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "\n", 281 | "resp_table=soup.select('table.wikitable')[7]\n", 282 | "print(resp_table)\n" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "### listing only the headers" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 27, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "['Rank\\n', 'Team\\n', 'Participations\\n', 'Titles\\n', 'Pld\\n', 'W\\n', 'D\\n', 'L\\n', 'GF\\n', 'GA\\n', 'GD\\n', 'Pts\\n']\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "headers=[]\n", 307 | "for h in resp_table.find_all('th'):\n", 308 | " headers.append(h.text)\n", 309 | "print(headers)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 29, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "ename": "AttributeError", 319 | "evalue": "ResultSet object has no attribute 'text'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?", 320 | "output_type": "error", 321 | "traceback": [ 322 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 323 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 324 | "Cell \u001b[1;32mIn [29], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfor\u001b[39;00m row \u001b[39min\u001b[39;00m resp_table\u001b[39m.\u001b[39mfind_all(\u001b[39m'\u001b[39m\u001b[39mtr\u001b[39m\u001b[39m'\u001b[39m)[\u001b[39m1\u001b[39m:]:\n\u001b[1;32m----> 2\u001b[0m data\u001b[39m=\u001b[39mrow\u001b[39m.\u001b[39;49mfind_all(\u001b[39m'\u001b[39;49m\u001b[39mtd\u001b[39;49m\u001b[39m'\u001b[39;49m)\u001b[39m.\u001b[39;49mtext\n", 325 | "File \u001b[1;32mc:\\Python\\Python310\\lib\\site-packages\\bs4\\element.py:2289\u001b[0m, in \u001b[0;36mResultSet.__getattr__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2287\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__getattr__\u001b[39m(\u001b[39mself\u001b[39m, key):\n\u001b[0;32m 2288\u001b[0m \u001b[39m\"\"\"Raise a helpful exception to explain a common code fix.\"\"\"\u001b[39;00m\n\u001b[1;32m-> 2289\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m(\n\u001b[0;32m 2290\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mResultSet object has no attribute \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. You\u001b[39m\u001b[39m'\u001b[39m\u001b[39mre probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m key\n\u001b[0;32m 2291\u001b[0m )\n", 326 | "\u001b[1;31mAttributeError\u001b[0m: ResultSet object has no attribute 'text'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "for row in resp_table.find_all('tr')[1:]:\n", 332 | " data=row.find_all('td').text\n", 333 | "\n" 334 | ] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3 (ipykernel)", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.11.0" 354 | }, 355 | "vscode": { 356 | "interpreter": { 357 | "hash": "d3e10ef16274dd72e574b8fa73b58450b957d8421a2901baded3cca26fcf5dda" 358 | } 359 | } 360 | }, 361 | "nbformat": 4, 362 | "nbformat_minor": 2 363 | } 364 | -------------------------------------------------------------------------------- /football_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "id": "eecc6fa6", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import requests\n", 12 | "import matplotlib.pyplot as mp" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 28, 18 | "id": "f07ee235", 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | "
RankTeamParticipationsTitlesPldWDLGFGAGDPts
01Brazil215109731818229105124237
12Germany194109672022226125101221
23Italy184834521171287751156
34Argentina172814315231379344144
45France152663413191207743115
56England15169292119916427108
67Spain15163301518997227105
78Uruguay1325624122087741384
\n", 179 | "
" 180 | ], 181 | "text/plain": [ 182 | " Rank Team Participations Titles Pld W D L GF GA GD \\\n", 183 | "0 1 Brazil 21 5 109 73 18 18 229 105 124 \n", 184 | "1 2 Germany 19 4 109 67 20 22 226 125 101 \n", 185 | "2 3 Italy 18 4 83 45 21 17 128 77 51 \n", 186 | "3 4 Argentina 17 2 81 43 15 23 137 93 44 \n", 187 | "4 5 France 15 2 66 34 13 19 120 77 43 \n", 188 | "5 6 England 15 1 69 29 21 19 91 64 27 \n", 189 | "6 7 Spain 15 1 63 30 15 18 99 72 27 \n", 190 | "7 8 Uruguay 13 2 56 24 12 20 87 74 13 \n", 191 | "\n", 192 | " Pts \n", 193 | "0 237 \n", 194 | "1 221 \n", 195 | "2 156 \n", 196 | "3 144 \n", 197 | "4 115 \n", 198 | "5 108 \n", 199 | "6 105 \n", 200 | "7 84 " 201 | ] 202 | }, 203 | "execution_count": 28, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "df=pd.read_csv('champions.csv')\n", 210 | "df\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 29, 216 | "id": "9d5d9f4f", 217 | "metadata": { 218 | "collapsed": true 219 | }, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/html": [ 224 | "
\n", 225 | "\n", 238 | "\n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | "
RankParticipationsTitlesPldWDLGFGAGDPts
count8.000008.0000008.0000008.0000008.0000008.0000008.0000008.0000008.0000008.0000008.000000
mean4.5000016.6250002.62500079.50000043.12500016.87500019.500000139.62500085.87500053.750000146.250000
std2.449492.6152031.50594120.25551118.0747463.6030742.07019757.02114320.41314438.68831555.984054
min1.0000013.0000001.00000056.00000024.00000012.00000017.00000087.00000064.00000013.00000084.000000
25%2.7500015.0000001.75000065.25000029.75000014.50000018.00000097.00000073.50000027.000000107.250000
50%4.5000016.0000002.00000075.00000038.50000016.50000019.000000124.00000077.00000043.500000129.500000
75%6.2500018.2500004.00000089.50000050.50000020.25000020.500000159.25000096.00000063.500000172.250000
max8.0000021.0000005.000000109.00000073.00000021.00000023.000000229.000000125.000000124.000000237.000000
\n", 370 | "
" 371 | ], 372 | "text/plain": [ 373 | " Rank Participations Titles Pld W D \\\n", 374 | "count 8.00000 8.000000 8.000000 8.000000 8.000000 8.000000 \n", 375 | "mean 4.50000 16.625000 2.625000 79.500000 43.125000 16.875000 \n", 376 | "std 2.44949 2.615203 1.505941 20.255511 18.074746 3.603074 \n", 377 | "min 1.00000 13.000000 1.000000 56.000000 24.000000 12.000000 \n", 378 | "25% 2.75000 15.000000 1.750000 65.250000 29.750000 14.500000 \n", 379 | "50% 4.50000 16.000000 2.000000 75.000000 38.500000 16.500000 \n", 380 | "75% 6.25000 18.250000 4.000000 89.500000 50.500000 20.250000 \n", 381 | "max 8.00000 21.000000 5.000000 109.000000 73.000000 21.000000 \n", 382 | "\n", 383 | " L GF GA GD Pts \n", 384 | "count 8.000000 8.000000 8.000000 8.000000 8.000000 \n", 385 | "mean 19.500000 139.625000 85.875000 53.750000 146.250000 \n", 386 | "std 2.070197 57.021143 20.413144 38.688315 55.984054 \n", 387 | "min 17.000000 87.000000 64.000000 13.000000 84.000000 \n", 388 | "25% 18.000000 97.000000 73.500000 27.000000 107.250000 \n", 389 | "50% 19.000000 124.000000 77.000000 43.500000 129.500000 \n", 390 | "75% 20.500000 159.250000 96.000000 63.500000 172.250000 \n", 391 | "max 23.000000 229.000000 125.000000 124.000000 237.000000 " 392 | ] 393 | }, 394 | "execution_count": 29, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "df.describe()" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "id": "4d5babca", 406 | "metadata": {}, 407 | "source": [ 408 | "### selecting only team and titles \n" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 30, 414 | "id": "ce358583", 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/html": [ 420 | "
\n", 421 | "\n", 434 | "\n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | "
TeamTitles
0Brazil5
1Germany4
2Italy4
3Argentina2
4France2
5England1
6Spain1
7Uruguay2
\n", 485 | "
" 486 | ], 487 | "text/plain": [ 488 | " Team Titles\n", 489 | "0 Brazil 5\n", 490 | "1 Germany 4\n", 491 | "2 Italy 4\n", 492 | "3 Argentina 2\n", 493 | "4 France 2\n", 494 | "5 England 1\n", 495 | "6 Spain 1\n", 496 | "7 Uruguay 2" 497 | ] 498 | }, 499 | "execution_count": 30, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "winners=df.loc[:,['Team','Titles']]\n", 506 | "winners\n" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "id": "d2cc6948", 512 | "metadata": {}, 513 | "source": [ 514 | "### plotting most no of worldcup winners " 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 31, 520 | "id": "284101f5", 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "" 527 | ] 528 | }, 529 | "execution_count": 31, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | }, 533 | { 534 | "data": { 535 | "image/png": "\n", 536 | "text/plain": [ 537 | "
" 538 | ] 539 | }, 540 | "metadata": {}, 541 | "output_type": "display_data" 542 | } 543 | ], 544 | "source": [ 545 | "winners.plot(kind='bar',x='Team',y='Titles')" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 45, 551 | "id": "c769107e", 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "data": { 556 | "text/html": [ 557 | "
\n", 558 | "\n", 571 | "\n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | "
TeamGFGA
0Brazil229105
1Germany226125
2Italy12877
3Argentina13793
4France12077
5England9164
6Spain9972
7Uruguay8774
\n", 631 | "
" 632 | ], 633 | "text/plain": [ 634 | " Team GF GA\n", 635 | "0 Brazil 229 105\n", 636 | "1 Germany 226 125\n", 637 | "2 Italy 128 77\n", 638 | "3 Argentina 137 93\n", 639 | "4 France 120 77\n", 640 | "5 England 91 64\n", 641 | "6 Spain 99 72\n", 642 | "7 Uruguay 87 74" 643 | ] 644 | }, 645 | "execution_count": 45, 646 | "metadata": {}, 647 | "output_type": "execute_result" 648 | } 649 | ], 650 | "source": [ 651 | "games=df.loc[:,['Team','GF','GA']]\n", 652 | "games" 653 | ] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "id": "cde6f46a", 658 | "metadata": {}, 659 | "source": [ 660 | "### Goal For and Goal Against in a bar graph representation by a team\n" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": 49, 666 | "id": "c74f974f", 667 | "metadata": { 668 | "scrolled": false 669 | }, 670 | "outputs": [ 671 | { 672 | "data": { 673 | "text/plain": [ 674 | "" 675 | ] 676 | }, 677 | "execution_count": 49, 678 | "metadata": {}, 679 | "output_type": "execute_result" 680 | }, 681 | { 682 | "data": { 683 | "image/png": "\n", 684 | "text/plain": [ 685 | "
" 686 | ] 687 | }, 688 | "metadata": {}, 689 | "output_type": "display_data" 690 | } 691 | ], 692 | "source": [ 693 | "games.plot(kind='bar',y=['GF','GA'],x='Team',color=['yellow','green'],figsize=(6,10),)" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 53, 699 | "id": "fc00336d", 700 | "metadata": {}, 701 | "outputs": [ 702 | { 703 | "data": { 704 | "text/plain": [ 705 | "(8, 12)" 706 | ] 707 | }, 708 | "execution_count": 53, 709 | "metadata": {}, 710 | "output_type": "execute_result" 711 | } 712 | ], 713 | "source": [ 714 | "df.shape" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 61, 720 | "id": "805fd47b", 721 | "metadata": {}, 722 | "outputs": [ 723 | { 724 | "data": { 725 | "text/html": [ 726 | "
\n", 727 | "\n", 740 | "\n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | "
TeamPts
0Brazil237
1Germany221
2Italy156
3Argentina144
4France115
5England108
6Spain105
7Uruguay84
\n", 791 | "
" 792 | ], 793 | "text/plain": [ 794 | " Team Pts\n", 795 | "0 Brazil 237\n", 796 | "1 Germany 221\n", 797 | "2 Italy 156\n", 798 | "3 Argentina 144\n", 799 | "4 France 115\n", 800 | "5 England 108\n", 801 | "6 Spain 105\n", 802 | "7 Uruguay 84" 803 | ] 804 | }, 805 | "execution_count": 61, 806 | "metadata": {}, 807 | "output_type": "execute_result" 808 | } 809 | ], 810 | "source": [ 811 | "points=df.iloc[:,[1,11]]\n", 812 | "points" 813 | ] 814 | }, 815 | { 816 | "cell_type": "code", 817 | "execution_count": 65, 818 | "id": "d8ae14a0", 819 | "metadata": {}, 820 | "outputs": [], 821 | "source": [ 822 | "my_team=df.Team\n", 823 | "my_pts=df.Pts" 824 | ] 825 | }, 826 | { 827 | "cell_type": "markdown", 828 | "id": "8b116cdc", 829 | "metadata": {}, 830 | "source": [ 831 | "### showing total points gained by a team as percentage in the form of pie chart\n" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": 78, 837 | "id": "08058faf", 838 | "metadata": {}, 839 | "outputs": [ 840 | { 841 | "data": { 842 | "image/png": "\n", 843 | "text/plain": [ 844 | "
" 845 | ] 846 | }, 847 | "metadata": {}, 848 | "output_type": "display_data" 849 | } 850 | ], 851 | "source": [ 852 | "mp.pie(my_pts,labels=my_team,autopct='%1.0f%%')\n", 853 | "mp.title(\"pie chart to show total points gained\")\n", 854 | "mp.show()" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "id": "aa977979", 861 | "metadata": {}, 862 | "outputs": [], 863 | "source": [] 864 | } 865 | ], 866 | "metadata": { 867 | "kernelspec": { 868 | "display_name": "Python 3 (ipykernel)", 869 | "language": "python", 870 | "name": "python3" 871 | }, 872 | "language_info": { 873 | "codemirror_mode": { 874 | "name": "ipython", 875 | "version": 3 876 | }, 877 | "file_extension": ".py", 878 | "mimetype": "text/x-python", 879 | "name": "python", 880 | "nbconvert_exporter": "python", 881 | "pygments_lexer": "ipython3", 882 | "version": "3.11.0" 883 | } 884 | }, 885 | "nbformat": 4, 886 | "nbformat_minor": 5 887 | } 888 | --------------------------------------------------------------------------------