├── champions.csv
├── scrap.ipynb
├── .ipynb_checkpoints
└── scrap-checkpoint.ipynb
└── football_analysis.ipynb
/champions.csv:
--------------------------------------------------------------------------------
1 | Rank,Team,Participations,Titles,Pld,W,D,L,GF,GA,GD,Pts
2 | 1,Brazil,21,5,109,73,18,18,229,105,124,237
3 | 2,Germany,19,4,109,67,20,22,226,125,101,221
4 | 3,Italy,18,4,83,45,21,17,128,77,51,156
5 | 4,Argentina,17,2,81,43,15,23,137,93,44,144
6 | 5,France,15,2,66,34,13,19,120,77,43,115
7 | 6,England,15,1,69,29,21,19,91,64,27,108
8 | 7,Spain,15,1,63,30,15,18,99,72,27,105
9 | 8,Uruguay,13,2,56,24,12,20,87,74,13,84
10 |
--------------------------------------------------------------------------------
/scrap.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 77,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from bs4 import BeautifulSoup as bs\n",
10 | "import requests\n",
11 | "import pandas as pd\n",
12 | "import csv\n"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 78,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "url='https://en.wikipedia.org/wiki/FIFA_World_Cup'\n",
22 | "headers = {\"User-Agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0\"}\n",
23 | "data=requests.get(url,headers=headers)\n",
24 | "soup=bs(data.text,'html.parser')\n",
25 | "\n",
26 | "#soup.prettify()"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 79,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "\n",
36 | "resp_table=soup.select('table.wikitable')[7]\n",
37 | "#resp_table\n"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "### listing only the headers\n"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 80,
50 | "metadata": {
51 | "scrolled": true
52 | },
53 | "outputs": [],
54 | "source": [
55 | "headers=[]\n",
56 | "for h in resp_table.find_all('th'):\n",
57 | " headers.append(h.text.replace('\\n',\"\"))\n",
58 | "#print(headers)\n",
59 | "\n"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 84,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/html": [
70 | "
\n",
71 | "\n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " | \n",
88 | " Rank | \n",
89 | " Team | \n",
90 | " Participations | \n",
91 | " Titles | \n",
92 | " Pld | \n",
93 | " W | \n",
94 | " D | \n",
95 | " L | \n",
96 | " GF | \n",
97 | " GA | \n",
98 | " GD | \n",
99 | " Pts | \n",
100 | "
\n",
101 | " \n",
102 | " \n",
103 | " \n",
104 | "
\n",
105 | "
"
106 | ],
107 | "text/plain": [
108 | "Empty DataFrame\n",
109 | "Columns: [Rank, Team, Participations, Titles, Pld, W, D, L, GF, GA, GD, Pts]\n",
110 | "Index: []"
111 | ]
112 | },
113 | "execution_count": 84,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "df=pd.DataFrame(columns=headers)\n",
120 | "#df\n"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 86,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "rows=[]\n",
130 | "row_tags=rows=resp_table.find_all('tr')\n",
131 | "for r in row_tags[1:]:\n",
132 | " dirty_rows=r.find_all('td')\n",
133 | " beauty_rows=[rw.text.strip() for rw in dirty_rows]\n",
134 | " #print(beauty_rows)\n",
135 | " length=len(df)\n",
136 | " df.loc[length]=beauty_rows\n",
137 | " \n",
138 | " "
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 87,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/html": [
149 | "\n",
150 | "\n",
163 | "
\n",
164 | " \n",
165 | " \n",
166 | " | \n",
167 | " Rank | \n",
168 | " Team | \n",
169 | " Participations | \n",
170 | " Titles | \n",
171 | " Pld | \n",
172 | " W | \n",
173 | " D | \n",
174 | " L | \n",
175 | " GF | \n",
176 | " GA | \n",
177 | " GD | \n",
178 | " Pts | \n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " \n",
183 | " | 0 | \n",
184 | " 1 | \n",
185 | " Brazil | \n",
186 | " 21 | \n",
187 | " 5 | \n",
188 | " 109 | \n",
189 | " 73 | \n",
190 | " 18 | \n",
191 | " 18 | \n",
192 | " 229 | \n",
193 | " 105 | \n",
194 | " 124 | \n",
195 | " 237 | \n",
196 | "
\n",
197 | " \n",
198 | " | 1 | \n",
199 | " 2 | \n",
200 | " Germany[124] | \n",
201 | " 19 | \n",
202 | " 4 | \n",
203 | " 109 | \n",
204 | " 67 | \n",
205 | " 20 | \n",
206 | " 22 | \n",
207 | " 226 | \n",
208 | " 125 | \n",
209 | " 101 | \n",
210 | " 221 | \n",
211 | "
\n",
212 | " \n",
213 | " | 2 | \n",
214 | " 3 | \n",
215 | " Italy | \n",
216 | " 18 | \n",
217 | " 4 | \n",
218 | " 83 | \n",
219 | " 45 | \n",
220 | " 21 | \n",
221 | " 17 | \n",
222 | " 128 | \n",
223 | " 77 | \n",
224 | " 51 | \n",
225 | " 156 | \n",
226 | "
\n",
227 | " \n",
228 | " | 3 | \n",
229 | " 4 | \n",
230 | " Argentina | \n",
231 | " 17 | \n",
232 | " 2 | \n",
233 | " 81 | \n",
234 | " 43 | \n",
235 | " 15 | \n",
236 | " 23 | \n",
237 | " 137 | \n",
238 | " 93 | \n",
239 | " 44 | \n",
240 | " 144 | \n",
241 | "
\n",
242 | " \n",
243 | " | 4 | \n",
244 | " 5 | \n",
245 | " France | \n",
246 | " 15 | \n",
247 | " 2 | \n",
248 | " 66 | \n",
249 | " 34 | \n",
250 | " 13 | \n",
251 | " 19 | \n",
252 | " 120 | \n",
253 | " 77 | \n",
254 | " 43 | \n",
255 | " 115 | \n",
256 | "
\n",
257 | " \n",
258 | " | 5 | \n",
259 | " 6 | \n",
260 | " England | \n",
261 | " 15 | \n",
262 | " 1 | \n",
263 | " 69 | \n",
264 | " 29 | \n",
265 | " 21 | \n",
266 | " 19 | \n",
267 | " 91 | \n",
268 | " 64 | \n",
269 | " 27 | \n",
270 | " 108 | \n",
271 | "
\n",
272 | " \n",
273 | " | 6 | \n",
274 | " 7 | \n",
275 | " Spain | \n",
276 | " 15 | \n",
277 | " 1 | \n",
278 | " 63 | \n",
279 | " 30 | \n",
280 | " 15 | \n",
281 | " 18 | \n",
282 | " 99 | \n",
283 | " 72 | \n",
284 | " 27 | \n",
285 | " 105 | \n",
286 | "
\n",
287 | " \n",
288 | " | 7 | \n",
289 | " 8 | \n",
290 | " Uruguay | \n",
291 | " 13 | \n",
292 | " 2 | \n",
293 | " 56 | \n",
294 | " 24 | \n",
295 | " 12 | \n",
296 | " 20 | \n",
297 | " 87 | \n",
298 | " 74 | \n",
299 | " 13 | \n",
300 | " 84 | \n",
301 | "
\n",
302 | " \n",
303 | "
\n",
304 | "
"
305 | ],
306 | "text/plain": [
307 | " Rank Team Participations Titles Pld W D L GF GA GD \\\n",
308 | "0 1 Brazil 21 5 109 73 18 18 229 105 124 \n",
309 | "1 2 Germany[124] 19 4 109 67 20 22 226 125 101 \n",
310 | "2 3 Italy 18 4 83 45 21 17 128 77 51 \n",
311 | "3 4 Argentina 17 2 81 43 15 23 137 93 44 \n",
312 | "4 5 France 15 2 66 34 13 19 120 77 43 \n",
313 | "5 6 England 15 1 69 29 21 19 91 64 27 \n",
314 | "6 7 Spain 15 1 63 30 15 18 99 72 27 \n",
315 | "7 8 Uruguay 13 2 56 24 12 20 87 74 13 \n",
316 | "\n",
317 | " Pts \n",
318 | "0 237 \n",
319 | "1 221 \n",
320 | "2 156 \n",
321 | "3 144 \n",
322 | "4 115 \n",
323 | "5 108 \n",
324 | "6 105 \n",
325 | "7 84 "
326 | ]
327 | },
328 | "execution_count": 87,
329 | "metadata": {},
330 | "output_type": "execute_result"
331 | }
332 | ],
333 | "source": [
334 | "df\n",
335 | " "
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": 88,
341 | "metadata": {},
342 | "outputs": [],
343 | "source": [
344 | "df.to_csv('champions.csv',index=False)"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "metadata": {},
351 | "outputs": [],
352 | "source": []
353 | }
354 | ],
355 | "metadata": {
356 | "kernelspec": {
357 | "display_name": "Python 3 (ipykernel)",
358 | "language": "python",
359 | "name": "python3"
360 | },
361 | "language_info": {
362 | "codemirror_mode": {
363 | "name": "ipython",
364 | "version": 3
365 | },
366 | "file_extension": ".py",
367 | "mimetype": "text/x-python",
368 | "name": "python",
369 | "nbconvert_exporter": "python",
370 | "pygments_lexer": "ipython3",
371 | "version": "3.11.0"
372 | },
373 | "vscode": {
374 | "interpreter": {
375 | "hash": "d3e10ef16274dd72e574b8fa73b58450b957d8421a2901baded3cca26fcf5dda"
376 | }
377 | }
378 | },
379 | "nbformat": 4,
380 | "nbformat_minor": 2
381 | }
382 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/scrap-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from bs4 import BeautifulSoup as bs\n",
10 | "import requests\n",
11 | "import pandas\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "url='https://en.wikipedia.org/wiki/FIFA_World_Cup'\n",
21 | "headers = {\"User-Agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0\"}\n",
22 | "data=requests.get(url,headers=headers)\n",
23 | "soup=bs(data.text,'html.parser')"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "#print(soup.prettify())\n",
33 | "\n",
34 | "#access the titles of the website\n",
35 | "#tit=soup.title.text\n",
36 | "#print(tit)"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 4,
42 | "metadata": {
43 | "scrolled": true
44 | },
45 | "outputs": [
46 | {
47 | "name": "stdout",
48 | "output_type": "stream",
49 | "text": [
50 | "\n",
51 | "\n",
52 | "| Rank\n",
53 | " | \n",
54 | "Team\n",
55 | " | \n",
56 | "Participations\n",
57 | " | \n",
58 | "Titles\n",
59 | " | \n",
60 | "Pld\n",
61 | " | \n",
62 | "W\n",
63 | " | \n",
64 | "D\n",
65 | " | \n",
66 | "L\n",
67 | " | \n",
68 | "GF\n",
69 | " | \n",
70 | "GA\n",
71 | " | \n",
72 | "GD\n",
73 | " | \n",
74 | "Pts\n",
75 | " |
\n",
76 | "\n",
77 | "| 1\n",
78 | " | \n",
79 | " Brazil\n",
80 | " | \n",
81 | "21\n",
82 | " | \n",
83 | "5\n",
84 | " | \n",
85 | "109\n",
86 | " | \n",
87 | "73\n",
88 | " | \n",
89 | "18\n",
90 | " | \n",
91 | "18\n",
92 | " | \n",
93 | "229\n",
94 | " | \n",
95 | "105\n",
96 | " | \n",
97 | "124\n",
98 | " | \n",
99 | "237\n",
100 | " |
\n",
101 | "\n",
102 | "| 2\n",
103 | " | \n",
104 | " Germany[123]\n",
105 | " | \n",
106 | "19\n",
107 | " | \n",
108 | "4\n",
109 | " | \n",
110 | "109\n",
111 | " | \n",
112 | "67\n",
113 | " | \n",
114 | "20\n",
115 | " | \n",
116 | "22\n",
117 | " | \n",
118 | "226\n",
119 | " | \n",
120 | "125\n",
121 | " | \n",
122 | "101\n",
123 | " | \n",
124 | "221\n",
125 | " |
\n",
126 | "\n",
127 | "| 3\n",
128 | " | \n",
129 | " Italy\n",
130 | " | \n",
131 | "18\n",
132 | " | \n",
133 | "4\n",
134 | " | \n",
135 | "83\n",
136 | " | \n",
137 | "45\n",
138 | " | \n",
139 | "21\n",
140 | " | \n",
141 | "17\n",
142 | " | \n",
143 | "128\n",
144 | " | \n",
145 | "77\n",
146 | " | \n",
147 | "51\n",
148 | " | \n",
149 | "156\n",
150 | " |
\n",
151 | "\n",
152 | "| 4\n",
153 | " | \n",
154 | " Argentina\n",
155 | " | \n",
156 | "17\n",
157 | " | \n",
158 | "2\n",
159 | " | \n",
160 | "81\n",
161 | " | \n",
162 | "43\n",
163 | " | \n",
164 | "15\n",
165 | " | \n",
166 | "23\n",
167 | " | \n",
168 | "137\n",
169 | " | \n",
170 | "93\n",
171 | " | \n",
172 | "44\n",
173 | " | \n",
174 | "144\n",
175 | " |
\n",
176 | "\n",
177 | "| 5\n",
178 | " | \n",
179 | " France\n",
180 | " | \n",
181 | "15\n",
182 | " | \n",
183 | "2\n",
184 | " | \n",
185 | "66\n",
186 | " | \n",
187 | "34\n",
188 | " | \n",
189 | "13\n",
190 | " | \n",
191 | "19\n",
192 | " | \n",
193 | "120\n",
194 | " | \n",
195 | "77\n",
196 | " | \n",
197 | "43\n",
198 | " | \n",
199 | "115\n",
200 | " |
\n",
201 | "\n",
202 | "| 6\n",
203 | " | \n",
204 | " England\n",
205 | " | \n",
206 | "15\n",
207 | " | \n",
208 | "1\n",
209 | " | \n",
210 | "69\n",
211 | " | \n",
212 | "29\n",
213 | " | \n",
214 | "21\n",
215 | " | \n",
216 | "19\n",
217 | " | \n",
218 | "91\n",
219 | " | \n",
220 | "64\n",
221 | " | \n",
222 | "27\n",
223 | " | \n",
224 | "108\n",
225 | " |
\n",
226 | "\n",
227 | "| 7\n",
228 | " | \n",
229 | " Spain\n",
230 | " | \n",
231 | "15\n",
232 | " | \n",
233 | "1\n",
234 | " | \n",
235 | "63\n",
236 | " | \n",
237 | "30\n",
238 | " | \n",
239 | "15\n",
240 | " | \n",
241 | "18\n",
242 | " | \n",
243 | "99\n",
244 | " | \n",
245 | "72\n",
246 | " | \n",
247 | "27\n",
248 | " | \n",
249 | "105\n",
250 | " |
\n",
251 | "\n",
252 | "| 8\n",
253 | " | \n",
254 | " Uruguay\n",
255 | " | \n",
256 | "13\n",
257 | " | \n",
258 | "2\n",
259 | " | \n",
260 | "56\n",
261 | " | \n",
262 | "24\n",
263 | " | \n",
264 | "12\n",
265 | " | \n",
266 | "20\n",
267 | " | \n",
268 | "87\n",
269 | " | \n",
270 | "74\n",
271 | " | \n",
272 | "13\n",
273 | " | \n",
274 | "84\n",
275 | " |
\n"
276 | ]
277 | }
278 | ],
279 | "source": [
280 | "\n",
281 | "resp_table=soup.select('table.wikitable')[7]\n",
282 | "print(resp_table)\n"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "### listing only the headers"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 27,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "name": "stdout",
299 | "output_type": "stream",
300 | "text": [
301 | "['Rank\\n', 'Team\\n', 'Participations\\n', 'Titles\\n', 'Pld\\n', 'W\\n', 'D\\n', 'L\\n', 'GF\\n', 'GA\\n', 'GD\\n', 'Pts\\n']\n"
302 | ]
303 | }
304 | ],
305 | "source": [
306 | "headers=[]\n",
307 | "for h in resp_table.find_all('th'):\n",
308 | " headers.append(h.text)\n",
309 | "print(headers)"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 29,
315 | "metadata": {},
316 | "outputs": [
317 | {
318 | "ename": "AttributeError",
319 | "evalue": "ResultSet object has no attribute 'text'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?",
320 | "output_type": "error",
321 | "traceback": [
322 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
323 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
324 | "Cell \u001b[1;32mIn [29], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfor\u001b[39;00m row \u001b[39min\u001b[39;00m resp_table\u001b[39m.\u001b[39mfind_all(\u001b[39m'\u001b[39m\u001b[39mtr\u001b[39m\u001b[39m'\u001b[39m)[\u001b[39m1\u001b[39m:]:\n\u001b[1;32m----> 2\u001b[0m data\u001b[39m=\u001b[39mrow\u001b[39m.\u001b[39;49mfind_all(\u001b[39m'\u001b[39;49m\u001b[39mtd\u001b[39;49m\u001b[39m'\u001b[39;49m)\u001b[39m.\u001b[39;49mtext\n",
325 | "File \u001b[1;32mc:\\Python\\Python310\\lib\\site-packages\\bs4\\element.py:2289\u001b[0m, in \u001b[0;36mResultSet.__getattr__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2287\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__getattr__\u001b[39m(\u001b[39mself\u001b[39m, key):\n\u001b[0;32m 2288\u001b[0m \u001b[39m\"\"\"Raise a helpful exception to explain a common code fix.\"\"\"\u001b[39;00m\n\u001b[1;32m-> 2289\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m(\n\u001b[0;32m 2290\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mResultSet object has no attribute \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. You\u001b[39m\u001b[39m'\u001b[39m\u001b[39mre probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m key\n\u001b[0;32m 2291\u001b[0m )\n",
326 | "\u001b[1;31mAttributeError\u001b[0m: ResultSet object has no attribute 'text'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?"
327 | ]
328 | }
329 | ],
330 | "source": [
331 | "for row in resp_table.find_all('tr')[1:]:\n",
332 | " data=row.find_all('td').text\n",
333 | "\n"
334 | ]
335 | }
336 | ],
337 | "metadata": {
338 | "kernelspec": {
339 | "display_name": "Python 3 (ipykernel)",
340 | "language": "python",
341 | "name": "python3"
342 | },
343 | "language_info": {
344 | "codemirror_mode": {
345 | "name": "ipython",
346 | "version": 3
347 | },
348 | "file_extension": ".py",
349 | "mimetype": "text/x-python",
350 | "name": "python",
351 | "nbconvert_exporter": "python",
352 | "pygments_lexer": "ipython3",
353 | "version": "3.11.0"
354 | },
355 | "vscode": {
356 | "interpreter": {
357 | "hash": "d3e10ef16274dd72e574b8fa73b58450b957d8421a2901baded3cca26fcf5dda"
358 | }
359 | }
360 | },
361 | "nbformat": 4,
362 | "nbformat_minor": 2
363 | }
364 |
--------------------------------------------------------------------------------
/football_analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 27,
6 | "id": "eecc6fa6",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import requests\n",
12 | "import matplotlib.pyplot as mp"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 28,
18 | "id": "f07ee235",
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " Rank | \n",
43 | " Team | \n",
44 | " Participations | \n",
45 | " Titles | \n",
46 | " Pld | \n",
47 | " W | \n",
48 | " D | \n",
49 | " L | \n",
50 | " GF | \n",
51 | " GA | \n",
52 | " GD | \n",
53 | " Pts | \n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " \n",
58 | " | 0 | \n",
59 | " 1 | \n",
60 | " Brazil | \n",
61 | " 21 | \n",
62 | " 5 | \n",
63 | " 109 | \n",
64 | " 73 | \n",
65 | " 18 | \n",
66 | " 18 | \n",
67 | " 229 | \n",
68 | " 105 | \n",
69 | " 124 | \n",
70 | " 237 | \n",
71 | "
\n",
72 | " \n",
73 | " | 1 | \n",
74 | " 2 | \n",
75 | " Germany | \n",
76 | " 19 | \n",
77 | " 4 | \n",
78 | " 109 | \n",
79 | " 67 | \n",
80 | " 20 | \n",
81 | " 22 | \n",
82 | " 226 | \n",
83 | " 125 | \n",
84 | " 101 | \n",
85 | " 221 | \n",
86 | "
\n",
87 | " \n",
88 | " | 2 | \n",
89 | " 3 | \n",
90 | " Italy | \n",
91 | " 18 | \n",
92 | " 4 | \n",
93 | " 83 | \n",
94 | " 45 | \n",
95 | " 21 | \n",
96 | " 17 | \n",
97 | " 128 | \n",
98 | " 77 | \n",
99 | " 51 | \n",
100 | " 156 | \n",
101 | "
\n",
102 | " \n",
103 | " | 3 | \n",
104 | " 4 | \n",
105 | " Argentina | \n",
106 | " 17 | \n",
107 | " 2 | \n",
108 | " 81 | \n",
109 | " 43 | \n",
110 | " 15 | \n",
111 | " 23 | \n",
112 | " 137 | \n",
113 | " 93 | \n",
114 | " 44 | \n",
115 | " 144 | \n",
116 | "
\n",
117 | " \n",
118 | " | 4 | \n",
119 | " 5 | \n",
120 | " France | \n",
121 | " 15 | \n",
122 | " 2 | \n",
123 | " 66 | \n",
124 | " 34 | \n",
125 | " 13 | \n",
126 | " 19 | \n",
127 | " 120 | \n",
128 | " 77 | \n",
129 | " 43 | \n",
130 | " 115 | \n",
131 | "
\n",
132 | " \n",
133 | " | 5 | \n",
134 | " 6 | \n",
135 | " England | \n",
136 | " 15 | \n",
137 | " 1 | \n",
138 | " 69 | \n",
139 | " 29 | \n",
140 | " 21 | \n",
141 | " 19 | \n",
142 | " 91 | \n",
143 | " 64 | \n",
144 | " 27 | \n",
145 | " 108 | \n",
146 | "
\n",
147 | " \n",
148 | " | 6 | \n",
149 | " 7 | \n",
150 | " Spain | \n",
151 | " 15 | \n",
152 | " 1 | \n",
153 | " 63 | \n",
154 | " 30 | \n",
155 | " 15 | \n",
156 | " 18 | \n",
157 | " 99 | \n",
158 | " 72 | \n",
159 | " 27 | \n",
160 | " 105 | \n",
161 | "
\n",
162 | " \n",
163 | " | 7 | \n",
164 | " 8 | \n",
165 | " Uruguay | \n",
166 | " 13 | \n",
167 | " 2 | \n",
168 | " 56 | \n",
169 | " 24 | \n",
170 | " 12 | \n",
171 | " 20 | \n",
172 | " 87 | \n",
173 | " 74 | \n",
174 | " 13 | \n",
175 | " 84 | \n",
176 | "
\n",
177 | " \n",
178 | "
\n",
179 | "
"
180 | ],
181 | "text/plain": [
182 | " Rank Team Participations Titles Pld W D L GF GA GD \\\n",
183 | "0 1 Brazil 21 5 109 73 18 18 229 105 124 \n",
184 | "1 2 Germany 19 4 109 67 20 22 226 125 101 \n",
185 | "2 3 Italy 18 4 83 45 21 17 128 77 51 \n",
186 | "3 4 Argentina 17 2 81 43 15 23 137 93 44 \n",
187 | "4 5 France 15 2 66 34 13 19 120 77 43 \n",
188 | "5 6 England 15 1 69 29 21 19 91 64 27 \n",
189 | "6 7 Spain 15 1 63 30 15 18 99 72 27 \n",
190 | "7 8 Uruguay 13 2 56 24 12 20 87 74 13 \n",
191 | "\n",
192 | " Pts \n",
193 | "0 237 \n",
194 | "1 221 \n",
195 | "2 156 \n",
196 | "3 144 \n",
197 | "4 115 \n",
198 | "5 108 \n",
199 | "6 105 \n",
200 | "7 84 "
201 | ]
202 | },
203 | "execution_count": 28,
204 | "metadata": {},
205 | "output_type": "execute_result"
206 | }
207 | ],
208 | "source": [
209 | "df=pd.read_csv('champions.csv')\n",
210 | "df\n"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 29,
216 | "id": "9d5d9f4f",
217 | "metadata": {
218 | "collapsed": true
219 | },
220 | "outputs": [
221 | {
222 | "data": {
223 | "text/html": [
224 | "\n",
225 | "\n",
238 | "
\n",
239 | " \n",
240 | " \n",
241 | " | \n",
242 | " Rank | \n",
243 | " Participations | \n",
244 | " Titles | \n",
245 | " Pld | \n",
246 | " W | \n",
247 | " D | \n",
248 | " L | \n",
249 | " GF | \n",
250 | " GA | \n",
251 | " GD | \n",
252 | " Pts | \n",
253 | "
\n",
254 | " \n",
255 | " \n",
256 | " \n",
257 | " | count | \n",
258 | " 8.00000 | \n",
259 | " 8.000000 | \n",
260 | " 8.000000 | \n",
261 | " 8.000000 | \n",
262 | " 8.000000 | \n",
263 | " 8.000000 | \n",
264 | " 8.000000 | \n",
265 | " 8.000000 | \n",
266 | " 8.000000 | \n",
267 | " 8.000000 | \n",
268 | " 8.000000 | \n",
269 | "
\n",
270 | " \n",
271 | " | mean | \n",
272 | " 4.50000 | \n",
273 | " 16.625000 | \n",
274 | " 2.625000 | \n",
275 | " 79.500000 | \n",
276 | " 43.125000 | \n",
277 | " 16.875000 | \n",
278 | " 19.500000 | \n",
279 | " 139.625000 | \n",
280 | " 85.875000 | \n",
281 | " 53.750000 | \n",
282 | " 146.250000 | \n",
283 | "
\n",
284 | " \n",
285 | " | std | \n",
286 | " 2.44949 | \n",
287 | " 2.615203 | \n",
288 | " 1.505941 | \n",
289 | " 20.255511 | \n",
290 | " 18.074746 | \n",
291 | " 3.603074 | \n",
292 | " 2.070197 | \n",
293 | " 57.021143 | \n",
294 | " 20.413144 | \n",
295 | " 38.688315 | \n",
296 | " 55.984054 | \n",
297 | "
\n",
298 | " \n",
299 | " | min | \n",
300 | " 1.00000 | \n",
301 | " 13.000000 | \n",
302 | " 1.000000 | \n",
303 | " 56.000000 | \n",
304 | " 24.000000 | \n",
305 | " 12.000000 | \n",
306 | " 17.000000 | \n",
307 | " 87.000000 | \n",
308 | " 64.000000 | \n",
309 | " 13.000000 | \n",
310 | " 84.000000 | \n",
311 | "
\n",
312 | " \n",
313 | " | 25% | \n",
314 | " 2.75000 | \n",
315 | " 15.000000 | \n",
316 | " 1.750000 | \n",
317 | " 65.250000 | \n",
318 | " 29.750000 | \n",
319 | " 14.500000 | \n",
320 | " 18.000000 | \n",
321 | " 97.000000 | \n",
322 | " 73.500000 | \n",
323 | " 27.000000 | \n",
324 | " 107.250000 | \n",
325 | "
\n",
326 | " \n",
327 | " | 50% | \n",
328 | " 4.50000 | \n",
329 | " 16.000000 | \n",
330 | " 2.000000 | \n",
331 | " 75.000000 | \n",
332 | " 38.500000 | \n",
333 | " 16.500000 | \n",
334 | " 19.000000 | \n",
335 | " 124.000000 | \n",
336 | " 77.000000 | \n",
337 | " 43.500000 | \n",
338 | " 129.500000 | \n",
339 | "
\n",
340 | " \n",
341 | " | 75% | \n",
342 | " 6.25000 | \n",
343 | " 18.250000 | \n",
344 | " 4.000000 | \n",
345 | " 89.500000 | \n",
346 | " 50.500000 | \n",
347 | " 20.250000 | \n",
348 | " 20.500000 | \n",
349 | " 159.250000 | \n",
350 | " 96.000000 | \n",
351 | " 63.500000 | \n",
352 | " 172.250000 | \n",
353 | "
\n",
354 | " \n",
355 | " | max | \n",
356 | " 8.00000 | \n",
357 | " 21.000000 | \n",
358 | " 5.000000 | \n",
359 | " 109.000000 | \n",
360 | " 73.000000 | \n",
361 | " 21.000000 | \n",
362 | " 23.000000 | \n",
363 | " 229.000000 | \n",
364 | " 125.000000 | \n",
365 | " 124.000000 | \n",
366 | " 237.000000 | \n",
367 | "
\n",
368 | " \n",
369 | "
\n",
370 | "
"
371 | ],
372 | "text/plain": [
373 | " Rank Participations Titles Pld W D \\\n",
374 | "count 8.00000 8.000000 8.000000 8.000000 8.000000 8.000000 \n",
375 | "mean 4.50000 16.625000 2.625000 79.500000 43.125000 16.875000 \n",
376 | "std 2.44949 2.615203 1.505941 20.255511 18.074746 3.603074 \n",
377 | "min 1.00000 13.000000 1.000000 56.000000 24.000000 12.000000 \n",
378 | "25% 2.75000 15.000000 1.750000 65.250000 29.750000 14.500000 \n",
379 | "50% 4.50000 16.000000 2.000000 75.000000 38.500000 16.500000 \n",
380 | "75% 6.25000 18.250000 4.000000 89.500000 50.500000 20.250000 \n",
381 | "max 8.00000 21.000000 5.000000 109.000000 73.000000 21.000000 \n",
382 | "\n",
383 | " L GF GA GD Pts \n",
384 | "count 8.000000 8.000000 8.000000 8.000000 8.000000 \n",
385 | "mean 19.500000 139.625000 85.875000 53.750000 146.250000 \n",
386 | "std 2.070197 57.021143 20.413144 38.688315 55.984054 \n",
387 | "min 17.000000 87.000000 64.000000 13.000000 84.000000 \n",
388 | "25% 18.000000 97.000000 73.500000 27.000000 107.250000 \n",
389 | "50% 19.000000 124.000000 77.000000 43.500000 129.500000 \n",
390 | "75% 20.500000 159.250000 96.000000 63.500000 172.250000 \n",
391 | "max 23.000000 229.000000 125.000000 124.000000 237.000000 "
392 | ]
393 | },
394 | "execution_count": 29,
395 | "metadata": {},
396 | "output_type": "execute_result"
397 | }
398 | ],
399 | "source": [
400 | "df.describe()"
401 | ]
402 | },
403 | {
404 | "cell_type": "markdown",
405 | "id": "4d5babca",
406 | "metadata": {},
407 | "source": [
408 | "### selecting only team and titles \n"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 30,
414 | "id": "ce358583",
415 | "metadata": {},
416 | "outputs": [
417 | {
418 | "data": {
419 | "text/html": [
420 | "\n",
421 | "\n",
434 | "
\n",
435 | " \n",
436 | " \n",
437 | " | \n",
438 | " Team | \n",
439 | " Titles | \n",
440 | "
\n",
441 | " \n",
442 | " \n",
443 | " \n",
444 | " | 0 | \n",
445 | " Brazil | \n",
446 | " 5 | \n",
447 | "
\n",
448 | " \n",
449 | " | 1 | \n",
450 | " Germany | \n",
451 | " 4 | \n",
452 | "
\n",
453 | " \n",
454 | " | 2 | \n",
455 | " Italy | \n",
456 | " 4 | \n",
457 | "
\n",
458 | " \n",
459 | " | 3 | \n",
460 | " Argentina | \n",
461 | " 2 | \n",
462 | "
\n",
463 | " \n",
464 | " | 4 | \n",
465 | " France | \n",
466 | " 2 | \n",
467 | "
\n",
468 | " \n",
469 | " | 5 | \n",
470 | " England | \n",
471 | " 1 | \n",
472 | "
\n",
473 | " \n",
474 | " | 6 | \n",
475 | " Spain | \n",
476 | " 1 | \n",
477 | "
\n",
478 | " \n",
479 | " | 7 | \n",
480 | " Uruguay | \n",
481 | " 2 | \n",
482 | "
\n",
483 | " \n",
484 | "
\n",
485 | "
"
486 | ],
487 | "text/plain": [
488 | " Team Titles\n",
489 | "0 Brazil 5\n",
490 | "1 Germany 4\n",
491 | "2 Italy 4\n",
492 | "3 Argentina 2\n",
493 | "4 France 2\n",
494 | "5 England 1\n",
495 | "6 Spain 1\n",
496 | "7 Uruguay 2"
497 | ]
498 | },
499 | "execution_count": 30,
500 | "metadata": {},
501 | "output_type": "execute_result"
502 | }
503 | ],
504 | "source": [
505 | "winners=df.loc[:,['Team','Titles']]\n",
506 | "winners\n"
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "id": "d2cc6948",
512 | "metadata": {},
513 | "source": [
514 | "### plotting most no of worldcup winners "
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 31,
520 | "id": "284101f5",
521 | "metadata": {},
522 | "outputs": [
523 | {
524 | "data": {
525 | "text/plain": [
526 | ""
527 | ]
528 | },
529 | "execution_count": 31,
530 | "metadata": {},
531 | "output_type": "execute_result"
532 | },
533 | {
534 | "data": {
535 | "image/png": "\n",
536 | "text/plain": [
537 | ""
538 | ]
539 | },
540 | "metadata": {},
541 | "output_type": "display_data"
542 | }
543 | ],
544 | "source": [
545 | "winners.plot(kind='bar',x='Team',y='Titles')"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 45,
551 | "id": "c769107e",
552 | "metadata": {},
553 | "outputs": [
554 | {
555 | "data": {
556 | "text/html": [
557 | "\n",
558 | "\n",
571 | "
\n",
572 | " \n",
573 | " \n",
574 | " | \n",
575 | " Team | \n",
576 | " GF | \n",
577 | " GA | \n",
578 | "
\n",
579 | " \n",
580 | " \n",
581 | " \n",
582 | " | 0 | \n",
583 | " Brazil | \n",
584 | " 229 | \n",
585 | " 105 | \n",
586 | "
\n",
587 | " \n",
588 | " | 1 | \n",
589 | " Germany | \n",
590 | " 226 | \n",
591 | " 125 | \n",
592 | "
\n",
593 | " \n",
594 | " | 2 | \n",
595 | " Italy | \n",
596 | " 128 | \n",
597 | " 77 | \n",
598 | "
\n",
599 | " \n",
600 | " | 3 | \n",
601 | " Argentina | \n",
602 | " 137 | \n",
603 | " 93 | \n",
604 | "
\n",
605 | " \n",
606 | " | 4 | \n",
607 | " France | \n",
608 | " 120 | \n",
609 | " 77 | \n",
610 | "
\n",
611 | " \n",
612 | " | 5 | \n",
613 | " England | \n",
614 | " 91 | \n",
615 | " 64 | \n",
616 | "
\n",
617 | " \n",
618 | " | 6 | \n",
619 | " Spain | \n",
620 | " 99 | \n",
621 | " 72 | \n",
622 | "
\n",
623 | " \n",
624 | " | 7 | \n",
625 | " Uruguay | \n",
626 | " 87 | \n",
627 | " 74 | \n",
628 | "
\n",
629 | " \n",
630 | "
\n",
631 | "
"
632 | ],
633 | "text/plain": [
634 | " Team GF GA\n",
635 | "0 Brazil 229 105\n",
636 | "1 Germany 226 125\n",
637 | "2 Italy 128 77\n",
638 | "3 Argentina 137 93\n",
639 | "4 France 120 77\n",
640 | "5 England 91 64\n",
641 | "6 Spain 99 72\n",
642 | "7 Uruguay 87 74"
643 | ]
644 | },
645 | "execution_count": 45,
646 | "metadata": {},
647 | "output_type": "execute_result"
648 | }
649 | ],
650 | "source": [
651 | "games=df.loc[:,['Team','GF','GA']]\n",
652 | "games"
653 | ]
654 | },
655 | {
656 | "cell_type": "markdown",
657 | "id": "cde6f46a",
658 | "metadata": {},
659 | "source": [
660 | "### Goal For and Goal Against in a bar graph representation by a team\n"
661 | ]
662 | },
663 | {
664 | "cell_type": "code",
665 | "execution_count": 49,
666 | "id": "c74f974f",
667 | "metadata": {
668 | "scrolled": false
669 | },
670 | "outputs": [
671 | {
672 | "data": {
673 | "text/plain": [
674 | ""
675 | ]
676 | },
677 | "execution_count": 49,
678 | "metadata": {},
679 | "output_type": "execute_result"
680 | },
681 | {
682 | "data": {
683 | "image/png": "\n",
684 | "text/plain": [
685 | ""
686 | ]
687 | },
688 | "metadata": {},
689 | "output_type": "display_data"
690 | }
691 | ],
692 | "source": [
693 | "games.plot(kind='bar',y=['GF','GA'],x='Team',color=['yellow','green'],figsize=(6,10),)"
694 | ]
695 | },
696 | {
697 | "cell_type": "code",
698 | "execution_count": 53,
699 | "id": "fc00336d",
700 | "metadata": {},
701 | "outputs": [
702 | {
703 | "data": {
704 | "text/plain": [
705 | "(8, 12)"
706 | ]
707 | },
708 | "execution_count": 53,
709 | "metadata": {},
710 | "output_type": "execute_result"
711 | }
712 | ],
713 | "source": [
714 | "df.shape"
715 | ]
716 | },
717 | {
718 | "cell_type": "code",
719 | "execution_count": 61,
720 | "id": "805fd47b",
721 | "metadata": {},
722 | "outputs": [
723 | {
724 | "data": {
725 | "text/html": [
726 | "\n",
727 | "\n",
740 | "
\n",
741 | " \n",
742 | " \n",
743 | " | \n",
744 | " Team | \n",
745 | " Pts | \n",
746 | "
\n",
747 | " \n",
748 | " \n",
749 | " \n",
750 | " | 0 | \n",
751 | " Brazil | \n",
752 | " 237 | \n",
753 | "
\n",
754 | " \n",
755 | " | 1 | \n",
756 | " Germany | \n",
757 | " 221 | \n",
758 | "
\n",
759 | " \n",
760 | " | 2 | \n",
761 | " Italy | \n",
762 | " 156 | \n",
763 | "
\n",
764 | " \n",
765 | " | 3 | \n",
766 | " Argentina | \n",
767 | " 144 | \n",
768 | "
\n",
769 | " \n",
770 | " | 4 | \n",
771 | " France | \n",
772 | " 115 | \n",
773 | "
\n",
774 | " \n",
775 | " | 5 | \n",
776 | " England | \n",
777 | " 108 | \n",
778 | "
\n",
779 | " \n",
780 | " | 6 | \n",
781 | " Spain | \n",
782 | " 105 | \n",
783 | "
\n",
784 | " \n",
785 | " | 7 | \n",
786 | " Uruguay | \n",
787 | " 84 | \n",
788 | "
\n",
789 | " \n",
790 | "
\n",
791 | "
"
792 | ],
793 | "text/plain": [
794 | " Team Pts\n",
795 | "0 Brazil 237\n",
796 | "1 Germany 221\n",
797 | "2 Italy 156\n",
798 | "3 Argentina 144\n",
799 | "4 France 115\n",
800 | "5 England 108\n",
801 | "6 Spain 105\n",
802 | "7 Uruguay 84"
803 | ]
804 | },
805 | "execution_count": 61,
806 | "metadata": {},
807 | "output_type": "execute_result"
808 | }
809 | ],
810 | "source": [
811 | "points=df.iloc[:,[1,11]]\n",
812 | "points"
813 | ]
814 | },
815 | {
816 | "cell_type": "code",
817 | "execution_count": 65,
818 | "id": "d8ae14a0",
819 | "metadata": {},
820 | "outputs": [],
821 | "source": [
822 | "my_team=df.Team\n",
823 | "my_pts=df.Pts"
824 | ]
825 | },
826 | {
827 | "cell_type": "markdown",
828 | "id": "8b116cdc",
829 | "metadata": {},
830 | "source": [
831 | "### showing total points gained by a team as percentage in the form of pie chart\n"
832 | ]
833 | },
834 | {
835 | "cell_type": "code",
836 | "execution_count": 78,
837 | "id": "08058faf",
838 | "metadata": {},
839 | "outputs": [
840 | {
841 | "data": {
842 | "image/png": "\n",
843 | "text/plain": [
844 | ""
845 | ]
846 | },
847 | "metadata": {},
848 | "output_type": "display_data"
849 | }
850 | ],
851 | "source": [
852 | "mp.pie(my_pts,labels=my_team,autopct='%1.0f%%')\n",
853 | "mp.title(\"pie chart to show total points gained\")\n",
854 | "mp.show()"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": null,
860 | "id": "aa977979",
861 | "metadata": {},
862 | "outputs": [],
863 | "source": []
864 | }
865 | ],
866 | "metadata": {
867 | "kernelspec": {
868 | "display_name": "Python 3 (ipykernel)",
869 | "language": "python",
870 | "name": "python3"
871 | },
872 | "language_info": {
873 | "codemirror_mode": {
874 | "name": "ipython",
875 | "version": 3
876 | },
877 | "file_extension": ".py",
878 | "mimetype": "text/x-python",
879 | "name": "python",
880 | "nbconvert_exporter": "python",
881 | "pygments_lexer": "ipython3",
882 | "version": "3.11.0"
883 | }
884 | },
885 | "nbformat": 4,
886 | "nbformat_minor": 5
887 | }
888 |
--------------------------------------------------------------------------------