├── src ├── Procfile ├── app.py ├── user_ratings.py ├── name_of_user.py ├── templates │ ├── styles.css │ └── index.html ├── user_ranks.py ├── last_laugh.py └── driver.py ├── LICENSE ├── .gitignore └── README.md /src/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn -b:$PORT app:app 2 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | import driver as dr 3 | 4 | app = Flask(__name__) 5 | 6 | @app.route('/') 7 | def home(): 8 | return render_template('index.html') 9 | 10 | @app.route('/compare_rank/

/

') 11 | def compare_rank(h1,h2): 12 | return dr.driver_function(h1,h2) 13 | 14 | if __name__ == '__main__': 15 | app.run(debug=True) 16 | -------------------------------------------------------------------------------- /src/user_ratings.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from lxml import html 3 | import sys 4 | import lxml.etree 5 | import lxml._elementpath 6 | 7 | 8 | def ratings(handle, result): 9 | 10 | str = "" 11 | 12 | #valid username url 13 | url = 'https://www.codechef.com/users/' + handle 14 | 15 | page = requests.get(url) 16 | 17 | tree = html.fromstring(page.content) 18 | 19 | data = tree.xpath("//div/section/div/div/div/a/text()") 20 | 21 | if len(data) == 0: 22 | result = result + "Handle: " + handle + " is invalid." 23 | print("Wrong Handle: user_ratings_py") 24 | sys.exit() 25 | 26 | else: 27 | for i in range(0, len(data[0])-2): 28 | str += data[0][i] 29 | 30 | return str 31 | -------------------------------------------------------------------------------- /src/name_of_user.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from lxml import html 3 | import sys 4 | 5 | def name(handle, result): 6 | 7 | str = "" 8 | 9 | #valid username url 10 | url = 'https://www.codechef.com/users/' + handle 11 | 12 | page = requests.get(url) 13 | 14 | tree = html.fromstring(page.content) 15 | 16 | #Storing the text of the first 'main' tag's child 'aside' tag's child tag 'div' 17 | data = tree.xpath("//main[1]/aside/div/text()") 18 | 19 | if len(data) == 0: 20 | result = result + "Handle: " + handle + " is not a valid handle." 21 | print("Wrong Handle: name_of_user") 22 | 23 | else: 24 | for i in range(3, len(data[0])): 25 | str += data[0][i] 26 | 27 | return str 28 | -------------------------------------------------------------------------------- /src/templates/styles.css: -------------------------------------------------------------------------------- 1 | body{ 2 | font-family: 'Roboto', sans-serif; 3 | } 4 | label{ 5 | font-weight: normal; 6 | } 7 | .loader { 8 | border: 5px solid #f3f3f3; /* Light grey */ 9 | border-top: 5px solid #3498db; /* Blue */ 10 | border-radius: 50%; 11 | width: 50px; 12 | height: 50px; 13 | animation: spin 2s linear infinite; 14 | } 15 | @keyframes spin { 16 | 0% { transform: rotate(0deg); } 17 | 100% { transform: rotate(360deg); } 18 | } 19 | #loader_text{ 20 | font-size: 20px; 21 | } 22 | a{ 23 | text-decoration: none; 24 | color: white; 25 | } 26 | a:hover { 27 | color: #9E9E9E; 28 | } 29 | a: visited{ 30 | color: #9E9E9E; 31 | } 32 | a:active { 33 | color: #9E9E9E; 34 | } 35 | .footer { 36 | position: fixed; 37 | left: 0; 38 | bottom: 0; 39 | width: 100%; 40 | height: 30px; 41 | padding-top: 2px; 42 | background-color: #37474F; 43 | color: white; 44 | text-align: center; 45 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Rohit Thapliyal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/user_ranks.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sys 3 | from lxml import html 4 | import lxml.etree 5 | import lxml._elementpath 6 | 7 | def details_for_one(handle, dic1, lis1, result): 8 | 9 | #valid username url 10 | url = 'https://www.codechef.com/users/' + handle 11 | 12 | page = requests.get(url) 13 | 14 | tree = html.fromstring(page.content) 15 | 16 | # data = tree.xpath("//script[contains(text(), 'jQuery(document).foundation();')]/text()") 17 | 18 | data = tree.xpath("//script[contains(text(), 'var all_rating')]/text()") 19 | print(data) 20 | # print(data) 21 | 22 | if len(data) == 0: 23 | result = result + "Handle: " + handle + " is invalid." 24 | print("Wrong Handle: name_of_user_py") 25 | sys.exit() 26 | 27 | 28 | i = 0 29 | cb = 0 30 | 31 | while(1): 32 | cb = 0 33 | if i != 0: 34 | if(data[0][i-1] == ']' and data[0][i] == ';'): 35 | break 36 | if data[0][i] == '{': 37 | str = "" 38 | ktr = "" 39 | while(cb < 9): 40 | if(data[0][i] == ':'): 41 | cb += 1 42 | if(cb == 1): 43 | i += 2 44 | while(data[0][i] != '"'): 45 | ktr += data[0][i] 46 | i += 1 47 | cb += 1 48 | if(cb == 9): 49 | i += 2 50 | while(data[0][i] != '"'): 51 | str += data[0][i] 52 | i += 1 53 | i += 1 54 | i += 1 55 | dic1[ktr] = str 56 | print(str) 57 | lis1.append(ktr) 58 | i += 1 59 | -------------------------------------------------------------------------------- /src/last_laugh.py: -------------------------------------------------------------------------------- 1 | def finish_off(handle1, handle2, name1, name2, rating1, rating2, dic1, dic2, lis1, lis2, result): 2 | 3 | par1 = 0 4 | par2 = 0 5 | result = result + "Handle : " + handle1 + "\n" 6 | result = result + "Name : " + name1+ "\n" 7 | result = result + "Current ratings : " + rating1 + '\n' 8 | result = result + "\n" 9 | result = result + "Handle : " + handle2 + "\n" 10 | result = result + "Name : " + name2 + "\n" 11 | result = result + "Current ratings : " + rating2 + '\n' 12 | result = result + "\n" 13 | 14 | lis = [] 15 | 16 | if(len(lis1) < len(lis2)): 17 | lis = lis1 18 | else: 19 | lis = lis2 20 | 21 | result = result + "Contest\t\t\tRank1\t\t\tRank2\t\t\tWinner\n" 22 | 23 | for i in range(0, len(lis)): 24 | if(lis[i] in dic1 and lis[i] in dic2): 25 | result = result + lis[i] + "\t\t\t" + dic1[lis[i]] + "\t\t\t" + dic2[lis[i]] 26 | if(int(dic1[lis[i]]) < int(dic2[lis[i]])): 27 | winner = name1 28 | par1 += 1 29 | elif(int(dic1[lis[i]]) > int(dic2[lis[i]])): 30 | winner = name2 31 | par2 += 1 32 | else: 33 | winner = "Tie" 34 | 35 | result = result + "\t\t\t" + winner + '\n' 36 | 37 | result += "\n" + "Wins: \n" 38 | result += name1 + " : " + str(par1) + "\t" 39 | result += name2 + " : " + str(par2) + "\n\n" 40 | if(par1 == par2): 41 | result += "Tie" + "\n\n\n" 42 | else: 43 | result += "OVERALL WINNER : " 44 | if(par1 > par2): 45 | result += name1 + "\n\n\n" 46 | else: 47 | result += name2 + "\n\n\n" 48 | return result 49 | -------------------------------------------------------------------------------- /src/driver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import user_ranks as us 3 | import user_ratings as ur 4 | import name_of_user as un 5 | import last_laugh as ll 6 | 7 | def driver_function(handle1, handle2): 8 | rating1 = "" 9 | rating2 = "" 10 | name1 = "" 11 | name2 = "" 12 | lis1 = [] 13 | lis2 = [] 14 | result = "" 15 | 16 | # dictionary [contest -> rank] for user1 17 | dic1 = {} 18 | 19 | print("Calling name_of_user for " + handle1) 20 | name1 = un.name(handle1, result) 21 | if(len(name1) > 0): 22 | print("Calling name_of_user for " + handle2) 23 | name2 = un.name(handle2, result) 24 | 25 | if(len(name2) > 0): 26 | # scraping contests and respective ranks for user1 27 | print("Calling user_ranks for " + handle1) 28 | us.details_for_one(handle1, dic1, lis1, result) 29 | 30 | print("Calling user_ratings for " + handle1) 31 | rating1 = ur.ratings(handle1, result) 32 | 33 | # dictionary [contest -> rank] for user2 34 | dic2 = {} 35 | 36 | # scraping contests and respective ranks for user2 37 | print("Calling user_ranks for " + handle2) 38 | us.details_for_one(handle2, dic2, lis2, result) 39 | 40 | print("Calling user_ratings for " + handle2) 41 | rating2 = ur.ratings(handle2, result) 42 | 43 | print("Calling finishing function") 44 | return ll.finish_off(handle1, handle2, name1, name2, rating1, rating2, dic1, dic2, lis1, lis2, result) 45 | else: 46 | print("Handle2 is wrong") 47 | result += "Handle: " + handle2 + " is not a valid handle." 48 | return result 49 | else: 50 | print("Handle1 is wrong") 51 | result += "Handle: " + handle1 + " is not a valid handle." 52 | return result 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # codechef-rank-comparator 2 | Web scraping in python using lxml package (XML Path Language) 3 | 4 | ### Input : 5 | 6 | *Two codechef usernames which are to be compared* 7 | 8 | ### Operations : 9 | 10 | *Checks if the entered username is valid or not* 11 | 12 | *Scraps the name of the user* 13 | 14 | *Scraps the current codechef ratings of the user* 15 | 16 | *Outputs the list of mutually participated contests of the users and the winner* 17 | 18 | 19 | ### Packages/tools used : 20 | 21 | *lxml library to use html element API* 22 | 23 | *requests library to send HTTP request to the webpage* 24 | 25 | *sys library to exit the system in case of errors* 26 | 27 | *Flask web framework* 28 | 29 | *HTML and Javascript to create web template* 30 | 31 | 32 | ### Running the source code locally : 33 | 34 | *Clone/Download the repo* 35 | 36 | *Compile/Run the application file i.e. app.py by running the following command:* 37 | 38 | $ python app.py 39 | 40 | *Enter the following URL in address bar of your browser* 41 | 42 | localhost:5000 43 | 44 | ### Requirements to run the source : 45 | 46 | *Python 3* 47 | 48 | *pip(Python Package Index) :* 49 | 50 | $ sudo apt-get install python3-pip 51 | 52 | *requests package :* 53 | 54 | $ pip3 install requests 55 | 56 | *lxml package :* 57 | 58 | $ sudo apt-get install libxml2-dev libxslt1-dev python-dev 59 | 60 | $ pip install lxml 61 | 62 | *Flask package :* 63 | 64 | $ pip install flask 65 | 66 | ### Description : 67 | 68 | The script works by sending request at URL : 'codechef.com/users/handle'. This part is handled by Python. XML Path Language is used for crawling. Separate files are specified for the information being scraped. A dictionary [contest -> rank] is created for both the users. The mutual contests becomes the part of the result. The information is stored in a string 'result'. 69 | 70 | Flask web framework is used for creating a Web API to link the HTML file with the python script. 71 | 72 | The UI is created in HTML. The application is then deployed on Heroku cloud platform. 73 | 74 | The application can be run using the terminal through local host or directly through the [Heroku platform](http://codechefcomparator.herokuapp.com/) 75 | 76 | This Project is a joint contribution of : 77 | 78 | [Rohit Thapliyal](https://www.linkedin.com/in/rohit-thapliyal-515b5913a/) and [Neeraj Negi](https://www.linkedin.com/in/iamneerajnegi/) 79 | 80 | -------------------------------------------------------------------------------- /src/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Codechef Rank Comparator 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 47 | 48 | 49 | 58 |
59 |
60 |

Enter Details Below:

61 |

Enter two handles of the users to compare their codechef ratings.

62 |

**Fetching data may take some time, so please be patient

63 |

**Insert new queries after previous result is fetched.

64 |
65 |
66 |
67 | 68 | 69 |
70 |
71 | 72 | 73 |
74 |
75 |
76 |
77 |
78 |
79 | 80 |
81 |
82 |
83 |
84 |
85 | 87 | 88 |
89 | 90 |
91 | 96 | 97 | 98 | --------------------------------------------------------------------------------