├── README.md ├── Screenshot from 2021-03-20 21-39-21.png ├── Screenshot from 2021-03-20 21-46-34.png └── wistalk.py /README.md: -------------------------------------------------------------------------------- 1 | # wi-stalk 2 | WikiStalk : Analyze Wikipedia User's Activity 3 | 4 | ## Live Demo 5 | https://replit.com/@rtnf141/wistalk 6 | 7 | ## How To Use 8 | * `pip install beautifulsoup4` 9 | * `pip install lxml` 10 | * `python3 wistalk.py targetWikipediaUsername wikipediaLanguageCode`\ 11 | Supported wikipedia language : `id` (default) and `en` \ 12 | Example : \ 13 | `python3 wistalk.py someUser en` 14 | `python3 wistalk.py namaPengguna` 15 | 16 | 17 | ## Screenshot 18 | ![Screenshot2](https://github.com/altilunium/wistalk/blob/main/Screenshot%20from%202021-03-20%2021-39-21.png?raw=true) 19 | ![Screenshot3](https://github.com/altilunium/wistalk/blob/main/Screenshot%20from%202021-03-20%2021-46-34.png?raw=true) 20 | 21 | ## Dependencies 22 | `pip install beautifulsoup4` 23 | `pip install requests` 24 | `pip install lxml` 25 | 26 | 27 | ### Update : Wistalk2 28 | Now you can directly query to Wikipedia's database to analyze wikipedia user's activity (https://quarry.wmcloud.org/query/59144). It's much faster this way. 29 | * Create a Wikipedia account 30 | * Log in to Quarry 31 | * Fork that SQL query 32 | * Change the target variable with target username 33 | * Submit query 34 | -------------------------------------------------------------------------------- /Screenshot from 2021-03-20 21-39-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altilunium/wistalk/c5ed79950718b42957d171182f1da909f71dd20a/Screenshot from 2021-03-20 21-39-21.png -------------------------------------------------------------------------------- /Screenshot from 2021-03-20 21-46-34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altilunium/wistalk/c5ed79950718b42957d171182f1da909f71dd20a/Screenshot from 2021-03-20 21-46-34.png -------------------------------------------------------------------------------- /wistalk.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from bs4 import BeautifulSoup 3 | import requests 4 | import urllib.parse as urlparse 5 | from urllib.parse import parse_qs 6 | import datetime 7 | import lxml 8 | import signal 9 | 10 | 11 | def sizeof_fmt(num, suffix='B'): 12 | for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: 13 | if abs(num) < 1024.0: 14 | return "%3.1f%s%s" % (num, unit, suffix) 15 | num /= 1024.0 16 | return "%.1f%s%s" % (num, 'Yi', suffix) 17 | 18 | 19 | 20 | uname = sys.argv[1] 21 | 22 | 23 | if uname == "-i": 24 | print("https://YY.wikipedia.org/wiki/User:XXXX") 25 | uname = input("Enter Wikipedia Username (XXXX) : ") 26 | subwiki = input("Enter Wikipedia Language (en/id) : ") 27 | iloop = True 28 | else: 29 | iloop = False 30 | try: 31 | subwiki = sys.argv[2] 32 | except Exception as e: 33 | subwiki = 'id' 34 | stillOn = True 35 | topart = dict() 36 | 37 | def signal_handler(sig,frame): 38 | for i in sorted(topart,key=topart.get,reverse=False): 39 | print(str(i) +" : "+ str(topart[i])) 40 | print() 41 | sys.exit(0) 42 | 43 | 44 | 45 | signal.signal(signal.SIGINT,signal_handler) 46 | while stillOn: 47 | if subwiki == 'en': 48 | url = "https://en.wikipedia.org/w/index.php?title=Special:Contributions/"+str(uname)+"&dir=prev&limit=5000" 49 | baseurl = "https://en.wikipedia.org" 50 | else: 51 | url = "https://id.wikipedia.org/w/index.php?title=Istimewa:Kontribusi_pengguna/"+str(uname)+"&dir=prev&limit=5000" 52 | baseurl = "https://id.wikipedia.org" 53 | 54 | stop = False 55 | wikiarticle = set() 56 | byte_add = 0 57 | byte_rem = 0 58 | stage = 0 59 | topart = dict() 60 | 61 | while not stop: 62 | 63 | stage += 500 64 | #print("Edit count : "+str(stage)) 65 | 66 | parsed = urlparse.urlparse(url) 67 | try: 68 | x = parse_qs(parsed.query)['offset'] 69 | dobj = datetime.datetime.strptime(x[0],'%Y%m%d%H%M%S') 70 | print(str(dobj) + " " +str(stage) +" [+"+sizeof_fmt(byte_add)+"] ["+sizeof_fmt(byte_rem)+"]" ) 71 | except Exception as e: 72 | None 73 | 74 | 75 | #print(url) 76 | actualPayload = bytearray() 77 | response = requests.get(url) 78 | actualPayload = response.text 79 | soup = BeautifulSoup(actualPayload,'lxml') 80 | 81 | titles = soup.find_all("a",{"class":"mw-contributions-title"}) 82 | titles_time = soup.find_all("a",{"class":"mw-changeslist-date"}) 83 | bytescontrib = soup.find_all("span",{"class":"mw-diff-bytes"}) 84 | next_url = soup.find("a",{"class":"mw-prevlink"}) 85 | theList = soup.findAll("li",{"data-mw-revid":True}) 86 | 87 | 88 | theList.reverse() 89 | for i in theList: 90 | c_title = i.find("a",{"class":"mw-contributions-title"}) 91 | c_time = i.find("a",{"class":"mw-changeslist-date"}) 92 | c_ctr = i.find("span",{"class":"mw-diff-bytes"}) 93 | c_ctrX = i.find("strong",{"class":"mw-diff-bytes"}) 94 | c_diff = i.find("a",{"class":"mw-changeslist-diff"}) 95 | 96 | 97 | try: 98 | if c_ctr is None: 99 | c_ctr = c_ctrX 100 | a = c_ctr.text.replace("−","-") 101 | a = c_ctr.text.replace(".","") 102 | a = int(a) 103 | if c_title.text not in topart: 104 | topart[c_title.text] = a 105 | else: 106 | topart[c_title.text] += a 107 | print(c_time.text +" : "+c_title.text + " (" + c_ctr.text +" / "+str(topart[c_title.text]) +")") 108 | 109 | except Exception as e: 110 | 111 | try: 112 | print(c_time.text +" : "+c_title.text) 113 | except Exception as d: 114 | print(c_title.text) 115 | try: 116 | print(baseurl+c_diff.attrs["href"]) 117 | except Exception as e: 118 | try: 119 | print(baseurl+c_time.attrs["href"]+" (B)") 120 | except Exception as e: 121 | continue 122 | print() 123 | 124 | 125 | 126 | 127 | 128 | 129 | for i in bytescontrib: 130 | #print(i.text) 131 | a = i.text.replace("−","-") 132 | contrib = int(a) 133 | if contrib >= 0: 134 | byte_add += contrib 135 | else: 136 | byte_rem += contrib 137 | 138 | 139 | try : 140 | next_url = next_url.attrs["href"] 141 | url = baseurl + next_url 142 | except Exception as e: 143 | stop = True 144 | 145 | 146 | ''' 147 | for i in wikiarticle: 148 | print(i) 149 | ''' 150 | 151 | 152 | for i in sorted(topart,key=topart.get,reverse=False): 153 | print(str(i) +" : "+ str(topart[i])) 154 | print() 155 | 156 | print("Addition "+sizeof_fmt(byte_add)) 157 | print("Deletion "+sizeof_fmt(byte_rem)) 158 | 159 | if iloop: 160 | print() 161 | print("https://YY.wikipedia.org/wiki/User:XXXX") 162 | uname = input("Enter Wikipedia Username (XXXX) : ") 163 | subwiki = input("Enter Wikipedia Language (en/id) : ") 164 | else: 165 | stillOn = False 166 | 167 | 168 | --------------------------------------------------------------------------------