├── README.md
├── Screenshot from 2021-03-20 21-39-21.png
├── Screenshot from 2021-03-20 21-46-34.png
└── wistalk.py


/README.md:
--------------------------------------------------------------------------------
 1 | # wi-stalk
 2 | WikiStalk : Analyze Wikipedia User's Activity
 3 | 
 4 | ## Live Demo
 5 | https://replit.com/@rtnf141/wistalk
 6 | 
 7 | ## How To Use
 8 | * `pip install beautifulsoup4`
 9 | * `pip install lxml`
10 | * `python3 wistalk.py targetWikipediaUsername wikipediaLanguageCode`\
11 | Supported wikipedia language : `id` (default) and `en` \
12 | Example : \
13 | `python3 wistalk.py someUser en`
14 | `python3 wistalk.py namaPengguna`
15 | 
16 | 
17 | ## Screenshot
18 | ![Screenshot2](https://github.com/altilunium/wistalk/blob/main/Screenshot%20from%202021-03-20%2021-39-21.png?raw=true)
19 | ![Screenshot3](https://github.com/altilunium/wistalk/blob/main/Screenshot%20from%202021-03-20%2021-46-34.png?raw=true)
20 | 
21 | ## Dependencies
22 | `pip install beautifulsoup4`
23 | `pip install requests`
24 | `pip install lxml`
25 | 
26 | 
27 | ### Update : Wistalk2
28 | Now you can directly query to Wikipedia's database to analyze wikipedia user's activity (https://quarry.wmcloud.org/query/59144). It's much faster this way.
29 | * Create a Wikipedia account
30 | * Log in to Quarry
31 | * Fork that SQL query
32 | * Change the target variable with target username
33 | * Submit query
34 | 


--------------------------------------------------------------------------------
/Screenshot from 2021-03-20 21-39-21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altilunium/wistalk/c5ed79950718b42957d171182f1da909f71dd20a/Screenshot from 2021-03-20 21-39-21.png


--------------------------------------------------------------------------------
/Screenshot from 2021-03-20 21-46-34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altilunium/wistalk/c5ed79950718b42957d171182f1da909f71dd20a/Screenshot from 2021-03-20 21-46-34.png


--------------------------------------------------------------------------------
/wistalk.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from bs4 import BeautifulSoup
  3 | import requests
  4 | import urllib.parse as urlparse
  5 | from urllib.parse import parse_qs
  6 | import datetime
  7 | import lxml
  8 | import signal
  9 | 
 10 | 
 11 | def sizeof_fmt(num, suffix='B'):
 12 |     for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
 13 |         if abs(num) < 1024.0:
 14 |             return "%3.1f%s%s" % (num, unit, suffix)
 15 |         num /= 1024.0
 16 |     return "%.1f%s%s" % (num, 'Yi', suffix)
 17 | 
 18 | 
 19 | 
 20 | uname = sys.argv[1]
 21 | 
 22 | 
 23 | if uname == "-i":
 24 | 	print("https://YY.wikipedia.org/wiki/User:XXXX")
 25 | 	uname = input("Enter Wikipedia Username (XXXX) : ")
 26 | 	subwiki = input("Enter Wikipedia Language (en/id) : ")
 27 | 	iloop = True
 28 | else:
 29 | 	iloop = False
 30 | 	try:
 31 | 		subwiki = sys.argv[2]
 32 | 	except Exception as e:
 33 | 		subwiki = 'id'
 34 | stillOn = True
 35 | topart = dict()
 36 | 
 37 | def signal_handler(sig,frame):
 38 | 	for i in sorted(topart,key=topart.get,reverse=False):
 39 | 		print(str(i) +" : "+ str(topart[i])) 
 40 | 	print()
 41 | 	sys.exit(0)
 42 | 
 43 | 
 44 | 
 45 | signal.signal(signal.SIGINT,signal_handler)
 46 | while stillOn:
 47 | 	if subwiki == 'en':
 48 | 		url = "https://en.wikipedia.org/w/index.php?title=Special:Contributions/"+str(uname)+"&dir=prev&limit=5000"
 49 | 		baseurl = "https://en.wikipedia.org"
 50 | 	else:
 51 | 		url = "https://id.wikipedia.org/w/index.php?title=Istimewa:Kontribusi_pengguna/"+str(uname)+"&dir=prev&limit=5000"
 52 | 		baseurl = "https://id.wikipedia.org"
 53 | 
 54 | 	stop = False
 55 | 	wikiarticle = set()
 56 | 	byte_add = 0
 57 | 	byte_rem = 0
 58 | 	stage = 0
 59 | 	topart = dict()
 60 | 
 61 | 	while not stop:
 62 | 
 63 | 		stage += 500
 64 | 		#print("Edit count : "+str(stage))
 65 | 
 66 | 		parsed = urlparse.urlparse(url)
 67 | 		try:
 68 | 			x = parse_qs(parsed.query)['offset']
 69 | 			dobj = datetime.datetime.strptime(x[0],'%Y%m%d%H%M%S')
 70 | 			print(str(dobj) + "   " +str(stage) +" [+"+sizeof_fmt(byte_add)+"] ["+sizeof_fmt(byte_rem)+"]" )
 71 | 		except Exception as e:
 72 | 			None
 73 | 
 74 | 
 75 | 		#print(url)
 76 | 		actualPayload = bytearray()
 77 | 		response = requests.get(url)
 78 | 		actualPayload = response.text
 79 | 		soup = BeautifulSoup(actualPayload,'lxml')
 80 | 
 81 | 		titles = soup.find_all("a",{"class":"mw-contributions-title"})
 82 | 		titles_time = soup.find_all("a",{"class":"mw-changeslist-date"})
 83 | 		bytescontrib = soup.find_all("span",{"class":"mw-diff-bytes"})
 84 | 		next_url = soup.find("a",{"class":"mw-prevlink"})
 85 | 		theList = soup.findAll("li",{"data-mw-revid":True})
 86 | 
 87 | 
 88 | 		theList.reverse()
 89 | 		for i in theList:
 90 | 			c_title = i.find("a",{"class":"mw-contributions-title"})
 91 | 			c_time = i.find("a",{"class":"mw-changeslist-date"})
 92 | 			c_ctr = i.find("span",{"class":"mw-diff-bytes"})
 93 | 			c_ctrX = i.find("strong",{"class":"mw-diff-bytes"})
 94 | 			c_diff = i.find("a",{"class":"mw-changeslist-diff"})
 95 | 
 96 | 
 97 | 			try:
 98 | 				if c_ctr is None:
 99 | 					c_ctr = c_ctrX
100 | 				a = c_ctr.text.replace("−","-")
101 | 				a = c_ctr.text.replace(".","")
102 | 				a = int(a)
103 | 				if c_title.text not in topart:
104 | 					topart[c_title.text] = a
105 | 				else:
106 | 					topart[c_title.text] += a
107 | 				print(c_time.text +" : "+c_title.text + " (" + c_ctr.text +" / "+str(topart[c_title.text]) +")")
108 | 				
109 | 			except Exception as e:
110 | 				
111 | 				try:
112 | 					print(c_time.text +" : "+c_title.text)
113 | 				except Exception as d:
114 | 					print(c_title.text)
115 | 			try:
116 | 				print(baseurl+c_diff.attrs["href"])
117 | 			except Exception as e:
118 | 				try:
119 | 					print(baseurl+c_time.attrs["href"]+"  (B)")
120 | 				except Exception as e:
121 | 					continue
122 | 			print()
123 | 
124 | 
125 | 
126 | 		
127 | 
128 | 		
129 | 		for i in bytescontrib:
130 | 			#print(i.text)
131 | 			a = i.text.replace("−","-")
132 | 			contrib = int(a)
133 | 			if contrib >= 0:
134 | 				byte_add += contrib
135 | 			else:
136 | 				byte_rem += contrib
137 | 
138 | 
139 | 		try :
140 | 			next_url = next_url.attrs["href"]
141 | 			url = baseurl + next_url
142 | 		except Exception as e:
143 | 			stop = True
144 | 
145 | 
146 | 	'''
147 | 	for i in wikiarticle:
148 | 		print(i)
149 | 	'''
150 | 
151 | 
152 | 	for i in sorted(topart,key=topart.get,reverse=False):
153 | 		print(str(i) +" : "+ str(topart[i])) 
154 | 	print()
155 | 
156 | 	print("Addition "+sizeof_fmt(byte_add))
157 | 	print("Deletion "+sizeof_fmt(byte_rem))
158 | 
159 | 	if iloop:
160 | 		print()
161 | 		print("https://YY.wikipedia.org/wiki/User:XXXX")
162 | 		uname = input("Enter Wikipedia Username (XXXX) : ")
163 | 		subwiki = input("Enter Wikipedia Language (en/id) : ")
164 | 	else:
165 | 		stillOn = False	
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------