├── Screenshot from 2021-03-20 21-39-21.png ├── Screenshot from 2021-03-20 21-46-34.png ├── README.md ├── wistalk.py └── wistalk2.py /Screenshot from 2021-03-20 21-39-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altilunium/wistalk/HEAD/Screenshot from 2021-03-20 21-39-21.png -------------------------------------------------------------------------------- /Screenshot from 2021-03-20 21-46-34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altilunium/wistalk/HEAD/Screenshot from 2021-03-20 21-46-34.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wi-stalk 2 | WikiStalk : Analyze Wikipedia User's Activity 3 | 4 | ![image](https://github.com/user-attachments/assets/8d9ef5e2-35ea-41e9-8393-aa034ad77228) 5 | 6 | -------------------------------------------------------------------------------- /wistalk.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from bs4 import BeautifulSoup 3 | import requests 4 | import urllib.parse as urlparse 5 | from urllib.parse import parse_qs 6 | import datetime 7 | import lxml 8 | import signal 9 | 10 | 11 | def sizeof_fmt(num, suffix='B'): 12 | for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: 13 | if abs(num) < 1024.0: 14 | return "%3.1f%s%s" % (num, unit, suffix) 15 | num /= 1024.0 16 | return "%.1f%s%s" % (num, 'Yi', suffix) 17 | 18 | 19 | 20 | uname = sys.argv[1] 21 | 22 | 23 | if uname == "-i": 24 | print("https://YY.wikipedia.org/wiki/User:XXXX") 25 | uname = input("Enter Wikipedia Username (XXXX) : ") 26 | subwiki = input("Enter Wikipedia Language (en/id) : ") 27 | iloop = True 28 | else: 29 | iloop = False 30 | try: 31 | subwiki = sys.argv[2] 32 | except Exception as e: 33 | subwiki = 'id' 34 | stillOn = True 35 | topart = dict() 36 | 37 | def signal_handler(sig,frame): 38 | for i in sorted(topart,key=topart.get,reverse=False): 39 | print(str(i) +" : "+ str(topart[i])) 40 | print() 41 | sys.exit(0) 42 | 43 | 44 | 45 | signal.signal(signal.SIGINT,signal_handler) 46 | while stillOn: 47 | if subwiki == 'en': 48 | url = "https://en.wikipedia.org/w/index.php?title=Special:Contributions/"+str(uname)+"&dir=prev&limit=5000" 49 | baseurl = "https://en.wikipedia.org" 50 | else: 51 | url = "https://id.wikipedia.org/w/index.php?title=Istimewa:Kontribusi_pengguna/"+str(uname)+"&dir=prev&limit=5000" 52 | baseurl = "https://id.wikipedia.org" 53 | 54 | stop = False 55 | wikiarticle = set() 56 | byte_add = 0 57 | byte_rem = 0 58 | stage = 0 59 | topart = dict() 60 | 61 | while not stop: 62 | 63 | stage += 500 64 | #print("Edit count : "+str(stage)) 65 | 66 | parsed = urlparse.urlparse(url) 67 | try: 68 | x = parse_qs(parsed.query)['offset'] 69 | dobj = datetime.datetime.strptime(x[0],'%Y%m%d%H%M%S') 70 | print(str(dobj) + " " +str(stage) +" [+"+sizeof_fmt(byte_add)+"] ["+sizeof_fmt(byte_rem)+"]" ) 71 | except Exception as e: 72 | None 73 | 74 | 75 | #print(url) 76 | actualPayload = bytearray() 77 | response = requests.get(url) 78 | actualPayload = response.text 79 | soup = BeautifulSoup(actualPayload,'lxml') 80 | 81 | titles = soup.find_all("a",{"class":"mw-contributions-title"}) 82 | titles_time = soup.find_all("a",{"class":"mw-changeslist-date"}) 83 | bytescontrib = soup.find_all("span",{"class":"mw-diff-bytes"}) 84 | next_url = soup.find("a",{"class":"mw-prevlink"}) 85 | theList = soup.findAll("li",{"data-mw-revid":True}) 86 | 87 | 88 | theList.reverse() 89 | for i in theList: 90 | c_title = i.find("a",{"class":"mw-contributions-title"}) 91 | c_time = i.find("a",{"class":"mw-changeslist-date"}) 92 | c_ctr = i.find("span",{"class":"mw-diff-bytes"}) 93 | c_ctrX = i.find("strong",{"class":"mw-diff-bytes"}) 94 | c_diff = i.find("a",{"class":"mw-changeslist-diff"}) 95 | 96 | 97 | try: 98 | if c_ctr is None: 99 | c_ctr = c_ctrX 100 | a = c_ctr.text.replace("−","-") 101 | a = c_ctr.text.replace(".","") 102 | a = int(a) 103 | if c_title.text not in topart: 104 | topart[c_title.text] = a 105 | else: 106 | topart[c_title.text] += a 107 | print(c_time.text +" : "+c_title.text + " (" + c_ctr.text +" / "+str(topart[c_title.text]) +")") 108 | 109 | except Exception as e: 110 | 111 | try: 112 | print(c_time.text +" : "+c_title.text) 113 | except Exception as d: 114 | print(c_title.text) 115 | try: 116 | print(baseurl+c_diff.attrs["href"]) 117 | except Exception as e: 118 | try: 119 | print(baseurl+c_time.attrs["href"]+" (B)") 120 | except Exception as e: 121 | continue 122 | print() 123 | 124 | 125 | 126 | 127 | 128 | 129 | for i in bytescontrib: 130 | #print(i.text) 131 | a = i.text.replace("−","-") 132 | contrib = int(a) 133 | if contrib >= 0: 134 | byte_add += contrib 135 | else: 136 | byte_rem += contrib 137 | 138 | 139 | try : 140 | next_url = next_url.attrs["href"] 141 | url = baseurl + next_url 142 | except Exception as e: 143 | stop = True 144 | 145 | 146 | ''' 147 | for i in wikiarticle: 148 | print(i) 149 | ''' 150 | 151 | 152 | for i in sorted(topart,key=topart.get,reverse=False): 153 | print(str(i) +" : "+ str(topart[i])) 154 | print() 155 | 156 | print("Addition "+sizeof_fmt(byte_add)) 157 | print("Deletion "+sizeof_fmt(byte_rem)) 158 | 159 | if iloop: 160 | print() 161 | print("https://YY.wikipedia.org/wiki/User:XXXX") 162 | uname = input("Enter Wikipedia Username (XXXX) : ") 163 | subwiki = input("Enter Wikipedia Language (en/id) : ") 164 | else: 165 | stillOn = False 166 | 167 | 168 | -------------------------------------------------------------------------------- /wistalk2.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | def get_user_contributions(username: str, lang_code: str) -> dict: 4 | """ 5 | Fetches all Wikipedia page contributions for a given user in a specific language, 6 | aggregating the total byte difference for each page. 7 | 8 | Args: 9 | username (str): The Wikipedia username. 10 | lang_code (str): The Wikipedia language code (e.g., 'en' for English, 'id' for Indonesian). 11 | 12 | Returns: 13 | dict: A dictionary where keys are page titles and values are the 14 | total byte count contributed by the user to that page. 15 | Returns an empty dictionary if the user has no contributions or an error occurs. 16 | """ 17 | base_url = f"https://{lang_code}.wikipedia.org/w/api.php" 18 | all_contributions = {} 19 | uccontinue = None # For pagination 20 | 21 | print(f"Fetching contributions for user '{username}' on {lang_code.upper()} Wikipedia...") 22 | 23 | while True: 24 | params = { 25 | "action": "query", 26 | "list": "usercontribs", 27 | "ucuser": username, 28 | "uclimit": "500", # Max limit per request 29 | "ucprop": "title|sizediff", # Get page title and byte difference for each edit 30 | "format": "json", 31 | "ucshow": "!new", # Exclude 'new page' creations as sizediff for new pages is often zero 32 | # and doesn't represent content added. For this task, we focus on actual content diff. 33 | } 34 | if uccontinue: 35 | params["uccontinue"] = uccontinue 36 | 37 | try: 38 | response = requests.get(base_url, params=params) 39 | response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx) 40 | data = response.json() 41 | 42 | user_contribs = data.get("query", {}).get("usercontribs", []) 43 | for contrib in user_contribs: 44 | title = contrib.get("title") 45 | # sizediff can be negative for removals, positive for additions 46 | # We are interested in the absolute change, but for "contributed bytes", 47 | # positive sizediff (additions) is what we count. 48 | # If a user deletes content, that's still a "touch" but might not 49 | # count as "contributed bytes" in the positive sense. 50 | # For this script, we'll sum all sizediff values as they represent 51 | # the net change the user made. 52 | size_diff = contrib.get("sizediff", 0) 53 | 54 | if title: 55 | all_contributions[title] = all_contributions.get(title, 0) + size_diff 56 | 57 | # Check if there are more results to fetch (pagination) 58 | if "continue" in data: 59 | uccontinue = data["continue"].get("uccontinue") 60 | print(f" Continuing fetch with: {uccontinue}") 61 | else: 62 | break # No more contributions 63 | except requests.exceptions.RequestException as e: 64 | print(f"Error during API request: {e}") 65 | return {} # Return empty on error 66 | except Exception as e: 67 | print(f"An unexpected error occurred: {e}") 68 | return {} 69 | 70 | return all_contributions 71 | 72 | def main(): 73 | """ 74 | Main function to get user input, fetch contributions, and display sorted results. 75 | """ 76 | wikipedia_username = input("Enter Wikipedia username: ") 77 | wikipedia_language_code = input("Enter Wikipedia language code (e.g., 'en', 'id', 'fr'): ").lower() 78 | 79 | if not wikipedia_username or not wikipedia_language_code: 80 | print("Username and language code cannot be empty.") 81 | return 82 | 83 | # Fetch contributions 84 | contributions = get_user_contributions(wikipedia_username, wikipedia_language_code) 85 | 86 | if not contributions: 87 | print(f"\nNo contributions found for '{wikipedia_username}' on {wikipedia_language_code.upper()} Wikipedia, or an error occurred.") 88 | return 89 | 90 | # Sort contributions by byte count (descending) 91 | # We convert to a list of tuples for sorting, then back to a dictionary for display. 92 | sorted_contributions = sorted(contributions.items(), key=lambda item: item[1], reverse=True) 93 | 94 | print(f"\n--- Contributions for '{wikipedia_username}' on {wikipedia_language_code.upper()} Wikipedia (Sorted by Byte Count) ---") 95 | for title, byte_count in sorted_contributions: 96 | # Filter out pages where net contribution is zero, as they don't represent significant "touches" 97 | if byte_count != 0: 98 | print(f"Page: {title:<50} | Contributed Bytes: {byte_count:,}") 99 | 100 | print("\nNote: 'Contributed Bytes' represents the net change in byte count made by the user to the page.") 101 | print(" A positive number indicates additions, a negative number indicates removals.") 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | --------------------------------------------------------------------------------