├── python project report.docx ├── README.md └── 3.py /python project report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shyxm1/PYHTON-PROJECT-/HEAD/python project report.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PYHTON-PROJECT- 2 | 3 | The Stack Overflow Developer Survey serves as a critical annual benchmark for understanding the global developer ecosystem. By capturing data on skills, tools, education, and workplace conditions, it offers unparalleled insights into the evolving preferences and challenges of software professionals. This report presents a structured exploratory analysis of the 2023 survey dataset, focusing on key trends in technology adoption, compensation patterns, and demographic influences. The findings aim to illuminate industry-wide shifts and empower stakeholders—from developers to tech leaders—to make data-driven decisions in a rapidly changing digital landscape. 4 | 5 | ii. General Objectives of the Project 6 | This report analyses six core research questions: 7 | 1. Analyse yearly compensation distribution. 8 | 2. Compare yearly compensation by employment type. 9 | 3. Compare yearly compensation by country. 10 | 4. Compare remote work distribution. 11 | 5. Compare average salary by job satisfaction. 12 | 6. Analyse Correlation Between Yearly Compensation and Age. 13 | 14 | -------------------------------------------------------------------------------- /3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import tkinter as tk 6 | from tkinter import ttk 7 | 8 | # Load and clean data 9 | df = pd.read_csv(r"C:\Users\shyaa\OneDrive\Desktop\survey_results_public.csv") 10 | columns = ['MainBranch', 'Age', 'Employment', 'RemoteWork', 'EdLevel', 11 | 'ConvertedCompYearly', 'JobSat', 'Country'] 12 | df_clean = df[columns].copy() 13 | df_clean.dropna(subset=['Employment', 'RemoteWork', 'ConvertedCompYearly', 'JobSat'], inplace=True) 14 | df_clean['ConvertedCompYearly'] = pd.to_numeric(df_clean['ConvertedCompYearly'], errors='coerce') 15 | df_clean = df_clean[(df_clean['ConvertedCompYearly'] > 1000) & (df_clean['ConvertedCompYearly'] < 500000)] 16 | 17 | # Graph Functions 18 | def show_histogram(): 19 | plt.figure(figsize=(8,5)) 20 | plt.hist(df_clean['ConvertedCompYearly'], bins=40, color='skyblue', edgecolor='black') 21 | plt.title("Histogram of Yearly Compensation") 22 | plt.xlabel("Salary") 23 | plt.ylabel("Frequency") 24 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 25 | 26 | plt.show() 27 | 28 | def show_boxplot(): 29 | plt.figure(figsize=(10,6)) 30 | sns.boxplot(data=df_clean, x='Employment', y='ConvertedCompYearly') 31 | plt.yscale('log') 32 | plt.title("Box Plot: Salary by Employment Type") 33 | plt.xticks(rotation=45) 34 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 35 | 36 | plt.show() 37 | 38 | def show_scatter(): 39 | top_countries = df_clean['Country'].value_counts().index[:5] 40 | df_scatter = df_clean[df_clean['Country'].isin(top_countries)] 41 | plt.figure(figsize=(10,6)) 42 | sns.scatterplot(data=df_scatter, x='Country', y='ConvertedCompYearly', alpha=0.6) 43 | plt.title("Scatter Plot: Salary vs Country") 44 | plt.xticks(rotation=45) 45 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 46 | 47 | plt.show() 48 | 49 | def show_donut(): 50 | remote_counts = df_clean['RemoteWork'].value_counts() 51 | plt.figure(figsize=(6,6)) 52 | plt.pie(remote_counts, labels=remote_counts.index, autopct='%1.1f%%', startangle=140, wedgeprops={'width': 0.4}) 53 | plt.title("Donut Chart: Remote Work Distribution") 54 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 55 | 56 | plt.show() 57 | 58 | def show_bar(): 59 | salary_by_job_sat = df_clean.groupby('JobSat')['ConvertedCompYearly'].mean().sort_values(ascending=False) 60 | plt.figure(figsize=(10,5)) 61 | salary_by_job_sat.plot(kind='bar', color='orange') 62 | plt.title("Bar Chart: Avg Salary by Job Satisfaction") 63 | plt.ylabel("Avg Salary") 64 | plt.xticks(rotation=45) 65 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 66 | 67 | plt.show() 68 | 69 | def show_heatmap(): 70 | df_corr = df_clean[['ConvertedCompYearly']].copy() 71 | df_corr['Age_num'] = df_clean['Age'].astype('category').cat.codes 72 | plt.figure(figsize=(6,4)) 73 | sns.heatmap(df_corr.corr(), annot=True, cmap='coolwarm') 74 | plt.title("Correlation Heatmap") 75 | plt.subplots_adjust(top=0.9, bottom=0.1, hspace=0.4) 76 | 77 | plt.show() 78 | 79 | # GUI Setup 80 | root = tk.Tk() 81 | root.title("Interactive Survey Graphs") 82 | root.geometry("400x400") 83 | 84 | tk.Label(root, text="Select Graph Type", font=('Helvetica', 16)).pack(pady=20) 85 | 86 | options = [ 87 | "Histogram", "Box Plot", "Scatter Plot", "Donut Chart", 88 | "Bar Chart", "Heatmap" 89 | ] 90 | 91 | def generate_graph(choice): 92 | if choice == "Histogram": 93 | show_histogram() 94 | elif choice == "Box Plot": 95 | show_boxplot() 96 | elif choice == "Scatter Plot": 97 | show_scatter() 98 | elif choice == "Donut Chart": 99 | show_donut() 100 | elif choice == "Bar Chart": 101 | show_bar() 102 | elif choice == "Heatmap": 103 | show_heatmap() 104 | 105 | combo = ttk.Combobox(root, values=options, font=('Helvetica', 12)) 106 | combo.pack(pady=10) 107 | 108 | tk.Button(root, text="Generate Graph", font=('Helvetica', 12), command=lambda: generate_graph(combo.get())).pack(pady=20) 109 | 110 | root.mainloop() 111 | --------------------------------------------------------------------------------