├── Datathon-1 ├── .gitignore ├── Report.pdf ├── elevation_map.py ├── scalar_field_visualization.py ├── test.ipynb └── vector_field_visualization.py ├── Datathon-2 ├── .gitignore ├── Datathon_2.pdf ├── README.txt ├── current_statistics.txt ├── isosurfaces.py ├── isosurfaces_currents.py ├── salinity_statistics.txt ├── scalar_field_depth_profiling.py ├── scalar_field_visualization_constant_depth.py ├── temperature_statistics.txt ├── vector_field_visualization_constant_depth.py ├── vector_field_visualization_constant_time.py ├── volume_slice_rendering_currents.py └── volume_slice_rendering_scalar.py ├── Datathon-3 ├── .ipynb_checkpoints │ ├── Covid_graph-checkpoint.ipynb │ └── graph-theory-covid-19-spreading-clustering-checkpoint.ipynb ├── Covid_graph.ipynb ├── Datathon_3.pdf ├── cheatsheet-70-ggplot-charts.ipynb ├── coronavirus-covid-19-visualization-prediction.ipynb ├── covid-19-case-study-analysis-viz-comparisons.ipynb ├── covid_19_data.csv ├── covid_death_graph_networkx.py ├── covid_geo.py ├── covid_graph.py ├── graph-theory-covid-19-spreading-clustering.ipynb ├── time_series_covid_19_confirmed.csv ├── time_series_covid_19_deaths.csv └── time_series_covid_19_recovered.csv ├── Datathon-4 ├── Report.pdf ├── __pycache__ │ ├── optimal_leaf_ordering.cpython-37.pyc │ └── traveling_sales_person.cpython-37.pyc ├── data_matrix.py ├── optimal_leaf_ordering.py └── traveling_sales_person.py ├── Datathon-5 ├── Datathon_5.pdf ├── unece.csv └── visualization.py ├── README.md └── images ├── D1 ├── m1.png ├── ssha.png ├── sss.png └── sst.png ├── D2 └── temp_iso.png ├── D3 ├── 10_deaths_early.png └── 10_recoveries.png ├── D4 ├── olo_data_2.png ├── shuffled_data_2.png └── tsp_data_2.png └── D5 ├── figure_1.png ├── figure_10.png ├── figure_11.png ├── figure_12.png ├── figure_2.png ├── figure_3.png ├── figure_4.png ├── figure_5.png ├── figure_6.png ├── figure_7.png ├── figure_8.png └── figure_9.png /Datathon-1/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | .vscode/ 3 | results/ -------------------------------------------------------------------------------- /Datathon-1/Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-1/Report.pdf -------------------------------------------------------------------------------- /Datathon-1/elevation_map.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | from mpl_toolkits.mplot3d import Axes3D 7 | import matplotlib.cm as cm 8 | 9 | # List of files to be visualised 10 | ssha_data_files = glob.glob("data/ssha/*.txt") 11 | ssha_data_files.sort() 12 | 13 | BAD_FLAG = '-1.E+34' 14 | 15 | idx = 0 16 | 17 | # Data structure to store the value of SSHA at location (LON,LAT) 18 | OCEAN = dict() 19 | date = "" 20 | 21 | with open(ssha_data_files[0],'r') as f: 22 | while(f): 23 | r = f.readline() 24 | if r != '': 25 | if idx >= 10: 26 | data = r.strip().split(',') 27 | date = data[0] 28 | lon = float(data[2]) 29 | lat = float(data[3]) 30 | 31 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 32 | ssha = np.nan 33 | if data[4] != BAD_FLAG: 34 | ssha = float(data[4]) 35 | if lon not in OCEAN: 36 | OCEAN[lon] = dict() 37 | OCEAN[lon][lat] = ssha 38 | 39 | else: 40 | break 41 | 42 | 43 | idx += 1 44 | 45 | LAT = [] 46 | LON = [] 47 | 48 | for lon in OCEAN: 49 | LON.append(lon) 50 | for lat in OCEAN[lon]: 51 | LAT.append(lat) 52 | 53 | LON = list(set(LON)) 54 | LAT = list(set(LAT)) 55 | 56 | LON.sort() 57 | LAT.sort() 58 | 59 | # Convert SSHA into grid format 60 | SSHA = np.zeros((len(LON),len(LAT)),np.float) 61 | 62 | for i in range(len(LON)): 63 | for j in range(len(LAT)): 64 | SSHA[i][j] = OCEAN[LON[i]][LAT[j]] 65 | 66 | # Visualize the data 67 | fig = plt.figure() 68 | ax = fig.gca(projection='3d') 69 | 70 | # map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 71 | lon, lat = np.meshgrid(LON, LAT) 72 | # map.drawcoastlines() 73 | # map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 74 | # map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 75 | 76 | h = ax.plot_surface(lon,lat,SSHA.T,cmap=cm.hot) 77 | # plt.colorbar() 78 | ax.set_title("Indian Ocean SSHA on {}".format(date.strip("\""))) 79 | 80 | plt.show() 81 | 82 | -------------------------------------------------------------------------------- /Datathon-1/scalar_field_visualization.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | 4 | import matplotlib 5 | matplotlib.use("Agg") 6 | import matplotlib.pyplot as plt 7 | import matplotlib.animation as manimation 8 | from mpl_toolkits.basemap import Basemap 9 | import matplotlib.cm as cm 10 | 11 | FFMpegWriter = manimation.writers['ffmpeg'] 12 | metadata = dict(title='Indian Ocean - SSHA', artist='Swasti', 13 | comment='Movie support!') 14 | writer = FFMpegWriter(fps=12, metadata=metadata) 15 | 16 | # List of files to be visualised 17 | data_files = glob.glob("data/ssha/*.txt") 18 | data_files.sort() 19 | 20 | BAD_FLAG = '-1.E+34' 21 | 22 | def update(data_file): 23 | idx = 0 24 | 25 | # Data structure to store the value of SSHA at location (LON,LAT) 26 | OCEAN = dict() 27 | date = "" 28 | 29 | with open(data_file,'r') as f: 30 | while(f): 31 | r = f.readline() 32 | if r != '': 33 | if idx >= 10: 34 | data = r.strip().split(',') 35 | date = data[0] 36 | lon = float(data[2]) 37 | lat = float(data[3]) 38 | 39 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 40 | ssha = np.nan 41 | if data[4] != BAD_FLAG: 42 | ssha = float(data[4]) 43 | if lon not in OCEAN: 44 | OCEAN[lon] = dict() 45 | OCEAN[lon][lat] = ssha 46 | 47 | else: 48 | break 49 | 50 | 51 | idx += 1 52 | 53 | LAT = [] 54 | LON = [] 55 | 56 | for lon in OCEAN: 57 | LON.append(lon) 58 | for lat in OCEAN[lon]: 59 | LAT.append(lat) 60 | 61 | LON = list(set(LON)) 62 | LAT = list(set(LAT)) 63 | 64 | LON.sort() 65 | LAT.sort() 66 | 67 | # Convert SSHA into grid format 68 | SSHA = np.zeros((len(LON),len(LAT)),np.float) 69 | 70 | for i in range(len(LON)): 71 | for j in range(len(LAT)): 72 | SSHA[i][j] = OCEAN[LON[i]][LAT[j]] 73 | 74 | # Visualize the data 75 | plt.clf() 76 | map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 77 | lon, lat = np.meshgrid(LON, LAT) 78 | map.drawcoastlines() 79 | map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 80 | map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 81 | 82 | h = map.contourf(lon,lat,SSHA.T,levels=np.linspace(-0.44,0.44,100),cmap=cm.BrBG) 83 | cbar = plt.colorbar() 84 | cbar.set_label("Relative hight of sea suface") 85 | plt.title("Indian Ocean Sea Surface Height Anomaly on {}".format(date.strip("\""))) 86 | 87 | return h 88 | 89 | 90 | fig = plt.figure(figsize=(16,8)) 91 | 92 | with writer.saving(fig, "writer_test.mp4", dpi=100): 93 | for f in data_files: 94 | update(f) 95 | writer.grab_frame() 96 | -------------------------------------------------------------------------------- /Datathon-1/vector_field_visualization.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.animation as manimation 7 | from mpl_toolkits.basemap import Basemap 8 | import matplotlib.cm as cm 9 | 10 | FFMpegWriter = manimation.writers['ffmpeg'] 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti', 12 | comment='Movie support!') 13 | writer = FFMpegWriter(fps=2, metadata=metadata) 14 | 15 | # List of files to be visualised 16 | meridional_current_data_files = glob.glob("data/meridional_current/*.txt")[:20] 17 | meridional_current_data_files.sort() 18 | 19 | BAD_FLAG = '-1.E+34' 20 | 21 | def update(current_file): 22 | idx = 0 23 | 24 | # Data structure to store the value of current at location (LON,LAT) 25 | OCEAN = dict() 26 | date = "" 27 | 28 | meridional_current_file = "data/meridional_current/"+current_file 29 | zonal_current_file = "data/zonal_current/"+current_file 30 | 31 | with open(meridional_current_file,'r') as f: 32 | while(f): 33 | r = f.readline() 34 | if r != '': 35 | if idx >= 11: 36 | data = r.strip().split(',') 37 | date = data[0] 38 | lon = float(data[2]) 39 | lat = float(data[3]) 40 | 41 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 42 | meridional_current = np.nan 43 | 44 | if data[5] != BAD_FLAG: 45 | meridional_current = float(data[5])*-1 46 | if lon not in OCEAN: 47 | OCEAN[lon] = dict() 48 | OCEAN[lon][lat] = [0,meridional_current] 49 | 50 | else: 51 | break 52 | 53 | 54 | idx += 1 55 | 56 | idx = 0 57 | with open(zonal_current_file,'r') as f: 58 | while(f): 59 | r = f.readline() 60 | if r != '': 61 | if idx >= 11: 62 | data = r.strip().split(',') 63 | lon = float(data[2]) 64 | lat = float(data[3]) 65 | 66 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 67 | zonal_current = np.nan 68 | 69 | if data[5] != BAD_FLAG: 70 | zonal_current = float(data[5])*-1 71 | 72 | OCEAN[lon][lat][0] = zonal_current 73 | 74 | else: 75 | break 76 | 77 | 78 | idx += 1 79 | 80 | LAT = [] 81 | LON = [] 82 | 83 | for lon in OCEAN: 84 | LON.append(lon) 85 | for lat in OCEAN[lon]: 86 | LAT.append(lat) 87 | 88 | LON = list(set(LON)) 89 | LAT = list(set(LAT)) 90 | 91 | LON.sort() 92 | LAT.sort() 93 | 94 | LON1 = LON[::2] 95 | LAT1 = LAT[::2] 96 | 97 | # Convert meridional_current into grid format 98 | meridional_current = np.zeros((len(LON1),len(LAT1)),np.float) 99 | zonal_current = np.zeros((len(LON1),len(LAT1)),np.float) 100 | magnitude = np.zeros((len(LON),len(LAT)),np.float) 101 | 102 | for i in range(len(LON)): 103 | for j in range(len(LAT)): 104 | zc = OCEAN[LON[i]][LAT[j]][0] 105 | mc = OCEAN[LON[i]][LAT[j]][1] 106 | mag = np.sqrt(zc**2 + mc**2) 107 | magnitude[i][j] = mag 108 | 109 | for i in range(len(LON1)): 110 | for j in range(len(LAT1)): 111 | zc = OCEAN[LON1[i]][LAT1[j]][0] 112 | mc = OCEAN[LON1[i]][LAT1[j]][1] 113 | # mag = np.sqrt(zc**2 + mc**2) 114 | zonal_current[i][j] = (zc)#/mag) 115 | meridional_current[i][j] = (mc)#/mag) 116 | 117 | # Visualize the data 118 | plt.clf() 119 | map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 120 | lon, lat = np.meshgrid(LON, LAT) 121 | lon1, lat1 = np.meshgrid(LON1, LAT1) 122 | map.drawcoastlines() 123 | map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 124 | map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 125 | #h = map.contourf(lon,lat,magnitude.T,levels=np.linspace(0,3,100)) 126 | q = map.quiver(lon1,lat1,zonal_current.T,meridional_current.T,width=0.01, color='black', scale=150) 127 | plt.quiverkey(q, 0, 20, 1, "Arrow length vs magnitude") 128 | #cbar = map.colorbar(h) 129 | #cbar.set_label("Magnitude of current value in m/sec") 130 | plt.title("Currents (zonal and meridional) at depth = 5 in Indian Ocean on {}".format(date.strip("\""))) 131 | 132 | return q#, h 133 | 134 | 135 | fig = plt.figure(figsize=(16,8)) 136 | 137 | with writer.saving(fig, "writer_test.mp4", dpi=250): 138 | for f in meridional_current_data_files: 139 | f = f[23:] 140 | update(f) 141 | writer.grab_frame() 142 | -------------------------------------------------------------------------------- /Datathon-2/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /Datathon-2/Datathon_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-2/Datathon_2.pdf -------------------------------------------------------------------------------- /Datathon-2/README.txt: -------------------------------------------------------------------------------- 1 | Submissions: Data Visualization - Datathon 2 2 | 3 | Google drive link to video visualizations: https://drive.google.com/drive/folders/1_D_ywfun_xBxxPrWgV7PBYOVCob8-nKM?usp=sharing 4 | 5 | Directory Structure: 6 | . 7 | ├── Report.pdf 8 | ├── source codes 9 | │   ├── isosurfaces 10 | │   │   ├── isosurfaces_currents.py 11 | │   │   └── isosurfaces.py 12 | │   ├── scalar_field_visualization 13 | │   │   ├── scalar_field_depth_profiling.py 14 | │   │   └── scalar_field_visualization_constant_depth.py 15 | │   ├── vector_field_visualization 16 | │   │   ├── vector_field_visualization_constant_depth.py 17 | │   │   └── vector_field_visualization_constant_time.py 18 | │   └── volume_slice_rendering 19 | │   ├── volume_slice_rendering_currents.py 20 | │   └── volume_slice_rendering_scalar.py 21 | └── statistics 22 | ├── current_statistics.csv 23 | ├── salinity_statistics.csv 24 | └── temperature_statistics.csv 25 | -------------------------------------------------------------------------------- /Datathon-2/current_statistics.txt: -------------------------------------------------------------------------------- 1 | Depth; Mean; Variance; Standard deviation 2 | 5; 0.2634942502103688; 0.000503441515044454; 0.022437502424388835 3 | 15; 0.2489651476609433; 0.000477773350267354; 0.021858027135753914 4 | 25; 0.2354338583326177; 0.000463173160228980; 0.021521458134359316 5 | 35; 0.2228846622477850; 0.000431883839129440; 0.020781815106709053 6 | 45; 0.2108549591859987; 0.000387964509998090; 0.019696814717057434 7 | 55; 0.1991084264726173; 0.000334939633818405; 0.018301356065013482 8 | 65; 0.1876547911443133; 0.000284967249422415; 0.016880972999872243 9 | 75; 0.1766129143298025; 0.000225996616472560; 0.015033183843503026 10 | 85; 0.1651594652469909; 0.000162390281966655; 0.012743244561988755 11 | 95; 0.1520378122342999; 0.000109704636120663; 0.010473998096269787 12 | 105; 0.1466897563841745; 8.91990976029287e-05; 0.00944452738907187 13 | 115; 0.1375138462634111; 6.58093668285688e-05; 0.008112297259628053 14 | 125; 0.1291105077187464; 5.335497187524599e-05; 0.0073044487728538415 15 | 135; 0.1214819740523726; 4.742582572019154e-05; 0.006886641105807064 16 | 145; 0.1149101913745539; 4.3337151435385745e-05; 0.006583095885325212 17 | 155; 0.1090671400546524; 3.951883504182367e-05; 0.006286400801875718 18 | 165; 0.1037920916983562; 3.64305062401065e-05; 0.006035768902145484 19 | 175; 0.0991418385261553; 3.451358600773206e-05; 0.005874826466180262 20 | 185; 0.0950919389056526; 3.2375018219734675e-05; 0.005689904939428661 21 | 195; 0.0916113346603196; 3.084017975706223e-05; 0.005553393535223506 22 | 205; 0.0885771978760791; 2.895110311736927e-05; 0.005380622930234869 23 | 215; 0.0859321003877646; 2.7174918883266603e-05; 0.005212956827297403 24 | 225; 0.0836067124303342; 2.5429947507723384e-05; 0.0050428114685880715 25 | -------------------------------------------------------------------------------- /Datathon-2/isosurfaces.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import plotly.graph_objects as go 3 | 4 | file_path = "data/datathon2_data/OneDrive_1_12-09-2020/Salinity_3D/001_29_Dec_2003.txt" 5 | 6 | idx = 0 7 | 8 | BAD_FLAG = '-1.E+34' 9 | 10 | date = "" 11 | LAT = [] 12 | LON = [] 13 | DEP = [] 14 | SALT = [] # Note that this can be changed to any variable 15 | 16 | OCEAN = dict() 17 | 18 | with open(file_path,'r') as f: 19 | while(f): 20 | r = f.readline() 21 | if r != '': 22 | if idx >= 11: 23 | data = r.strip().split(',') 24 | date = data[0] 25 | lon = float(data[2]) 26 | lat = float(data[3]) 27 | dep = float(data[4]) 28 | salt = data[5] 29 | if salt == BAD_FLAG: 30 | salt = np.nan 31 | else: 32 | salt = float(salt) 33 | OCEAN[(lat,lon,dep)] = salt 34 | if dep > 50: 35 | continue 36 | LAT.append(lat) 37 | LON.append(lon) 38 | DEP.append(dep) 39 | SALT.append(salt) 40 | 41 | 42 | else: 43 | break 44 | 45 | 46 | idx += 1 47 | 48 | MAX_SALT = np.nanmax(SALT) 49 | MIN_SALT = np.nanmin(SALT) 50 | 51 | fig= go.Figure( 52 | data=go.Isosurface( 53 | x=LON, 54 | y=LAT, 55 | z=DEP, 56 | value=SALT, 57 | isomin=MIN_SALT, 58 | isomax=MAX_SALT, 59 | surface_count=50, 60 | colorbar_nticks=5, 61 | colorbar_title="Salinity (psu)", 62 | # opacity=0.7, 63 | caps=dict(x_show=False, y_show=False, z_show=False) 64 | ), 65 | layout=go.Layout( 66 | scene = dict( 67 | xaxis = dict(title='Longitude'), 68 | yaxis = dict(title='Latitude'), 69 | zaxis = dict(title='Depth in meters'), 70 | ), 71 | title = go.layout.Title( 72 | text='Indian Ocean Salinity with variation in depth in meters (z-direction) on 29 December 2003' 73 | ) 74 | ) 75 | ) 76 | 77 | fig.show() 78 | -------------------------------------------------------------------------------- /Datathon-2/isosurfaces_currents.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import plotly.graph_objects as go 3 | 4 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt" 5 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt" 6 | 7 | idx = 0 8 | 9 | BAD_FLAG = '-1.E+34' 10 | 11 | date = "" 12 | LAT = [] 13 | LON = [] 14 | DEP = [] 15 | MAGNITUDE = [] 16 | 17 | OCEAN = dict() 18 | 19 | with open(meridional_curr_file_path,'r') as f: 20 | while(f): 21 | r = f.readline() 22 | if r != '': 23 | if idx >= 12: 24 | data = r.strip().split(',') 25 | date = data[0] 26 | lon = float(data[2]) 27 | lat = float(data[3]) 28 | dep = float(data[4]) 29 | meridional_current = data[5] 30 | if meridional_current == BAD_FLAG: 31 | meridional_current = np.nan 32 | else: 33 | meridional_current = float(data[5])*-1 34 | OCEAN[(lat,lon,dep)] = [0,meridional_current] 35 | 36 | else: 37 | break 38 | 39 | 40 | idx += 1 41 | 42 | idx = 0 43 | 44 | with open(zonal_curr_file_path,'r') as f: 45 | while(f): 46 | r = f.readline() 47 | if r != '': 48 | if idx >= 12: 49 | data = r.strip().split(',') 50 | date = data[0] 51 | lon = float(data[2]) 52 | lat = float(data[3]) 53 | dep = float(data[4]) 54 | zonal_current = data[5] 55 | if zonal_current == BAD_FLAG: 56 | zonal_current = np.nan 57 | else: 58 | zonal_current = float(data[5])*-1 59 | OCEAN[(lat,lon,dep)][0] = zonal_current 60 | mag = np.nan 61 | if OCEAN[(lat,lon,dep)][0] == np.nan or OCEAN[(lat,lon,dep)][1] == np.nan: 62 | mag = np.nan 63 | else: 64 | mag = np.sqrt(OCEAN[(lat,lon,dep)][0]**2 + OCEAN[(lat,lon,dep)][1]**2) 65 | OCEAN[(lat,lon,dep)] = mag 66 | if dep > 50: 67 | continue 68 | LAT.append(lat) 69 | LON.append(lon) 70 | DEP.append(dep) 71 | MAGNITUDE.append(mag) 72 | 73 | 74 | else: 75 | break 76 | 77 | 78 | idx += 1 79 | 80 | MAX_MAG = np.nanmax(MAGNITUDE) 81 | MIN_MAG = np.nanmin(MAGNITUDE) 82 | 83 | fig= go.Figure( 84 | data=go.Isosurface( 85 | x=LON, 86 | y=LAT, 87 | z=DEP, 88 | value=MAGNITUDE, 89 | isomin=MIN_MAG, 90 | isomax=MAX_MAG, 91 | surface_count=50, 92 | colorbar_nticks=10, 93 | colorscale="viridis", 94 | colorbar_title="Magnitude of currents (m/sec)", 95 | # opacity=0.7, 96 | caps=dict(x_show=False, y_show=False, z_show=False) 97 | ), 98 | layout=go.Layout( 99 | scene = dict( 100 | xaxis = dict(title='Longitude'), 101 | yaxis = dict(title='Latitude'), 102 | zaxis = dict(title='Depth in meters'), 103 | ), 104 | title = go.layout.Title( 105 | text='Indian Ocean Magnitude of zonal and meridional currents with variation in depth in meters (z-direction) on 4 November 2004' 106 | ) 107 | ) 108 | ) 109 | 110 | fig.show() 111 | -------------------------------------------------------------------------------- /Datathon-2/salinity_statistics.txt: -------------------------------------------------------------------------------- 1 | Depth; Mean; Variance; Standard deviation 2 | 5; 34.5476460403180; 0.06007074323328246; 0.24509333575860945 3 | 15; 34.5567419239074; 0.05997460412994543; 0.24489712968907054 4 | 25; 34.5953786986189; 0.05940470900179379; 0.24373081258181903 5 | 35; 34.6533809416091; 0.05838554022094924; 0.24163100012405123 6 | 45; 34.7159942281470; 0.05766160942211035; 0.24012831865923343 7 | 55; 34.7737773656595; 0.05718184711656207; 0.23912726134124088 8 | 65; 34.8233653941431; 0.05690329247441789; 0.23854411012309211 9 | 75; 34.8640548571675; 0.05686164979542857; 0.23845680907751107 10 | 85; 34.8946684755262; 0.05690541135738735; 0.23854855136300315 11 | 95; 34.9145288027546; 0.05696304923805384; 0.23866933032556543 12 | 105; 34.9523827255303; 0.05693372561975969; 0.23860789094193782 13 | 115; 34.9564708081097; 0.05686253847836554; 0.23845867247463562 14 | 125; 34.9572196443218; 0.05679271401221725; 0.23831221960322818 15 | 135; 34.9580149818220; 0.05673752898979663; 0.23819640843177428 16 | 145; 34.9594041885092; 0.05670522886006821; 0.23812859731680322 17 | 155; 34.9617613453292; 0.05669875045701147; 0.23811499418770646 18 | 165; 34.9645535062396; 0.05670237475919637; 0.23812260446920275 19 | 175; 34.9676362343133; 0.05670461696652149; 0.23812731251690028 20 | 185; 34.9691550941273; 0.05670286285414368; 0.23812362934858794 21 | 195; 34.9710786242774; 0.05670626689237978; 0.23813077686930723 22 | 205; 34.9711766413845; 0.05670500214430586; 0.23812812127992333 23 | 215; 34.9709881521240; 0.05669342125369363; 0.23810380352630584 24 | 225; 34.9696517278573; 0.05668299852520193; 0.23808191557781522 25 | -------------------------------------------------------------------------------- /Datathon-2/scalar_field_depth_profiling.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.animation as manimation 7 | from mpl_toolkits.basemap import Basemap 8 | import matplotlib.cm as cm 9 | 10 | FFMpegWriter = manimation.writers['ffmpeg'] 11 | metadata = dict(title='Indian Ocean - Temperature', artist='Swasti', 12 | comment='Movie support!') 13 | writer = FFMpegWriter(fps=2, metadata=metadata) 14 | 15 | # List of files to be visualised 16 | current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/PotentialTemperature_3D/*.txt")[:1] 17 | current_file = current_data_files[0] 18 | 19 | BAD_FLAG = '-1.E+34' 20 | 21 | idx = 0 22 | 23 | LAT = set() 24 | LON = set() 25 | DEP = set() 26 | # Data structure to store the value of current at location (LON,LAT) 27 | OCEAN = dict() 28 | date = "" 29 | 30 | idx = 0 31 | with open(current_file,'r') as f: 32 | while(f): 33 | r = f.readline() 34 | if r != '': 35 | if idx >= 11: 36 | data = r.strip().split(',') 37 | try: 38 | date = data[0] 39 | lon = float(data[2]) 40 | lat = float(data[3]) 41 | dep = float(data[4]) 42 | 43 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 44 | temp = np.nan 45 | 46 | if data[5] != BAD_FLAG: 47 | temp = float(data[5]) 48 | 49 | OCEAN[lon,lat,dep] = temp 50 | LAT.add(lat) 51 | LON.add(lon) 52 | DEP.add(dep) 53 | except: 54 | continue 55 | 56 | else: 57 | break 58 | 59 | 60 | idx += 1 61 | 62 | 63 | LON = list(LON) 64 | LAT = list(LAT) 65 | DEP = list(DEP) 66 | 67 | LON.sort() 68 | LAT.sort() 69 | DEP.sort() 70 | 71 | 72 | def update(lon): 73 | 74 | # Convert TEMP into grid format 75 | TEMP = np.zeros((len(LAT),len(DEP)),np.float) 76 | 77 | for i in range(len(LAT)): 78 | for j in range(len(DEP)): 79 | try: 80 | TEMP[i][j] = OCEAN[lon,LAT[i],DEP[j]] 81 | except: 82 | TEMP[i][j] = np.nan 83 | continue 84 | 85 | # Visualize the data 86 | plt.clf() 87 | lat, dep = np.meshgrid(LAT, DEP) 88 | 89 | h = plt.contourf(lat,dep,TEMP.T,cmap=cm.hot) 90 | cbar = plt.colorbar() 91 | cbar.set_label("Potential Temperature (degree Celcius)") 92 | plt.title("Indian Ocean Potential Temperature at longitude = {}mdegrees on {}".format(lon,date.strip("\""))) 93 | plt.xlabel("Latitude in degrees") 94 | plt.ylabel("Depth in meters") 95 | 96 | return h 97 | 98 | 99 | fig = plt.figure(figsize=(16,8)) 100 | 101 | with writer.saving(fig, "writer_test.mp4", dpi=250): 102 | for lon in LON: 103 | update(lon) 104 | writer.grab_frame() 105 | -------------------------------------------------------------------------------- /Datathon-2/scalar_field_visualization_constant_depth.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.animation as manimation 7 | from mpl_toolkits.basemap import Basemap 8 | import matplotlib.cm as cm 9 | 10 | FFMpegWriter = manimation.writers['ffmpeg'] 11 | metadata = dict(title='Indian Ocean - Salinity', artist='Swasti', 12 | comment='Movie support!') 13 | writer = FFMpegWriter(fps=2, metadata=metadata) 14 | 15 | # List of files to be visualised 16 | current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/Salinity_3D/*.txt")[:20] 17 | current_data_files.sort() 18 | 19 | BAD_FLAG = '-1.E+34' 20 | 21 | def update(current_file): 22 | idx = 0 23 | 24 | LAT = set() 25 | LON = set() 26 | # Data structure to store the value of current at location (LON,LAT) 27 | OCEAN = dict() 28 | date = "" 29 | 30 | idx = 0 31 | with open(current_file,'r') as f: 32 | while(f): 33 | r = f.readline() 34 | if r != '': 35 | if idx >= 11: 36 | data = r.strip().split(',') 37 | date = data[0] 38 | lon = float(data[2]) 39 | lat = float(data[3]) 40 | dep = float(data[4]) 41 | 42 | if dep != 45.0: 43 | continue 44 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 45 | salt = np.nan 46 | 47 | if data[5] != BAD_FLAG: 48 | salt = float(data[5]) 49 | 50 | OCEAN[lon,lat] = salt 51 | LAT.add(lat) 52 | LON.add(lon) 53 | 54 | else: 55 | break 56 | 57 | 58 | idx += 1 59 | 60 | 61 | LON = list(LON) 62 | LAT = list(LAT) 63 | 64 | LON.sort() 65 | LAT.sort() 66 | 67 | # Convert SALT into grid format 68 | SALT = np.zeros((len(LON),len(LAT)),np.float) 69 | 70 | for i in range(len(LON)): 71 | for j in range(len(LAT)): 72 | try: 73 | SALT[i][j] = OCEAN[LON[i],LAT[j]] 74 | except: 75 | SALT[i][j] = np.nan 76 | continue 77 | 78 | # Visualize the data 79 | plt.clf() 80 | map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 81 | lon, lat = np.meshgrid(LON, LAT) 82 | map.drawcoastlines() 83 | map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 84 | map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 85 | 86 | h = map.contourf(lon,lat,SALT.T,cmap=cm.plasma) 87 | cbar = plt.colorbar() 88 | cbar.set_label("Salinity (psu)") 89 | plt.title("Indian Ocean Salinity at depth = 45.0m on {}".format(date.strip("\""))) 90 | 91 | return h 92 | 93 | 94 | fig = plt.figure(figsize=(16,8)) 95 | 96 | with writer.saving(fig, "writer_test.mp4", dpi=250): 97 | for f in current_data_files: 98 | update(f) 99 | writer.grab_frame() 100 | -------------------------------------------------------------------------------- /Datathon-2/temperature_statistics.txt: -------------------------------------------------------------------------------- 1 | Depth; Mean; Variance; Standard deviation 2 | 5; 27.11250655356588; 0.5476857593587509; 0.7400579432441428 3 | 15; 27.03993716170805; 0.5373139832493482; 0.7330170415818095 4 | 25; 26.84029407042512; 0.4821577178820572; 0.6943757757022182 5 | 35; 26.48182443501643; 0.3652256417414011; 0.6043390122616618 6 | 45; 25.96732721296131; 0.2441088972320103; 0.4940737771143196 7 | 55; 25.31709901569217; 0.1496946378382129; 0.3869039129269862 8 | 65; 24.56975614817752; 0.0951665899098514; 0.3084908262977222 9 | 75; 23.73544411402076; 0.0718090781810706; 0.2679721593394930 10 | 85; 22.83602555384061; 0.0627572831439539; 0.2505140378181509 11 | 95; 21.92278382624695; 0.0628753052124298; 0.2507494869634433 12 | 105; 20.87278935279349; 0.0794405080409788; 0.2818519257357999 13 | 115; 19.95056975235749; 0.0779852228317582; 0.2792583442473264 14 | 125; 19.08908719596788; 0.0694392296677286; 0.2635132438184628 15 | 135; 18.30714756773401; 0.0577132054374724; 0.2402357288945016 16 | 145; 17.61300266303936; 0.0462526987244731; 0.2150644059914916 17 | 155; 16.99729512782661; 0.0365353727850427; 0.1911422841368249 18 | 165; 16.45414063891477; 0.0300029379191761; 0.1732135615913955 19 | 175; 15.97000871473394; 0.0251827300456243; 0.1586906740978319 20 | 185; 15.54376712077133; 0.0214443733671179; 0.1464389748909693 21 | 195; 15.16498681087736; 0.0188419278727541; 0.1372659020760587 22 | 205; 14.82257698705703; 0.0169301382856726; 0.1301158648500354 23 | 215; 14.51234226885430; 0.0156999296340625; 0.1252993600704432 24 | 225; 14.22235575155881; 0.0148140654231499; 0.1217130454107117 25 | -------------------------------------------------------------------------------- /Datathon-2/vector_field_visualization_constant_depth.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.animation as manimation 7 | from mpl_toolkits.basemap import Basemap 8 | import matplotlib.cm as cm 9 | 10 | FFMpegWriter = manimation.writers['ffmpeg'] 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti', 12 | comment='Movie support!') 13 | writer = FFMpegWriter(fps=2, metadata=metadata) 14 | 15 | # List of files to be visualised 16 | meridional_current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/*.txt")[:20] 17 | meridional_current_data_files.sort() 18 | 19 | BAD_FLAG = '-1.E+34' 20 | 21 | def update(current_file): 22 | idx = 0 23 | 24 | # Data structure to store the value of current at location (LON,LAT) 25 | OCEAN = dict() 26 | date = "" 27 | 28 | meridional_current_file = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/"+current_file 29 | zonal_current_file = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/"+current_file 30 | 31 | with open(meridional_current_file,'r') as f: 32 | while(f): 33 | r = f.readline() 34 | if r != '': 35 | if idx >= 12: 36 | data = r.strip().split(',') 37 | date = data[0] 38 | lon = float(data[2]) 39 | lat = float(data[3]) 40 | dep = float(data[4]) 41 | 42 | if dep != 5.0: 43 | continue 44 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 45 | meridional_current = np.nan 46 | 47 | if data[5] != BAD_FLAG: 48 | meridional_current = float(data[5])*-1 49 | if lon not in OCEAN: 50 | OCEAN[lon] = dict() 51 | OCEAN[lon][lat] = [0,meridional_current] 52 | 53 | else: 54 | break 55 | 56 | 57 | idx += 1 58 | 59 | idx = 0 60 | with open(zonal_current_file,'r') as f: 61 | while(f): 62 | r = f.readline() 63 | if r != '': 64 | if idx >= 12: 65 | data = r.strip().split(',') 66 | lon = float(data[2]) 67 | lat = float(data[3]) 68 | dep = float(data[4]) 69 | 70 | if dep != 5.0: 71 | continue 72 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 73 | zonal_current = np.nan 74 | 75 | if data[5] != BAD_FLAG: 76 | zonal_current = float(data[5])*-1 77 | 78 | OCEAN[lon][lat][0] = zonal_current 79 | 80 | else: 81 | break 82 | 83 | 84 | idx += 1 85 | 86 | LAT = [] 87 | LON = [] 88 | 89 | for lon in OCEAN: 90 | LON.append(lon) 91 | for lat in OCEAN[lon]: 92 | LAT.append(lat) 93 | 94 | LON = list(set(LON)) 95 | LAT = list(set(LAT)) 96 | 97 | LON.sort() 98 | LAT.sort() 99 | 100 | LON1 = LON#[::2] 101 | LAT1 = LAT#[::2] 102 | 103 | # Convert meridional_current into grid format 104 | meridional_current = np.zeros((len(LON1),len(LAT1)),np.float) 105 | zonal_current = np.zeros((len(LON1),len(LAT1)),np.float) 106 | 107 | for i in range(len(LON1)): 108 | for j in range(len(LAT1)): 109 | try: 110 | zc = OCEAN[LON1[i]][LAT1[j]][0] 111 | mc = OCEAN[LON1[i]][LAT1[j]][1] 112 | zonal_current[i][j] = zc 113 | meridional_current[i][j] = mc 114 | except: 115 | zonal_current[i][j] = np.nan 116 | meridional_current[i][j] = np.nan 117 | continue 118 | 119 | # Visualize the data 120 | plt.clf() 121 | map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 122 | lon, lat = np.meshgrid(LON, LAT) 123 | lon1, lat1 = np.meshgrid(LON1, LAT1) 124 | map.drawcoastlines() 125 | map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 126 | map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 127 | q = map.quiver(lon1,lat1,zonal_current.T,meridional_current.T,width=0.001, color='black', scale=150) 128 | _ = plt.quiverkey(q, 0.85, 0.85, 2,'2 m/sec', labelpos='E',coordinates='figure') 129 | plt.title("Currents (zonal and meridional) at depth = 5.0m in Indian Ocean on {}".format(date.strip("\""))) 130 | 131 | return q 132 | 133 | 134 | fig = plt.figure(figsize=(16,8)) 135 | 136 | with writer.saving(fig, "writer_test.mp4", dpi=250): 137 | for f in meridional_current_data_files: 138 | f = f[64:] 139 | update(f) 140 | writer.grab_frame() 141 | -------------------------------------------------------------------------------- /Datathon-2/vector_field_visualization_constant_time.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.animation as manimation 7 | from mpl_toolkits.basemap import Basemap 8 | import matplotlib.cm as cm 9 | 10 | FFMpegWriter = manimation.writers['ffmpeg'] 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti', 12 | comment='Movie support!') 13 | writer = FFMpegWriter(fps=2, metadata=metadata) 14 | 15 | BAD_FLAG = '-1.E+34' 16 | 17 | idx = 0 18 | 19 | # Data structure to store the value of current at location (LON,LAT) 20 | OCEAN = dict() 21 | date = "" 22 | LAT = set() 23 | LON = set() 24 | DEP = set() 25 | 26 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt" 27 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt" 28 | 29 | with open(meridional_curr_file_path,'r') as f: 30 | while(f): 31 | r = f.readline() 32 | if r != '': 33 | if idx >= 12: 34 | data = r.strip().split(',') 35 | date = data[0] 36 | lon = float(data[2]) 37 | lat = float(data[3]) 38 | dep = float(data[4]) 39 | 40 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 41 | meridional_current = np.nan 42 | 43 | if data[5] != BAD_FLAG: 44 | meridional_current = float(data[5])*-1 45 | if lon not in OCEAN: 46 | OCEAN[lon] = dict() 47 | OCEAN[lon,lat,dep] = [0,meridional_current] 48 | 49 | else: 50 | break 51 | 52 | 53 | idx += 1 54 | 55 | idx = 0 56 | with open(zonal_curr_file_path,'r') as f: 57 | while(f): 58 | r = f.readline() 59 | if r != '': 60 | if idx >= 12: 61 | data = r.strip().split(',') 62 | lon = float(data[2]) 63 | lat = float(data[3]) 64 | dep = float(data[4]) 65 | 66 | # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib) 67 | zonal_current = np.nan 68 | 69 | if data[5] != BAD_FLAG: 70 | zonal_current = float(data[5])*-1 71 | 72 | OCEAN[lon,lat,dep][0] = zonal_current 73 | LAT.add(lat) 74 | LON.add(lon) 75 | DEP.add(dep) 76 | 77 | else: 78 | break 79 | 80 | 81 | idx += 1 82 | 83 | 84 | LON = list(LON) 85 | LON.sort() 86 | LAT = list(LAT) 87 | LAT.sort() 88 | DEP = list(DEP) 89 | DEP.sort() 90 | 91 | 92 | def update(dep): 93 | 94 | # Convert meridional_current into grid format 95 | meridional_current = np.zeros((len(LON),len(LAT)),np.float) 96 | zonal_current = np.zeros((len(LON),len(LAT)),np.float) 97 | 98 | for i in range(len(LON)): 99 | for j in range(len(LAT)): 100 | try: 101 | zc = OCEAN[LON[i],LAT[j],dep][0] 102 | mc = OCEAN[LON[i],LAT[j],dep][1] 103 | zonal_current[i][j] = zc 104 | meridional_current[i][j] = mc 105 | except: 106 | zonal_current[i][j] = np.nan 107 | meridional_current[i][j] = np.nan 108 | continue 109 | 110 | # Visualize the data 111 | plt.clf() 112 | map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544) 113 | lon, lat = np.meshgrid(LON, LAT) 114 | map.drawcoastlines() 115 | map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0]) 116 | map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1]) 117 | q = map.quiver(lon,lat,zonal_current.T,meridional_current.T,width=0.001, color='black', scale=150) 118 | _ = plt.quiverkey(q, 0.85, 0.85, 2,'2 m/sec', labelpos='E',coordinates='figure') 119 | plt.title("Currents (zonal and meridional) at depth = {}m in Indian Ocean on {}".format(dep,date.strip("\""))) 120 | 121 | return q 122 | 123 | 124 | fig = plt.figure(figsize=(16,8)) 125 | 126 | with writer.saving(fig, "writer_test.mp4", dpi=250): 127 | for dep in DEP: 128 | update(dep) 129 | writer.grab_frame() 130 | -------------------------------------------------------------------------------- /Datathon-2/volume_slice_rendering_currents.py: -------------------------------------------------------------------------------- 1 | # Import data 2 | import time 3 | import numpy as np 4 | 5 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt" 6 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt" 7 | 8 | idx = 0 9 | 10 | BAD_FLAG = '-1.E+34' 11 | 12 | date = "" 13 | LAT = set() 14 | LON = set() 15 | DEP = set() 16 | MAGNITUDE = [] 17 | 18 | OCEAN = dict() 19 | 20 | with open(meridional_curr_file_path,'r') as f: 21 | while(f): 22 | r = f.readline() 23 | if r != '': 24 | if idx >= 12: 25 | data = r.strip().split(',') 26 | date = data[0] 27 | lon = float(data[2]) 28 | lat = float(data[3]) 29 | dep = float(data[4]) 30 | meridional_current = data[5] 31 | if meridional_current == BAD_FLAG: 32 | meridional_current = np.nan 33 | else: 34 | meridional_current = float(data[5])*-1 35 | OCEAN[(lat,lon,dep)] = [0,meridional_current] 36 | LAT.add(lat) 37 | LON.add(lon) 38 | DEP.add(dep) 39 | 40 | 41 | else: 42 | break 43 | 44 | 45 | idx += 1 46 | 47 | idx = 0 48 | 49 | with open(zonal_curr_file_path,'r') as f: 50 | while(f): 51 | r = f.readline() 52 | if r != '': 53 | if idx >= 12: 54 | data = r.strip().split(',') 55 | date = data[0] 56 | lon = float(data[2]) 57 | lat = float(data[3]) 58 | dep = float(data[4]) 59 | zonal_current = data[5] 60 | if zonal_current == BAD_FLAG: 61 | zonal_current = np.nan 62 | else: 63 | zonal_current = float(data[5])*-1 64 | OCEAN[(lat,lon,dep)][0] = zonal_current 65 | LAT.add(lat) 66 | LON.add(lon) 67 | DEP.add(dep) 68 | if OCEAN[(lat,lon,dep)][0] == np.nan or OCEAN[(lat,lon,dep)][1] == np.nan: 69 | mag = np.nan 70 | else: 71 | mag = np.sqrt(OCEAN[(lat,lon,dep)][0]**2 + OCEAN[(lat,lon,dep)][1]**2) 72 | OCEAN[(lat,lon,dep)] = mag 73 | MAGNITUDE.append(mag) 74 | 75 | 76 | else: 77 | break 78 | 79 | 80 | idx += 1 81 | 82 | MAX_MAG = np.nanmax(MAGNITUDE) 83 | MIN_MAG = np.nanmin(MAGNITUDE) 84 | 85 | LAT = list(LAT) 86 | LAT.sort() 87 | LON = list(LON) 88 | LON.sort() 89 | DEP = list(DEP) 90 | DEP.sort() 91 | 92 | MIN_LAT, MAX_LAT = min(LAT),max(LAT) 93 | MIN_LON, MAX_LON = min(LON),max(LON) 94 | 95 | r,c = len(LON),len(LAT) 96 | 97 | def getMagnitudeForDepth(depth): 98 | mag = [] 99 | for x in LON: 100 | arr = [] 101 | for y in LAT: 102 | if (y,x,depth) not in OCEAN or type(OCEAN[(y,x,depth)]) == list: 103 | arr.append(np.nan) 104 | else: 105 | arr.append(OCEAN[(y,x,depth)]) 106 | mag.append(np.array(arr)) 107 | mag = np.array(mag) 108 | mag[np.isnan(mag)] = -100 109 | return mag 110 | 111 | # Define frames 112 | import plotly.graph_objects as go 113 | 114 | fig = go.Figure(frames=[go.Frame(data=go.Surface( 115 | z=dep * np.ones((r, c)), 116 | surfacecolor=getMagnitudeForDepth(dep).T, 117 | cmin=MAX_MAG - MAX_MAG/0.99, cmax=MAX_MAG, 118 | colorbar_title="Magnitude of currents (m/sec)", 119 | colorscale=[[0, 'white'], 120 | [0.01, 'white'], 121 | [0.01, 'blue'], 122 | [1, 'red']] 123 | ), 124 | name=str(dep) 125 | ) for dep in DEP]) 126 | 127 | # Add data to be displayed before animation starts 128 | fig.add_trace(go.Surface( 129 | z=225.0 * np.ones((r, c)), 130 | surfacecolor=getMagnitudeForDepth(5.0).T, 131 | cmin=MAX_MAG - MAX_MAG/0.99, cmax=MAX_MAG, 132 | colorbar_title="Magnitude of currents (m/sec)", 133 | colorscale=[[0, 'white'], 134 | [0.01, 'white'], 135 | [0.01, 'blue'], 136 | [1, 'red']] 137 | )) 138 | 139 | def frame_args(duration): 140 | return { 141 | "frame": {"duration": duration}, 142 | "mode": "immediate", 143 | "fromcurrent": True, 144 | "transition": {"duration": duration, "easing": "linear"}, 145 | } 146 | 147 | sliders = [ 148 | { 149 | "pad": {"b": 10, "t": 60}, 150 | "len": 0.9, 151 | "x": 0.1, 152 | "y": 0, 153 | "steps": [ 154 | { 155 | "args": [[f.name], frame_args(0)], 156 | "label": str(k), 157 | "method": "animate", 158 | } 159 | for k, f in enumerate(fig.frames) 160 | ], 161 | } 162 | ] 163 | 164 | # Layout 165 | fig.update_layout( 166 | title='Indian Ocean Magnitude of zonal and meridional currents with variation in depth in meters (z-direction) on 4 November 2004', 167 | width=1200, 168 | height=800, 169 | scene=dict( 170 | zaxis=dict(range=[5.0, 225.0],autorange=False,title='Depth in meters'), 171 | xaxis = dict(title='Longitude'), 172 | yaxis = dict(title='Latitude'), 173 | aspectratio=dict(x=1.5, y=1, z=1), 174 | ), 175 | updatemenus = [ 176 | { 177 | "buttons": [ 178 | { 179 | "args": [None, frame_args(50)], 180 | "label": "▶", # play symbol 181 | "method": "animate", 182 | }, 183 | { 184 | "args": [[None], frame_args(0)], 185 | "label": "◼", # pause symbol 186 | "method": "animate", 187 | }, 188 | ], 189 | "direction": "left", 190 | "pad": {"r": 10, "t": 70}, 191 | "type": "buttons", 192 | "x": 0.1, 193 | "y": 0, 194 | } 195 | ], 196 | sliders=sliders 197 | ) 198 | 199 | fig.show() -------------------------------------------------------------------------------- /Datathon-2/volume_slice_rendering_scalar.py: -------------------------------------------------------------------------------- 1 | # Import data 2 | import time 3 | import numpy as np 4 | 5 | file_path = "data/datathon2_data/OneDrive_1_12-09-2020/PotentialTemperature_3D/001_29_Dec_2003.txt" 6 | 7 | idx = 0 8 | 9 | BAD_FLAG = '-1.E+34' 10 | 11 | date = "" 12 | LAT = set() 13 | LON = set() 14 | DEP = set() 15 | SALT = [] 16 | 17 | OCEAN = dict() 18 | 19 | with open(file_path,'r') as f: 20 | while(f): 21 | r = f.readline() 22 | if r != '': 23 | if idx >= 11: 24 | data = r.strip().split(',') 25 | date = data[0] 26 | lon = float(data[2]) 27 | lat = float(data[3]) 28 | dep = float(data[4]) 29 | salt = data[5] 30 | if salt == BAD_FLAG: 31 | salt = np.nan 32 | else: 33 | salt = float(salt) 34 | OCEAN[(lat,lon,dep)] = salt 35 | LAT.add(lat) 36 | LON.add(lon) 37 | DEP.add(dep) 38 | SALT.append(salt) 39 | 40 | 41 | else: 42 | break 43 | 44 | 45 | idx += 1 46 | 47 | MAX_SALT = np.nanmax(SALT) 48 | MIN_SALT = np.nanmin(SALT) 49 | 50 | LAT = list(LAT) 51 | LAT.sort() 52 | LON = list(LON) 53 | LON.sort() 54 | DEP = list(DEP) 55 | DEP.sort() 56 | 57 | MIN_LAT, MAX_LAT = min(LAT),max(LAT) 58 | MIN_LON, MAX_LON = min(LON),max(LON) 59 | 60 | r,c = len(LON),len(LAT) 61 | 62 | def getSaltForDepth(depth): 63 | salt = [] 64 | for x in LON: 65 | arr = [] 66 | for y in LAT: 67 | arr.append(OCEAN[(y,x,depth)]) 68 | salt.append(np.array(arr)) 69 | return np.array(salt) 70 | 71 | # Define frames 72 | import plotly.graph_objects as go 73 | 74 | fig = go.Figure(frames=[go.Frame(data=go.Surface( 75 | z=dep * np.ones((r, c)), 76 | surfacecolor=getSaltForDepth(dep).T, 77 | cmin=MIN_SALT, cmax=MAX_SALT, 78 | colorbar_title="Potential Temperature (degree Celcius)", 79 | colorscale=[[0, 'white'], 80 | [0.01, 'white'], 81 | [0.01, 'red'], 82 | [1, 'yellow']], 83 | # [1, 'green']] 84 | ), 85 | name=str(dep) 86 | ) for dep in DEP]) 87 | 88 | # Add data to be displayed before animation starts 89 | fig.add_trace(go.Surface( 90 | z=5.0 * np.ones((r, c)), 91 | surfacecolor=getSaltForDepth(5.0).T, 92 | cmin=MIN_SALT, cmax=MAX_SALT, 93 | colorbar_title="Potential Temperature (degree Celcius)", 94 | colorscale=[[0, 'white'], 95 | [0.01, 'red'], 96 | [1, 'yellow']], 97 | # [1, 'green']] 98 | )) 99 | 100 | 101 | def frame_args(duration): 102 | return { 103 | "frame": {"duration": duration}, 104 | "mode": "immediate", 105 | "fromcurrent": True, 106 | "transition": {"duration": duration, "easing": "linear"}, 107 | } 108 | 109 | sliders = [ 110 | { 111 | "pad": {"b": 10, "t": 60}, 112 | "len": 0.9, 113 | "x": 0.1, 114 | "y": 0, 115 | "steps": [ 116 | { 117 | "args": [[f.name], frame_args(0)], 118 | "label": str(k), 119 | "method": "animate", 120 | } 121 | for k, f in enumerate(fig.frames) 122 | ], 123 | } 124 | ] 125 | 126 | # Layout 127 | fig.update_layout( 128 | title='Indian Ocean Potential Temperature with variation in depth in meters (z-direction) on 29 December 2003', 129 | width=1200, 130 | height=800, 131 | scene=dict( 132 | zaxis=dict(range=[5.0, 225.0],autorange=False,title='Depth in meters'), 133 | xaxis = dict(title='Longitude'), 134 | yaxis = dict(title='Latitude'), 135 | aspectratio=dict(x=1.5, y=1, z=1), 136 | ), 137 | updatemenus = [ 138 | { 139 | "buttons": [ 140 | { 141 | "args": [None, frame_args(50)], 142 | "label": "▶", # play symbol 143 | "method": "animate", 144 | }, 145 | { 146 | "args": [[None], frame_args(0)], 147 | "label": "◼", # pause symbol 148 | "method": "animate", 149 | }, 150 | ], 151 | "direction": "left", 152 | "pad": {"r": 10, "t": 70}, 153 | "type": "buttons", 154 | "x": 0.1, 155 | "y": 0, 156 | } 157 | ], 158 | sliders=sliders 159 | ) 160 | 161 | fig.show() -------------------------------------------------------------------------------- /Datathon-3/Datathon_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-3/Datathon_3.pdf -------------------------------------------------------------------------------- /Datathon-3/covid_death_graph_networkx.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import networkx as nx 4 | import matplotlib.pyplot as plt 5 | 6 | df_in = pd.read_csv('time_series_covid_19_deaths.csv') 7 | dict_date = {date:'sum' for date in list(df_in.columns[4:])} 8 | df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index() 9 | df_in.iloc[:,:].head() 10 | 11 | num_deaths = {} 12 | for index in df_in.index: 13 | num_deaths[df_in['Country/Region'][index]] = float(df_in['9/23/20'][index])/246 14 | 15 | 16 | dates_vec = list(df_in.columns)[1:] 17 | average_time_vec = [None] * df_in.shape[0] 18 | 19 | for i, row_index in enumerate(df_in.index): 20 | 21 | weighted_sum, total_deaths = 0, 0 22 | 23 | for j, date in enumerate(dates_vec): 24 | current_term = df_in.at[row_index, date] 25 | weighted_sum += j * current_term 26 | total_deaths += current_term 27 | 28 | average_time_vec[i] = weighted_sum / total_deaths 29 | 30 | df_in['avg_time'] = average_time_vec 31 | 32 | n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2) 33 | list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines 34 | 35 | line_index = 0 36 | epsilon = 0.001 37 | for i in range(0, df_in.shape[0] - 1): 38 | for j in range(i + 1, df_in.shape[0]): 39 | index_i, index_j = df_in.index[i], df_in.index[j] 40 | list_country1[line_index] = df_in.at[index_i, 'Country/Region'] 41 | list_country2[line_index] = df_in.at[index_j, 'Country/Region'] 42 | diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time'] 43 | list_w[line_index] = (1 / (abs(diff_time) + epsilon)) 44 | list_d[line_index] = abs(diff_time) 45 | line_index += 1 46 | 47 | df_graph = pd.DataFrame(dict( 48 | Country1 = list_country1, 49 | Country2 = list_country2, 50 | Weight = list_w, 51 | Distance = list_d 52 | )) 53 | 54 | 55 | df_graph = df_graph.dropna(axis=0) 56 | df_graph.to_csv('df_graph.csv', index=False) 57 | 58 | 59 | covid_graph = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight') 60 | sparse_covid_graph = nx.Graph(((u, v, e) for u,v,e in covid_graph.edges(data=True) if e['Weight'] > 0.0 and e['Weight'] < 0.0150)) 61 | sparse_vertex = set() 62 | for (u,v) in sparse_covid_graph.edges(): 63 | sparse_vertex.add(u) 64 | sparse_vertex.add(v) 65 | sparse_vertex = list(sparse_vertex) 66 | sparse_vertex.sort() 67 | d = [e['Weight'] for u,v,e in covid_graph.edges(data=True)] 68 | print(np.mean(np.array(d))) 69 | # nx.draw(sparse_covid_graph,with_labels=True) 70 | 71 | vertex_attributes = {u:num_deaths[u] for u in sparse_vertex} 72 | nx.draw(sparse_covid_graph, with_labels=True,nodelist=vertex_attributes.keys(), node_size=[v*100 for v in vertex_attributes.values()]) 73 | plt.show() 74 | -------------------------------------------------------------------------------- /Datathon-3/covid_geo.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import networkx as nx 4 | from tqdm import tqdm 5 | import matplotlib.pyplot as plt 6 | import plotly.graph_objects as go 7 | from mpl_toolkits.basemap import Basemap 8 | 9 | deaths = 'time_series_covid_19_deaths.csv' 10 | recovered = 'time_series_covid_19_recovered.csv' 11 | 12 | def read_data(filename): 13 | df_in = pd.read_csv(filename) 14 | dict_date = {date:'sum' for date in list(df_in.columns[4:54])} 15 | df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index() 16 | print(df_in.columns) 17 | return df_in 18 | 19 | def get_lat_lon(filename): 20 | df_in = pd.read_csv(filename) 21 | df_lat_lon = df_in.iloc[:,:4] 22 | # dict_lat_lon = {val:'mean' for val in list(df_lat_lon.columns[2:])} 23 | # df_lat_lon = df_lat_lon.groupby(['Country/Region']).agg(dict_lat_lon).reset_index() 24 | df_lat_lon = df_lat_lon.groupby(['Country/Region']).nth(-1).reset_index() 25 | return df_lat_lon 26 | 27 | def get_transpose(df_in): 28 | invert_columns = df_in['Country/Region'].unique() 29 | invert_index = df_in.columns[1:] 30 | 31 | invert_df = pd.DataFrame(index=invert_index,columns=invert_columns) 32 | invert_df = invert_df.fillna(0) 33 | for i in tqdm(df_in['Country/Region']): 34 | for j in invert_index: 35 | invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0] 36 | 37 | return invert_df 38 | 39 | def get_correlation_edges(invert_df): 40 | corr_matrix = invert_df.corr() 41 | countries = corr_matrix.index.values 42 | corr_matrix = np.asmatrix(corr_matrix) 43 | 44 | edges = [] 45 | for i in range(len(corr_matrix)): 46 | for j in range(i+1, len(corr_matrix[i])): 47 | if pd.isnull(corr_matrix[i][j]) == False: 48 | edges.append((countries[i],countries[j],{'Weight':corr_matrix[i][j]})) 49 | 50 | return edges 51 | 52 | def get_graph_from_edges(edges): 53 | G = nx.Graph(edges) 54 | return G 55 | 56 | def get_node_weight(df_in): 57 | num_cases = {} 58 | for index in df_in.index: 59 | num_cases[df_in['Country/Region'][index]] = float(df_in['3/11/20'][index])/50 60 | 61 | return num_cases 62 | 63 | def get_average_time(df_in): 64 | dates_vec = list(df_in.columns)[1:] 65 | average_time_array = [None] * df_in.shape[0] 66 | 67 | for i, row in enumerate(df_in.index): 68 | 69 | weighted_sum, total_cases = 0, 0 70 | 71 | for j, date in enumerate(dates_vec): 72 | current = df_in.at[row, date] 73 | weighted_sum += j * current 74 | total_cases += current 75 | 76 | average_time_array[i] = weighted_sum / total_cases 77 | 78 | df_in['avg_time'] = average_time_array 79 | 80 | n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2) 81 | list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines 82 | 83 | line_index = 0 84 | epsilon = 0.001 85 | for i in range(0, df_in.shape[0] - 1): 86 | for j in range(i + 1, df_in.shape[0]): 87 | index_i, index_j = df_in.index[i], df_in.index[j] 88 | list_country1[line_index] = df_in.at[index_i, 'Country/Region'] 89 | list_country2[line_index] = df_in.at[index_j, 'Country/Region'] 90 | diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time'] 91 | list_w[line_index] = (1 / (abs(diff_time) + epsilon)) 92 | list_d[line_index] = abs(diff_time) 93 | line_index += 1 94 | 95 | df_graph = pd.DataFrame(dict( 96 | Country1 = list_country1, 97 | Country2 = list_country2, 98 | Weight = list_w 99 | )) 100 | 101 | df_graph = df_graph.dropna(axis=0) 102 | 103 | return df_graph 104 | 105 | def get_graph_from_df(df_graph): 106 | G = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight') 107 | return G 108 | 109 | def get_sparse_graph(graph, num_cases, min_threshold, max_threshold): 110 | sparse_graph = nx.Graph(((u, v, e) for u,v,e in graph.edges(data=True) if e['Weight'] > min_threshold and e['Weight'] < max_threshold)) 111 | sparse_vertex = set() 112 | for (u,v) in sparse_graph.edges(): 113 | sparse_vertex.add(u) 114 | sparse_vertex.add(v) 115 | sparse_vertex = list(sparse_vertex) 116 | sparse_vertex.sort() 117 | vertex_attributes = {u:num_cases[u] for u in sparse_vertex} 118 | 119 | return sparse_graph, vertex_attributes 120 | 121 | def get_graph_stats(G, cases_dict): 122 | graph_dict = {(u, v): e for (u,v,e) in G.edges(data=True)} 123 | weights = [e['Weight'] for (u,v,e) in G.edges(data=True)] 124 | print("Maximum weight: ", max(weights)) 125 | print("Minimum weight: ", min(weights)) 126 | print("Mean weight: ", np.mean(weights)) 127 | 128 | top_10_cases = list(zip(cases_dict.values(),cases_dict.keys())) 129 | top_10_cases.sort(reverse=True) 130 | top_10_cases = top_10_cases[:10] 131 | return graph_dict, top_10_cases 132 | 133 | df_deaths = read_data(deaths) 134 | df_country = get_lat_lon(deaths) 135 | num_deaths = get_node_weight(df_deaths) 136 | df_death_graph = get_average_time(df_deaths) 137 | deaths_graph = get_graph_from_df(df_death_graph) 138 | 139 | """Sparse death graph""" 140 | 141 | # sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 10, 100) 142 | # e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)]) 143 | # nx.draw(sparse_deaths_graph, with_labels=True,pos=positions,edge_color=e_color, width=3, edge_cmap=plt.cm.hot, nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()]) 144 | 145 | """Top 10 graph""" 146 | graph_d, top_10 = get_graph_stats(deaths_graph, num_deaths) 147 | edges = [] 148 | death_attributes = {v:w for w,v in top_10} 149 | for i in range(10): 150 | for j in range(i+1, 10): 151 | if (top_10[i][1],top_10[j][1]) in graph_d: 152 | edges.append((top_10[i][1],top_10[j][1],graph_d[top_10[i][1],top_10[j][1]])) 153 | elif (top_10[j][1],top_10[i][1]) in graph_d: 154 | edges.append((top_10[j][1],top_10[i][1],graph_d[top_10[j][1],top_10[i][1]])) 155 | 156 | sparse_deaths_graph = nx.Graph(edges) 157 | sparse_deaths_graph, death_attributes = get_sparse_graph(sparse_deaths_graph, num_deaths, 0, 100) 158 | e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)]) 159 | # nx.draw(sparse_deaths_graph, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()]) 160 | 161 | 162 | # Set up base map 163 | plt.figure(figsize=(15,20)) 164 | m = Basemap( 165 | projection='merc', 166 | llcrnrlon=-180, 167 | llcrnrlat=-60, 168 | urcrnrlon=180, 169 | urcrnrlat=70, 170 | lat_ts=0, 171 | resolution='l', 172 | suppress_ticks=True) 173 | 174 | # import long lat as m attribute 175 | mx, my = m(df_country['Long'].values, df_country['Lat'].values) 176 | pos = {} 177 | for idx, elem in enumerate (df_country['Country/Region']): 178 | pos[elem] = (mx[idx], my[idx]) 179 | 180 | # draw nodes and edges and over aly on basemap 181 | nx.draw_networkx_nodes(G = sparse_deaths_graph, pos = pos, node_list = sparse_deaths_graph.nodes(), node_color = 'r', alpha = 0.6, 182 | node_size = [num_deaths[s]*5 for s in sparse_deaths_graph.nodes()], with_labels=True) 183 | nx.draw_networkx_edges(G = sparse_deaths_graph, pos = pos, edge_color=e_color, width=2, edge_cmap=plt.cm.hot, 184 | alpha=0.6, arrows = False) 185 | nx.draw_networkx_labels(G = sparse_deaths_graph, pos = pos, font_size=15, font_color='y', 186 | labels = {x:x for x in sparse_deaths_graph.nodes() if num_deaths[x] >= 0}) 187 | 188 | m.drawcountries(linewidth = 1) 189 | m.drawstates(linewidth = 0.2) 190 | m.drawcoastlines(linewidth=1) 191 | plt.tight_layout() 192 | sm = plt.cm.ScalarMappable(cmap=plt.cm.hot, norm=plt.Normalize(vmin = min(e_color), vmax=max(e_color))) 193 | cbar = plt.colorbar(sm) 194 | cbar.set_label("Edge weights") 195 | plt.title("Top 10 countries with maximum deaths due to COVID-19 by the end of 11/3/20") 196 | plt.show() -------------------------------------------------------------------------------- /Datathon-3/covid_graph.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import networkx as nx 4 | from tqdm import tqdm 5 | import matplotlib.pyplot as plt 6 | import plotly.graph_objects as go 7 | from mpl_toolkits.basemap import Basemap 8 | 9 | deaths = 'time_series_covid_19_deaths.csv' 10 | recovered = 'time_series_covid_19_recovered.csv' 11 | 12 | def read_data(filename): 13 | df_in = pd.read_csv(filename) 14 | dict_date = {date:'sum' for date in list(df_in.columns[4:])} 15 | df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index() 16 | return df_in 17 | 18 | def get_transpose(df_in): 19 | invert_columns = df_in['Country/Region'].unique() 20 | invert_index = df_in.columns[1:] 21 | 22 | invert_df = pd.DataFrame(index=invert_index,columns=invert_columns) 23 | invert_df = invert_df.fillna(0) 24 | for i in tqdm(df_in['Country/Region']): 25 | for j in invert_index: 26 | invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0] 27 | 28 | return invert_df 29 | 30 | def get_correlation_edges(invert_df): 31 | corr_matrix = invert_df.corr() 32 | countries = corr_matrix.index.values 33 | # corr_matrix = np.asmatrix(corr_matrix) 34 | corr_matrix = corr_matrix.values 35 | print(corr_matrix) 36 | edges = [] 37 | for i in range(len(corr_matrix)): 38 | for j in range(i+1, len(corr_matrix[i])): 39 | if pd.isnull(corr_matrix[i][j]) == False: 40 | edges.append((countries[i],countries[j],{'Weight':corr_matrix[i][j]})) 41 | 42 | return edges 43 | 44 | def get_graph_from_edges(edges): 45 | G = nx.Graph(edges) 46 | return G 47 | 48 | def get_node_weight(df_in): 49 | num_cases = {} 50 | for index in df_in.index: 51 | num_cases[df_in['Country/Region'][index]] = float(df_in['9/23/20'][index])/246 52 | 53 | return num_cases 54 | 55 | def get_average_time(df_in): 56 | dates_vec = list(df_in.columns)[1:] 57 | average_time_array = [None] * df_in.shape[0] 58 | 59 | for i, row in enumerate(df_in.index): 60 | 61 | weighted_sum, total_cases = 0, 0 62 | 63 | for j, date in enumerate(dates_vec): 64 | current = df_in.at[row, date] 65 | weighted_sum += j * current 66 | total_cases += current 67 | 68 | average_time_array[i] = weighted_sum / total_cases 69 | 70 | df_in['avg_time'] = average_time_array 71 | 72 | n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2) 73 | list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines 74 | 75 | line_index = 0 76 | epsilon = 0.001 77 | for i in range(0, df_in.shape[0] - 1): 78 | for j in range(i + 1, df_in.shape[0]): 79 | index_i, index_j = df_in.index[i], df_in.index[j] 80 | list_country1[line_index] = df_in.at[index_i, 'Country/Region'] 81 | list_country2[line_index] = df_in.at[index_j, 'Country/Region'] 82 | diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time'] 83 | list_w[line_index] = (1 / (abs(diff_time) + epsilon)) 84 | list_d[line_index] = abs(diff_time) 85 | line_index += 1 86 | 87 | df_graph = pd.DataFrame(dict( 88 | Country1 = list_country1, 89 | Country2 = list_country2, 90 | Weight = list_w 91 | )) 92 | 93 | df_graph = df_graph.dropna(axis=0) 94 | 95 | return df_graph 96 | 97 | def get_graph_from_df(df_graph): 98 | G = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight') 99 | return G 100 | 101 | def get_sparse_graph(graph, num_cases, min_threshold, max_threshold): 102 | sparse_graph = nx.Graph(((u, v, e) for u,v,e in graph.edges(data=True) if e['Weight'] >= min_threshold and e['Weight'] <= max_threshold)) 103 | sparse_vertex = set() 104 | for (u,v) in sparse_graph.edges(): 105 | sparse_vertex.add(u) 106 | sparse_vertex.add(v) 107 | sparse_vertex = list(sparse_vertex) 108 | sparse_vertex.sort() 109 | vertex_attributes = {u:num_cases[u] for u in sparse_vertex} 110 | 111 | return sparse_graph, vertex_attributes 112 | 113 | def get_graph_stats(G, cases_dict): 114 | graph_dict = {(u, v): e for (u,v,e) in G.edges(data=True)} 115 | weights = [e['Weight'] for (u,v,e) in G.edges(data=True)] 116 | print("Maximum weight: ", max(weights)) 117 | print("Minimum weight: ", min(weights)) 118 | print("Mean weight: ", np.mean(weights)) 119 | 120 | top_10_cases = list(zip(cases_dict.values(),cases_dict.keys())) 121 | top_10_cases.sort(reverse=True) 122 | top_10_cases = top_10_cases[:10] 123 | return graph_dict, top_10_cases 124 | 125 | df_deaths = read_data(deaths) 126 | num_deaths = get_node_weight(df_deaths) 127 | df_death_graph = get_average_time(df_deaths) 128 | deaths_graph = get_graph_from_df(df_death_graph) 129 | 130 | """Sparse deaths correlation graph""" 131 | # invert_df = get_transpose(df_deaths) 132 | # corr_edges = get_correlation_edges(invert_df) 133 | # deaths_graph = nx.Graph(corr_edges) 134 | # sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 0.998, 1) 135 | # positions = nx.circular_layout(sparse_deaths_graph) 136 | # nx.draw(sparse_deaths_graph, pos=positions, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()]) 137 | 138 | """Sparse death graph""" 139 | 140 | sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 10, 50) 141 | positions=nx.circular_layout(sparse_deaths_graph) 142 | e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)]) 143 | nx.draw(sparse_deaths_graph, with_labels=True,pos=positions,edge_color=e_color, width=3, edge_cmap=plt.cm.hot, nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()]) 144 | sm = plt.cm.ScalarMappable(cmap=plt.cm.hot, norm=plt.Normalize(vmin = min(e_color), vmax=max(e_color))) 145 | cbar = plt.colorbar(sm) 146 | cbar.set_label("Edge weights") 147 | 148 | """Top 10 graph""" 149 | # graph_d, top_10 = get_graph_stats(deaths_graph, num_deaths) 150 | # edges = [] 151 | # death_attributes = {v:w for w,v in top_10} 152 | # for i in range(10): 153 | # for j in range(i+1, 10): 154 | # if (top_10[i][1],top_10[j][1]) in graph_d: 155 | # edges.append((top_10[i][1],top_10[j][1],graph_d[top_10[i][1],top_10[j][1]])) 156 | # elif (top_10[j][1],top_10[i][1]) in graph_d: 157 | # edges.append((top_10[j][1],top_10[i][1],graph_d[top_10[j][1],top_10[i][1]])) 158 | 159 | # sparse_deaths_graph = nx.Graph(edges) 160 | # sparse_deaths_graph, death_attributes = get_sparse_graph(sparse_deaths_graph, num_deaths, 0, 0.08) 161 | # nx.draw(sparse_deaths_graph, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()]) 162 | 163 | plt.show() 164 | 165 | """Graph visualization using plotly""" 166 | # positions=nx.circular_layout(sparse_deaths_graph) 167 | 168 | # edge_x = [] 169 | # edge_y = [] 170 | # for edge in sparse_deaths_graph.edges(): 171 | # x0, y0 = positions[edge[0]] 172 | # x1, y1 = positions[edge[1]] 173 | # edge_x.append(x0) 174 | # edge_x.append(x1) 175 | # edge_x.append(None) 176 | # edge_y.append(y0) 177 | # edge_y.append(y1) 178 | # edge_y.append(None) 179 | 180 | # edge_trace = go.Scatter( 181 | # x=edge_x, y=edge_y, 182 | # line=dict(width=0.5, color='#888'), 183 | # hoverinfo='none', 184 | # mode='lines') 185 | 186 | # node_x = [] 187 | # node_y = [] 188 | # for node in sparse_deaths_graph.nodes(): 189 | # x, y = positions[node] 190 | # node_x.append(x) 191 | # node_y.append(y) 192 | 193 | # node_trace = go.Scatter( 194 | # x=node_x, y=node_y, 195 | # mode='markers', 196 | # hoverinfo='text', 197 | # marker=dict( 198 | # showscale=True, 199 | # # colorscale options 200 | # #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' | 201 | # #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' | 202 | # #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' | 203 | # colorscale='YlGnBu', 204 | # reversescale=True, 205 | # color=[], 206 | # size=10, 207 | # colorbar=dict( 208 | # thickness=15, 209 | # title='Number of deaths', 210 | # xanchor='left', 211 | # titleside='right' 212 | # ), 213 | # line_width=2)) 214 | 215 | # node_cases = [] 216 | # node_text = [] 217 | # for node in sparse_deaths_graph.nodes(): 218 | # node_cases.append(num_deaths[node]) 219 | # node_text.append("# of deaths in {}: {}".format(node,num_deaths[node])) 220 | 221 | # node_trace.marker.color = node_cases 222 | # node_trace.text = node_text 223 | 224 | # fig = go.Figure(data=[edge_trace, node_trace], 225 | # layout=go.Layout( 226 | # title='
Covid network graph', 227 | # titlefont_size=16, 228 | # showlegend=False, 229 | # hovermode='closest', 230 | # margin=dict(b=20,l=5,r=5,t=40), 231 | # annotations=[ dict( 232 | # text="Covid graph", 233 | # showarrow=False, 234 | # xref="paper", yref="paper", 235 | # x=0.005, y=-0.002 ) ], 236 | # xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), 237 | # yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) 238 | # ) 239 | # fig.show() -------------------------------------------------------------------------------- /Datathon-4/Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/Report.pdf -------------------------------------------------------------------------------- /Datathon-4/__pycache__/optimal_leaf_ordering.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/__pycache__/optimal_leaf_ordering.cpython-37.pyc -------------------------------------------------------------------------------- /Datathon-4/__pycache__/traveling_sales_person.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/__pycache__/traveling_sales_person.cpython-37.pyc -------------------------------------------------------------------------------- /Datathon-4/data_matrix.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | import matplotlib.pyplot as plt 5 | from matplotlib import cm 6 | import seaborn 7 | 8 | from traveling_sales_person import TravelingSalesPerson 9 | from optimal_leaf_ordering import OptimalLeafOrdering 10 | 11 | """Data file paths""" 12 | deaths = "../Datathon-3/time_series_covid_19_deaths.csv" 13 | recovered = "../Datathon-3/time_series_covid_19_recovered.csv" 14 | confirmed = "../Datathon-3/time_series_covid_19_confirmed.csv" 15 | 16 | def read_data(filename): 17 | df_in = pd.read_csv(filename) 18 | dict_date = {date:'sum' for date in list(df_in.columns[4:])} 19 | df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index() 20 | assert df_in.isnull().values.any() == False, "Dataframe has null values" 21 | return df_in 22 | 23 | def get_transpose(df_in): 24 | invert_columns = df_in['Country/Region'].unique() 25 | invert_index = df_in.columns[1:] 26 | 27 | invert_df = pd.DataFrame(index=invert_index,columns=invert_columns) 28 | invert_df = invert_df.fillna(0) 29 | for i in tqdm(df_in['Country/Region']): 30 | for j in invert_index: 31 | invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0] 32 | 33 | return invert_df 34 | 35 | def get_correlation_edges(invert_df): 36 | corr_matrix = invert_df.corr() 37 | corr_matrix = corr_matrix.fillna(0) 38 | 39 | # Change the range of similarity values 40 | corr_matrix = 1 + corr_matrix 41 | 42 | return corr_matrix 43 | 44 | def get_average_time(df_in): 45 | dates_vec = list(df_in.columns)[1:] 46 | average_time_array = [None] * df_in.shape[0] 47 | 48 | for i, row in enumerate(df_in.index): 49 | 50 | weighted_sum, total_cases = 0, 0 51 | 52 | for j, date in enumerate(dates_vec): 53 | current = df_in.at[row, date] 54 | weighted_sum += j * current 55 | total_cases += current 56 | 57 | average_time_array[i] = weighted_sum / total_cases 58 | 59 | df_in['avg_time'] = average_time_array 60 | 61 | n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2) 62 | list_country1, list_country2, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines 63 | 64 | line_index = 0 65 | for i in range(0, df_in.shape[0] - 1): 66 | for j in range(i + 1, df_in.shape[0]): 67 | index_i, index_j = df_in.index[i], df_in.index[j] 68 | list_country1[line_index] = df_in.at[index_i, 'Country/Region'] 69 | list_country2[line_index] = df_in.at[index_j, 'Country/Region'] 70 | diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time'] 71 | list_d[line_index] = abs(diff_time) 72 | line_index += 1 73 | 74 | print(list_d, len(list_d)) 75 | countries = list(set(list_country1+list_country2)) 76 | countries = {countries[i]:i for i in range(len(countries))} 77 | # print(countries) 78 | 79 | matrix = np.full((len(countries), len(countries)),100) 80 | for i in range(len(countries)): 81 | matrix[i,i] = 0 82 | 83 | idx = 0 84 | for i,j in zip(list_country1,list_country2): 85 | if np.isnan(list_d[idx]): 86 | list_d[idx] = 100 87 | matrix[countries[i],countries[j]] = list_d[idx] 88 | matrix[countries[j],countries[i]] = list_d[idx] 89 | idx += 1 90 | 91 | return matrix 92 | 93 | 94 | """Average time matrix""" 95 | df_deaths = read_data(deaths) 96 | # deaths_matrix = get_average_time(df_deaths) 97 | 98 | """Correlation matrix""" 99 | invert_df = get_transpose(df_deaths) 100 | corr_edges = get_correlation_edges(invert_df) 101 | 102 | """Choose which matrix to visualize""" 103 | data = corr_edges 104 | # data = deaths_matrix 105 | 106 | tsp = TravelingSalesPerson(data, data_type='data') 107 | # olo = OptimalLeafOrdering(pd.DataFrame(data), data_type='data', metric='euclidean', method='complete') 108 | seaborn.heatmap(data, cmap = cm.Blues,xticklabels=True, yticklabels=True) 109 | plt.figure() 110 | 111 | # Visualize the output data 112 | Y = tsp.get_ordered_data() 113 | # Y = olo.get_ordered_data() 114 | seaborn.heatmap(Y, cmap = cm.Blues,xticklabels=True, yticklabels=True) 115 | plt.show() 116 | 117 | -------------------------------------------------------------------------------- /Datathon-4/optimal_leaf_ordering.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy.cluster import hierarchy 4 | from scipy.spatial import distance 5 | from scipy.cluster.hierarchy import linkage 6 | import itertools 7 | import matplotlib.pyplot as plt 8 | import seaborn 9 | 10 | class OptimalLeafOrdering: 11 | def __init__(self, data, data_type='data', metric="euclidean", method='single'): 12 | 13 | self.data = data 14 | self.data_type = data_type 15 | self.metric = metric 16 | self.method = method 17 | 18 | def get_ordered_data(self): 19 | row_order = self.compute_dendrogram(axis=0) 20 | col_order = self.compute_dendrogram(axis=1) 21 | return self.data.iloc[row_order, col_order] 22 | 23 | def compute_dendrogram(self, axis=0): 24 | if axis == 1: 25 | data = self.data.T 26 | else: 27 | data = self.data 28 | 29 | # Calculate pairwise distances and linkage 30 | if self.data_type == 'data': 31 | pairwise_dists = distance.pdist(data.values, metric=self.metric) 32 | elif self.data_type == 'dist': 33 | pairwise_dists = [] 34 | for i in range(len(data.values)): 35 | for j in range(i+1, len(data.values)): 36 | pairwise_dists.append(data.values[i][j]) 37 | pairwise_dists = np.array(pairwise_dists) 38 | else: 39 | raise NotImplementedError 40 | linkage = hierarchy.linkage(pairwise_dists, method=self.method) 41 | 42 | self.M = {} 43 | tree = hierarchy.to_tree(linkage) 44 | dists = distance.squareform(pairwise_dists) 45 | tree = self.order_tree(tree, dists) 46 | order = self.leaves(tree) 47 | del self.M 48 | return order 49 | 50 | def optimal_scores(self, v, D, fast=True): 51 | """ Implementation of Ziv-Bar-Joseph et al.'s leaf order algorithm 52 | v is a ClusterNode 53 | D is a distance matrix """ 54 | 55 | def score(left, right, u, m, w, k): 56 | return get_M(left, u, m) + get_M(right, w, k) + D[m, k] 57 | 58 | def get_M(v, a, b): 59 | if a == b: 60 | self.M[v.get_id(), a, b] = 0 61 | return self.M[v.get_id(), a, b] 62 | 63 | if v.is_leaf(): 64 | n = v.get_id() 65 | self.M[v.get_id(), n, n] = 0 66 | return 0 67 | else: 68 | L = self.leaves(v.left) 69 | R = self.leaves(v.right) 70 | LL = self.leaves(v.left.left, v.left) 71 | LR = self.leaves(v.left.right, v.left) 72 | RL = self.leaves(v.right.left, v.right) 73 | RR = self.leaves(v.right.right, v.right) 74 | for l in L: 75 | for r in R: 76 | self.M[v.left.get_id(), l, r] = self.optimal_scores(v.left, D, fast=False) 77 | self.M[v.right.get_id(), l, r] = self.optimal_scores(v.right, D, fast=False) 78 | for u in L: 79 | for w in R: 80 | if fast: 81 | m_order = sorted(self.other(u, LL, LR), key=lambda m: get_M(v.left, u, m)) 82 | k_order = sorted(self.other(w, RL, RR), key=lambda k: get_M(v.right, w, k)) 83 | C = min([D[m, k] for m in self.other(u, LL, LR) for k in self.other(w, RL, RR)]) 84 | Cmin = 1e10 85 | for m in m_order: 86 | if self.M[v.left.get_id(), u, m] + self.M[v.right.get_id(), w, k_order[0]] + C >= Cmin: 87 | break 88 | for k in k_order: 89 | if self.M[v.left.get_id(), u, m] + self.M[v.right.get_id(), w, k] + C >= Cmin: 90 | break 91 | C = score(v.left, v.right, u, m, w, k) 92 | if C < Cmin: 93 | Cmin = C 94 | self.M[v.get_id(), u, w] = self.M[v.get_id(), w, u] = Cmin 95 | else: 96 | self.M[v.get_id(), u, w] = self.M[v.get_id(), w, u] = \ 97 | min([score(v.left, v.right, u, m, w, k) \ 98 | for m in self.other(u, LL, LR) \ 99 | for k in self.other(w, RL, RR)]) 100 | return self.M[v.get_id(), l, r] 101 | 102 | def order_tree(self, v, D, fM=None, fast=True): 103 | 104 | if fM is None: 105 | fM = 1 106 | self.optimal_scores(v, D, fast=fast) 107 | 108 | L = self.leaves(v.left) 109 | R = self.leaves(v.right) 110 | if len(L) and len(R): 111 | def getkey(z): 112 | u,w = z 113 | return self.M[v.get_id(),u,w] 114 | if len(L) and len(R): 115 | u, w = min(itertools.product(L,R), key=getkey) 116 | if w in self.leaves(v.right.left): 117 | v.right.right, v.right.left = v.right.left, v.right.right 118 | if u in self.leaves(v.left.right): 119 | v.left.left, v.left.right = v.left.right, v.left.left 120 | v.left = self.order_tree(v.left, D, fM) 121 | v.right = self.order_tree(v.right, D, fM) 122 | return v 123 | 124 | def other(self, x, V, W): 125 | # For an element x, returns the set that x isn't in 126 | if x in V: 127 | return W 128 | else: 129 | return V 130 | 131 | def leaves(self, t, t2=None): 132 | """ Returns the leaves of a ClusterNode """ 133 | try: 134 | return t.pre_order() 135 | except AttributeError: 136 | if t2 is not None: 137 | return t2.pre_order() 138 | else: 139 | return [] 140 | 141 | if __name__ == "__main__": 142 | # Create a staircase matrix 143 | X = np.zeros((100, 100)) 144 | for n in [0,10,20,30,40,50,60]: 145 | X[int(10.*n/7):int(10.*(n+10)/7):,n:n+40] = 1 146 | 147 | X = distance.squareform(distance.pdist(X, metric="euclidean")) 148 | # X = distance.squareform(distance.pdist(X, metric="hamming")) 149 | 150 | seaborn.heatmap(X) 151 | plt.figure() 152 | 153 | # Since we know the data has a staircase pattern, we can now shuffle the rows and columns 154 | np.random.shuffle(X) 155 | X = X.T 156 | np.random.shuffle(X) 157 | X = X.T 158 | 159 | # Visualize the input data 160 | seaborn.heatmap(X) 161 | olo = OptimalLeafOrdering(pd.DataFrame(X), metric='hamming', method='complete') 162 | plt.figure() 163 | 164 | # Visualize the output data 165 | Y = olo.get_ordered_data() 166 | seaborn.heatmap(Y) 167 | plt.show() -------------------------------------------------------------------------------- /Datathon-4/traveling_sales_person.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ortools # Using ortools version 7 3 | from ortools.constraint_solver import pywrapcp, routing_enums_pb2 4 | from scipy.spatial.distance import squareform, pdist 5 | import matplotlib.pyplot as plt 6 | import seaborn 7 | import pandas as pd 8 | 9 | class TravelingSalesPerson: 10 | def __init__(self, data, data_type='dist', metric='euclidean', approximation_multiplier=1000, timeout=2.0): 11 | self.data = data 12 | self.data_type = data_type 13 | self.metric = metric 14 | self.approximation_multiplier = approximation_multiplier 15 | self.timeout = timeout 16 | 17 | def get_ordered_data(self): 18 | if self.data_type == 'dist': 19 | row_order = self.seriate(self.data) 20 | column_order = self.seriate(self.data.T) 21 | elif self.data_type == 'data': 22 | # Get distances along rows 23 | dist1 = squareform(pdist(self.data, metric=self.metric)) 24 | # Get distances along columns 25 | dist2 = squareform(pdist(self.data.T, metric=self.metric)) 26 | 27 | row_order = self.seriate(dist1) 28 | column_order = self.seriate(dist2) 29 | else: 30 | raise NotImplementedError 31 | 32 | ordered_data = pd.DataFrame(self.data) 33 | ordered_data = ordered_data.iloc[row_order, column_order] 34 | return ordered_data 35 | 36 | def get_ordered_data_recompute(self): 37 | """Re-compute distances for column after computing row_order""" 38 | if self.data_type == 'data': 39 | # Get distances along rows 40 | dist1 = squareform(pdist(self.data, metric=self.metric)) 41 | row_order = self.seriate(dist1) 42 | data = pd.DataFrame(self.data) 43 | data = data.iloc[row_order,:] 44 | 45 | # Get distances along columns 46 | dist2 = squareform(pdist(data.values.T, metric=self.metric)) 47 | column_order = self.seriate(dist2) 48 | ordered_data = data.iloc[:, column_order] 49 | else: 50 | raise NotImplementedError 51 | return ordered_data 52 | 53 | def validate_data(self, dists): 54 | """Check dists contains valid values.""" 55 | try: 56 | isinf = np.isinf(dists).any() 57 | isnan = np.isnan(dists).any() 58 | except Exception as e: 59 | raise InvalidDistanceValues() from e 60 | if isinf: 61 | raise InvalidDistanceValues("Data contains inf values.") 62 | if isnan: 63 | raise InvalidDistanceValues("Data contains NaN values.") 64 | 65 | def seriate(self, dists): 66 | # Validate distances 67 | self.validate_data(dists) 68 | if self.timeout > 0: 69 | return self._seriate(dists=dists) 70 | elif self.timeout < 0: 71 | raise ValueError("timeout cannot be negative.") 72 | self.timeout = 1. 73 | route = None 74 | while route is None: 75 | try: 76 | route = self._seriate(dists=dists) 77 | except IncompleteSolutionError: 78 | self.timeout *= 2 79 | return route 80 | 81 | 82 | def _seriate(self, dists): 83 | assert dists[dists < 0].size == 0, "distances must be non-negative" 84 | assert self.timeout > 0 85 | squareform = len(dists.shape) == 2 86 | if squareform: 87 | assert dists.shape[0] == dists.shape[1] 88 | size = dists.shape[0] 89 | else: 90 | raise InvalidDistanceValues("Data is not squareform.") 91 | 92 | manager = pywrapcp.RoutingIndexManager(size + 1, 1, size) 93 | routing = pywrapcp.RoutingModel(manager) 94 | 95 | def dist_callback(x, y): 96 | x = manager.IndexToNode(x) 97 | y = manager.IndexToNode(y) 98 | if x == size or y == size or x == y: 99 | return 0 100 | if squareform: 101 | dist = dists[x, y] 102 | else: 103 | # convert to the condensed index 104 | if x < y: 105 | x, y = y, x 106 | dist = dists[size * y - y * (y + 1) // 2 + x - y - 1] 107 | # ortools wants integers, so we approximate here 108 | return int(dist * self.approximation_multiplier) 109 | 110 | routing.SetArcCostEvaluatorOfAllVehicles(routing.RegisterTransitCallback(dist_callback)) 111 | search_parameters = pywrapcp.DefaultRoutingSearchParameters() 112 | search_parameters.time_limit.FromMilliseconds(int(self.timeout * 1000)) 113 | search_parameters.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH 114 | search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC 115 | assignment = routing.SolveWithParameters(search_parameters) 116 | if assignment is None: 117 | raise IncompleteSolutionError("No solution was found. Please increase the timeout value or set it to 0.") 118 | index = routing.Start(0) 119 | route = [] 120 | while not routing.IsEnd(index): 121 | node = manager.IndexToNode(index) 122 | if node < size: 123 | route.append(node) 124 | index = assignment.Value(routing.NextVar(index)) 125 | return route 126 | 127 | class IncompleteSolutionError(Exception): 128 | """Indicate that a solution for the TSP problem was not found.""" 129 | pass 130 | 131 | 132 | class InvalidDistanceValues(ValueError): 133 | """Indicate that the distance array contains invalid values.""" 134 | pass 135 | 136 | if __name__ == "__main__": 137 | # Create simulated data as in the paper 138 | X = np.zeros((100, 100)) 139 | for n in [0,10,20,30,40,50,60]: 140 | X[int(10.*n/7):int(10.*(n+10)/7):,n:n+40] = 1 141 | 142 | X = squareform(pdist(X, metric="euclidean")) 143 | # X = squareform(pdist(X, metric="hamming")) 144 | 145 | seaborn.heatmap(X) 146 | plt.figure() 147 | 148 | np.random.shuffle(X) 149 | X = X.T 150 | np.random.shuffle(X) 151 | X = X.T 152 | 153 | seaborn.heatmap(X) 154 | tsp = TravelingSalesPerson(X, data_type='data') 155 | plt.figure() 156 | 157 | # Visualize the output data 158 | Y = tsp.get_ordered_data() 159 | seaborn.heatmap(Y) 160 | plt.show() 161 | -------------------------------------------------------------------------------- /Datathon-5/Datathon_5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-5/Datathon_5.pdf -------------------------------------------------------------------------------- /Datathon-5/visualization.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import plotly.express as px 4 | 5 | def viz_lifeExp_country(data, years=[i for i in range(2000,2015)]): 6 | data = data.loc[data['year'].isin(years)] 7 | data["life_expectancy_at_birth_all"] = (data["life_expectancy_at_birth_men"]*data["total_population_male"] + data["life_expectancy_at_birth_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"]) 8 | 9 | fig = px.sunburst(data, path=['country','year'], values='life_expectancy_at_birth_all', 10 | color='life_expectancy_at_birth_all', hover_data=['life_expectancy_at_birth_all'], 11 | color_continuous_scale='RdBu',) 12 | 13 | fig.show() 14 | 15 | def viz_lifeExp_europe(data, years=[i for i in range(2000,2015)]): 16 | data = data.loc[data['year'].isin(years)] 17 | data = data.loc[data['country'].isin(['United Kingdom','Germany','France','Italy','Netherlands','Malta','Israel','Belgium','Russia'])] 18 | data['population_in_M_per_sq_km'] = data['total_population']/data['area_square_kilometres'] 19 | 20 | fig = px.sunburst(data, path=['country','year'], values='population_in_M_per_sq_km', 21 | color='population_in_M_per_sq_km', hover_data=['total_population'],) 22 | # color_continuous_scale='RdBu',) 23 | 24 | fig.show() 25 | 26 | def viz_lifeExp_female_fertility(data): 27 | # Female life expectancy 28 | fig = px.parallel_coordinates(data[["country_index","life_expectancy_at_birth_women","mean_age_of_women_at_birth_of_first_child","adolescent_fertility_rate","life_expectancy_at_age_65_women"]], color="life_expectancy_at_age_65_women", 29 | labels={"country_index":"country_index","adolescent_fertility_rate":"adolescent_fertility_rate","life_expectancy_at_birth_women": "life_expectancy_at_birth_women", "mean_age_of_women_at_birth_of_first_child": "mean_age_of_women_at_birth_of_first_child","life_expectancy_at_age_65_women":"life_expectancy_at_age_65_women",}, 30 | color_continuous_scale=px.colors.diverging.Tealrose, 31 | color_continuous_midpoint=2) 32 | fig.show() 33 | 34 | def viz_computer_usage_employment(data, years=[i for i in range(2000,2015)]): 35 | data = data.loc[data['year'].isin(years)] 36 | 37 | data["life_expectancy_at_birth_all"] = (data["life_expectancy_at_birth_men"]*data["total_population_male"] + data["life_expectancy_at_birth_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"]) 38 | data["life_expectancy_at_age_65_all"] = (data["life_expectancy_at_age_65_men"]*data["total_population_male"] + data["life_expectancy_at_age_65_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"]) 39 | data["computer_use_16_24_all"] = data["computer_use_16_24_male"] + data["computer_use_16_24_female"] 40 | data["computer_use_25_54_all"] = data["computer_use_25_54_male"] + data["computer_use_25_54_female"] 41 | data["computer_use_55_74_all"] = data["computer_use_55_74_male"] + data["computer_use_55_74_female"] 42 | 43 | # Parallel coordinates plot 44 | columnsNew = ["life_expectancy_at_birth_all", "life_expectancy_at_age_65_all","computer_use_16_24_all", "computer_use_25_54_all", "computer_use_55_74_all", "youth_unemployment_rate", "unemployment_rate"] 45 | cols = {i:i for i in columnsNew} 46 | 47 | fig = px.parallel_coordinates(data[columnsNew], color="life_expectancy_at_age_65_all", 48 | labels=cols, 49 | color_continuous_scale=px.colors.diverging.Tealrose, 50 | color_continuous_midpoint=2) 51 | 52 | # # Scatter matrix plot 53 | # columnsNew = ["life_expectancy_at_birth_all", "computer_use_16_24_all","unemployment_rate"] 54 | # cols = {i:i for i in columnsNew} 55 | # fig = px.scatter_matrix(data[columnsNew], 56 | # dimensions=columnsNew, 57 | # color="unemployment_rate", 58 | # title="Scatter matrix of UNECE employement data", 59 | # labels=cols) 60 | 61 | fig.show() 62 | 63 | def viz_germany(data, years=[i for i in range(2000,2015)]): 64 | data = data.loc[data['year'].isin(years)] 65 | data = data.loc[data['country'] == 'Germany'] 66 | columnsNew = ["year", "total_fertility_rate", "total_population"] 67 | cols = {i:i for i in columnsNew} 68 | 69 | fig = px.scatter_matrix(data[columnsNew], 70 | dimensions=columnsNew, 71 | color="total_population", 72 | title="Scatter matrix of UNECE Germany data", 73 | labels=cols) 74 | 75 | fig.show() 76 | 77 | def viz_employement(data): 78 | columnsNew = ["economic_activity_rate_men_15_64", "economic_acivity_rate_women_15_64", "unemployment_rate"] 79 | cols = {i:i for i in columnsNew} 80 | 81 | fig = px.scatter_matrix(data[columnsNew], 82 | dimensions=columnsNew, 83 | color="unemployment_rate", 84 | title="Scatter matrix of UNECE employement data", 85 | labels=cols) 86 | 87 | fig.show() 88 | 89 | def viz_gender_pay_gap(data, years=[i for i in range(2000,2015)]): 90 | data = data.loc[data['year'].isin(years)] 91 | data["world"] = "world" 92 | fig = px.treemap(data, path=['world','country','year'], values='gender_pay_gap_in_monthly_earnings', 93 | color='gender_pay_gap_in_monthly_earnings', hover_data=['gender_pay_gap_in_monthly_earnings'],) 94 | fig.show() 95 | 96 | if __name__ == '__main__': 97 | data = pd.read_csv("unece.csv",header=0) 98 | 99 | print(data.columns) 100 | 101 | country = list(set(data["country"])) 102 | country_index = {country[i]:i for i in range(len(country))} 103 | 104 | data["country_index"] = data.apply(lambda row: country_index[row.country], axis = 1) 105 | data = data.fillna(data.mean()) 106 | 107 | # viz_lifeExp_country(data) 108 | # viz_computer_usage_employment(data) 109 | # viz_employement(data) 110 | # viz_lifeExp_europe(data) 111 | # viz_gender_pay_gap(data) 112 | # viz_germany(data) 113 | # viz_lifeExp_female_fertility(data) 114 | 115 | 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Visualisation 2 | This repository is a compilation of Assignments that I have attempted for my Data Visualisation course. 3 | 4 | ## Datathon 1 5 | We are given the data of India Ocean, generated by the ocean model MOM, run by Indian National Center for Ocean Information Services, INCOIS, Hyderabad. The data values are 5-day moving average of the following variables: 6 | 1) Sea Surface Salinity (SSS) 7 | 2) Sea Surface Temperature (SST) 8 | 3) Sea Surface Height Anomaly (SSHA) 9 | 4) Meridional current 10 | 5) Zonal current 11 | Our main aim is to effectively visualize the above data to find interesting underlying patterns of the Indian Ocean. We would like to study the effect spatial and temporal patterns of Indian Ocean has on the monsoons of India. Since we have the data around the time of the Tsunami of December 2004, we will also look at the visualizations to infer the impact of the tsunami on the above mentioned variables. 12 | 13 | ### Visualizations 14 | 15 | ![](./images/D1/m1.png) 16 | ![](./images/D1/ssha.png) 17 | ![](./images/D1/sst.png) 18 | ![](./images/D1/sss.png) 19 | 20 | ## Datathon 2 21 | We are given the data of India Ocean, generated by the ocean model MOM, run by Indian National Center for Ocean Information Services, INCOIS, Hyderabad. The data 22 | values are available for the following variables at different depths ranging from 5m to 225m with an interval of 10m: 23 | 1) Salinity – It is the amount of salt dissolved in a body of water. 24 | 2) Potential Temperature – The temperature that an unsaturated parcel of dry air would have if brought adiabatically and reversibly from its initial state to a standard pressure. 25 | 3) Meridional current – Meridional currents flows from north to south, or from south to north, along the Earth’s longitude lines (or meridian circles) 26 | 4) Zonal current – Zonal currents flows from east to west, or from west to east, along the Earth’s latitude lines 27 | 28 | ### Visualizations 29 | 30 | ![](./images/D2/temp_iso.png) 31 | 32 | ## Datathon 3 33 | We are given the tabular datasets published by the World Health Organization for COVID-19 cases. These include State/Country/Reign-wise time series data (over a period of 246 days starting from January 2020) for confirmed cases, recovered cases and deaths. Our main aim is to effectively visualize the above data as a network to find underlying patterns or communities formed. 34 | 35 | ### Visualizations 36 | 37 | ![](./images/D3/10_deaths_early.png) 38 | ![](./images/D3/10_recoveries.png) 39 | 40 | ## Datathon 4 41 | We have used the same data as provided in Datathon 3. Our main aim is to effectively visualize the above data as a matrix and find the underlying patterns or clusters formed using two different methods of matrix seriation which are – Fast optimal leaf ordering and Seriation using Traveling Salesperson Problem formulation. These methods are implemented in the Datathon 4 folder. 42 | 43 | ### Visualizations 44 | Shuffled data:\ 45 | ![](./images/D4/shuffled_data_2.png)\ 46 | Matrix after seriation using fast OLO\ 47 | ![](./images/D4/olo_data_2.png)\ 48 | Matrix after seriation using TSP solver\ 49 | ![](./images/D4/tsp_data_2.png) 50 | 51 | ## Datathon 5 52 | We are given a tabular dataset published by United Nations Economic Commission for Europe (UNECE) which has the Country Overview data. It consists data for 52 53 | distinct countries from 2000 to 2016. It has 79 columns that ranges over different characteristics of measuring performance of the nations. I will be using a subset of this dataset to test some of my hypotheses and will make inferences from the same. 54 | 55 | ### Visualizations 56 | 57 | ![](./images/D5/figure_1.png) 58 | ![](./images/D5/figure_3.png) 59 | ![](./images/D5/figure_4.png) 60 | ![](./images/D5/figure_5.png) 61 | ![](./images/D5/figure_7.png) 62 | ![](./images/D5/figure_9.png) 63 | -------------------------------------------------------------------------------- /images/D1/m1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/m1.png -------------------------------------------------------------------------------- /images/D1/ssha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/ssha.png -------------------------------------------------------------------------------- /images/D1/sss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/sss.png -------------------------------------------------------------------------------- /images/D1/sst.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/sst.png -------------------------------------------------------------------------------- /images/D2/temp_iso.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D2/temp_iso.png -------------------------------------------------------------------------------- /images/D3/10_deaths_early.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D3/10_deaths_early.png -------------------------------------------------------------------------------- /images/D3/10_recoveries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D3/10_recoveries.png -------------------------------------------------------------------------------- /images/D4/olo_data_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/olo_data_2.png -------------------------------------------------------------------------------- /images/D4/shuffled_data_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/shuffled_data_2.png -------------------------------------------------------------------------------- /images/D4/tsp_data_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/tsp_data_2.png -------------------------------------------------------------------------------- /images/D5/figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_1.png -------------------------------------------------------------------------------- /images/D5/figure_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_10.png -------------------------------------------------------------------------------- /images/D5/figure_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_11.png -------------------------------------------------------------------------------- /images/D5/figure_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_12.png -------------------------------------------------------------------------------- /images/D5/figure_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_2.png -------------------------------------------------------------------------------- /images/D5/figure_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_3.png -------------------------------------------------------------------------------- /images/D5/figure_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_4.png -------------------------------------------------------------------------------- /images/D5/figure_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_5.png -------------------------------------------------------------------------------- /images/D5/figure_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_6.png -------------------------------------------------------------------------------- /images/D5/figure_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_7.png -------------------------------------------------------------------------------- /images/D5/figure_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_8.png -------------------------------------------------------------------------------- /images/D5/figure_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_9.png --------------------------------------------------------------------------------