├── Datathon-1
    ├── .gitignore
    ├── Report.pdf
    ├── elevation_map.py
    ├── scalar_field_visualization.py
    ├── test.ipynb
    └── vector_field_visualization.py
├── Datathon-2
    ├── .gitignore
    ├── Datathon_2.pdf
    ├── README.txt
    ├── current_statistics.txt
    ├── isosurfaces.py
    ├── isosurfaces_currents.py
    ├── salinity_statistics.txt
    ├── scalar_field_depth_profiling.py
    ├── scalar_field_visualization_constant_depth.py
    ├── temperature_statistics.txt
    ├── vector_field_visualization_constant_depth.py
    ├── vector_field_visualization_constant_time.py
    ├── volume_slice_rendering_currents.py
    └── volume_slice_rendering_scalar.py
├── Datathon-3
    ├── .ipynb_checkpoints
    │   ├── Covid_graph-checkpoint.ipynb
    │   └── graph-theory-covid-19-spreading-clustering-checkpoint.ipynb
    ├── Covid_graph.ipynb
    ├── Datathon_3.pdf
    ├── cheatsheet-70-ggplot-charts.ipynb
    ├── coronavirus-covid-19-visualization-prediction.ipynb
    ├── covid-19-case-study-analysis-viz-comparisons.ipynb
    ├── covid_19_data.csv
    ├── covid_death_graph_networkx.py
    ├── covid_geo.py
    ├── covid_graph.py
    ├── graph-theory-covid-19-spreading-clustering.ipynb
    ├── time_series_covid_19_confirmed.csv
    ├── time_series_covid_19_deaths.csv
    └── time_series_covid_19_recovered.csv
├── Datathon-4
    ├── Report.pdf
    ├── __pycache__
    │   ├── optimal_leaf_ordering.cpython-37.pyc
    │   └── traveling_sales_person.cpython-37.pyc
    ├── data_matrix.py
    ├── optimal_leaf_ordering.py
    └── traveling_sales_person.py
├── Datathon-5
    ├── Datathon_5.pdf
    ├── unece.csv
    └── visualization.py
├── README.md
└── images
    ├── D1
        ├── m1.png
        ├── ssha.png
        ├── sss.png
        └── sst.png
    ├── D2
        └── temp_iso.png
    ├── D3
        ├── 10_deaths_early.png
        └── 10_recoveries.png
    ├── D4
        ├── olo_data_2.png
        ├── shuffled_data_2.png
        └── tsp_data_2.png
    └── D5
        ├── figure_1.png
        ├── figure_10.png
        ├── figure_11.png
        ├── figure_12.png
        ├── figure_2.png
        ├── figure_3.png
        ├── figure_4.png
        ├── figure_5.png
        ├── figure_6.png
        ├── figure_7.png
        ├── figure_8.png
        └── figure_9.png


/Datathon-1/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | .vscode/
3 | results/


--------------------------------------------------------------------------------
/Datathon-1/Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-1/Report.pdf


--------------------------------------------------------------------------------
/Datathon-1/elevation_map.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | 
 4 | import matplotlib
 5 | import matplotlib.pyplot as plt
 6 | from mpl_toolkits.mplot3d import Axes3D
 7 | import matplotlib.cm as cm
 8 | 
 9 | # List of files to be visualised
10 | ssha_data_files = glob.glob("data/ssha/*.txt")
11 | ssha_data_files.sort()
12 | 
13 | BAD_FLAG = '-1.E+34'
14 | 
15 | idx = 0
16 | 
17 | # Data structure to store the value of SSHA at location (LON,LAT)
18 | OCEAN = dict()
19 | date = ""
20 | 
21 | with open(ssha_data_files[0],'r') as f:
22 |     while(f):
23 |         r = f.readline()
24 |         if r != '':
25 |             if idx >= 10:
26 |                 data = r.strip().split(',')
27 |                 date = data[0]
28 |                 lon = float(data[2])
29 |                 lat = float(data[3])
30 | 
31 |                 # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
32 |                 ssha = np.nan
33 |                 if data[4] != BAD_FLAG:
34 |                     ssha = float(data[4])
35 |                 if lon not in OCEAN:
36 |                     OCEAN[lon] = dict()
37 |                 OCEAN[lon][lat] = ssha
38 | 
39 |         else:
40 |             break
41 | 
42 | 
43 |         idx += 1
44 | 
45 | LAT = []
46 | LON = []
47 | 
48 | for lon in OCEAN:
49 |     LON.append(lon)
50 |     for lat in OCEAN[lon]:
51 |         LAT.append(lat)
52 | 
53 | LON = list(set(LON))
54 | LAT = list(set(LAT))
55 | 
56 | LON.sort()
57 | LAT.sort()
58 | 
59 | # Convert SSHA into grid format
60 | SSHA = np.zeros((len(LON),len(LAT)),np.float)
61 | 
62 | for i in range(len(LON)):
63 |     for j in range(len(LAT)):
64 |         SSHA[i][j] = OCEAN[LON[i]][LAT[j]]
65 | 
66 | # Visualize the data
67 | fig = plt.figure()
68 | ax = fig.gca(projection='3d')
69 | 
70 | # map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
71 | lon, lat = np.meshgrid(LON, LAT)
72 | # map.drawcoastlines()
73 | # map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
74 | # map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
75 | 
76 | h = ax.plot_surface(lon,lat,SSHA.T,cmap=cm.hot)
77 | # plt.colorbar()
78 | ax.set_title("Indian Ocean SSHA on {}".format(date.strip("\"")))
79 | 
80 | plt.show()
81 | 
82 | 


--------------------------------------------------------------------------------
/Datathon-1/scalar_field_visualization.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | 
 4 | import matplotlib
 5 | matplotlib.use("Agg")
 6 | import matplotlib.pyplot as plt
 7 | import matplotlib.animation as manimation
 8 | from mpl_toolkits.basemap import Basemap
 9 | import matplotlib.cm as cm
10 | 
11 | FFMpegWriter = manimation.writers['ffmpeg']
12 | metadata = dict(title='Indian Ocean - SSHA', artist='Swasti',
13 |                 comment='Movie support!')
14 | writer = FFMpegWriter(fps=12, metadata=metadata)
15 | 
16 | # List of files to be visualised
17 | data_files = glob.glob("data/ssha/*.txt")
18 | data_files.sort()
19 | 
20 | BAD_FLAG = '-1.E+34'
21 | 
22 | def update(data_file):
23 |     idx = 0
24 | 
25 |     # Data structure to store the value of SSHA at location (LON,LAT)
26 |     OCEAN = dict()
27 |     date = ""
28 | 
29 |     with open(data_file,'r') as f:
30 |         while(f):
31 |             r = f.readline()
32 |             if r != '':
33 |                 if idx >= 10:
34 |                     data = r.strip().split(',')
35 |                     date = data[0]
36 |                     lon = float(data[2])
37 |                     lat = float(data[3])
38 | 
39 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
40 |                     ssha = np.nan
41 |                     if data[4] != BAD_FLAG:
42 |                         ssha = float(data[4])
43 |                     if lon not in OCEAN:
44 |                         OCEAN[lon] = dict()
45 |                     OCEAN[lon][lat] = ssha
46 | 
47 |             else:
48 |                 break
49 | 
50 | 
51 |             idx += 1
52 | 
53 |     LAT = []
54 |     LON = []
55 | 
56 |     for lon in OCEAN:
57 |         LON.append(lon)
58 |         for lat in OCEAN[lon]:
59 |             LAT.append(lat)
60 | 
61 |     LON = list(set(LON))
62 |     LAT = list(set(LAT))
63 | 
64 |     LON.sort()
65 |     LAT.sort()
66 | 
67 |     # Convert SSHA into grid format
68 |     SSHA = np.zeros((len(LON),len(LAT)),np.float)
69 | 
70 |     for i in range(len(LON)):
71 |         for j in range(len(LAT)):
72 |             SSHA[i][j] = OCEAN[LON[i]][LAT[j]]
73 | 
74 |     # Visualize the data
75 |     plt.clf()
76 |     map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
77 |     lon, lat = np.meshgrid(LON, LAT)
78 |     map.drawcoastlines()
79 |     map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
80 |     map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
81 | 
82 |     h = map.contourf(lon,lat,SSHA.T,levels=np.linspace(-0.44,0.44,100),cmap=cm.BrBG)
83 |     cbar = plt.colorbar()
84 |     cbar.set_label("Relative hight of sea suface")
85 |     plt.title("Indian Ocean Sea Surface Height Anomaly on {}".format(date.strip("\"")))
86 | 
87 |     return h
88 | 
89 | 
90 | fig = plt.figure(figsize=(16,8))
91 | 
92 | with writer.saving(fig, "writer_test.mp4", dpi=100):
93 |     for f in data_files:
94 |         update(f)
95 |         writer.grab_frame()
96 | 


--------------------------------------------------------------------------------
/Datathon-1/vector_field_visualization.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import matplotlib
  3 | matplotlib.use("Agg")
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import matplotlib.animation as manimation
  7 | from mpl_toolkits.basemap import Basemap
  8 | import matplotlib.cm as cm
  9 | 
 10 | FFMpegWriter = manimation.writers['ffmpeg']
 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti',
 12 |                 comment='Movie support!')
 13 | writer = FFMpegWriter(fps=2, metadata=metadata)
 14 | 
 15 | # List of files to be visualised
 16 | meridional_current_data_files = glob.glob("data/meridional_current/*.txt")[:20]
 17 | meridional_current_data_files.sort()
 18 | 
 19 | BAD_FLAG = '-1.E+34'
 20 | 
 21 | def update(current_file):
 22 |     idx = 0
 23 | 
 24 |     # Data structure to store the value of current at location (LON,LAT)
 25 |     OCEAN = dict()
 26 |     date = ""
 27 | 
 28 |     meridional_current_file = "data/meridional_current/"+current_file
 29 |     zonal_current_file = "data/zonal_current/"+current_file
 30 | 
 31 |     with open(meridional_current_file,'r') as f:
 32 |         while(f):
 33 |             r = f.readline()
 34 |             if r != '':
 35 |                 if idx >= 11:
 36 |                     data = r.strip().split(',')
 37 |                     date = data[0]
 38 |                     lon = float(data[2])
 39 |                     lat = float(data[3])
 40 | 
 41 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 42 |                     meridional_current = np.nan
 43 | 
 44 |                     if data[5] != BAD_FLAG:
 45 |                         meridional_current = float(data[5])*-1
 46 |                     if lon not in OCEAN:
 47 |                         OCEAN[lon] = dict()
 48 |                     OCEAN[lon][lat] = [0,meridional_current]
 49 | 
 50 |             else:
 51 |                 break
 52 | 
 53 | 
 54 |             idx += 1
 55 | 
 56 |     idx = 0
 57 |     with open(zonal_current_file,'r') as f:
 58 |         while(f):
 59 |             r = f.readline()
 60 |             if r != '':
 61 |                 if idx >= 11:
 62 |                     data = r.strip().split(',')
 63 |                     lon = float(data[2])
 64 |                     lat = float(data[3])
 65 | 
 66 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 67 |                     zonal_current = np.nan
 68 | 
 69 |                     if data[5] != BAD_FLAG:
 70 |                         zonal_current = float(data[5])*-1
 71 | 
 72 |                     OCEAN[lon][lat][0] = zonal_current
 73 | 
 74 |             else:
 75 |                 break
 76 | 
 77 | 
 78 |             idx += 1
 79 | 
 80 |     LAT = []
 81 |     LON = []
 82 | 
 83 |     for lon in OCEAN:
 84 |         LON.append(lon)
 85 |         for lat in OCEAN[lon]:
 86 |             LAT.append(lat)
 87 | 
 88 |     LON = list(set(LON))
 89 |     LAT = list(set(LAT))
 90 | 
 91 |     LON.sort()
 92 |     LAT.sort()
 93 | 
 94 |     LON1 = LON[::2]
 95 |     LAT1 = LAT[::2]
 96 | 
 97 |     # Convert meridional_current into grid format
 98 |     meridional_current = np.zeros((len(LON1),len(LAT1)),np.float)
 99 |     zonal_current = np.zeros((len(LON1),len(LAT1)),np.float)
100 |     magnitude = np.zeros((len(LON),len(LAT)),np.float)
101 | 
102 |     for i in range(len(LON)):
103 |         for j in range(len(LAT)):
104 |             zc = OCEAN[LON[i]][LAT[j]][0]
105 |             mc = OCEAN[LON[i]][LAT[j]][1]
106 |             mag = np.sqrt(zc**2 + mc**2)
107 |             magnitude[i][j] = mag
108 | 
109 |     for i in range(len(LON1)):
110 |         for j in range(len(LAT1)):
111 |             zc = OCEAN[LON1[i]][LAT1[j]][0]
112 |             mc = OCEAN[LON1[i]][LAT1[j]][1]
113 |             # mag = np.sqrt(zc**2 + mc**2)
114 |             zonal_current[i][j] = (zc)#/mag)
115 |             meridional_current[i][j] = (mc)#/mag)
116 | 
117 |     # Visualize the data
118 |     plt.clf()
119 |     map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
120 |     lon, lat = np.meshgrid(LON, LAT)
121 |     lon1, lat1 = np.meshgrid(LON1, LAT1)
122 |     map.drawcoastlines()
123 |     map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
124 |     map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
125 |     #h = map.contourf(lon,lat,magnitude.T,levels=np.linspace(0,3,100))
126 |     q = map.quiver(lon1,lat1,zonal_current.T,meridional_current.T,width=0.01, color='black', scale=150)
127 |     plt.quiverkey(q, 0, 20, 1, "Arrow length vs magnitude")    
128 |     #cbar = map.colorbar(h)
129 |     #cbar.set_label("Magnitude of current value in m/sec")
130 |     plt.title("Currents (zonal and meridional) at depth = 5 in Indian Ocean on {}".format(date.strip("\"")))
131 | 
132 |     return q#, h
133 | 
134 | 
135 | fig = plt.figure(figsize=(16,8))
136 | 
137 | with writer.saving(fig, "writer_test.mp4", dpi=250):
138 |     for f in meridional_current_data_files:
139 |         f = f[23:]
140 |         update(f)
141 |         writer.grab_frame()
142 | 


--------------------------------------------------------------------------------
/Datathon-2/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/Datathon-2/Datathon_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-2/Datathon_2.pdf


--------------------------------------------------------------------------------
/Datathon-2/README.txt:
--------------------------------------------------------------------------------
 1 | Submissions: Data Visualization - Datathon 2
 2 | 
 3 | Google drive link to video visualizations: https://drive.google.com/drive/folders/1_D_ywfun_xBxxPrWgV7PBYOVCob8-nKM?usp=sharing
 4 | 
 5 | Directory Structure:
 6 | .
 7 | ├── Report.pdf
 8 | ├── source codes
 9 | │   ├── isosurfaces
10 | │   │   ├── isosurfaces_currents.py
11 | │   │   └── isosurfaces.py
12 | │   ├── scalar_field_visualization
13 | │   │   ├── scalar_field_depth_profiling.py
14 | │   │   └── scalar_field_visualization_constant_depth.py
15 | │   ├── vector_field_visualization
16 | │   │   ├── vector_field_visualization_constant_depth.py
17 | │   │   └── vector_field_visualization_constant_time.py
18 | │   └── volume_slice_rendering
19 | │       ├── volume_slice_rendering_currents.py
20 | │       └── volume_slice_rendering_scalar.py
21 | └── statistics
22 |     ├── current_statistics.csv
23 |     ├── salinity_statistics.csv
24 |     └── temperature_statistics.csv
25 | 


--------------------------------------------------------------------------------
/Datathon-2/current_statistics.txt:
--------------------------------------------------------------------------------
 1 | Depth; Mean; Variance; Standard deviation
 2 | 5; 0.2634942502103688; 0.000503441515044454; 0.022437502424388835
 3 | 15; 0.2489651476609433; 0.000477773350267354; 0.021858027135753914
 4 | 25; 0.2354338583326177; 0.000463173160228980; 0.021521458134359316
 5 | 35; 0.2228846622477850; 0.000431883839129440; 0.020781815106709053
 6 | 45; 0.2108549591859987; 0.000387964509998090; 0.019696814717057434
 7 | 55; 0.1991084264726173; 0.000334939633818405; 0.018301356065013482
 8 | 65; 0.1876547911443133; 0.000284967249422415; 0.016880972999872243
 9 | 75; 0.1766129143298025; 0.000225996616472560; 0.015033183843503026
10 | 85; 0.1651594652469909; 0.000162390281966655; 0.012743244561988755
11 | 95; 0.1520378122342999; 0.000109704636120663; 0.010473998096269787
12 | 105; 0.1466897563841745; 8.91990976029287e-05; 0.00944452738907187
13 | 115; 0.1375138462634111; 6.58093668285688e-05; 0.008112297259628053
14 | 125; 0.1291105077187464; 5.335497187524599e-05; 0.0073044487728538415
15 | 135; 0.1214819740523726; 4.742582572019154e-05; 0.006886641105807064
16 | 145; 0.1149101913745539; 4.3337151435385745e-05; 0.006583095885325212
17 | 155; 0.1090671400546524; 3.951883504182367e-05; 0.006286400801875718
18 | 165; 0.1037920916983562; 3.64305062401065e-05; 0.006035768902145484
19 | 175; 0.0991418385261553; 3.451358600773206e-05; 0.005874826466180262
20 | 185; 0.0950919389056526; 3.2375018219734675e-05; 0.005689904939428661
21 | 195; 0.0916113346603196; 3.084017975706223e-05; 0.005553393535223506
22 | 205; 0.0885771978760791; 2.895110311736927e-05; 0.005380622930234869
23 | 215; 0.0859321003877646; 2.7174918883266603e-05; 0.005212956827297403
24 | 225; 0.0836067124303342; 2.5429947507723384e-05; 0.0050428114685880715
25 | 


--------------------------------------------------------------------------------
/Datathon-2/isosurfaces.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import plotly.graph_objects as go
 3 | 
 4 | file_path = "data/datathon2_data/OneDrive_1_12-09-2020/Salinity_3D/001_29_Dec_2003.txt"
 5 | 
 6 | idx = 0
 7 | 
 8 | BAD_FLAG = '-1.E+34'
 9 | 
10 | date = ""
11 | LAT = []
12 | LON = []
13 | DEP = []
14 | SALT = [] # Note that this can be changed to any variable 
15 | 
16 | OCEAN = dict()
17 | 
18 | with open(file_path,'r') as f:
19 |     while(f):
20 |         r = f.readline()
21 |         if r != '':
22 |             if idx >= 11:
23 |                 data = r.strip().split(',')
24 |                 date = data[0]
25 |                 lon = float(data[2])
26 |                 lat = float(data[3])
27 |                 dep = float(data[4])
28 |                 salt = data[5]
29 |                 if salt == BAD_FLAG:
30 |                     salt = np.nan
31 |                 else:
32 |                     salt = float(salt)
33 |                 OCEAN[(lat,lon,dep)] = salt
34 |                 if dep > 50:
35 |                     continue
36 |                 LAT.append(lat)
37 |                 LON.append(lon)
38 |                 DEP.append(dep)
39 |                 SALT.append(salt)
40 | 
41 | 
42 |         else:
43 |             break
44 | 
45 | 
46 |         idx += 1
47 | 
48 | MAX_SALT = np.nanmax(SALT)
49 | MIN_SALT = np.nanmin(SALT)
50 | 
51 | fig= go.Figure(
52 |     data=go.Isosurface(
53 |         x=LON,
54 |         y=LAT,
55 |         z=DEP,
56 |         value=SALT,
57 |         isomin=MIN_SALT,
58 |         isomax=MAX_SALT,
59 |         surface_count=50,
60 |         colorbar_nticks=5,
61 |         colorbar_title="Salinity (psu)",
62 |         # opacity=0.7,
63 |         caps=dict(x_show=False, y_show=False, z_show=False)
64 |     ),
65 |     layout=go.Layout(
66 |         scene = dict(
67 |                     xaxis = dict(title='Longitude'),
68 |                     yaxis = dict(title='Latitude'),
69 |                     zaxis = dict(title='Depth in meters'),
70 |                 ),
71 |         title = go.layout.Title(
72 |             text='Indian Ocean Salinity with variation in depth in meters (z-direction) on 29 December 2003'
73 |         )
74 |     )
75 | )
76 | 
77 | fig.show()
78 | 


--------------------------------------------------------------------------------
/Datathon-2/isosurfaces_currents.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import plotly.graph_objects as go
  3 | 
  4 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt"
  5 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt"
  6 | 
  7 | idx = 0
  8 | 
  9 | BAD_FLAG = '-1.E+34'
 10 | 
 11 | date = ""
 12 | LAT = []
 13 | LON = []
 14 | DEP = []
 15 | MAGNITUDE = []
 16 | 
 17 | OCEAN = dict()
 18 | 
 19 | with open(meridional_curr_file_path,'r') as f:
 20 |     while(f):
 21 |         r = f.readline()
 22 |         if r != '':
 23 |             if idx >= 12:
 24 |                 data = r.strip().split(',')
 25 |                 date = data[0]
 26 |                 lon = float(data[2])
 27 |                 lat = float(data[3])
 28 |                 dep = float(data[4])
 29 |                 meridional_current = data[5]
 30 |                 if meridional_current == BAD_FLAG:
 31 |                     meridional_current = np.nan
 32 |                 else:
 33 |                     meridional_current = float(data[5])*-1
 34 |                 OCEAN[(lat,lon,dep)] = [0,meridional_current]
 35 | 
 36 |         else:
 37 |             break
 38 | 
 39 | 
 40 |         idx += 1
 41 | 
 42 | idx = 0
 43 | 
 44 | with open(zonal_curr_file_path,'r') as f:
 45 |     while(f):
 46 |         r = f.readline()
 47 |         if r != '':
 48 |             if idx >= 12:
 49 |                 data = r.strip().split(',')
 50 |                 date = data[0]
 51 |                 lon = float(data[2])
 52 |                 lat = float(data[3])
 53 |                 dep = float(data[4])
 54 |                 zonal_current = data[5]
 55 |                 if zonal_current == BAD_FLAG:
 56 |                     zonal_current = np.nan
 57 |                 else:
 58 |                     zonal_current = float(data[5])*-1
 59 |                 OCEAN[(lat,lon,dep)][0] = zonal_current
 60 |                 mag = np.nan
 61 |                 if OCEAN[(lat,lon,dep)][0] == np.nan or OCEAN[(lat,lon,dep)][1] == np.nan:
 62 |                     mag = np.nan
 63 |                 else:
 64 |                     mag = np.sqrt(OCEAN[(lat,lon,dep)][0]**2 + OCEAN[(lat,lon,dep)][1]**2)
 65 |                 OCEAN[(lat,lon,dep)] = mag
 66 |                 if dep > 50:
 67 |                     continue
 68 |                 LAT.append(lat)
 69 |                 LON.append(lon)
 70 |                 DEP.append(dep)
 71 |                 MAGNITUDE.append(mag)
 72 | 
 73 | 
 74 |         else:
 75 |             break
 76 | 
 77 | 
 78 |         idx += 1
 79 | 
 80 | MAX_MAG = np.nanmax(MAGNITUDE)
 81 | MIN_MAG = np.nanmin(MAGNITUDE)
 82 | 
 83 | fig= go.Figure(
 84 |     data=go.Isosurface(
 85 |         x=LON,
 86 |         y=LAT,
 87 |         z=DEP,
 88 |         value=MAGNITUDE,
 89 |         isomin=MIN_MAG,
 90 |         isomax=MAX_MAG,
 91 |         surface_count=50,
 92 |         colorbar_nticks=10,
 93 |         colorscale="viridis",
 94 |         colorbar_title="Magnitude of currents (m/sec)",
 95 |         # opacity=0.7,
 96 |         caps=dict(x_show=False, y_show=False, z_show=False)
 97 |     ),
 98 |     layout=go.Layout(
 99 |         scene = dict(
100 |                     xaxis = dict(title='Longitude'),
101 |                     yaxis = dict(title='Latitude'),
102 |                     zaxis = dict(title='Depth in meters'),
103 |                 ),
104 |         title = go.layout.Title(
105 |             text='Indian Ocean Magnitude of zonal and meridional currents with variation in depth in meters (z-direction) on 4 November 2004'
106 |         )
107 |     )
108 | )
109 | 
110 | fig.show()
111 | 


--------------------------------------------------------------------------------
/Datathon-2/salinity_statistics.txt:
--------------------------------------------------------------------------------
 1 | Depth; Mean; Variance; Standard deviation
 2 | 5; 34.5476460403180; 0.06007074323328246; 0.24509333575860945
 3 | 15; 34.5567419239074; 0.05997460412994543; 0.24489712968907054
 4 | 25; 34.5953786986189; 0.05940470900179379; 0.24373081258181903
 5 | 35; 34.6533809416091; 0.05838554022094924; 0.24163100012405123
 6 | 45; 34.7159942281470; 0.05766160942211035; 0.24012831865923343
 7 | 55; 34.7737773656595; 0.05718184711656207; 0.23912726134124088
 8 | 65; 34.8233653941431; 0.05690329247441789; 0.23854411012309211
 9 | 75; 34.8640548571675; 0.05686164979542857; 0.23845680907751107
10 | 85; 34.8946684755262; 0.05690541135738735; 0.23854855136300315
11 | 95; 34.9145288027546; 0.05696304923805384; 0.23866933032556543
12 | 105; 34.9523827255303; 0.05693372561975969; 0.23860789094193782
13 | 115; 34.9564708081097; 0.05686253847836554; 0.23845867247463562
14 | 125; 34.9572196443218; 0.05679271401221725; 0.23831221960322818
15 | 135; 34.9580149818220; 0.05673752898979663; 0.23819640843177428
16 | 145; 34.9594041885092; 0.05670522886006821; 0.23812859731680322
17 | 155; 34.9617613453292; 0.05669875045701147; 0.23811499418770646
18 | 165; 34.9645535062396; 0.05670237475919637; 0.23812260446920275
19 | 175; 34.9676362343133; 0.05670461696652149; 0.23812731251690028
20 | 185; 34.9691550941273; 0.05670286285414368; 0.23812362934858794
21 | 195; 34.9710786242774; 0.05670626689237978; 0.23813077686930723
22 | 205; 34.9711766413845; 0.05670500214430586; 0.23812812127992333
23 | 215; 34.9709881521240; 0.05669342125369363; 0.23810380352630584
24 | 225; 34.9696517278573; 0.05668299852520193; 0.23808191557781522
25 | 


--------------------------------------------------------------------------------
/Datathon-2/scalar_field_depth_profiling.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import matplotlib
  3 | matplotlib.use("Agg")
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import matplotlib.animation as manimation
  7 | from mpl_toolkits.basemap import Basemap
  8 | import matplotlib.cm as cm
  9 | 
 10 | FFMpegWriter = manimation.writers['ffmpeg']
 11 | metadata = dict(title='Indian Ocean - Temperature', artist='Swasti',
 12 |                 comment='Movie support!')
 13 | writer = FFMpegWriter(fps=2, metadata=metadata)
 14 | 
 15 | # List of files to be visualised
 16 | current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/PotentialTemperature_3D/*.txt")[:1]
 17 | current_file = current_data_files[0]
 18 | 
 19 | BAD_FLAG = '-1.E+34'
 20 | 
 21 | idx = 0
 22 | 
 23 | LAT = set()
 24 | LON = set()
 25 | DEP = set()
 26 | # Data structure to store the value of current at location (LON,LAT)
 27 | OCEAN = dict()
 28 | date = ""
 29 | 
 30 | idx = 0
 31 | with open(current_file,'r') as f:
 32 |     while(f):
 33 |         r = f.readline()
 34 |         if r != '':
 35 |             if idx >= 11:
 36 |                 data = r.strip().split(',')
 37 |                 try:
 38 |                     date = data[0]
 39 |                     lon = float(data[2])
 40 |                     lat = float(data[3])
 41 |                     dep = float(data[4])
 42 | 
 43 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 44 |                     temp = np.nan
 45 | 
 46 |                     if data[5] != BAD_FLAG:
 47 |                         temp = float(data[5])
 48 | 
 49 |                     OCEAN[lon,lat,dep] = temp
 50 |                     LAT.add(lat)
 51 |                     LON.add(lon)
 52 |                     DEP.add(dep)
 53 |                 except:
 54 |                     continue
 55 | 
 56 |         else:
 57 |             break
 58 | 
 59 | 
 60 |         idx += 1
 61 | 
 62 | 
 63 | LON = list(LON)
 64 | LAT = list(LAT)
 65 | DEP = list(DEP)
 66 | 
 67 | LON.sort()
 68 | LAT.sort()
 69 | DEP.sort()
 70 | 
 71 | 
 72 | def update(lon):
 73 |     
 74 |     # Convert TEMP into grid format
 75 |     TEMP = np.zeros((len(LAT),len(DEP)),np.float)
 76 | 
 77 |     for i in range(len(LAT)):
 78 |         for j in range(len(DEP)):
 79 |             try:
 80 |                 TEMP[i][j] = OCEAN[lon,LAT[i],DEP[j]]
 81 |             except:
 82 |                 TEMP[i][j] = np.nan
 83 |                 continue
 84 | 
 85 |     # Visualize the data
 86 |     plt.clf()
 87 |     lat, dep = np.meshgrid(LAT, DEP)
 88 | 
 89 |     h = plt.contourf(lat,dep,TEMP.T,cmap=cm.hot)
 90 |     cbar = plt.colorbar()
 91 |     cbar.set_label("Potential Temperature (degree Celcius)")
 92 |     plt.title("Indian Ocean Potential Temperature at longitude = {}mdegrees on {}".format(lon,date.strip("\"")))
 93 |     plt.xlabel("Latitude in degrees")
 94 |     plt.ylabel("Depth in meters")
 95 | 
 96 |     return h
 97 | 
 98 | 
 99 | fig = plt.figure(figsize=(16,8))
100 | 
101 | with writer.saving(fig, "writer_test.mp4", dpi=250):
102 |     for lon in LON:
103 |         update(lon)
104 |         writer.grab_frame()
105 | 


--------------------------------------------------------------------------------
/Datathon-2/scalar_field_visualization_constant_depth.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import matplotlib
  3 | matplotlib.use("Agg")
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import matplotlib.animation as manimation
  7 | from mpl_toolkits.basemap import Basemap
  8 | import matplotlib.cm as cm
  9 | 
 10 | FFMpegWriter = manimation.writers['ffmpeg']
 11 | metadata = dict(title='Indian Ocean - Salinity', artist='Swasti',
 12 |                 comment='Movie support!')
 13 | writer = FFMpegWriter(fps=2, metadata=metadata)
 14 | 
 15 | # List of files to be visualised
 16 | current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/Salinity_3D/*.txt")[:20]
 17 | current_data_files.sort()
 18 | 
 19 | BAD_FLAG = '-1.E+34'
 20 | 
 21 | def update(current_file):
 22 |     idx = 0
 23 | 
 24 |     LAT = set()
 25 |     LON = set()
 26 |     # Data structure to store the value of current at location (LON,LAT)
 27 |     OCEAN = dict()
 28 |     date = ""
 29 | 
 30 |     idx = 0
 31 |     with open(current_file,'r') as f:
 32 |         while(f):
 33 |             r = f.readline()
 34 |             if r != '':
 35 |                 if idx >= 11:
 36 |                     data = r.strip().split(',')
 37 |                     date = data[0]
 38 |                     lon = float(data[2])
 39 |                     lat = float(data[3])
 40 |                     dep = float(data[4])
 41 | 
 42 |                     if dep != 45.0:
 43 |                         continue
 44 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 45 |                     salt = np.nan
 46 | 
 47 |                     if data[5] != BAD_FLAG:
 48 |                         salt = float(data[5])
 49 | 
 50 |                     OCEAN[lon,lat] = salt
 51 |                     LAT.add(lat)
 52 |                     LON.add(lon)
 53 | 
 54 |             else:
 55 |                 break
 56 | 
 57 | 
 58 |             idx += 1
 59 | 
 60 | 
 61 |     LON = list(LON)
 62 |     LAT = list(LAT)
 63 | 
 64 |     LON.sort()
 65 |     LAT.sort()
 66 | 
 67 |     # Convert SALT into grid format
 68 |     SALT = np.zeros((len(LON),len(LAT)),np.float)
 69 | 
 70 |     for i in range(len(LON)):
 71 |         for j in range(len(LAT)):
 72 |             try:
 73 |                 SALT[i][j] = OCEAN[LON[i],LAT[j]]
 74 |             except:
 75 |                 SALT[i][j] = np.nan
 76 |                 continue
 77 | 
 78 |     # Visualize the data
 79 |     plt.clf()
 80 |     map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
 81 |     lon, lat = np.meshgrid(LON, LAT)
 82 |     map.drawcoastlines()
 83 |     map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
 84 |     map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
 85 | 
 86 |     h = map.contourf(lon,lat,SALT.T,cmap=cm.plasma)
 87 |     cbar = plt.colorbar()
 88 |     cbar.set_label("Salinity (psu)")
 89 |     plt.title("Indian Ocean Salinity at depth = 45.0m on {}".format(date.strip("\"")))
 90 | 
 91 |     return h
 92 | 
 93 | 
 94 | fig = plt.figure(figsize=(16,8))
 95 | 
 96 | with writer.saving(fig, "writer_test.mp4", dpi=250):
 97 |     for f in current_data_files:
 98 |         update(f)
 99 |         writer.grab_frame()
100 | 


--------------------------------------------------------------------------------
/Datathon-2/temperature_statistics.txt:
--------------------------------------------------------------------------------
 1 | Depth; Mean; Variance; Standard deviation
 2 | 5; 27.11250655356588; 0.5476857593587509; 0.7400579432441428
 3 | 15; 27.03993716170805; 0.5373139832493482; 0.7330170415818095
 4 | 25; 26.84029407042512; 0.4821577178820572; 0.6943757757022182
 5 | 35; 26.48182443501643; 0.3652256417414011; 0.6043390122616618
 6 | 45; 25.96732721296131; 0.2441088972320103; 0.4940737771143196
 7 | 55; 25.31709901569217; 0.1496946378382129; 0.3869039129269862
 8 | 65; 24.56975614817752; 0.0951665899098514; 0.3084908262977222
 9 | 75; 23.73544411402076; 0.0718090781810706; 0.2679721593394930
10 | 85; 22.83602555384061; 0.0627572831439539; 0.2505140378181509
11 | 95; 21.92278382624695; 0.0628753052124298; 0.2507494869634433
12 | 105; 20.87278935279349; 0.0794405080409788; 0.2818519257357999
13 | 115; 19.95056975235749; 0.0779852228317582; 0.2792583442473264
14 | 125; 19.08908719596788; 0.0694392296677286; 0.2635132438184628
15 | 135; 18.30714756773401; 0.0577132054374724; 0.2402357288945016
16 | 145; 17.61300266303936; 0.0462526987244731; 0.2150644059914916
17 | 155; 16.99729512782661; 0.0365353727850427; 0.1911422841368249
18 | 165; 16.45414063891477; 0.0300029379191761; 0.1732135615913955
19 | 175; 15.97000871473394; 0.0251827300456243; 0.1586906740978319
20 | 185; 15.54376712077133; 0.0214443733671179; 0.1464389748909693
21 | 195; 15.16498681087736; 0.0188419278727541; 0.1372659020760587
22 | 205; 14.82257698705703; 0.0169301382856726; 0.1301158648500354
23 | 215; 14.51234226885430; 0.0156999296340625; 0.1252993600704432
24 | 225; 14.22235575155881; 0.0148140654231499; 0.1217130454107117
25 | 


--------------------------------------------------------------------------------
/Datathon-2/vector_field_visualization_constant_depth.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import matplotlib
  3 | matplotlib.use("Agg")
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import matplotlib.animation as manimation
  7 | from mpl_toolkits.basemap import Basemap
  8 | import matplotlib.cm as cm
  9 | 
 10 | FFMpegWriter = manimation.writers['ffmpeg']
 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti',
 12 |                 comment='Movie support!')
 13 | writer = FFMpegWriter(fps=2, metadata=metadata)
 14 | 
 15 | # List of files to be visualised
 16 | meridional_current_data_files = glob.glob("data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/*.txt")[:20]
 17 | meridional_current_data_files.sort()
 18 | 
 19 | BAD_FLAG = '-1.E+34'
 20 | 
 21 | def update(current_file):
 22 |     idx = 0
 23 | 
 24 |     # Data structure to store the value of current at location (LON,LAT)
 25 |     OCEAN = dict()
 26 |     date = ""
 27 | 
 28 |     meridional_current_file = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/"+current_file
 29 |     zonal_current_file = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/"+current_file
 30 | 
 31 |     with open(meridional_current_file,'r') as f:
 32 |         while(f):
 33 |             r = f.readline()
 34 |             if r != '':
 35 |                 if idx >= 12:
 36 |                     data = r.strip().split(',')
 37 |                     date = data[0]
 38 |                     lon = float(data[2])
 39 |                     lat = float(data[3])
 40 |                     dep = float(data[4])
 41 | 
 42 |                     if dep != 5.0:
 43 |                         continue
 44 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 45 |                     meridional_current = np.nan
 46 | 
 47 |                     if data[5] != BAD_FLAG:
 48 |                         meridional_current = float(data[5])*-1
 49 |                     if lon not in OCEAN:
 50 |                         OCEAN[lon] = dict()
 51 |                     OCEAN[lon][lat] = [0,meridional_current]
 52 | 
 53 |             else:
 54 |                 break
 55 | 
 56 | 
 57 |             idx += 1
 58 | 
 59 |     idx = 0
 60 |     with open(zonal_current_file,'r') as f:
 61 |         while(f):
 62 |             r = f.readline()
 63 |             if r != '':
 64 |                 if idx >= 12:
 65 |                     data = r.strip().split(',')
 66 |                     lon = float(data[2])
 67 |                     lat = float(data[3])
 68 |                     dep = float(data[4])
 69 | 
 70 |                     if dep != 5.0:
 71 |                         continue
 72 |                     # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 73 |                     zonal_current = np.nan
 74 | 
 75 |                     if data[5] != BAD_FLAG:
 76 |                         zonal_current = float(data[5])*-1
 77 | 
 78 |                     OCEAN[lon][lat][0] = zonal_current
 79 | 
 80 |             else:
 81 |                 break
 82 | 
 83 | 
 84 |             idx += 1
 85 | 
 86 |     LAT = []
 87 |     LON = []
 88 | 
 89 |     for lon in OCEAN:
 90 |         LON.append(lon)
 91 |         for lat in OCEAN[lon]:
 92 |             LAT.append(lat)
 93 | 
 94 |     LON = list(set(LON))
 95 |     LAT = list(set(LAT))
 96 | 
 97 |     LON.sort()
 98 |     LAT.sort()
 99 | 
100 |     LON1 = LON#[::2]
101 |     LAT1 = LAT#[::2]
102 | 
103 |     # Convert meridional_current into grid format
104 |     meridional_current = np.zeros((len(LON1),len(LAT1)),np.float)
105 |     zonal_current = np.zeros((len(LON1),len(LAT1)),np.float)
106 | 
107 |     for i in range(len(LON1)):
108 |         for j in range(len(LAT1)):
109 |             try:
110 |                 zc = OCEAN[LON1[i]][LAT1[j]][0]
111 |                 mc = OCEAN[LON1[i]][LAT1[j]][1]
112 |                 zonal_current[i][j] = zc
113 |                 meridional_current[i][j] = mc
114 |             except:
115 |                 zonal_current[i][j] = np.nan
116 |                 meridional_current[i][j] = np.nan
117 |                 continue
118 | 
119 |     # Visualize the data
120 |     plt.clf()
121 |     map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
122 |     lon, lat = np.meshgrid(LON, LAT)
123 |     lon1, lat1 = np.meshgrid(LON1, LAT1)
124 |     map.drawcoastlines()
125 |     map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
126 |     map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
127 |     q = map.quiver(lon1,lat1,zonal_current.T,meridional_current.T,width=0.001, color='black', scale=150)    
128 |     _ = plt.quiverkey(q, 0.85, 0.85, 2,'2 m/sec', labelpos='E',coordinates='figure')
129 |     plt.title("Currents (zonal and meridional) at depth = 5.0m in Indian Ocean on {}".format(date.strip("\"")))
130 | 
131 |     return q
132 | 
133 | 
134 | fig = plt.figure(figsize=(16,8))
135 | 
136 | with writer.saving(fig, "writer_test.mp4", dpi=250):
137 |     for f in meridional_current_data_files:
138 |         f = f[64:]
139 |         update(f)
140 |         writer.grab_frame()
141 | 


--------------------------------------------------------------------------------
/Datathon-2/vector_field_visualization_constant_time.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import matplotlib
  3 | matplotlib.use("Agg")
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import matplotlib.animation as manimation
  7 | from mpl_toolkits.basemap import Basemap
  8 | import matplotlib.cm as cm
  9 | 
 10 | FFMpegWriter = manimation.writers['ffmpeg']
 11 | metadata = dict(title='Indian Ocean - Currents', artist='Swasti',
 12 |                 comment='Movie support!')
 13 | writer = FFMpegWriter(fps=2, metadata=metadata)
 14 | 
 15 | BAD_FLAG = '-1.E+34'
 16 | 
 17 | idx = 0
 18 | 
 19 | # Data structure to store the value of current at location (LON,LAT)
 20 | OCEAN = dict()
 21 | date = ""
 22 | LAT = set()
 23 | LON = set()
 24 | DEP = set()
 25 | 
 26 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt"
 27 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt"
 28 | 
 29 | with open(meridional_curr_file_path,'r') as f:
 30 |     while(f):
 31 |         r = f.readline()
 32 |         if r != '':
 33 |             if idx >= 12:
 34 |                 data = r.strip().split(',')
 35 |                 date = data[0]
 36 |                 lon = float(data[2])
 37 |                 lat = float(data[3])
 38 |                 dep = float(data[4])
 39 | 
 40 |                 # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 41 |                 meridional_current = np.nan
 42 | 
 43 |                 if data[5] != BAD_FLAG:
 44 |                     meridional_current = float(data[5])*-1
 45 |                 if lon not in OCEAN:
 46 |                     OCEAN[lon] = dict()
 47 |                 OCEAN[lon,lat,dep] = [0,meridional_current]
 48 | 
 49 |         else:
 50 |             break
 51 | 
 52 | 
 53 |         idx += 1
 54 | 
 55 | idx = 0
 56 | with open(zonal_curr_file_path,'r') as f:
 57 |     while(f):
 58 |         r = f.readline()
 59 |         if r != '':
 60 |             if idx >= 12:
 61 |                 data = r.strip().split(',')
 62 |                 lon = float(data[2])
 63 |                 lat = float(data[3])
 64 |                 dep = float(data[4])
 65 | 
 66 |                 # If the data is a BAD_FLAG, convert it into NaN (so that it is ignored by matplotlib)
 67 |                 zonal_current = np.nan
 68 | 
 69 |                 if data[5] != BAD_FLAG:
 70 |                     zonal_current = float(data[5])*-1
 71 | 
 72 |                 OCEAN[lon,lat,dep][0] = zonal_current
 73 |                 LAT.add(lat)
 74 |                 LON.add(lon)
 75 |                 DEP.add(dep)
 76 | 
 77 |         else:
 78 |             break
 79 | 
 80 | 
 81 |         idx += 1
 82 | 
 83 | 
 84 | LON = list(LON)
 85 | LON.sort()
 86 | LAT = list(LAT)
 87 | LAT.sort()
 88 | DEP = list(DEP)
 89 | DEP.sort()
 90 | 
 91 | 
 92 | def update(dep):
 93 | 
 94 |     # Convert meridional_current into grid format
 95 |     meridional_current = np.zeros((len(LON),len(LAT)),np.float)
 96 |     zonal_current = np.zeros((len(LON),len(LAT)),np.float)
 97 | 
 98 |     for i in range(len(LON)):
 99 |         for j in range(len(LAT)):
100 |             try:
101 |                 zc = OCEAN[LON[i],LAT[j],dep][0]
102 |                 mc = OCEAN[LON[i],LAT[j],dep][1]
103 |                 zonal_current[i][j] = zc
104 |                 meridional_current[i][j] = mc
105 |             except:
106 |                 zonal_current[i][j] = np.nan
107 |                 meridional_current[i][j] = np.nan
108 |                 continue
109 | 
110 |     # Visualize the data
111 |     plt.clf()
112 |     map = Basemap(projection='cyl',llcrnrlon=min(LON),llcrnrlat=min(LAT),urcrnrlon=max(LON),urcrnrlat=max(LAT),lat_0=0,lon_0=74.9544)
113 |     lon, lat = np.meshgrid(LON, LAT)
114 |     map.drawcoastlines()
115 |     map.drawparallels(np.arange(-90., 90., 10.), linewidth=2, labels=[1,0,0,0])
116 |     map.drawmeridians(np.arange(-180., 180., 10.), linewidth=2, labels=[0,0,0,1])
117 |     q = map.quiver(lon,lat,zonal_current.T,meridional_current.T,width=0.001, color='black', scale=150)    
118 |     _ = plt.quiverkey(q, 0.85, 0.85, 2,'2 m/sec', labelpos='E',coordinates='figure')
119 |     plt.title("Currents (zonal and meridional) at depth = {}m in Indian Ocean on {}".format(dep,date.strip("\"")))
120 | 
121 |     return q
122 | 
123 | 
124 | fig = plt.figure(figsize=(16,8))
125 | 
126 | with writer.saving(fig, "writer_test.mp4", dpi=250):
127 |     for dep in DEP:
128 |         update(dep)
129 |         writer.grab_frame()
130 | 


--------------------------------------------------------------------------------
/Datathon-2/volume_slice_rendering_currents.py:
--------------------------------------------------------------------------------
  1 | # Import data
  2 | import time
  3 | import numpy as np
  4 | 
  5 | meridional_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/meridional-current_3D/063_04_Nov_2004.txt"
  6 | zonal_curr_file_path = "data/datathon2_data/OneDrive_1_12-09-2020/zonal-current_3D/063_04_Nov_2004.txt"
  7 | 
  8 | idx = 0
  9 | 
 10 | BAD_FLAG = '-1.E+34'
 11 | 
 12 | date = ""
 13 | LAT = set()
 14 | LON = set()
 15 | DEP = set()
 16 | MAGNITUDE = []
 17 | 
 18 | OCEAN = dict()
 19 | 
 20 | with open(meridional_curr_file_path,'r') as f:
 21 |     while(f):
 22 |         r = f.readline()
 23 |         if r != '':
 24 |             if idx >= 12:
 25 |                 data = r.strip().split(',')
 26 |                 date = data[0]
 27 |                 lon = float(data[2])
 28 |                 lat = float(data[3])
 29 |                 dep = float(data[4])
 30 |                 meridional_current = data[5]
 31 |                 if meridional_current == BAD_FLAG:
 32 |                     meridional_current = np.nan
 33 |                 else:
 34 |                     meridional_current = float(data[5])*-1
 35 |                 OCEAN[(lat,lon,dep)] = [0,meridional_current]
 36 |                 LAT.add(lat)
 37 |                 LON.add(lon)
 38 |                 DEP.add(dep)
 39 | 
 40 | 
 41 |         else:
 42 |             break
 43 | 
 44 | 
 45 |         idx += 1
 46 | 
 47 | idx = 0
 48 | 
 49 | with open(zonal_curr_file_path,'r') as f:
 50 |     while(f):
 51 |         r = f.readline()
 52 |         if r != '':
 53 |             if idx >= 12:
 54 |                 data = r.strip().split(',')
 55 |                 date = data[0]
 56 |                 lon = float(data[2])
 57 |                 lat = float(data[3])
 58 |                 dep = float(data[4])
 59 |                 zonal_current = data[5]
 60 |                 if zonal_current == BAD_FLAG:
 61 |                     zonal_current = np.nan
 62 |                 else:
 63 |                     zonal_current = float(data[5])*-1
 64 |                 OCEAN[(lat,lon,dep)][0] = zonal_current
 65 |                 LAT.add(lat)
 66 |                 LON.add(lon)
 67 |                 DEP.add(dep)
 68 |                 if OCEAN[(lat,lon,dep)][0] == np.nan or OCEAN[(lat,lon,dep)][1] == np.nan:
 69 |                     mag = np.nan
 70 |                 else:
 71 |                     mag = np.sqrt(OCEAN[(lat,lon,dep)][0]**2 + OCEAN[(lat,lon,dep)][1]**2)
 72 |                 OCEAN[(lat,lon,dep)] = mag
 73 |                 MAGNITUDE.append(mag)
 74 | 
 75 | 
 76 |         else:
 77 |             break
 78 | 
 79 | 
 80 |         idx += 1
 81 | 
 82 | MAX_MAG = np.nanmax(MAGNITUDE)
 83 | MIN_MAG = np.nanmin(MAGNITUDE)
 84 | 
 85 | LAT = list(LAT)
 86 | LAT.sort()
 87 | LON = list(LON)
 88 | LON.sort()
 89 | DEP = list(DEP)
 90 | DEP.sort()
 91 | 
 92 | MIN_LAT, MAX_LAT = min(LAT),max(LAT)
 93 | MIN_LON, MAX_LON = min(LON),max(LON)
 94 | 
 95 | r,c = len(LON),len(LAT)
 96 | 
 97 | def getMagnitudeForDepth(depth):
 98 |     mag = []
 99 |     for x in LON:
100 |         arr = []
101 |         for y in LAT:
102 |             if (y,x,depth) not in OCEAN or type(OCEAN[(y,x,depth)]) == list:
103 |                 arr.append(np.nan)
104 |             else:
105 |                 arr.append(OCEAN[(y,x,depth)])
106 |         mag.append(np.array(arr))
107 |     mag = np.array(mag)
108 |     mag[np.isnan(mag)] = -100
109 |     return mag
110 | 
111 | # Define frames
112 | import plotly.graph_objects as go
113 | 
114 | fig = go.Figure(frames=[go.Frame(data=go.Surface(
115 |     z=dep * np.ones((r, c)),
116 |     surfacecolor=getMagnitudeForDepth(dep).T,
117 |     cmin=MAX_MAG - MAX_MAG/0.99, cmax=MAX_MAG,
118 |     colorbar_title="Magnitude of currents (m/sec)",
119 |     colorscale=[[0, 'white'],
120 |                 [0.01, 'white'],
121 |                 [0.01, 'blue'],
122 |                 [1, 'red']]
123 |     ),
124 |     name=str(dep)
125 |     ) for dep in DEP])
126 | 
127 | # Add data to be displayed before animation starts
128 | fig.add_trace(go.Surface(
129 |     z=225.0 * np.ones((r, c)),
130 |     surfacecolor=getMagnitudeForDepth(5.0).T,
131 |     cmin=MAX_MAG - MAX_MAG/0.99, cmax=MAX_MAG,
132 |     colorbar_title="Magnitude of currents (m/sec)",
133 |     colorscale=[[0, 'white'],
134 |                 [0.01, 'white'],
135 |                 [0.01, 'blue'],
136 |                 [1, 'red']]
137 |     ))
138 | 
139 | def frame_args(duration):
140 |     return {
141 |             "frame": {"duration": duration},
142 |             "mode": "immediate",
143 |             "fromcurrent": True,
144 |             "transition": {"duration": duration, "easing": "linear"},
145 |         }
146 | 
147 | sliders = [
148 |             {
149 |                 "pad": {"b": 10, "t": 60},
150 |                 "len": 0.9,
151 |                 "x": 0.1,
152 |                 "y": 0,
153 |                 "steps": [
154 |                     {
155 |                         "args": [[f.name], frame_args(0)],
156 |                         "label": str(k),
157 |                         "method": "animate",
158 |                     }
159 |                     for k, f in enumerate(fig.frames)
160 |                 ],
161 |             }
162 |         ]
163 | 
164 | # Layout
165 | fig.update_layout(
166 |          title='Indian Ocean Magnitude of zonal and meridional currents with variation in depth in meters (z-direction) on 4 November 2004',
167 |          width=1200,
168 |          height=800,
169 |          scene=dict(
170 |                     zaxis=dict(range=[5.0, 225.0],autorange=False,title='Depth in meters'),
171 |                     xaxis = dict(title='Longitude'),
172 |                     yaxis = dict(title='Latitude'),
173 |                     aspectratio=dict(x=1.5, y=1, z=1),
174 |                     ),
175 |          updatemenus = [
176 |             {
177 |                 "buttons": [
178 |                     {
179 |                         "args": [None, frame_args(50)],
180 |                         "label": "&#9654;", # play symbol
181 |                         "method": "animate",
182 |                     },
183 |                     {
184 |                         "args": [[None], frame_args(0)],
185 |                         "label": "&#9724;", # pause symbol
186 |                         "method": "animate",
187 |                     },
188 |                 ],
189 |                 "direction": "left",
190 |                 "pad": {"r": 10, "t": 70},
191 |                 "type": "buttons",
192 |                 "x": 0.1,
193 |                 "y": 0,
194 |             }
195 |          ],
196 |          sliders=sliders
197 | )
198 | 
199 | fig.show()


--------------------------------------------------------------------------------
/Datathon-2/volume_slice_rendering_scalar.py:
--------------------------------------------------------------------------------
  1 | # Import data
  2 | import time
  3 | import numpy as np
  4 | 
  5 | file_path = "data/datathon2_data/OneDrive_1_12-09-2020/PotentialTemperature_3D/001_29_Dec_2003.txt"
  6 | 
  7 | idx = 0
  8 | 
  9 | BAD_FLAG = '-1.E+34'
 10 | 
 11 | date = ""
 12 | LAT = set()
 13 | LON = set()
 14 | DEP = set()
 15 | SALT = []
 16 | 
 17 | OCEAN = dict()
 18 | 
 19 | with open(file_path,'r') as f:
 20 |     while(f):
 21 |         r = f.readline()
 22 |         if r != '':
 23 |             if idx >= 11:
 24 |                 data = r.strip().split(',')
 25 |                 date = data[0]
 26 |                 lon = float(data[2])
 27 |                 lat = float(data[3])
 28 |                 dep = float(data[4])
 29 |                 salt = data[5]
 30 |                 if salt == BAD_FLAG:
 31 |                     salt = np.nan
 32 |                 else:
 33 |                     salt = float(salt)
 34 |                 OCEAN[(lat,lon,dep)] = salt
 35 |                 LAT.add(lat)
 36 |                 LON.add(lon)
 37 |                 DEP.add(dep)
 38 |                 SALT.append(salt)
 39 | 
 40 | 
 41 |         else:
 42 |             break
 43 | 
 44 | 
 45 |         idx += 1
 46 | 
 47 | MAX_SALT = np.nanmax(SALT)
 48 | MIN_SALT = np.nanmin(SALT)
 49 | 
 50 | LAT = list(LAT)
 51 | LAT.sort()
 52 | LON = list(LON)
 53 | LON.sort()
 54 | DEP = list(DEP)
 55 | DEP.sort()
 56 | 
 57 | MIN_LAT, MAX_LAT = min(LAT),max(LAT)
 58 | MIN_LON, MAX_LON = min(LON),max(LON)
 59 | 
 60 | r,c = len(LON),len(LAT)
 61 | 
 62 | def getSaltForDepth(depth):
 63 |     salt = []
 64 |     for x in LON:
 65 |         arr = []
 66 |         for y in LAT:
 67 |             arr.append(OCEAN[(y,x,depth)])
 68 |         salt.append(np.array(arr))
 69 |     return np.array(salt)
 70 | 
 71 | # Define frames
 72 | import plotly.graph_objects as go
 73 | 
 74 | fig = go.Figure(frames=[go.Frame(data=go.Surface(
 75 |     z=dep * np.ones((r, c)),
 76 |     surfacecolor=getSaltForDepth(dep).T,
 77 |     cmin=MIN_SALT, cmax=MAX_SALT,
 78 |     colorbar_title="Potential Temperature (degree Celcius)",
 79 |     colorscale=[[0, 'white'],
 80 |                 [0.01, 'white'],
 81 |                 [0.01, 'red'],
 82 |                 [1, 'yellow']],
 83 |                 # [1, 'green']]
 84 |     ),
 85 |     name=str(dep)
 86 |     ) for dep in DEP])
 87 | 
 88 | # Add data to be displayed before animation starts
 89 | fig.add_trace(go.Surface(
 90 |     z=5.0 * np.ones((r, c)),
 91 |     surfacecolor=getSaltForDepth(5.0).T,
 92 |     cmin=MIN_SALT, cmax=MAX_SALT,
 93 |     colorbar_title="Potential Temperature (degree Celcius)",
 94 |     colorscale=[[0, 'white'],
 95 |                 [0.01, 'red'],
 96 |                 [1, 'yellow']],
 97 |                 # [1, 'green']]
 98 |     ))
 99 | 
100 | 
101 | def frame_args(duration):
102 |     return {
103 |             "frame": {"duration": duration},
104 |             "mode": "immediate",
105 |             "fromcurrent": True,
106 |             "transition": {"duration": duration, "easing": "linear"},
107 |         }
108 | 
109 | sliders = [
110 |             {
111 |                 "pad": {"b": 10, "t": 60},
112 |                 "len": 0.9,
113 |                 "x": 0.1,
114 |                 "y": 0,
115 |                 "steps": [
116 |                     {
117 |                         "args": [[f.name], frame_args(0)],
118 |                         "label": str(k),
119 |                         "method": "animate",
120 |                     }
121 |                     for k, f in enumerate(fig.frames)
122 |                 ],
123 |             }
124 |         ]
125 | 
126 | # Layout
127 | fig.update_layout(
128 |          title='Indian Ocean Potential Temperature with variation in depth in meters (z-direction) on 29 December 2003',
129 |          width=1200,
130 |          height=800,
131 |          scene=dict(
132 |                     zaxis=dict(range=[5.0, 225.0],autorange=False,title='Depth in meters'),
133 |                     xaxis = dict(title='Longitude'),
134 |                     yaxis = dict(title='Latitude'),
135 |                     aspectratio=dict(x=1.5, y=1, z=1),
136 |                     ),
137 |          updatemenus = [
138 |             {
139 |                 "buttons": [
140 |                     {
141 |                         "args": [None, frame_args(50)],
142 |                         "label": "&#9654;", # play symbol
143 |                         "method": "animate",
144 |                     },
145 |                     {
146 |                         "args": [[None], frame_args(0)],
147 |                         "label": "&#9724;", # pause symbol
148 |                         "method": "animate",
149 |                     },
150 |                 ],
151 |                 "direction": "left",
152 |                 "pad": {"r": 10, "t": 70},
153 |                 "type": "buttons",
154 |                 "x": 0.1,
155 |                 "y": 0,
156 |             }
157 |          ],
158 |          sliders=sliders
159 | )
160 | 
161 | fig.show()


--------------------------------------------------------------------------------
/Datathon-3/Datathon_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-3/Datathon_3.pdf


--------------------------------------------------------------------------------
/Datathon-3/covid_death_graph_networkx.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import networkx as nx
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | df_in = pd.read_csv('time_series_covid_19_deaths.csv')
 7 | dict_date = {date:'sum' for date in list(df_in.columns[4:])}
 8 | df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index()
 9 | df_in.iloc[:,:].head()
10 | 
11 | num_deaths = {}
12 | for index in df_in.index:
13 |     num_deaths[df_in['Country/Region'][index]] = float(df_in['9/23/20'][index])/246
14 | 
15 | 
16 | dates_vec = list(df_in.columns)[1:]
17 | average_time_vec = [None] * df_in.shape[0]
18 | 
19 | for i, row_index in enumerate(df_in.index):
20 | 
21 |     weighted_sum, total_deaths = 0, 0
22 |     
23 |     for j, date in enumerate(dates_vec):
24 |         current_term = df_in.at[row_index, date]
25 |         weighted_sum += j * current_term
26 |         total_deaths += current_term
27 |     
28 |     average_time_vec[i] = weighted_sum / total_deaths
29 |     
30 | df_in['avg_time'] = average_time_vec
31 | 
32 | n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2)
33 | list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines
34 | 
35 | line_index = 0
36 | epsilon = 0.001
37 | for i in range(0, df_in.shape[0] - 1):
38 |     for j in range(i + 1, df_in.shape[0]):
39 |         index_i, index_j = df_in.index[i], df_in.index[j]
40 |         list_country1[line_index] = df_in.at[index_i, 'Country/Region']
41 |         list_country2[line_index] = df_in.at[index_j, 'Country/Region']
42 |         diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time']
43 |         list_w[line_index] = (1 / (abs(diff_time) + epsilon))
44 |         list_d[line_index] = abs(diff_time)
45 |         line_index += 1
46 |                 
47 | df_graph = pd.DataFrame(dict(
48 |     Country1 = list_country1,
49 |     Country2 = list_country2,
50 |     Weight = list_w,
51 |     Distance = list_d
52 | ))
53 | 
54 | 
55 | df_graph = df_graph.dropna(axis=0)
56 | df_graph.to_csv('df_graph.csv', index=False)
57 | 
58 | 
59 | covid_graph = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight')
60 | sparse_covid_graph = nx.Graph(((u, v, e) for u,v,e in covid_graph.edges(data=True) if e['Weight'] > 0.0 and e['Weight'] < 0.0150))
61 | sparse_vertex = set()
62 | for (u,v) in  sparse_covid_graph.edges():
63 |     sparse_vertex.add(u)
64 |     sparse_vertex.add(v)
65 | sparse_vertex = list(sparse_vertex)
66 | sparse_vertex.sort()
67 | d = [e['Weight'] for u,v,e in covid_graph.edges(data=True)]
68 | print(np.mean(np.array(d)))
69 | # nx.draw(sparse_covid_graph,with_labels=True)
70 | 
71 | vertex_attributes = {u:num_deaths[u] for u in sparse_vertex}
72 | nx.draw(sparse_covid_graph, with_labels=True,nodelist=vertex_attributes.keys(), node_size=[v*100 for v in vertex_attributes.values()])
73 | plt.show()
74 | 


--------------------------------------------------------------------------------
/Datathon-3/covid_geo.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import networkx as nx
  4 | from tqdm import tqdm
  5 | import matplotlib.pyplot as plt
  6 | import plotly.graph_objects as go
  7 | from mpl_toolkits.basemap import Basemap
  8 | 
  9 | deaths = 'time_series_covid_19_deaths.csv'
 10 | recovered = 'time_series_covid_19_recovered.csv'
 11 | 
 12 | def read_data(filename):
 13 |     df_in = pd.read_csv(filename)
 14 |     dict_date = {date:'sum' for date in list(df_in.columns[4:54])}
 15 |     df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index()
 16 |     print(df_in.columns)
 17 |     return df_in
 18 | 
 19 | def get_lat_lon(filename):
 20 |     df_in = pd.read_csv(filename)
 21 |     df_lat_lon = df_in.iloc[:,:4]
 22 |     # dict_lat_lon = {val:'mean' for val in list(df_lat_lon.columns[2:])}
 23 |     # df_lat_lon = df_lat_lon.groupby(['Country/Region']).agg(dict_lat_lon).reset_index()
 24 |     df_lat_lon = df_lat_lon.groupby(['Country/Region']).nth(-1).reset_index()
 25 |     return df_lat_lon
 26 | 
 27 | def get_transpose(df_in):
 28 |     invert_columns = df_in['Country/Region'].unique()
 29 |     invert_index = df_in.columns[1:]
 30 | 
 31 |     invert_df = pd.DataFrame(index=invert_index,columns=invert_columns)
 32 |     invert_df = invert_df.fillna(0)
 33 |     for i in tqdm(df_in['Country/Region']):
 34 |         for j in invert_index:
 35 |             invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0]
 36 |             
 37 |     return invert_df
 38 | 
 39 | def get_correlation_edges(invert_df):
 40 |     corr_matrix = invert_df.corr()
 41 |     countries = corr_matrix.index.values
 42 |     corr_matrix = np.asmatrix(corr_matrix)
 43 | 
 44 |     edges = []
 45 |     for i in range(len(corr_matrix)):
 46 |         for j in range(i+1, len(corr_matrix[i])):
 47 |             if pd.isnull(corr_matrix[i][j]) == False:
 48 |                 edges.append((countries[i],countries[j],{'Weight':corr_matrix[i][j]}))
 49 |             
 50 |     return edges
 51 | 
 52 | def get_graph_from_edges(edges):
 53 |     G = nx.Graph(edges)
 54 |     return G
 55 | 
 56 | def get_node_weight(df_in):
 57 |     num_cases = {}
 58 |     for index in df_in.index:
 59 |         num_cases[df_in['Country/Region'][index]] = float(df_in['3/11/20'][index])/50
 60 |     
 61 |     return num_cases
 62 | 
 63 | def get_average_time(df_in):
 64 |     dates_vec = list(df_in.columns)[1:]
 65 |     average_time_array = [None] * df_in.shape[0]
 66 | 
 67 |     for i, row in enumerate(df_in.index):
 68 | 
 69 |         weighted_sum, total_cases = 0, 0
 70 |         
 71 |         for j, date in enumerate(dates_vec):
 72 |             current = df_in.at[row, date]
 73 |             weighted_sum += j * current
 74 |             total_cases += current
 75 |         
 76 |         average_time_array[i] = weighted_sum / total_cases
 77 |         
 78 |     df_in['avg_time'] = average_time_array
 79 | 
 80 |     n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2)
 81 |     list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines
 82 | 
 83 |     line_index = 0
 84 |     epsilon = 0.001
 85 |     for i in range(0, df_in.shape[0] - 1):
 86 |         for j in range(i + 1, df_in.shape[0]):
 87 |             index_i, index_j = df_in.index[i], df_in.index[j]
 88 |             list_country1[line_index] = df_in.at[index_i, 'Country/Region']
 89 |             list_country2[line_index] = df_in.at[index_j, 'Country/Region']
 90 |             diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time']
 91 |             list_w[line_index] = (1 / (abs(diff_time) + epsilon))
 92 |             list_d[line_index] = abs(diff_time)
 93 |             line_index += 1
 94 |                     
 95 |     df_graph = pd.DataFrame(dict(
 96 |         Country1 = list_country1,
 97 |         Country2 = list_country2,
 98 |         Weight = list_w
 99 |     ))
100 | 
101 |     df_graph = df_graph.dropna(axis=0)
102 | 
103 |     return df_graph
104 | 
105 | def get_graph_from_df(df_graph):
106 |     G = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight')
107 |     return G
108 | 
109 | def get_sparse_graph(graph, num_cases, min_threshold, max_threshold):
110 |     sparse_graph = nx.Graph(((u, v, e) for u,v,e in graph.edges(data=True) if e['Weight'] > min_threshold and e['Weight'] < max_threshold))
111 |     sparse_vertex = set()
112 |     for (u,v) in  sparse_graph.edges():
113 |         sparse_vertex.add(u)
114 |         sparse_vertex.add(v)
115 |     sparse_vertex = list(sparse_vertex)
116 |     sparse_vertex.sort()
117 |     vertex_attributes = {u:num_cases[u] for u in sparse_vertex}
118 | 
119 |     return sparse_graph, vertex_attributes
120 | 
121 | def get_graph_stats(G, cases_dict):
122 |     graph_dict = {(u, v): e for (u,v,e) in G.edges(data=True)}
123 |     weights = [e['Weight'] for (u,v,e) in G.edges(data=True)]
124 |     print("Maximum weight: ", max(weights))
125 |     print("Minimum weight: ", min(weights))
126 |     print("Mean weight: ", np.mean(weights))
127 | 
128 |     top_10_cases = list(zip(cases_dict.values(),cases_dict.keys()))
129 |     top_10_cases.sort(reverse=True)
130 |     top_10_cases = top_10_cases[:10]
131 |     return graph_dict, top_10_cases
132 | 
133 | df_deaths = read_data(deaths)
134 | df_country = get_lat_lon(deaths)
135 | num_deaths = get_node_weight(df_deaths)
136 | df_death_graph = get_average_time(df_deaths)
137 | deaths_graph = get_graph_from_df(df_death_graph)
138 | 
139 | """Sparse death graph"""
140 | 
141 | # sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 10, 100)
142 | # e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)])
143 | # nx.draw(sparse_deaths_graph, with_labels=True,pos=positions,edge_color=e_color, width=3, edge_cmap=plt.cm.hot, nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()])
144 | 
145 | """Top 10 graph"""
146 | graph_d, top_10 = get_graph_stats(deaths_graph, num_deaths)
147 | edges = []
148 | death_attributes = {v:w for w,v in top_10}
149 | for i in range(10):
150 |     for j in range(i+1, 10):
151 |         if (top_10[i][1],top_10[j][1]) in graph_d:
152 |             edges.append((top_10[i][1],top_10[j][1],graph_d[top_10[i][1],top_10[j][1]]))
153 |         elif (top_10[j][1],top_10[i][1]) in graph_d:
154 |             edges.append((top_10[j][1],top_10[i][1],graph_d[top_10[j][1],top_10[i][1]]))
155 | 
156 | sparse_deaths_graph = nx.Graph(edges)
157 | sparse_deaths_graph, death_attributes = get_sparse_graph(sparse_deaths_graph, num_deaths, 0, 100)
158 | e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)])
159 | # nx.draw(sparse_deaths_graph, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()])
160 | 
161 | 
162 | # Set up base map
163 | plt.figure(figsize=(15,20))
164 | m = Basemap(
165 |         projection='merc',
166 |         llcrnrlon=-180,
167 |         llcrnrlat=-60,
168 |         urcrnrlon=180,
169 |         urcrnrlat=70,
170 |         lat_ts=0,
171 |         resolution='l',
172 |         suppress_ticks=True)
173 | 
174 | # import long lat as m attribute
175 | mx, my = m(df_country['Long'].values, df_country['Lat'].values)
176 | pos = {}
177 | for idx, elem in enumerate (df_country['Country/Region']):
178 |     pos[elem] = (mx[idx], my[idx])
179 | 
180 | # draw nodes and edges and over aly on basemap
181 | nx.draw_networkx_nodes(G = sparse_deaths_graph, pos = pos, node_list = sparse_deaths_graph.nodes(), node_color = 'r', alpha = 0.6,
182 |                         node_size = [num_deaths[s]*5 for s in sparse_deaths_graph.nodes()], with_labels=True)
183 | nx.draw_networkx_edges(G = sparse_deaths_graph, pos = pos, edge_color=e_color, width=2, edge_cmap=plt.cm.hot,
184 |                         alpha=0.6, arrows = False)
185 | nx.draw_networkx_labels(G = sparse_deaths_graph, pos = pos, font_size=15, font_color='y',
186 | 		                labels = {x:x for x in sparse_deaths_graph.nodes() if num_deaths[x] >= 0})
187 | 
188 | m.drawcountries(linewidth = 1)
189 | m.drawstates(linewidth = 0.2)
190 | m.drawcoastlines(linewidth=1)
191 | plt.tight_layout()
192 | sm = plt.cm.ScalarMappable(cmap=plt.cm.hot, norm=plt.Normalize(vmin = min(e_color), vmax=max(e_color)))
193 | cbar = plt.colorbar(sm)
194 | cbar.set_label("Edge weights")
195 | plt.title("Top 10 countries with maximum deaths due to COVID-19 by the end of 11/3/20")
196 | plt.show()


--------------------------------------------------------------------------------
/Datathon-3/covid_graph.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import networkx as nx
  4 | from tqdm import tqdm
  5 | import matplotlib.pyplot as plt
  6 | import plotly.graph_objects as go
  7 | from mpl_toolkits.basemap import Basemap
  8 | 
  9 | deaths = 'time_series_covid_19_deaths.csv'
 10 | recovered = 'time_series_covid_19_recovered.csv'
 11 | 
 12 | def read_data(filename):
 13 |     df_in = pd.read_csv(filename)
 14 |     dict_date = {date:'sum' for date in list(df_in.columns[4:])}
 15 |     df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index()
 16 |     return df_in
 17 | 
 18 | def get_transpose(df_in):
 19 |     invert_columns = df_in['Country/Region'].unique()
 20 |     invert_index = df_in.columns[1:]
 21 | 
 22 |     invert_df = pd.DataFrame(index=invert_index,columns=invert_columns)
 23 |     invert_df = invert_df.fillna(0)
 24 |     for i in tqdm(df_in['Country/Region']):
 25 |         for j in invert_index:
 26 |             invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0]
 27 |             
 28 |     return invert_df
 29 | 
 30 | def get_correlation_edges(invert_df):
 31 |     corr_matrix = invert_df.corr()
 32 |     countries = corr_matrix.index.values
 33 |     # corr_matrix = np.asmatrix(corr_matrix)
 34 |     corr_matrix = corr_matrix.values
 35 |     print(corr_matrix)
 36 |     edges = []
 37 |     for i in range(len(corr_matrix)):
 38 |         for j in range(i+1, len(corr_matrix[i])):
 39 |             if pd.isnull(corr_matrix[i][j]) == False:
 40 |                 edges.append((countries[i],countries[j],{'Weight':corr_matrix[i][j]}))
 41 |             
 42 |     return edges
 43 | 
 44 | def get_graph_from_edges(edges):
 45 |     G = nx.Graph(edges)
 46 |     return G
 47 | 
 48 | def get_node_weight(df_in):
 49 |     num_cases = {}
 50 |     for index in df_in.index:
 51 |         num_cases[df_in['Country/Region'][index]] = float(df_in['9/23/20'][index])/246
 52 |     
 53 |     return num_cases
 54 | 
 55 | def get_average_time(df_in):
 56 |     dates_vec = list(df_in.columns)[1:]
 57 |     average_time_array = [None] * df_in.shape[0]
 58 | 
 59 |     for i, row in enumerate(df_in.index):
 60 | 
 61 |         weighted_sum, total_cases = 0, 0
 62 |         
 63 |         for j, date in enumerate(dates_vec):
 64 |             current = df_in.at[row, date]
 65 |             weighted_sum += j * current
 66 |             total_cases += current
 67 |         
 68 |         average_time_array[i] = weighted_sum / total_cases
 69 |         
 70 |     df_in['avg_time'] = average_time_array
 71 | 
 72 |     n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2)
 73 |     list_country1, list_country2, list_w, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines, [None] * n_lines
 74 | 
 75 |     line_index = 0
 76 |     epsilon = 0.001
 77 |     for i in range(0, df_in.shape[0] - 1):
 78 |         for j in range(i + 1, df_in.shape[0]):
 79 |             index_i, index_j = df_in.index[i], df_in.index[j]
 80 |             list_country1[line_index] = df_in.at[index_i, 'Country/Region']
 81 |             list_country2[line_index] = df_in.at[index_j, 'Country/Region']
 82 |             diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time']
 83 |             list_w[line_index] = (1 / (abs(diff_time) + epsilon))
 84 |             list_d[line_index] = abs(diff_time)
 85 |             line_index += 1
 86 |                     
 87 |     df_graph = pd.DataFrame(dict(
 88 |         Country1 = list_country1,
 89 |         Country2 = list_country2,
 90 |         Weight = list_w
 91 |     ))
 92 | 
 93 |     df_graph = df_graph.dropna(axis=0)
 94 | 
 95 |     return df_graph
 96 | 
 97 | def get_graph_from_df(df_graph):
 98 |     G = nx.from_pandas_edgelist(df_graph, 'Country1', 'Country2', 'Weight')
 99 |     return G
100 | 
101 | def get_sparse_graph(graph, num_cases, min_threshold, max_threshold):
102 |     sparse_graph = nx.Graph(((u, v, e) for u,v,e in graph.edges(data=True) if e['Weight'] >= min_threshold and e['Weight'] <= max_threshold))
103 |     sparse_vertex = set()
104 |     for (u,v) in  sparse_graph.edges():
105 |         sparse_vertex.add(u)
106 |         sparse_vertex.add(v)
107 |     sparse_vertex = list(sparse_vertex)
108 |     sparse_vertex.sort()
109 |     vertex_attributes = {u:num_cases[u] for u in sparse_vertex}
110 | 
111 |     return sparse_graph, vertex_attributes
112 | 
113 | def get_graph_stats(G, cases_dict):
114 |     graph_dict = {(u, v): e for (u,v,e) in G.edges(data=True)}
115 |     weights = [e['Weight'] for (u,v,e) in G.edges(data=True)]
116 |     print("Maximum weight: ", max(weights))
117 |     print("Minimum weight: ", min(weights))
118 |     print("Mean weight: ", np.mean(weights))
119 | 
120 |     top_10_cases = list(zip(cases_dict.values(),cases_dict.keys()))
121 |     top_10_cases.sort(reverse=True)
122 |     top_10_cases = top_10_cases[:10]
123 |     return graph_dict, top_10_cases
124 | 
125 | df_deaths = read_data(deaths)
126 | num_deaths = get_node_weight(df_deaths)
127 | df_death_graph = get_average_time(df_deaths)
128 | deaths_graph = get_graph_from_df(df_death_graph)
129 | 
130 | """Sparse deaths correlation graph"""
131 | # invert_df = get_transpose(df_deaths)
132 | # corr_edges = get_correlation_edges(invert_df)
133 | # deaths_graph = nx.Graph(corr_edges)
134 | # sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 0.998, 1)
135 | # positions = nx.circular_layout(sparse_deaths_graph)
136 | # nx.draw(sparse_deaths_graph, pos=positions, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()])
137 | 
138 | """Sparse death graph"""
139 | 
140 | sparse_deaths_graph, death_attributes = get_sparse_graph(deaths_graph, num_deaths, 10, 50)
141 | positions=nx.circular_layout(sparse_deaths_graph)
142 | e_color = sorted([e['Weight'] for u,v,e in sparse_deaths_graph.edges(data=True)])
143 | nx.draw(sparse_deaths_graph, with_labels=True,pos=positions,edge_color=e_color, width=3, edge_cmap=plt.cm.hot, nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()])
144 | sm = plt.cm.ScalarMappable(cmap=plt.cm.hot, norm=plt.Normalize(vmin = min(e_color), vmax=max(e_color)))
145 | cbar = plt.colorbar(sm)
146 | cbar.set_label("Edge weights")
147 | 
148 | """Top 10 graph"""
149 | # graph_d, top_10 = get_graph_stats(deaths_graph, num_deaths)
150 | # edges = []
151 | # death_attributes = {v:w for w,v in top_10}
152 | # for i in range(10):
153 | #     for j in range(i+1, 10):
154 | #         if (top_10[i][1],top_10[j][1]) in graph_d:
155 | #             edges.append((top_10[i][1],top_10[j][1],graph_d[top_10[i][1],top_10[j][1]]))
156 | #         elif (top_10[j][1],top_10[i][1]) in graph_d:
157 | #             edges.append((top_10[j][1],top_10[i][1],graph_d[top_10[j][1],top_10[i][1]]))
158 | 
159 | # sparse_deaths_graph = nx.Graph(edges)
160 | # sparse_deaths_graph, death_attributes = get_sparse_graph(sparse_deaths_graph, num_deaths, 0, 0.08)
161 | # nx.draw(sparse_deaths_graph, with_labels=True,nodelist=death_attributes.keys(), node_size=[v*10 for v in death_attributes.values()])
162 | 
163 | plt.show()
164 | 
165 | """Graph visualization using plotly"""
166 | # positions=nx.circular_layout(sparse_deaths_graph)
167 | 
168 | # edge_x = []
169 | # edge_y = []
170 | # for edge in sparse_deaths_graph.edges():
171 | #     x0, y0 = positions[edge[0]]
172 | #     x1, y1 = positions[edge[1]]
173 | #     edge_x.append(x0)
174 | #     edge_x.append(x1)
175 | #     edge_x.append(None)
176 | #     edge_y.append(y0)
177 | #     edge_y.append(y1)
178 | #     edge_y.append(None)
179 | 
180 | # edge_trace = go.Scatter(
181 | #     x=edge_x, y=edge_y,
182 | #     line=dict(width=0.5, color='#888'),
183 | #     hoverinfo='none',
184 | #     mode='lines')
185 | 
186 | # node_x = []
187 | # node_y = []
188 | # for node in sparse_deaths_graph.nodes():
189 | #     x, y = positions[node]
190 | #     node_x.append(x)
191 | #     node_y.append(y)
192 | 
193 | # node_trace = go.Scatter(
194 | #     x=node_x, y=node_y,
195 | #     mode='markers',
196 | #     hoverinfo='text',
197 | #     marker=dict(
198 | #         showscale=True,
199 | #         # colorscale options
200 | #         #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
201 | #         #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
202 | #         #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
203 | #         colorscale='YlGnBu',
204 | #         reversescale=True,
205 | #         color=[],
206 | #         size=10,
207 | #         colorbar=dict(
208 | #             thickness=15,
209 | #             title='Number of deaths',
210 | #             xanchor='left',
211 | #             titleside='right'
212 | #         ),
213 | #         line_width=2))
214 | 
215 | # node_cases = []
216 | # node_text = []
217 | # for node in sparse_deaths_graph.nodes():
218 | #     node_cases.append(num_deaths[node])
219 | #     node_text.append("# of deaths in {}: {}".format(node,num_deaths[node]))
220 | 
221 | # node_trace.marker.color = node_cases
222 | # node_trace.text = node_text
223 | 
224 | # fig = go.Figure(data=[edge_trace, node_trace],
225 | #              layout=go.Layout(
226 | #                 title='<br>Covid network graph',
227 | #                 titlefont_size=16,
228 | #                 showlegend=False,
229 | #                 hovermode='closest',
230 | #                 margin=dict(b=20,l=5,r=5,t=40),
231 | #                 annotations=[ dict(
232 | #                     text="Covid graph",
233 | #                     showarrow=False,
234 | #                     xref="paper", yref="paper",
235 | #                     x=0.005, y=-0.002 ) ],
236 | #                 xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
237 | #                 yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
238 | #                 )
239 | # fig.show()


--------------------------------------------------------------------------------
/Datathon-4/Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/Report.pdf


--------------------------------------------------------------------------------
/Datathon-4/__pycache__/optimal_leaf_ordering.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/__pycache__/optimal_leaf_ordering.cpython-37.pyc


--------------------------------------------------------------------------------
/Datathon-4/__pycache__/traveling_sales_person.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-4/__pycache__/traveling_sales_person.cpython-37.pyc


--------------------------------------------------------------------------------
/Datathon-4/data_matrix.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | import matplotlib.pyplot as plt
  5 | from matplotlib import cm
  6 | import seaborn
  7 | 
  8 | from traveling_sales_person import TravelingSalesPerson
  9 | from optimal_leaf_ordering import OptimalLeafOrdering
 10 | 
 11 | """Data file paths"""
 12 | deaths = "../Datathon-3/time_series_covid_19_deaths.csv"
 13 | recovered = "../Datathon-3/time_series_covid_19_recovered.csv"
 14 | confirmed = "../Datathon-3/time_series_covid_19_confirmed.csv"
 15 | 
 16 | def read_data(filename):
 17 |     df_in = pd.read_csv(filename)
 18 |     dict_date = {date:'sum' for date in list(df_in.columns[4:])}
 19 |     df_in = df_in.groupby(['Country/Region']).agg(dict_date).reset_index()
 20 |     assert df_in.isnull().values.any() == False, "Dataframe has null values"
 21 |     return df_in
 22 | 
 23 | def get_transpose(df_in):
 24 |     invert_columns = df_in['Country/Region'].unique()
 25 |     invert_index = df_in.columns[1:]
 26 | 
 27 |     invert_df = pd.DataFrame(index=invert_index,columns=invert_columns)
 28 |     invert_df = invert_df.fillna(0)
 29 |     for i in tqdm(df_in['Country/Region']):
 30 |         for j in invert_index:
 31 |             invert_df.at[j,i] += list(df_in[df_in['Country/Region'] == i][j])[0]
 32 |             
 33 |     return invert_df
 34 | 
 35 | def get_correlation_edges(invert_df):
 36 |     corr_matrix = invert_df.corr()
 37 |     corr_matrix = corr_matrix.fillna(0)
 38 | 
 39 |     # Change the range of similarity values
 40 |     corr_matrix = 1 + corr_matrix
 41 | 
 42 |     return corr_matrix
 43 | 
 44 | def get_average_time(df_in):
 45 |     dates_vec = list(df_in.columns)[1:]
 46 |     average_time_array = [None] * df_in.shape[0]
 47 | 
 48 |     for i, row in enumerate(df_in.index):
 49 | 
 50 |         weighted_sum, total_cases = 0, 0
 51 |         
 52 |         for j, date in enumerate(dates_vec):
 53 |             current = df_in.at[row, date]
 54 |             weighted_sum += j * current
 55 |             total_cases += current
 56 |         
 57 |         average_time_array[i] = weighted_sum / total_cases
 58 |         
 59 |     df_in['avg_time'] = average_time_array
 60 | 
 61 |     n_lines = int((df_in.shape[0] * (df_in.shape[0] - 1)) / 2)
 62 |     list_country1, list_country2, list_d = [None] * n_lines, [None] * n_lines, [None] * n_lines
 63 | 
 64 |     line_index = 0
 65 |     for i in range(0, df_in.shape[0] - 1):
 66 |         for j in range(i + 1, df_in.shape[0]):
 67 |             index_i, index_j = df_in.index[i], df_in.index[j]
 68 |             list_country1[line_index] = df_in.at[index_i, 'Country/Region']
 69 |             list_country2[line_index] = df_in.at[index_j, 'Country/Region']
 70 |             diff_time = df_in.at[index_i, 'avg_time'] - df_in.at[index_j,'avg_time']
 71 |             list_d[line_index] = abs(diff_time)
 72 |             line_index += 1
 73 | 
 74 |     print(list_d, len(list_d))
 75 |     countries = list(set(list_country1+list_country2))
 76 |     countries = {countries[i]:i for i in range(len(countries))}
 77 |     # print(countries)
 78 | 
 79 |     matrix = np.full((len(countries), len(countries)),100)
 80 |     for i in range(len(countries)):
 81 |         matrix[i,i] = 0
 82 | 
 83 |     idx = 0
 84 |     for i,j in zip(list_country1,list_country2):
 85 |         if np.isnan(list_d[idx]):
 86 |             list_d[idx] = 100
 87 |         matrix[countries[i],countries[j]] = list_d[idx]
 88 |         matrix[countries[j],countries[i]] = list_d[idx]
 89 |         idx += 1
 90 | 
 91 |     return matrix
 92 | 
 93 | 
 94 | """Average time matrix"""
 95 | df_deaths = read_data(deaths)
 96 | # deaths_matrix = get_average_time(df_deaths)
 97 | 
 98 | """Correlation matrix"""
 99 | invert_df = get_transpose(df_deaths)
100 | corr_edges = get_correlation_edges(invert_df)
101 | 
102 | """Choose which matrix to visualize"""
103 | data = corr_edges
104 | # data = deaths_matrix
105 | 
106 | tsp = TravelingSalesPerson(data, data_type='data')
107 | # olo = OptimalLeafOrdering(pd.DataFrame(data), data_type='data', metric='euclidean', method='complete')
108 | seaborn.heatmap(data, cmap = cm.Blues,xticklabels=True, yticklabels=True)
109 | plt.figure()
110 | 
111 | # Visualize the output data
112 | Y = tsp.get_ordered_data()
113 | # Y = olo.get_ordered_data()
114 | seaborn.heatmap(Y, cmap = cm.Blues,xticklabels=True, yticklabels=True)
115 | plt.show()
116 | 
117 | 


--------------------------------------------------------------------------------
/Datathon-4/optimal_leaf_ordering.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from scipy.cluster import hierarchy
  4 | from scipy.spatial import distance
  5 | from scipy.cluster.hierarchy import linkage
  6 | import itertools
  7 | import matplotlib.pyplot as plt
  8 | import seaborn
  9 |     
 10 | class OptimalLeafOrdering:
 11 |     def __init__(self, data, data_type='data', metric="euclidean", method='single'):
 12 |         
 13 |         self.data = data
 14 |         self.data_type = data_type
 15 |         self.metric = metric
 16 |         self.method = method
 17 | 
 18 |     def get_ordered_data(self):
 19 |         row_order = self.compute_dendrogram(axis=0)
 20 |         col_order = self.compute_dendrogram(axis=1)
 21 |         return self.data.iloc[row_order, col_order]
 22 |         
 23 |     def compute_dendrogram(self, axis=0):
 24 |         if axis == 1:
 25 |             data = self.data.T
 26 |         else:
 27 |             data = self.data
 28 | 
 29 |         # Calculate pairwise distances and linkage
 30 |         if self.data_type == 'data':
 31 |             pairwise_dists = distance.pdist(data.values, metric=self.metric)
 32 |         elif self.data_type == 'dist':
 33 |             pairwise_dists = []
 34 |             for i in range(len(data.values)):
 35 |                 for j in range(i+1, len(data.values)):
 36 |                     pairwise_dists.append(data.values[i][j])
 37 |             pairwise_dists = np.array(pairwise_dists)
 38 |         else:
 39 |             raise NotImplementedError
 40 |         linkage = hierarchy.linkage(pairwise_dists, method=self.method)
 41 |         
 42 |         self.M = {}
 43 |         tree = hierarchy.to_tree(linkage)
 44 |         dists = distance.squareform(pairwise_dists)
 45 |         tree = self.order_tree(tree, dists)
 46 |         order = self.leaves(tree)
 47 |         del self.M
 48 |         return order
 49 |     
 50 |     def optimal_scores(self, v, D, fast=True):
 51 |         """ Implementation of Ziv-Bar-Joseph et al.'s leaf order algorithm
 52 |         v is a ClusterNode
 53 |         D is a distance matrix """
 54 | 
 55 |         def score(left, right, u, m, w, k):
 56 |             return get_M(left, u, m) + get_M(right, w, k) + D[m, k]
 57 | 
 58 |         def get_M(v, a, b):
 59 |             if a == b:
 60 |                 self.M[v.get_id(), a, b] = 0
 61 |             return self.M[v.get_id(), a, b]
 62 | 
 63 |         if v.is_leaf():
 64 |             n = v.get_id()
 65 |             self.M[v.get_id(), n, n] = 0
 66 |             return 0
 67 |         else:
 68 |             L = self.leaves(v.left)
 69 |             R = self.leaves(v.right)
 70 |             LL = self.leaves(v.left.left, v.left)
 71 |             LR = self.leaves(v.left.right, v.left)
 72 |             RL = self.leaves(v.right.left, v.right)
 73 |             RR = self.leaves(v.right.right, v.right)
 74 |             for l in L:
 75 |                 for r in R:
 76 |                     self.M[v.left.get_id(), l, r] = self.optimal_scores(v.left, D, fast=False)
 77 |                     self.M[v.right.get_id(), l, r] = self.optimal_scores(v.right, D, fast=False)
 78 |                     for u in L:
 79 |                         for w in R:
 80 |                             if fast:
 81 |                                 m_order = sorted(self.other(u, LL, LR), key=lambda m: get_M(v.left, u, m))
 82 |                                 k_order = sorted(self.other(w, RL, RR), key=lambda k: get_M(v.right, w, k))
 83 |                                 C = min([D[m, k] for m in self.other(u, LL, LR) for k in self.other(w, RL, RR)])
 84 |                                 Cmin = 1e10
 85 |                                 for m in m_order:
 86 |                                     if self.M[v.left.get_id(), u, m] + self.M[v.right.get_id(), w, k_order[0]] + C >= Cmin:
 87 |                                         break
 88 |                                     for k in k_order:
 89 |                                         if self.M[v.left.get_id(), u, m] + self.M[v.right.get_id(), w, k] + C >= Cmin:
 90 |                                             break
 91 |                                         C = score(v.left, v.right, u, m, w, k)
 92 |                                         if C < Cmin:
 93 |                                             Cmin = C
 94 |                                 self.M[v.get_id(), u, w] = self.M[v.get_id(), w, u] = Cmin
 95 |                             else:
 96 |                                 self.M[v.get_id(), u, w] = self.M[v.get_id(), w, u] = \
 97 |                                     min([score(v.left, v.right, u, m, w, k) \
 98 |                                         for m in self.other(u, LL, LR) \
 99 |                                         for k in self.other(w, RL, RR)])
100 |                     return self.M[v.get_id(), l, r]
101 | 
102 |     def order_tree(self, v, D, fM=None, fast=True):
103 |         
104 |         if fM is None:
105 |             fM = 1
106 |             self.optimal_scores(v, D, fast=fast)
107 | 
108 |         L = self.leaves(v.left)
109 |         R = self.leaves(v.right)
110 |         if len(L) and len(R):
111 |             def getkey(z):
112 |                 u,w = z
113 |                 return self.M[v.get_id(),u,w]
114 |             if len(L) and len(R):
115 |                 u, w = min(itertools.product(L,R), key=getkey)
116 |             if w in self.leaves(v.right.left):
117 |                 v.right.right, v.right.left = v.right.left, v.right.right
118 |             if u in self.leaves(v.left.right):
119 |                 v.left.left, v.left.right = v.left.right, v.left.left
120 |             v.left = self.order_tree(v.left, D, fM)
121 |             v.right = self.order_tree(v.right, D, fM)
122 |         return v
123 | 
124 |     def other(self, x, V, W):
125 |         # For an element x, returns the set that x isn't in        
126 |         if x in V:
127 |             return W
128 |         else:
129 |             return V
130 | 
131 |     def leaves(self, t, t2=None):
132 |         """ Returns the leaves of a ClusterNode """
133 |         try:
134 |             return t.pre_order()
135 |         except AttributeError:
136 |             if t2 is not None:
137 |                 return t2.pre_order()
138 |             else:
139 |                 return []
140 |     
141 | if __name__ == "__main__":
142 |     # Create a staircase matrix
143 |     X = np.zeros((100, 100))
144 |     for n in [0,10,20,30,40,50,60]:
145 |         X[int(10.*n/7):int(10.*(n+10)/7):,n:n+40] = 1
146 | 
147 |     X = distance.squareform(distance.pdist(X, metric="euclidean"))
148 |     # X = distance.squareform(distance.pdist(X, metric="hamming"))
149 | 
150 |     seaborn.heatmap(X)
151 |     plt.figure()
152 | 
153 |     # Since we know the data has a staircase pattern, we can now shuffle the rows and columns
154 |     np.random.shuffle(X)
155 |     X = X.T
156 |     np.random.shuffle(X)
157 |     X = X.T
158 | 
159 |     # Visualize the input data
160 |     seaborn.heatmap(X)
161 |     olo = OptimalLeafOrdering(pd.DataFrame(X), metric='hamming', method='complete')
162 |     plt.figure()
163 | 
164 |     # Visualize the output data
165 |     Y = olo.get_ordered_data()
166 |     seaborn.heatmap(Y)
167 |     plt.show()


--------------------------------------------------------------------------------
/Datathon-4/traveling_sales_person.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import ortools # Using ortools version 7
  3 | from ortools.constraint_solver import pywrapcp, routing_enums_pb2
  4 | from scipy.spatial.distance import squareform, pdist
  5 | import matplotlib.pyplot as plt
  6 | import seaborn
  7 | import pandas as pd
  8 | 
  9 | class TravelingSalesPerson:
 10 |     def __init__(self, data, data_type='dist', metric='euclidean', approximation_multiplier=1000, timeout=2.0):
 11 |         self.data = data
 12 |         self.data_type = data_type
 13 |         self.metric = metric
 14 |         self.approximation_multiplier = approximation_multiplier
 15 |         self.timeout = timeout
 16 | 
 17 |     def get_ordered_data(self):
 18 |         if self.data_type == 'dist':
 19 |             row_order = self.seriate(self.data)
 20 |             column_order = self.seriate(self.data.T)
 21 |         elif self.data_type == 'data':
 22 |             # Get distances along rows 
 23 |             dist1 = squareform(pdist(self.data, metric=self.metric))
 24 |             # Get distances along columns
 25 |             dist2 = squareform(pdist(self.data.T, metric=self.metric))
 26 | 
 27 |             row_order = self.seriate(dist1)
 28 |             column_order = self.seriate(dist2)
 29 |         else:
 30 |             raise NotImplementedError
 31 | 
 32 |         ordered_data = pd.DataFrame(self.data)
 33 |         ordered_data = ordered_data.iloc[row_order, column_order]
 34 |         return ordered_data
 35 | 
 36 |     def get_ordered_data_recompute(self):
 37 |         """Re-compute distances for column after computing row_order"""
 38 |         if self.data_type == 'data': 
 39 |             # Get distances along rows 
 40 |             dist1 = squareform(pdist(self.data, metric=self.metric))
 41 |             row_order = self.seriate(dist1)
 42 |             data = pd.DataFrame(self.data)
 43 |             data = data.iloc[row_order,:]
 44 | 
 45 |             # Get distances along columns
 46 |             dist2 = squareform(pdist(data.values.T, metric=self.metric))
 47 |             column_order = self.seriate(dist2)
 48 |             ordered_data = data.iloc[:, column_order]
 49 |         else:
 50 |             raise NotImplementedError
 51 |         return ordered_data
 52 | 
 53 |     def validate_data(self, dists):
 54 |         """Check dists contains valid values."""
 55 |         try:
 56 |             isinf = np.isinf(dists).any()
 57 |             isnan = np.isnan(dists).any()
 58 |         except Exception as e:
 59 |             raise InvalidDistanceValues() from e
 60 |         if isinf:
 61 |             raise InvalidDistanceValues("Data contains inf values.")
 62 |         if isnan:
 63 |             raise InvalidDistanceValues("Data contains NaN values.")
 64 | 
 65 |     def seriate(self, dists):
 66 |         # Validate distances
 67 |         self.validate_data(dists)
 68 |         if self.timeout > 0:
 69 |             return self._seriate(dists=dists)
 70 |         elif self.timeout < 0:
 71 |             raise ValueError("timeout cannot be negative.")
 72 |         self.timeout = 1.
 73 |         route = None
 74 |         while route is None:
 75 |             try:
 76 |                 route = self._seriate(dists=dists)
 77 |             except IncompleteSolutionError:
 78 |                 self.timeout *= 2
 79 |         return route
 80 | 
 81 | 
 82 |     def _seriate(self, dists):
 83 |         assert dists[dists < 0].size == 0, "distances must be non-negative"
 84 |         assert self.timeout > 0
 85 |         squareform = len(dists.shape) == 2
 86 |         if squareform:
 87 |             assert dists.shape[0] == dists.shape[1]
 88 |             size = dists.shape[0]
 89 |         else:
 90 |             raise InvalidDistanceValues("Data is not squareform.")
 91 | 
 92 |         manager = pywrapcp.RoutingIndexManager(size + 1, 1, size)
 93 |         routing = pywrapcp.RoutingModel(manager)
 94 | 
 95 |         def dist_callback(x, y):
 96 |             x = manager.IndexToNode(x)
 97 |             y = manager.IndexToNode(y)
 98 |             if x == size or y == size or x == y:
 99 |                 return 0
100 |             if squareform:
101 |                 dist = dists[x, y]
102 |             else:
103 |                 # convert to the condensed index
104 |                 if x < y:
105 |                     x, y = y, x
106 |                 dist = dists[size * y - y * (y + 1) // 2 + x - y - 1]
107 |             # ortools wants integers, so we approximate here
108 |             return int(dist * self.approximation_multiplier)
109 | 
110 |         routing.SetArcCostEvaluatorOfAllVehicles(routing.RegisterTransitCallback(dist_callback))
111 |         search_parameters = pywrapcp.DefaultRoutingSearchParameters()
112 |         search_parameters.time_limit.FromMilliseconds(int(self.timeout * 1000))
113 |         search_parameters.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
114 |         search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
115 |         assignment = routing.SolveWithParameters(search_parameters)
116 |         if assignment is None:
117 |             raise IncompleteSolutionError("No solution was found. Please increase the timeout value or set it to 0.")
118 |         index = routing.Start(0)
119 |         route = []
120 |         while not routing.IsEnd(index):
121 |             node = manager.IndexToNode(index)
122 |             if node < size:
123 |                 route.append(node)
124 |             index = assignment.Value(routing.NextVar(index))
125 |         return route
126 | 
127 | class IncompleteSolutionError(Exception):
128 |     """Indicate that a solution for the TSP problem was not found."""
129 |     pass
130 | 
131 | 
132 | class InvalidDistanceValues(ValueError):
133 |     """Indicate that the distance array contains invalid values."""
134 |     pass
135 | 
136 | if __name__ == "__main__":
137 |     # Create simulated data as in the paper
138 |     X = np.zeros((100, 100))
139 |     for n in [0,10,20,30,40,50,60]:
140 |         X[int(10.*n/7):int(10.*(n+10)/7):,n:n+40] = 1
141 | 
142 |     X = squareform(pdist(X, metric="euclidean"))
143 |     # X = squareform(pdist(X, metric="hamming"))
144 | 
145 |     seaborn.heatmap(X)
146 |     plt.figure()
147 | 
148 |     np.random.shuffle(X)
149 |     X = X.T
150 |     np.random.shuffle(X)
151 |     X = X.T
152 | 
153 |     seaborn.heatmap(X)
154 |     tsp = TravelingSalesPerson(X, data_type='data')
155 |     plt.figure()
156 | 
157 |     # Visualize the output data
158 |     Y = tsp.get_ordered_data()
159 |     seaborn.heatmap(Y)
160 |     plt.show()
161 | 


--------------------------------------------------------------------------------
/Datathon-5/Datathon_5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/Datathon-5/Datathon_5.pdf


--------------------------------------------------------------------------------
/Datathon-5/visualization.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import plotly.express as px
  4 | 
  5 | def viz_lifeExp_country(data, years=[i for i in range(2000,2015)]):
  6 |   data = data.loc[data['year'].isin(years)]
  7 |   data["life_expectancy_at_birth_all"] = (data["life_expectancy_at_birth_men"]*data["total_population_male"] + data["life_expectancy_at_birth_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"])
  8 | 
  9 |   fig = px.sunburst(data, path=['country','year'], values='life_expectancy_at_birth_all',
 10 |                     color='life_expectancy_at_birth_all', hover_data=['life_expectancy_at_birth_all'],
 11 |                     color_continuous_scale='RdBu',)
 12 |                                         
 13 |   fig.show()
 14 | 
 15 | def viz_lifeExp_europe(data, years=[i for i in range(2000,2015)]):
 16 |   data = data.loc[data['year'].isin(years)]
 17 |   data = data.loc[data['country'].isin(['United Kingdom','Germany','France','Italy','Netherlands','Malta','Israel','Belgium','Russia'])]
 18 |   data['population_in_M_per_sq_km'] = data['total_population']/data['area_square_kilometres']
 19 | 
 20 |   fig = px.sunburst(data, path=['country','year'], values='population_in_M_per_sq_km',
 21 |                     color='population_in_M_per_sq_km', hover_data=['total_population'],)
 22 |                     # color_continuous_scale='RdBu',)
 23 |                                         
 24 |   fig.show()
 25 | 
 26 | def viz_lifeExp_female_fertility(data):
 27 |   # Female life expectancy
 28 |   fig = px.parallel_coordinates(data[["country_index","life_expectancy_at_birth_women","mean_age_of_women_at_birth_of_first_child","adolescent_fertility_rate","life_expectancy_at_age_65_women"]], color="life_expectancy_at_age_65_women", 
 29 |                               labels={"country_index":"country_index","adolescent_fertility_rate":"adolescent_fertility_rate","life_expectancy_at_birth_women": "life_expectancy_at_birth_women", "mean_age_of_women_at_birth_of_first_child": "mean_age_of_women_at_birth_of_first_child","life_expectancy_at_age_65_women":"life_expectancy_at_age_65_women",},
 30 |                               color_continuous_scale=px.colors.diverging.Tealrose,
 31 |                               color_continuous_midpoint=2)
 32 |   fig.show()
 33 | 
 34 | def viz_computer_usage_employment(data, years=[i for i in range(2000,2015)]):
 35 |   data = data.loc[data['year'].isin(years)]
 36 | 
 37 |   data["life_expectancy_at_birth_all"] = (data["life_expectancy_at_birth_men"]*data["total_population_male"] + data["life_expectancy_at_birth_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"])
 38 |   data["life_expectancy_at_age_65_all"] = (data["life_expectancy_at_age_65_men"]*data["total_population_male"] + data["life_expectancy_at_age_65_women"]*data["total_population_female"])/(data["total_population_male"] + data["total_population_female"])
 39 |   data["computer_use_16_24_all"] = data["computer_use_16_24_male"] + data["computer_use_16_24_female"]
 40 |   data["computer_use_25_54_all"] = data["computer_use_25_54_male"] + data["computer_use_25_54_female"]
 41 |   data["computer_use_55_74_all"] = data["computer_use_55_74_male"] + data["computer_use_55_74_female"]
 42 | 
 43 |   # Parallel coordinates plot
 44 |   columnsNew = ["life_expectancy_at_birth_all", "life_expectancy_at_age_65_all","computer_use_16_24_all", "computer_use_25_54_all", "computer_use_55_74_all", "youth_unemployment_rate", "unemployment_rate"]
 45 |   cols = {i:i for i in columnsNew}
 46 | 
 47 |   fig = px.parallel_coordinates(data[columnsNew], color="life_expectancy_at_age_65_all", 
 48 |                               labels=cols,
 49 |                               color_continuous_scale=px.colors.diverging.Tealrose,
 50 |                               color_continuous_midpoint=2)
 51 | 
 52 |   # # Scatter matrix plot
 53 |   # columnsNew = ["life_expectancy_at_birth_all", "computer_use_16_24_all","unemployment_rate"]
 54 |   # cols = {i:i for i in columnsNew}
 55 |   # fig = px.scatter_matrix(data[columnsNew],
 56 |   #   dimensions=columnsNew,
 57 |   #   color="unemployment_rate",
 58 |   #   title="Scatter matrix of UNECE employement data",
 59 |   #   labels=cols)
 60 |                           
 61 |   fig.show()
 62 | 
 63 | def viz_germany(data, years=[i for i in range(2000,2015)]):
 64 |   data = data.loc[data['year'].isin(years)]
 65 |   data = data.loc[data['country'] == 'Germany']
 66 |   columnsNew = ["year", "total_fertility_rate", "total_population"]
 67 |   cols = {i:i for i in columnsNew}
 68 | 
 69 |   fig = px.scatter_matrix(data[columnsNew],
 70 |     dimensions=columnsNew,
 71 |     color="total_population",
 72 |     title="Scatter matrix of UNECE Germany data",
 73 |     labels=cols)
 74 |                           
 75 |   fig.show()
 76 | 
 77 | def viz_employement(data):
 78 |   columnsNew = ["economic_activity_rate_men_15_64", "economic_acivity_rate_women_15_64", "unemployment_rate"]
 79 |   cols = {i:i for i in columnsNew}
 80 | 
 81 |   fig = px.scatter_matrix(data[columnsNew],
 82 |     dimensions=columnsNew,
 83 |     color="unemployment_rate",
 84 |     title="Scatter matrix of UNECE employement data",
 85 |     labels=cols)
 86 |                           
 87 |   fig.show()
 88 | 
 89 | def viz_gender_pay_gap(data, years=[i for i in range(2000,2015)]):
 90 |   data = data.loc[data['year'].isin(years)]
 91 |   data["world"] = "world"
 92 |   fig = px.treemap(data, path=['world','country','year'], values='gender_pay_gap_in_monthly_earnings',
 93 |                     color='gender_pay_gap_in_monthly_earnings', hover_data=['gender_pay_gap_in_monthly_earnings'],)
 94 |   fig.show()
 95 | 
 96 | if __name__ == '__main__':
 97 |   data = pd.read_csv("unece.csv",header=0)
 98 | 
 99 |   print(data.columns)
100 | 
101 |   country = list(set(data["country"]))
102 |   country_index = {country[i]:i for i in range(len(country))}
103 | 
104 |   data["country_index"] = data.apply(lambda row: country_index[row.country], axis = 1) 
105 |   data = data.fillna(data.mean())
106 | 
107 |   # viz_lifeExp_country(data)
108 |   # viz_computer_usage_employment(data)
109 |   # viz_employement(data)
110 |   # viz_lifeExp_europe(data)
111 |   # viz_gender_pay_gap(data)
112 |   # viz_germany(data)
113 |   # viz_lifeExp_female_fertility(data)
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Data-Visualisation
 2 | This repository is a compilation of Assignments that I have attempted for my Data Visualisation course.
 3 | 
 4 | ## Datathon 1
 5 | We are given the data of India Ocean, generated by the ocean model MOM, run by Indian National Center for Ocean Information Services, INCOIS, Hyderabad. The data values are 5-day moving average of the following variables:
 6 | 1) Sea Surface Salinity (SSS)
 7 | 2) Sea Surface Temperature (SST)
 8 | 3) Sea Surface Height Anomaly (SSHA)
 9 | 4) Meridional current
10 | 5) Zonal current
11 | Our main aim is to effectively visualize the above data to find interesting underlying patterns of the Indian Ocean. We would like to study the effect spatial and temporal patterns of Indian Ocean has on the monsoons of India. Since we have the data around the time of the Tsunami of December 2004, we will also look at the visualizations to infer the impact of the tsunami on the above mentioned variables.
12 | 
13 | ### Visualizations
14 | 
15 | ![](./images/D1/m1.png)
16 | ![](./images/D1/ssha.png)
17 | ![](./images/D1/sst.png)
18 | ![](./images/D1/sss.png)
19 | 
20 | ## Datathon 2
21 | We are given the data of India Ocean, generated by the ocean model MOM, run by Indian National Center for Ocean Information Services, INCOIS, Hyderabad. The data
22 | values are available for the following variables at different depths ranging from 5m to 225m with an interval of 10m:
23 | 1) Salinity – It is the amount of salt dissolved in a body of water.
24 | 2) Potential Temperature – The temperature that an unsaturated parcel of dry air would have if brought adiabatically and reversibly from its initial state to a standard pressure.
25 | 3) Meridional current – Meridional currents flows from north to south, or from south to north, along the Earth’s longitude lines (or meridian circles)
26 | 4) Zonal current – Zonal currents flows from east to west, or from west to east, along the Earth’s latitude lines
27 | 
28 | ### Visualizations
29 | 
30 | ![](./images/D2/temp_iso.png)
31 | 
32 | ## Datathon 3
33 | We are given the tabular datasets published by the World Health Organization for COVID-19 cases. These include State/Country/Reign-wise time series data (over a period of 246 days starting from January 2020) for confirmed cases, recovered cases and deaths. Our main aim is to effectively visualize the above data as a network to find underlying patterns or communities formed.
34 | 
35 | ### Visualizations
36 | 
37 | ![](./images/D3/10_deaths_early.png)
38 | ![](./images/D3/10_recoveries.png)
39 | 
40 | ## Datathon 4
41 | We have used the same data as provided in Datathon 3. Our main aim is to effectively visualize the above data as a matrix and find the underlying patterns or clusters formed using two different methods of matrix seriation which are – Fast optimal leaf ordering and Seriation using Traveling Salesperson Problem formulation. These methods are implemented in the Datathon 4 folder.
42 | 
43 | ### Visualizations
44 | Shuffled data:\
45 | ![](./images/D4/shuffled_data_2.png)\
46 | Matrix after seriation using fast OLO\
47 | ![](./images/D4/olo_data_2.png)\
48 | Matrix after seriation using TSP solver\
49 | ![](./images/D4/tsp_data_2.png)
50 | 
51 | ## Datathon 5
52 | We are given a tabular dataset published by United Nations Economic Commission for Europe (UNECE) which has the Country Overview data. It consists data for 52
53 | distinct countries from 2000 to 2016. It has 79 columns that ranges over different characteristics of measuring performance of the nations. I will be using a subset of this dataset to test some of my hypotheses and will make inferences from the same.
54 | 
55 | ### Visualizations
56 | 
57 | ![](./images/D5/figure_1.png)
58 | ![](./images/D5/figure_3.png)
59 | ![](./images/D5/figure_4.png)
60 | ![](./images/D5/figure_5.png)
61 | ![](./images/D5/figure_7.png)
62 | ![](./images/D5/figure_9.png)
63 | 


--------------------------------------------------------------------------------
/images/D1/m1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/m1.png


--------------------------------------------------------------------------------
/images/D1/ssha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/ssha.png


--------------------------------------------------------------------------------
/images/D1/sss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/sss.png


--------------------------------------------------------------------------------
/images/D1/sst.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D1/sst.png


--------------------------------------------------------------------------------
/images/D2/temp_iso.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D2/temp_iso.png


--------------------------------------------------------------------------------
/images/D3/10_deaths_early.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D3/10_deaths_early.png


--------------------------------------------------------------------------------
/images/D3/10_recoveries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D3/10_recoveries.png


--------------------------------------------------------------------------------
/images/D4/olo_data_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/olo_data_2.png


--------------------------------------------------------------------------------
/images/D4/shuffled_data_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/shuffled_data_2.png


--------------------------------------------------------------------------------
/images/D4/tsp_data_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D4/tsp_data_2.png


--------------------------------------------------------------------------------
/images/D5/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_1.png


--------------------------------------------------------------------------------
/images/D5/figure_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_10.png


--------------------------------------------------------------------------------
/images/D5/figure_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_11.png


--------------------------------------------------------------------------------
/images/D5/figure_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_12.png


--------------------------------------------------------------------------------
/images/D5/figure_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_2.png


--------------------------------------------------------------------------------
/images/D5/figure_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_3.png


--------------------------------------------------------------------------------
/images/D5/figure_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_4.png


--------------------------------------------------------------------------------
/images/D5/figure_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_5.png


--------------------------------------------------------------------------------
/images/D5/figure_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_6.png


--------------------------------------------------------------------------------
/images/D5/figure_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_7.png


--------------------------------------------------------------------------------
/images/D5/figure_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_8.png


--------------------------------------------------------------------------------
/images/D5/figure_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swastishreya/Data-Visualisation/0761b65c64251ebf3d0c17a3ef994ebf364e89ca/images/D5/figure_9.png


--------------------------------------------------------------------------------