/<'Deployment ID'>_<'Project ID'>/annotations/
72 |
73 | # path names
74 | audio_path = os.path.join(export_folder_dictionary['Export folder'],
75 | export_folder_dictionary['Project ID'] + '_' +
76 | export_folder_dictionary['Deployment ID'],
77 | 'audio')
78 | annot_path = os.path.join(export_folder_dictionary['Export folder'],
79 | export_folder_dictionary['Project ID'] + '_' +
80 | export_folder_dictionary['Deployment ID'],
81 | 'annotations')
82 | metadata_path = os.path.join(export_folder_dictionary['Export folder'],
83 | export_folder_dictionary['Project ID'] + '_' +
84 | export_folder_dictionary['Deployment ID'])
85 |
86 | export_folder_dictionary['Audio export folder'] = audio_path
87 | export_folder_dictionary['Annotation export folder'] = annot_path
88 | export_folder_dictionary['Metadata folder'] = metadata_path
89 |
90 | # Metadata, annotation csv and audio-selection table map names
91 | metadata_filename = \
92 | os.path.join(export_folder_dictionary['Metadata folder'],
93 | export_folder_dictionary['Project ID'] + '_' + \
94 | export_folder_dictionary['Deployment ID'] + \
95 | '_metadata.json')
96 |
97 | annotation_csv_filename = \
98 | os.path.join(export_folder_dictionary['Metadata folder'],
99 | export_folder_dictionary['Project ID'] + '_' + \
100 | export_folder_dictionary['Deployment ID']
101 | + '_annotations.csv')
102 | audio_sel_map_csv_filename = \
103 | os.path.join(export_folder_dictionary['Metadata folder'],
104 | export_folder_dictionary['Project ID'] + '_' + \
105 | export_folder_dictionary['Deployment ID']
106 | + '_audio_seltab_map.csv')
107 |
108 | export_folder_dictionary['Metadata file'] = metadata_filename
109 | export_folder_dictionary['Annotation CSV file'] = annotation_csv_filename
110 | export_folder_dictionary['Audio-Seltab Map CSV file'] = audio_sel_map_csv_filename
111 |
112 |
113 | # Create directories
114 | # Option 1 -- The audio folder does not exist in path
115 | if not os.path.exists(audio_path):
116 | # Create the audio and annotations folders
117 | os.makedirs(audio_path)
118 | os.makedirs(annot_path)
119 |
120 | st.success(':white_check_mark: New folders created!')
121 | st.session_state.stage = 2
122 |
123 | # Option 2 -- the audio folder already exists
124 | else:
125 | # Display a warning message
126 | st.write(f'Warning: This folder already exists, data may be deleted: \n')
127 |
128 | output = st.empty()
129 | with folders.st_capture(output.code):
130 | folders.path_print(os.path.join(export_folder_dictionary['Export folder'],
131 | export_folder_dictionary['Project ID'] + '_' +
132 | export_folder_dictionary['Deployment ID']))
133 |
134 | col1, col2, col3, col4 = st.columns([0.2, 0.2, 0.4, 0.3])
135 | # Ask the user whether to delete existing data
136 | if col1.button('Delete data', help=None, on_click=set_state, args=[2]):
137 | # Delete existing audio and annotations folders
138 | #shutil.rmtree(audio_path)
139 | #shutil.rmtree(annot_path)
140 | shutil.rmtree(metadata_path)
141 |
142 | # Recreate audio and annotations folders
143 | os.makedirs(audio_path)
144 | os.makedirs(annot_path)
145 |
146 | st.success(':white_check_mark: Data successfully deleted & new folders created!')
147 |
148 | if col2.button('Abort', help=None, on_click=set_state, args=[1]):
149 | # Prompt the user to change the export folder path
150 | output = st.empty()
151 | with folders.st_capture(output.code):
152 | raise ValueError("Please change the export folder path")
153 |
154 | if st.session_state.stage >= 2:
155 | # Show the info on the sidebar
156 | st.sidebar.subheader('Project settings')
157 | st.sidebar.write('Export folder')
158 | st.sidebar.success(export_folder_dictionary['Export folder'])
159 | st.sidebar.write('Project ID')
160 | st.sidebar.success(export_folder_dictionary['Project ID'])
161 | st.sidebar.write('Deployment ID')
162 | st.sidebar.success(export_folder_dictionary['Deployment ID'])
163 |
164 | # Save
165 | st.session_state.export_folder_dictionary = export_folder_dictionary
166 |
167 | # Activate next session state
168 | st.session_state.stage = 3
169 | link_to_metadata = "pages" + os.sep + "2_Metadata_creator.py"
170 | st.page_link(link_to_metadata, label=":green[Continue to Metadata Creator]", icon="➡️")
171 |
172 | # Option for people to
173 | json_data = st.checkbox('I already have a metadata file in the correct format')
174 | if json_data:
175 | st.write('In construction, please use the Metadata Creator')
176 | #st.text_input('Path to metadata JSON file')
177 | #
178 | #if st.button('Verify metadata', help=None):
179 | #
180 | # missing_data = metadata.test_json_fields(json_data)
181 | # # TODO implement a test to check if all metadata fields are present
182 | # if missing_data: # Call this function with your JSON data
183 | # st.write('JSON file does not contain all the necessary fields, please use the Metadata Creator')
184 | # else:
185 | # # Activate next session state and get link to data creator
186 | # st.session_state.stage = 9
187 | # link_to_dataset = "pages" + os.sep + "3_Dataset_creator.py"
188 | # st.page_link(link_to_dataset, label=":green[Continue to Dataset Creator]", icon="➡️")
189 |
--------------------------------------------------------------------------------
/BenchmarkDatasetCreator_app/pages/2_Metadata_creator.py:
--------------------------------------------------------------------------------
1 | # Streamlit app page 2, Metadata input
2 | # This page is associated with a series of functions, in metadata.py
3 | # The text help for streamlit user inputs is integrated in help_dictionary.py in the metadata dict
4 |
5 | # Creates original_data_dictionary
6 |
7 | # Imports
8 | import json
9 | import os
10 | import sys
11 | import copy
12 |
13 |
14 | import pandas as pd
15 | import streamlit as st
16 |
17 | sys.path.insert(1, '.' + os.sep)
18 | from BenchmarkDatasetCreator_app import help_dictionary as hd
19 | from BenchmarkDatasetCreator import metadata
20 |
21 | # Page title (tab and page), Header
22 | st.set_page_config(
23 | page_title='Benchmark Dataset Creator: Metadata',
24 | )
25 | st.title('Benchmark Dataset Creator')
26 | st.header('Collect metadata')
27 |
28 | # Retrieve data from previous page
29 | if not hasattr(st.session_state, 'export_folder_dictionary'):
30 | st.error('Project information missing')
31 | link_to_project = "pages" + os.sep + "1_Project_creator.py"
32 | st.page_link(link_to_project, label=":white[Go to Project creator]", icon="➡️")
33 | else:
34 | export_folder_dictionary = st.session_state.export_folder_dictionary
35 |
36 | # Show the info on the sidebar
37 | st.sidebar.subheader('Project settings')
38 | st.sidebar.write('Project ID')
39 | st.sidebar.success(export_folder_dictionary['Project ID'])
40 | st.sidebar.write('Deployment ID')
41 | st.sidebar.success(export_folder_dictionary['Deployment ID'])
42 | st.sidebar.write('Export folder')
43 | st.sidebar.success(export_folder_dictionary['Export folder'])
44 |
45 | # Initialize the data saving dict
46 | original_data_dictionary = {}
47 |
48 | # 1) Collect info on project & project ID
49 |
50 | # Create the dictionary to store the information
51 | original_data_dictionary = {
52 | 'Project ID': export_folder_dictionary['Project ID'],
53 | 'Deployment ID': export_folder_dictionary['Deployment ID'],
54 | }
55 |
56 | if st.session_state.stage >= 3:
57 | # 2) Collect info on Data owners/curators
58 | # TODO: Add terms for local/indigenous partners
59 | # TODO: Land acknowledgment
60 | st.subheader('Data stewardship',
61 | help=hd.metadata['Data stewardship']['General'])
62 |
63 | # Create list of authorized roles (based on Zenodo)
64 | authorized_roles = ['Contact person', 'Data collector', 'Data analyst', 'Dataset curator', 'Distributor',
65 | 'Hosting institution', 'Principal Investigator', 'Rights holder', 'Sponsor']
66 |
67 | # Check if 'rows' is not in the session state and initialize it to 0
68 | if 'rows' not in st.session_state:
69 | st.session_state['rows'] = 0
70 |
71 |
72 | # Add rows
73 | def increase_rows():
74 | """
75 | Function to increase the number of rows when the "Add person" button is clicked
76 | """
77 | st.session_state['rows'] += 1
78 |
79 |
80 | # Button to add a new person; calls the increase_rows function when clicked
81 | st.button('Add co-creator', on_click=increase_rows)
82 |
83 | # Loop through the number of rows and display input fields for each person
84 | for i in range(st.session_state['rows']):
85 | metadata.display_input_row(i, authorized_roles)
86 |
87 | # Display the entered information for each person as an interactive DataFrame
88 | # Create a list to store the entered data
89 | people_data = []
90 |
91 | # Loop through the rows and append the entered data to the list
92 | for i in range(st.session_state['rows']):
93 | person_data = {
94 | 'Role': st.session_state[f'role_{i}'],
95 | 'Name': st.session_state[f'name_{i}'],
96 | 'Affiliation': st.session_state[f'affiliation_{i}'],
97 | 'Email Address': st.session_state[f'email_{i}']
98 | }
99 | people_data.append(person_data)
100 |
101 | # Create a DataFrame from the collected data
102 | people_df = pd.DataFrame(people_data)
103 |
104 | # Display the DataFrame
105 | st.write('Entered dataset co-creators')
106 | st.dataframe(people_df, use_container_width=True, hide_index=True)
107 |
108 | original_data_dictionary['Data stewardship'] = {
109 | # Information about permits
110 | 'Permits': st.text_area(
111 | 'Permit # or Permission and permitting authority',
112 | placeholder=hd.metadata['Data stewardship']['Permits'],
113 | height=None, max_chars=None, key=None,
114 | label_visibility="visible"),
115 |
116 | # Optional associated publication DOI
117 | 'DOI': st.text_input(
118 | 'Associated publication (DOI) ',
119 | value="https://doi.org/XX.XXXXX",
120 | type="default",
121 | help=hd.metadata['Data stewardship']['DOI'],
122 | label_visibility="visible")}
123 |
124 | st.button('Next', key='Next2', help=None, on_click=metadata.set_state, args=[4])
125 |
126 | # 3) Add information on the instrumentation
127 | if st.session_state.stage >= 4:
128 | # Save the previous data
129 | # Transform the dictionary in the wanted format
130 | original_data_dictionary['Data stewardship'] = people_data
131 |
132 | st.subheader('Instrumentation',
133 | help=hd.metadata['Instrument']['General'])
134 |
135 | # Create two columns for app display
136 | instrumentation_col, settings_col = st.columns(2)
137 |
138 | # List of authorized recording equipment + sort + add "Other" at the end
139 | authorized_instruments = ['Cornell - SwiftOne', 'Cornell - Swift',
140 | 'Cornell - Rockhopper', 'Cornell - MARU',
141 | 'Open Acoustic Devices - AudioMoth',
142 | 'Open Acoustic Devices - HydroMoth',
143 | "Ocean Instrunents - SoundTrap ST600 STD",
144 | "Ocean Instrunents - SoundTrap ST600 HF",
145 | "Scripps - HARP", "Wildlife Acoustics - Song Meter SM4",
146 | "Wildlife Acoustics - Song Meter Mini 2",
147 | "Wildlife Acoustics - Song Meter Micro",
148 | "Wildlife Acoustics - Song Meter Micro 2",
149 | "Wildlife Acoustics - Song Meter SM4BAT FS",
150 | "Wildlife Acoustics - Song Meter Mini Bat2"]
151 | authorized_instruments.sort()
152 | authorized_instruments.append("Other")
153 |
154 | # Add the user inputs to the dictionary
155 | original_data_dictionary['Instrument'] = {
156 | 'Type': instrumentation_col.selectbox(
157 | 'Select recording equipment',
158 | authorized_instruments,
159 | help=hd.metadata['Instrument']['Type']),
160 | 'Settings': settings_col.text_area(
161 | 'Details on instrument settings',
162 | placeholder=hd.metadata['Instrument']['Settings'],
163 | height=None, max_chars=None, key=None,
164 | label_visibility="visible")
165 | }
166 | st.button('Next', key='Next3', help=None, on_click=metadata.set_state, args=[5])
167 |
168 | # 4) Add information about the deployment
169 | if st.session_state.stage >= 5:
170 | st.subheader('Deployment',
171 | help=hd.metadata['Deployment']['General'])
172 |
173 | # Create two columns with different width for app display
174 | deployment_input_col, map_col = st.columns([0.3, 0.7])
175 |
176 | # Get user inputs
177 | original_data_dictionary['Deployment'] = {
178 | 'Position': {
179 | 'Lat.': float(
180 | deployment_input_col.number_input(
181 | 'Recorder latitude (°)',
182 | value=42.478327,
183 | min_value=-90.0,
184 | max_value=90.0,
185 | format='%.6f',
186 | step=0.000001,
187 | label_visibility="visible")),
188 | 'Lon.': float(
189 | deployment_input_col.number_input(
190 | 'Recorder longitude (°)',
191 | value=-76.450438,
192 | min_value=-180.0,
193 | max_value=180.0,
194 | format='%.6f',
195 | step=0.000001,
196 | # help="Enter Longitude",
197 | label_visibility="visible")),
198 | },
199 | 'Height/depth (m)': int(
200 | deployment_input_col.number_input('Recorder height/depth (m)',
201 | value=10,
202 | min_value=0,
203 | max_value=None,
204 | format='%i',
205 | step=1,
206 | help=hd.metadata['Deployment']['Height/depth (m)'],
207 | label_visibility="visible")),
208 | 'Terrain elevation/water depth (m)': int(
209 | deployment_input_col.number_input(
210 | 'Elevation/water depth (m)',
211 | value=10,
212 | min_value=0,
213 | max_value=None,
214 | format='%i',
215 | step=1,
216 | help=hd.metadata['Deployment']['Terrain elevation/water depth (m)'],
217 | label_visibility="visible")),
218 | 'Env. context': deployment_input_col.text_area(
219 | 'Details on environmental context',
220 | placeholder=hd.metadata['Deployment']['Env. context'],
221 | label_visibility="visible",
222 | height=143)
223 | }
224 |
225 | # Show map for the user to check their entry
226 | df_map = pd.DataFrame({
227 | 'lat': [original_data_dictionary['Deployment']['Position']['Lat.']],
228 | 'lon': [original_data_dictionary['Deployment']['Position']['Lon.']]
229 | })
230 | map_col.map(df_map, size=5, zoom=15)
231 | st.button('Next', key='Next4', help=None, on_click=metadata.set_state, args=[6])
232 |
233 | # 5) Enter sampling details
234 | if st.session_state.stage >= 6:
235 | st.subheader('Sampling details',
236 | help=hd.metadata['Sampling details']['General'])
237 |
238 | # Declare the dictionary structure for sampling details
239 | original_data_dictionary['Sampling details'] = {
240 | 'Time': '',
241 | 'Digital sampling': '',
242 | }
243 | # Get the start and end time in both local time and UTC
244 | start_date_time_utc, start_date_time_local = \
245 | metadata.get_date_time('Recording start', original_data_dictionary)
246 |
247 | end_date_time_utc, end_date_time_local = \
248 | metadata.get_date_time('Recording end', original_data_dictionary)
249 |
250 | # If the dates are filled
251 | if (start_date_time_local is not None and end_date_time_local is not None) and \
252 | (start_date_time_local != '' and end_date_time_local != ''):
253 | # Check the dates make sense:
254 | metadata.check_dates(start_date_time_local, end_date_time_local)
255 |
256 | # Fill times in the dictionary
257 | original_data_dictionary['Sampling details']['Time'] = {
258 | 'UTC Start': start_date_time_utc,
259 | 'UTC End': end_date_time_utc,
260 | 'Local Start': start_date_time_local,
261 | 'Local End': end_date_time_local}
262 |
263 | # Get the information on the digital sampling
264 | st.write('Digital sampling')
265 |
266 | # Create two columns with different width for app display
267 | digital_sampling_col, data_mod_col = st.columns([0.5, 0.5])
268 |
269 | # Values for bit depth
270 | authorized_bit_depths = [8, 16, 24]
271 |
272 | # User inputs for all digital sampling
273 | original_data_dictionary['Sampling details']['Digital sampling'] = {
274 | 'Sample rate (kHz)': float(digital_sampling_col.number_input(
275 | 'Sample rate (kHz)',
276 | value=1.000,
277 | min_value=0.100,
278 | max_value=None,
279 | format='%.3f',
280 | step=1.000,
281 | help=
282 | hd.metadata['Sampling details']['Digital sampling'][
283 | 'Sample rate (kHz)'],
284 | label_visibility="visible")),
285 |
286 | 'Sample Bits': int(digital_sampling_col.selectbox(
287 | 'Bit depth',
288 | authorized_bit_depths,
289 | index=1,
290 | help=hd.metadata['Sampling details']['Digital sampling'][
291 | 'Sample Bits'])),
292 |
293 | 'Clipping': digital_sampling_col.radio(
294 | 'Clipping',
295 | ['Yes', 'No', 'Don\'t know'],
296 | horizontal=True,
297 | index=None,
298 | help=hd.metadata['Sampling details']['Digital sampling'][
299 | 'Clipping']),
300 | 'Data Modifications': data_mod_col.text_area(
301 | 'Data Modifications',
302 | placeholder=
303 | hd.metadata['Sampling details']['Digital sampling'][
304 | 'Data Modifications'],
305 | label_visibility="visible",
306 | height=185)
307 | }
308 |
309 | st.button('Next', key='Next5', help=None, on_click=metadata.set_state, args=[7])
310 |
311 | # 6) Get information on the annotation protocol
312 | if st.session_state.stage >= 7:
313 | st.subheader('Annotations',
314 | help=hd.metadata['Annotations']['General'])
315 | # Add columns
316 | annotation_questions_col, annotation_protocol_col = st.columns([0.5, 0.5])
317 |
318 | # About the target signals
319 | annotation_questions_col.write('Target signals')
320 |
321 | # Authorized annotation types
322 | authorized_annotations = ['SpeciesID', 'CallID']
323 |
324 | # Initialize annotations section of the dictionary
325 | original_data_dictionary['Annotations'] = {
326 | 'Target signals': '',
327 | 'Non-target signals': '',
328 | 'Annotation protocol': ''
329 | }
330 |
331 | original_data_dictionary['Annotations']['Target signals'] = {
332 | 'Kind': annotation_questions_col.radio(
333 | 'Annotation type',
334 | authorized_annotations,
335 | horizontal=True,
336 | index=None,
337 | help=hd.metadata['Annotations']['Target signals']['Kind']
338 | ),
339 | }
340 |
341 | # About non-target signals
342 | annotation_protocol_col.write('Non-target signals')
343 |
344 | # Authorized answers
345 | yes_no = ['Yes', 'No']
346 |
347 | # noinspection PyTypedDict
348 | original_data_dictionary['Annotations']['Non-target signals'] = {
349 | 'Noise': annotation_protocol_col.radio(
350 | 'Does the dataset contain a background noise class?',
351 | yes_no,
352 | index=None,
353 | horizontal=True),
354 | 'Bio': '',
355 | 'Anthro': '',
356 | 'Geo': '',
357 | }
358 |
359 | st.markdown("""
360 |
365 | """, unsafe_allow_html=True)
366 | annotation_protocol_col.markdown(
367 | 'Does the dataset contain selections with unique labels for:
',
368 | unsafe_allow_html=True)
369 |
370 | original_data_dictionary['Annotations']['Non-target signals']['Bio'] = \
371 | annotation_protocol_col.radio(
372 | ':heavy_minus_sign: Other biological sounds (e.g., insect chorus, un-IDed call types, etc)?',
373 | yes_no,
374 | index=None,
375 | horizontal=True,
376 | help='')
377 | original_data_dictionary['Annotations']['Non-target signals']['Anthro'] = \
378 | annotation_protocol_col.radio(
379 | ':heavy_minus_sign: Anthropogenic sounds (e.g., ship noise, piling, vehicles, chainsaw etc.)?',
380 | yes_no,
381 | index=None,
382 | horizontal=True,
383 | help='')
384 | original_data_dictionary['Annotations']['Non-target signals']['Geo'] = \
385 | annotation_protocol_col.radio(
386 | ':heavy_minus_sign: Geophysical sounds (e.g., thunder, heavy rain, earthquakes etc.)?',
387 | yes_no,
388 | index=None,
389 | horizontal=True,
390 | help='')
391 |
392 | # Optional field for annotation protocol
393 |
394 | # Free field for annotation protocol
395 | original_data_dictionary['Annotations']['Annotation protocol'] = \
396 | annotation_questions_col.text_area(
397 | 'Annotation protocol',
398 | placeholder=hd.metadata['Annotations']['Annotation protocol'],
399 | label_visibility="visible",
400 | height=254)
401 |
402 | st.button('Submit', key='Submit', help=None, on_click=metadata.set_state, args=[8])
403 |
404 | # 7) Submit button to write JSON file
405 | if st.session_state.stage >= 8:
406 | dict_oj = copy.deepcopy(original_data_dictionary)
407 | metadata_save = {
408 | 'Original data': metadata.transform_original_metadata_to_ASA_standard(dict_oj),
409 | 'Benchmarked data': ''
410 | }
411 | with open(export_folder_dictionary['Metadata file'], 'w') as fp:
412 | json.dump(metadata_save, fp, indent=4)
413 |
414 | # Metadata announcement
415 | meta_txt_col, meta_check_col = st.columns(2)
416 | meta_txt_col.success('Metadata successfully created!')
417 | #
418 | if meta_txt_col.button('Show metadata'):
419 | st.write('The metadata is saved at:', export_folder_dictionary['Metadata file'])
420 | st.write('Here is a preview: ')
421 | st.json(original_data_dictionary)
422 |
423 | # Save
424 | st.session_state.export_folder_dictionary = export_folder_dictionary
425 | st.session_state.original_data_dictionary = original_data_dictionary
426 |
427 | # Show on sidebar
428 | st.sidebar.write('Metadata file')
429 | st.sidebar.success(export_folder_dictionary['Metadata file'])
430 |
431 |
432 |
433 | # Activate next session state
434 | st.session_state.stage = 9
435 | link_to_dataset = "pages" + os.sep + "3_Dataset_creator.py"
436 | st.page_link(link_to_dataset, label=":green[Continue to Dataset Creator]", icon="➡️")
--------------------------------------------------------------------------------
/BenchmarkDatasetCreator_app/pages/3_Dataset_creator.py:
--------------------------------------------------------------------------------
1 | # Streamlit app page 3, Dataset creator
2 | # This page is associated with a series of functions, in dataset.py
3 | # The text help is in
4 | # Imports
5 | import sys
6 | import os
7 | import streamlit as st
8 | import pandas as pd
9 | import json
10 | import copy
11 |
12 | sys.path.insert(1, '.' + os.sep)
13 | from BenchmarkDatasetCreator_app import help_dictionary as hd
14 | from BenchmarkDatasetCreator import dataset, folders, metadata
15 |
16 |
17 | # Titles
18 | st.set_page_config(
19 | page_title='Benchmark Dataset Creator: Dataset',
20 | )
21 | st.title('Benchmark Dataset Creator')
22 | st.header('Create benchmark dataset')
23 |
24 | # Retrieve data from previous page
25 | if not hasattr(st.session_state, 'export_folder_dictionary'):
26 | st.error('Project information missing')
27 | link_to_project = "pages" + os.sep + "1_Project_creator.py"
28 | st.page_link(link_to_project, label=":white[Go to Project creator]", icon="➡️")
29 |
30 | elif not hasattr(st.session_state, 'export_folder_dictionary'):
31 | st.error('Metadata missing')
32 | link_to_project = "pages" + os.sep + "2_Metadata_creator.py"
33 | st.page_link(link_to_project, label=":white[Go to Metadata creator]", icon="➡️")
34 |
35 | else:
36 | export_folder_dictionary = st.session_state.export_folder_dictionary
37 | original_data_dictionary = st.session_state.original_data_dictionary
38 |
39 | # Show the info on the sidebar
40 | st.sidebar.subheader('Project settings')
41 | st.sidebar.write('Export folder')
42 | st.sidebar.success(export_folder_dictionary['Export folder'])
43 | st.sidebar.write('Project ID')
44 | st.sidebar.success(export_folder_dictionary['Project ID'])
45 | st.sidebar.write('Deployment ID')
46 | st.sidebar.success(export_folder_dictionary['Deployment ID'])
47 | st.sidebar.write('Metadata file')
48 | st.sidebar.success(export_folder_dictionary['Metadata file'])
49 |
50 | # Initialize the data saving variables
51 | label_key = []
52 | export_settings = {}
53 |
54 | # TODO: Continue editing the species list csv
55 | # TODO: have all of the text in a language-specific file -> https://phrase.com/blog/posts/translate-python-gnu-gettext/
56 | # TODO: Finalize this piece of code with the new functions
57 | # TODO: add BDC info to the metadata
58 | # could be a solution
59 |
60 | # User-defined export settings dictionary
61 | if st.session_state.stage >= 9:
62 | st.subheader('Export settings selection')
63 |
64 | # Needed variables
65 | authorized_user_fs = ['1 kHz', '2 kHz', '8 kHz', '16 kHz', '32 kHz', '48 kHz',
66 | '96 kHz', '192 kHz', '256 kHz', '384 kHz', '500 kHz']
67 | authorized_user_bit_depth = ['8 Bits', '16 Bits', '24 bits']
68 |
69 | export_settings_user_input = {
70 | #'Original project name':
71 | # st.text_input(
72 | # 'Original project name',
73 | # value="e.g., 2013_UnivMD_Maryland_71485_MD02",
74 | # type="default",
75 | # help="This entry will be used to keep track of the origin of "
76 | # "the data, as a part of the folder architecture and file naming."
77 | # "please do not end this entry by / or \ and avoid spaces",
78 | # label_visibility="visible"),
79 |
80 | 'Audio duration (s)':
81 | st.slider(
82 | 'Audio duration (min)',
83 | min_value=1, max_value=60, value=10, step=1, format='%i',
84 | help=hd.export['Digital sampling']['Audio duration (s)'],
85 | label_visibility="visible") * 60,
86 |
87 | 'fs (Hz)':
88 | st.selectbox(
89 | 'Sampling Frequency', authorized_user_fs,
90 | index=5,
91 | help=hd.export['Digital sampling']['fs (Hz)'],
92 | label_visibility="visible"),
93 |
94 | 'Bit depth':
95 | st.selectbox(
96 | 'Bit depth', authorized_user_bit_depth,
97 | index=2,
98 | help=hd.export['Digital sampling']['Bit depth'],
99 | label_visibility="visible"),
100 |
101 | 'Export label':
102 | st.text_input(
103 | 'Export label',
104 | value="Tags",
105 | type="default",
106 | help=hd.export['Selections']['Export label'],
107 | label_visibility="visible"),
108 |
109 | 'Split export selections':
110 | st.toggle(
111 | 'Split export selections',
112 | value=False,
113 | help=hd.export['Selections']['Split export selections']['General'],
114 | label_visibility="visible")}
115 |
116 | # User-chosen split output
117 | if export_settings_user_input['Split export selections']:
118 | export_settings_user_input['Split export selections'] = [
119 | export_settings_user_input['Split export selections'],
120 | st.number_input(
121 | 'Minimum duration (s)',
122 | value=float(1.0),
123 | min_value=float(0),
124 | max_value=float(
125 | export_settings_user_input[
126 | 'Audio duration (s)']),
127 | format='%.1f',
128 | step=0.1,
129 | help=hd.export['Split export selections']['Minimum duration (s)'],
130 | label_visibility="visible")
131 | ]
132 | else:
133 | export_settings_user_input['Split export selections'] = [
134 | export_settings_user_input['Split export selections'], 0]
135 |
136 |
137 | st.button('Done', help=None, on_click=metadata.set_state, args=[10])
138 |
139 | if st.session_state.stage >= 10:
140 | # 1) continued, Entries in the correct format
141 | # Create export_settings based on the user input:
142 | export_settings = {
143 | 'Project ID': export_folder_dictionary['Project ID'],
144 | 'Deployment ID': export_folder_dictionary['Deployment ID'],
145 | 'Method': hd.benchmark_creator_info['Method'],
146 | 'Signal Processing': hd.benchmark_creator_info['Signal Processing'],
147 | 'Digital sampling': {
148 | 'Audio duration (s)': export_settings_user_input['Audio duration (s)'],
149 | },
150 |
151 | 'Selections': {
152 | 'Export label': export_settings_user_input['Export label'],
153 | 'Split export selections': export_settings_user_input['Split export selections'],
154 | },
155 |
156 | 'Export folders': {
157 | 'Export folder': export_folder_dictionary['Export folder'],
158 | 'Audio export folder': export_folder_dictionary['Audio export folder'],
159 | 'Annotation export folder': export_folder_dictionary['Annotation export folder'],
160 | 'Metadata folder': export_folder_dictionary['Metadata folder'],
161 | 'Metadata file': export_folder_dictionary['Metadata file'],
162 | 'Annotation CSV file': export_folder_dictionary['Annotation CSV file'],
163 | 'Audio-Seltab Map CSV file': export_folder_dictionary['Audio-Seltab Map CSV file']
164 | },
165 | }
166 |
167 | # Write fs in the correct format (str to num)
168 | fs_wanted = [1, 2, 8, 16, 32, 48, 96, 192, 256, 384, 500]
169 | export_settings['Digital sampling']['fs (Hz)'] = \
170 | fs_wanted[authorized_user_fs.index(export_settings_user_input['fs (Hz)'])] * 1000
171 |
172 | # Write fs in the correct format (str to num)
173 | bit_depth_wanted = [8, 16, 24]
174 | export_settings['Digital sampling']['Bit depth'] = \
175 | bit_depth_wanted[authorized_user_bit_depth.index(export_settings_user_input['Bit depth'])]
176 |
177 | # 3) Run check on the user-defined entries and show output
178 | output = st.empty()
179 | with folders.st_capture(output.code):
180 | dataset.check_export_settings(export_settings)
181 |
182 | st.subheader('Load selections')
183 | # # User-defined path to selection table(s)
184 | selection_table_path = \
185 | st.text_input(
186 | 'Path to a selection table or selection table folder',
187 | value="e.g., SelectionTable/MD02_truth_selections.txt",
188 | type="default",
189 | help=hd.export['Selections']['Path'],
190 | label_visibility="visible")
191 |
192 | # 4) Load selection table and show output
193 | output = st.empty()
194 | with folders.st_capture(output.code):
195 | selection_table_df = dataset.load_selection_table(selection_table_path)
196 |
197 | # 5) Run dataset.check_selection_tab and show output of the function
198 | output = st.empty()
199 | with folders.st_capture(output.code):
200 | dataset.check_selection_tab(selection_table_path)
201 |
202 | # 6) Show selection table
203 | col3, col4 = st.columns([3, 1])
204 | col3.subheader('Uploaded Selection table')
205 | if not selection_table_df.empty:
206 | col3.dataframe(selection_table_df)
207 |
208 | # 7) Ask for user-defined label key, should be in the Selection table keys displayed above
209 | col4.subheader('Label')
210 | label_key = \
211 | col4.text_input(
212 | 'Selection table label',
213 | value="e.g., Tags",
214 | type="default",
215 | help=hd.export['Selections']['Label'],
216 | label_visibility="visible",
217 | on_change=metadata.set_state, args=[11]),
218 |
219 | if st.session_state.stage >= 11:
220 | label_key = label_key[0]
221 |
222 | # 8) Remove duplicates (e.g., if we have both the spectrogram and waveform view)
223 | selection_table_df.drop_duplicates(subset='Begin Time (s)', keep="last")
224 |
225 | # 9) Estimate the size of the dataset and show output
226 | st.subheader('Estimate Benchmark Dataset size')
227 | with st.spinner("Estimating the size of the Benchmark dataset..."):
228 | output = st.empty()
229 | with folders.st_capture(output.code):
230 | dataset.benchmark_size_estimator(selection_table_df, export_settings, label_key)
231 |
232 | # 10) Check & update labels
233 | st.subheader('Edit labels (Optional)')
234 | # Get a list of unique labels from the selection table
235 | unique_labels = selection_table_df[label_key].unique()
236 |
237 | # Create a dataframe
238 | remap_label_df = pd.DataFrame({'Original labels': unique_labels,
239 | 'New labels': unique_labels})
240 | # Show dataframe
241 | col5, col6 = st.columns([1, 1.5])
242 | new_labels_df = \
243 | col5.data_editor(
244 | remap_label_df,
245 | num_rows="fixed",
246 | disabled=["Original labels"],
247 | hide_index=True)
248 | col6.write(hd.export['Selections']['Label editor']['Help'])
249 | col6.image(
250 | 'docs/illustrations/method_schematicV2_zoom.png',
251 | caption=None, width=None, use_column_width=True,
252 | clamp=False,
253 | channels="RGB", output_format="auto")
254 |
255 | col6.write(hd.export['Selections']['Label editor']['Label list'])
256 |
257 | # Show button for creating Benchmark dataset
258 | col6.button('Continue', help=None, on_click=metadata.set_state, args=[12])
259 |
260 | if st.session_state.stage >= 12:
261 |
262 | # Show button for creating Benchmark dataset
263 | st.button('Create Benchmark Dataset', help=None, on_click=metadata.set_state, args=[13])
264 |
265 | if st.session_state.stage >= 13:
266 | # 11) Swap the labels
267 | # We want labels in a dictionary format with Key (old label): Value (new label)
268 | new_labels_dict = new_labels_df.set_index('Original labels')['New labels'].to_dict()
269 |
270 | # Update the selection table
271 | selection_table_df_updated = dataset.update_labels(selection_table_df, new_labels_dict, label_key)
272 |
273 | # Add the new labels to the Metadata dictionary
274 | export_settings['Annotations'] = {
275 | 'LabelKey': label_key,
276 | 'Used Label List': list(new_labels_dict.values()),
277 | 'Standard': hd.benchmark_creator_info['Annotations']['Standard'],
278 | }
279 |
280 | # 12) Write the metadata
281 | dict_oj = copy.deepcopy(original_data_dictionary)
282 | dict_export = copy.deepcopy(export_settings)
283 |
284 | metadata_save = {
285 | 'Original data': metadata.transform_original_metadata_to_ASA_standard(dict_oj),
286 | 'Benchmarked data': metadata.transform_export_metadata_to_ASA_standard(dict_export)
287 | }
288 |
289 | with open(export_folder_dictionary['Metadata file'], 'w') as fp:
290 | json.dump(metadata_save, fp, indent=4)
291 |
292 | # 13) Create the dataset
293 | with st.spinner("Creating the Benchmark dataset..."):
294 | dataset.benchmark_creator(selection_table_df_updated, export_settings, label_key)
295 |
296 | st.success('Benchmark dataset successfully created!')
297 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
2 | Public License
3 |
4 | By exercising the Licensed Rights (defined below), You accept and agree
5 | to be bound by the terms and conditions of this Creative Commons
6 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
7 | ("Public License"). To the extent this Public License may be
8 | interpreted as a contract, You are granted the Licensed Rights in
9 | consideration of Your acceptance of these terms and conditions, and the
10 | Licensor grants You such rights in consideration of benefits the
11 | Licensor receives from making the Licensed Material available under
12 | these terms and conditions.
13 |
14 |
15 | Section 1 -- Definitions.
16 |
17 | a. Adapted Material means material subject to Copyright and Similar
18 | Rights that is derived from or based upon the Licensed Material
19 | and in which the Licensed Material is translated, altered,
20 | arranged, transformed, or otherwise modified in a manner requiring
21 | permission under the Copyright and Similar Rights held by the
22 | Licensor. For purposes of this Public License, where the Licensed
23 | Material is a musical work, performance, or sound recording,
24 | Adapted Material is always produced where the Licensed Material is
25 | synched in timed relation with a moving image.
26 |
27 | b. Adapter's License means the license You apply to Your Copyright
28 | and Similar Rights in Your contributions to Adapted Material in
29 | accordance with the terms and conditions of this Public License.
30 |
31 | c. BY-NC-SA Compatible License means a license listed at
32 | creativecommons.org/compatiblelicenses, approved by Creative
33 | Commons as essentially the equivalent of this Public License.
34 |
35 | d. Copyright and Similar Rights means copyright and/or similar rights
36 | closely related to copyright including, without limitation,
37 | performance, broadcast, sound recording, and Sui Generis Database
38 | Rights, without regard to how the rights are labeled or
39 | categorized. For purposes of this Public License, the rights
40 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
41 | Rights.
42 |
43 | e. Effective Technological Measures means those measures that, in the
44 | absence of proper authority, may not be circumvented under laws
45 | fulfilling obligations under Article 11 of the WIPO Copyright
46 | Treaty adopted on December 20, 1996, and/or similar international
47 | agreements.
48 |
49 | f. Exceptions and Limitations means fair use, fair dealing, and/or
50 | any other exception or limitation to Copyright and Similar Rights
51 | that applies to Your use of the Licensed Material.
52 |
53 | g. License Elements means the license attributes listed in the name
54 | of a Creative Commons Public License. The License Elements of this
55 | Public License are Attribution, NonCommercial, and ShareAlike.
56 |
57 | h. Licensed Material means the artistic or literary work, database,
58 | or other material to which the Licensor applied this Public
59 | License.
60 |
61 | i. Licensed Rights means the rights granted to You subject to the
62 | terms and conditions of this Public License, which are limited to
63 | all Copyright and Similar Rights that apply to Your use of the
64 | Licensed Material and that the Licensor has authority to license.
65 |
66 | j. Licensor means the individual(s) or entity(ies) granting rights
67 | under this Public License.
68 |
69 | k. NonCommercial means not primarily intended for or directed towards
70 | commercial advantage or monetary compensation. For purposes of
71 | this Public License, the exchange of the Licensed Material for
72 | other material subject to Copyright and Similar Rights by digital
73 | file-sharing or similar means is NonCommercial provided there is
74 | no payment of monetary compensation in connection with the
75 | exchange.
76 |
77 | l. Share means to provide material to the public by any means or
78 | process that requires permission under the Licensed Rights, such
79 | as reproduction, public display, public performance, distribution,
80 | dissemination, communication, or importation, and to make material
81 | available to the public including in ways that members of the
82 | public may access the material from a place and at a time
83 | individually chosen by them.
84 |
85 | m. Sui Generis Database Rights means rights other than copyright
86 | resulting from Directive 96/9/EC of the European Parliament and of
87 | the Council of 11 March 1996 on the legal protection of databases,
88 | as amended and/or succeeded, as well as other essentially
89 | equivalent rights anywhere in the world.
90 |
91 | n. You means the individual or entity exercising the Licensed Rights
92 | under this Public License. Your has a corresponding meaning.
93 |
94 |
95 | Section 2 -- Scope.
96 |
97 | a. License grant.
98 |
99 | 1. Subject to the terms and conditions of this Public License,
100 | the Licensor hereby grants You a worldwide, royalty-free,
101 | non-sublicensable, non-exclusive, irrevocable license to
102 | exercise the Licensed Rights in the Licensed Material to:
103 |
104 | a. reproduce and Share the Licensed Material, in whole or
105 | in part, for NonCommercial purposes only; and
106 |
107 | b. produce, reproduce, and Share Adapted Material for
108 | NonCommercial purposes only.
109 |
110 | 2. Exceptions and Limitations. For the avoidance of doubt, where
111 | Exceptions and Limitations apply to Your use, this Public
112 | License does not apply, and You do not need to comply with
113 | its terms and conditions.
114 |
115 | 3. Term. The term of this Public License is specified in Section
116 | 6(a).
117 |
118 | 4. Media and formats; technical modifications allowed. The
119 | Licensor authorizes You to exercise the Licensed Rights in
120 | all media and formats whether now known or hereafter created,
121 | and to make technical modifications necessary to do so. The
122 | Licensor waives and/or agrees not to assert any right or
123 | authority to forbid You from making technical modifications
124 | necessary to exercise the Licensed Rights, including
125 | technical modifications necessary to circumvent Effective
126 | Technological Measures. For purposes of this Public License,
127 | simply making modifications authorized by this Section 2(a)
128 | (4) never produces Adapted Material.
129 |
130 | 5. Downstream recipients.
131 |
132 | a. Offer from the Licensor -- Licensed Material. Every
133 | recipient of the Licensed Material automatically
134 | receives an offer from the Licensor to exercise the
135 | Licensed Rights under the terms and conditions of this
136 | Public License.
137 |
138 | b. Additional offer from the Licensor -- Adapted Material.
139 | Every recipient of Adapted Material from You
140 | automatically receives an offer from the Licensor to
141 | exercise the Licensed Rights in the Adapted Material
142 | under the conditions of the Adapter's License You apply.
143 |
144 | c. No downstream restrictions. You may not offer or impose
145 | any additional or different terms or conditions on, or
146 | apply any Effective Technological Measures to, the
147 | Licensed Material if doing so restricts exercise of the
148 | Licensed Rights by any recipient of the Licensed
149 | Material.
150 |
151 | 6. No endorsement. Nothing in this Public License constitutes or
152 | may be construed as permission to assert or imply that You
153 | are, or that Your use of the Licensed Material is, connected
154 | with, or sponsored, endorsed, or granted official status by,
155 | the Licensor or others designated to receive attribution as
156 | provided in Section 3(a)(1)(A)(i).
157 |
158 | b. Other rights.
159 |
160 | 1. Moral rights, such as the right of integrity, are not
161 | licensed under this Public License, nor are publicity,
162 | privacy, and/or other similar personality rights; however, to
163 | the extent possible, the Licensor waives and/or agrees not to
164 | assert any such rights held by the Licensor to the limited
165 | extent necessary to allow You to exercise the Licensed
166 | Rights, but not otherwise.
167 |
168 | 2. Patent and trademark rights are not licensed under this
169 | Public License.
170 |
171 | 3. To the extent possible, the Licensor waives any right to
172 | collect royalties from You for the exercise of the Licensed
173 | Rights, whether directly or through a collecting society
174 | under any voluntary or waivable statutory or compulsory
175 | licensing scheme. In all other cases the Licensor expressly
176 | reserves any right to collect such royalties, including when
177 | the Licensed Material is used other than for NonCommercial
178 | purposes.
179 |
180 |
181 | Section 3 -- License Conditions.
182 |
183 | Your exercise of the Licensed Rights is expressly made subject to the
184 | following conditions.
185 |
186 | a. Attribution.
187 |
188 | 1. If You Share the Licensed Material (including in modified
189 | form), You must:
190 |
191 | a. retain the following if it is supplied by the Licensor
192 | with the Licensed Material:
193 |
194 | i. identification of the creator(s) of the Licensed
195 | Material and any others designated to receive
196 | attribution, in any reasonable manner requested by
197 | the Licensor (including by pseudonym if
198 | designated);
199 |
200 | ii. a copyright notice;
201 |
202 | iii. a notice that refers to this Public License;
203 |
204 | iv. a notice that refers to the disclaimer of
205 | warranties;
206 |
207 | v. a URI or hyperlink to the Licensed Material to the
208 | extent reasonably practicable;
209 |
210 | b. indicate if You modified the Licensed Material and
211 | retain an indication of any previous modifications; and
212 |
213 | c. indicate the Licensed Material is licensed under this
214 | Public License, and include the text of, or the URI or
215 | hyperlink to, this Public License.
216 |
217 | 2. You may satisfy the conditions in Section 3(a)(1) in any
218 | reasonable manner based on the medium, means, and context in
219 | which You Share the Licensed Material. For example, it may be
220 | reasonable to satisfy the conditions by providing a URI or
221 | hyperlink to a resource that includes the required
222 | information.
223 | 3. If requested by the Licensor, You must remove any of the
224 | information required by Section 3(a)(1)(A) to the extent
225 | reasonably practicable.
226 |
227 | b. ShareAlike.
228 |
229 | In addition to the conditions in Section 3(a), if You Share
230 | Adapted Material You produce, the following conditions also apply.
231 |
232 | 1. The Adapter's License You apply must be a Creative Commons
233 | license with the same License Elements, this version or
234 | later, or a BY-NC-SA Compatible License.
235 |
236 | 2. You must include the text of, or the URI or hyperlink to, the
237 | Adapter's License You apply. You may satisfy this condition
238 | in any reasonable manner based on the medium, means, and
239 | context in which You Share Adapted Material.
240 |
241 | 3. You may not offer or impose any additional or different terms
242 | or conditions on, or apply any Effective Technological
243 | Measures to, Adapted Material that restrict exercise of the
244 | rights granted under the Adapter's License You apply.
245 |
246 |
247 | Section 4 -- Sui Generis Database Rights.
248 |
249 | Where the Licensed Rights include Sui Generis Database Rights that
250 | apply to Your use of the Licensed Material:
251 |
252 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
253 | to extract, reuse, reproduce, and Share all or a substantial
254 | portion of the contents of the database for NonCommercial purposes
255 | only;
256 |
257 | b. if You include all or a substantial portion of the database
258 | contents in a database in which You have Sui Generis Database
259 | Rights, then the database in which You have Sui Generis Database
260 | Rights (but not its individual contents) is Adapted Material,
261 | including for purposes of Section 3(b); and
262 |
263 | c. You must comply with the conditions in Section 3(a) if You Share
264 | all or a substantial portion of the contents of the database.
265 |
266 | For the avoidance of doubt, this Section 4 supplements and does not
267 | replace Your obligations under this Public License where the Licensed
268 | Rights include other Copyright and Similar Rights.
269 |
270 |
271 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
272 |
273 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
274 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
275 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
276 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
277 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
278 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
279 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
280 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
281 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
282 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
283 |
284 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
285 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
286 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
287 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
288 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
289 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
290 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
291 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
292 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
293 |
294 | c. The disclaimer of warranties and limitation of liability provided
295 | above shall be interpreted in a manner that, to the extent
296 | possible, most closely approximates an absolute disclaimer and
297 | waiver of all liability.
298 |
299 |
300 | Section 6 -- Term and Termination.
301 |
302 | a. This Public License applies for the term of the Copyright and
303 | Similar Rights licensed here. However, if You fail to comply with
304 | this Public License, then Your rights under this Public License
305 | terminate automatically.
306 |
307 | b. Where Your right to use the Licensed Material has terminated under
308 | Section 6(a), it reinstates:
309 |
310 | 1. automatically as of the date the violation is cured, provided
311 | it is cured within 30 days of Your discovery of the
312 | violation; or
313 |
314 | 2. upon express reinstatement by the Licensor.
315 |
316 | For the avoidance of doubt, this Section 6(b) does not affect any
317 | right the Licensor may have to seek remedies for Your violations
318 | of this Public License.
319 |
320 | c. For the avoidance of doubt, the Licensor may also offer the
321 | Licensed Material under separate terms or conditions or stop
322 | distributing the Licensed Material at any time; however, doing so
323 | will not terminate this Public License.
324 |
325 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
326 | License.
327 |
328 |
329 | Section 7 -- Other Terms and Conditions.
330 |
331 | a. The Licensor shall not be bound by any additional or different
332 | terms or conditions communicated by You unless expressly agreed.
333 |
334 | b. Any arrangements, understandings, or agreements regarding the
335 | Licensed Material not stated herein are separate from and
336 | independent of the terms and conditions of this Public License.
337 |
338 |
339 | Section 8 -- Interpretation.
340 |
341 | a. For the avoidance of doubt, this Public License does not, and
342 | shall not be interpreted to, reduce, limit, restrict, or impose
343 | conditions on any use of the Licensed Material that could lawfully
344 | be made without permission under this Public License.
345 |
346 | b. To the extent possible, if any provision of this Public License is
347 | deemed unenforceable, it shall be automatically reformed to the
348 | minimum extent necessary to make it enforceable. If the provision
349 | cannot be reformed, it shall be severed from this Public License
350 | without affecting the enforceability of the remaining terms and
351 | conditions.
352 |
353 | c. No term or condition of this Public License will be waived and no
354 | failure to comply consented to unless expressly agreed to by the
355 | Licensor.
356 |
357 | d. Nothing in this Public License constitutes or may be interpreted
358 | as a limitation upon, or waiver of, any privileges and immunities
359 | that apply to the Licensor or You, including from the legal
360 | processes of any jurisdiction or authority.
361 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Benchmark Dataset Creator
2 | Léa Bouffaut, Ph.D. -- K. Lisa Yang Center for Conservation Bioacoustics, Cornell University
3 |
4 | lea.bouffaut@cornell.edu
5 |
6 | ### Motivations and objectives
7 |
8 | Many bioacoustic projects are sitting on a goldmine of already annotated datasets. We want to create a standardized pipeline for creating, storing, sharing and
9 | using data that is flexible and repeatable to train and test AI models for different applications. More details in https://www.overleaf.com/read/yfcgvngmwfbs#e349e7
10 |
11 | This notebook aims to create a benchmark dataset and standardize the following:
12 | * Metadata following ASA standards
13 | * File duration
14 | * Sampling frequency
15 | * Mono channel
16 | * Bit depth
17 | * File name format
18 | * Selection table fields
19 |
20 | It also gives the option to change labels, e.g., to match our standardized label format.
21 |
22 |
23 | It does NOT:
24 | * Filter the audio input beyond what is needed for resampling
25 | * Normalize the audio file amplitude
26 |
27 |
28 | For example, this schematic view presents (top) a Raven Pro project with a selection table associated with several audio files of different lengths, (bottom) the standardized benchmark clips, and associated annotations. Note that annotations at the junction between two export files and those in the remaining audio, which are too short in comparison with the selected export audio file duration, are ignored.
29 | 
30 |
31 | ### Necessary information in selection tables
32 | This project uses Raven Pro 1.6 selection tables. Selection tables can either be associated with (1) a single audio file or (2) multiple audio files.
33 | Selection tables, by default, contain the necessary information to draw a time-frequency box around a call, please make sure to have the required following fields, including an annotation column and variables that enable the code to retrieve the audio files:
34 | * 'Begin Time (s)'
35 | * 'End Time (s)'
36 | * 'Low Frequency (Hz)'
37 | * 'High Frequency (Hz)'
38 | * 'Begin Path'
39 | * 'File Offset (s)'
40 | * 'Label'/'Tags'/Other
41 |
42 | We will consider and test that all selection tables should contain all of the aforementioned fields, with a user-defined field for the label column. Note that 'Begin Path' should work from your current workstation (Unix and Windows mount servers and write paths differently)!
43 |
44 | ### Labels
45 | The following format is our suggested label format:
46 |
47 | `..`
48 |
49 | Where
50 | * `` is a 6-letter combination of the first letters of each word,
51 | * `` a 4-letter combination describing the geographical location of the recorder.
52 | - If underwater, give cardinal direction and abbreviation of the ocean/sea,
53 | - If on land, the first two letters specify the region, and the last two letters are the ISO 3166 country codes (see https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#ZZ),
54 |
55 | * `` free-format vocalization descriptor.
56 |
57 |
58 | ### Outputs
59 | Please refer to the [User-defined parameters](#User-defined-parameters) section to see the details on the output folder architecture.
60 | > [!NOTE]
61 | >This notebook will assist you in creating:
62 | > * Benchmark sound files based on user-input specifications
63 | > * a corresponding Raven selection table for each sound file,
64 | > * a two-column file-matching CSV (as used for Koogu) and,
65 | > * a recap annotation CSV file that will match previous datasets, e.g., https://zenodo.org/records/7525805
66 |
67 | ## How to get started
68 | The Benchmark Dataset Creator is Python-based code that can be run as an app (supported by Streamlit) that opens in the browser. The dataset-creating functions can also run directly in Python code (currently in dev; see user-defined parameters below).
69 |
70 | ### General workflow to install the app
71 | To get started, we suggest the following steps:
72 | 1) Create a work folder
73 | 2) Create and activate a virtual environment in this folder with Python 3.9
74 | 3) Download this repository and unzip it in the work folder; move the contents of the unzipped folder to the same level as your Venv.
75 | 4) Install the packages listed in requirements.txt in the virtual environment
76 | 5) To run the app: streamlit run BenchmarkDatasetCreator_app/Home.py
77 | 6) To close the app, close the terminal or ctrl+c
78 |
79 | ### Install and run the app on a Mac
80 | On my Mac (Macbook Pro 2019, Intel) the series of commands to do these steps:
81 | 1) Open a terminal at the folder (right click > New Terminal at Folder)
82 | 2) Follow the instructions of https://mnzel.medium.com/how-to-activate-python-venv-on-a-mac-a8fa1c3cb511 to create a virtual environment.
83 | On my Mac, I need to use:
84 | > `pip3.9 install --user virtualenv`
85 | > `python3.9 -m venv venv`
86 | 3) Activate the virtual environment
87 | > `source venv/bin/activate`
88 | Now you should see `(venv)` on the left of your terminal (don't close it).
89 |
90 | 4) Download the BenchmarkDatasetCreator from this Github Repository and place its content in your work folder, at the same level as the venv folder
91 | https://github.com/leabouffaut/BenchmarkDatasetCreator/
92 |
93 | 5) In the ´(venv)´ environment, you'll need to install the required packages by entering the following:
94 | > `python3.9 -m pip install -r requirements.txt`
95 |
96 | 6) Now you're all set! Start the application using:
97 | > `streamlit run BenchmarkDatasetCreator_app/Home.py`
98 |
99 | To stop the app, close the terminal or ctrl+c
100 |
101 |
102 | After the first installation, to reopen the app:
103 | 1) Activate the virtual environment at the working folder
104 | > `source venv/bin/activate`
105 | 2) Launch the App:
106 | > `streamlit run BenchmarkDatasetCreator_app/Home.py`
107 |
108 |
109 |
110 |
111 | ## User-defined parameters (to run the Benchmark Dataset Creator in a .py file or Jupyter notebook (in dev))
112 | ```ruby
113 | export_settings = {
114 | 'Original project name': '2021_CLOCCB_BermudaPlantBank_S1105',
115 | 'Audio duration (s)': 300,
116 | 'fs (Hz)': 8000,
117 | 'Bit depth': 24,
118 | 'Export label': 'Tags',
119 | 'Split export selections': [True, 1],
120 | 'Export folder': 'benchmark_data'
121 | }
122 | ```
123 |
124 | The field illustrated above is a series of user-defined parameters in the form of a [Python dictionary](https://realpython.com/python-dicts/#defining-a-dictionary) (surrounded by curly braces, entries separated by commas, typical entry: `'key word': 'value'`) to create the Benchmark dataset, note that the following fields can be filled in any specific order but must all be present:
125 | * `Original project name`, helps you keep track of the origin of the data, should be written between as a string of characters, which in Python is between quotes `'Project'`. This code will create the folder architecture, please do not end this entry by "/" or "\" and avoid spaces " ".
126 | * `Audio duration (s)` is the chosen export audio file duration for the Benchmark dataset in seconds. Our recommendation is to set it to encompass the vocalization(s) of interest but also some context. What is the minimum duration that would represent the signal's repetition or call/cue rate (with several annotations)?
127 | * `fs (Hz)` is the sampling frequency in Hz, to be set at minima at double the maximum frequency of the signals of interest. If relevant, BirdNET uses fs = 48 kHz (see: [BirdNET Analyzer technical details](https://github.com/kahst/BirdNET-Analyzer?tab=readme-ov-file#technical-details))
128 | * `Bit depth` determines the number of possible amplitude values we can record for each audio sample; for SWIFT units, it is set to 16 bits and for Rockhopper to 24 bits.
129 | * `Export label` defines the name of the label column for the created export Raven selection tables
130 | * `Split export selections` specifies the method when a selection is at the junction between two export audio files if it should be split (True) or not (False). In the case the split is selected, a second value should be entered to specify the minimum duration to report an annotation in the selection table in seconds, e.g., `[True, 3]` or `[False, ]`. If you have hundreds or even tens of selections of your target signals, we would recommend to set this parameter to false. This parameter can be handy if, for example, you selected "long" periods of background noise (long compared to the annotations of signals of interest) that could be split across two audio export files. In that case, you can set the minimun duration to something longer than your signals of interest or to 3 s if you plan to work with BirdNET. Another use case is if you have a very tight selection around your signal of interest (in time) and want even a very small portion of that signal to be labeled.
131 | * `Export folder` is where the data will be saved following this structure (example where `` is 2013_UnivMD_Maryland_71485_MD0)
132 | ```
133 | Export_folder/
134 | │
135 | └───2013_UnivMD_Maryland_71485_MD02/
136 | │ 2013_UnivMD_Maryland_71485_02_metadata.json
137 | │ 2013_UnivMD_Maryland_71485_MD02_annotations.csv
138 | │ 2013_UnivMD_Maryland_71485_MD02_audio_seltab_map.csv
139 | │
140 | └───audio/
141 | │ │ ____.flac
142 | │ │ 2013_UnivMD_Maryland_71485_MD02_71485MD02_002K_M11_multi_20150626_031500Z_2kHz_ch03_0600s.flac
143 | │ │ ...
144 | │
145 | └───annotations/
146 | │ ____.txt
147 | │ 2013_UnivMD_Maryland_71485_MD02_71485MD02_002K_M11_multi_20150626_031500Z_2kHz_ch03_0600s.txt
148 | │ ...
149 | ```
150 |
151 |
--------------------------------------------------------------------------------
/SelectionTable/MD02_truth_selections.txt:
--------------------------------------------------------------------------------
1 | Selection View Channel Begin Time (s) End Time (s) Low Freq (Hz) High Freq (Hz) Begin Date Begin Hour Begin Path Begin File File Offset (s) Delta Time (s) SNR NIST Quick (dB) Tag Notes VF2
2 | 1 Spectrogram 1 11 197575.630000000 197576.570000000 78.100 246.900 54 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_064500Z.aif 71485MD02_002K_M11_multi_20150902_064500Z.aif 475.6300 0.9400 16.19 NARW ? v
3 | 2 Spectrogram 1 11 197616.450000000 197617.100000000 80.500 201.100 54 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_064500Z.aif 71485MD02_002K_M11_multi_20150902_064500Z.aif 516.4500 0.6500 15.92 NARW added by KBH v
4 | 3 Spectrogram 1 11 197659.170000000 197660.190000000 109.400 303.100 54 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_064500Z.aif 71485MD02_002K_M11_multi_20150902_064500Z.aif 559.1700 1.0200 14.36 NARW ? v
5 | 4 Spectrogram 1 11 197679.540000000 197680.590000000 118.800 293.800 54 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_064500Z.aif 71485MD02_002K_M11_multi_20150902_064500Z.aif 579.5400 1.0500 14.90 NARW ? v
6 | 5 Spectrogram 1 11 198612.520000000 198613.660000000 96.900 278.100 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_070000Z.aif 71485MD02_002K_M11_multi_20150902_070000Z.aif 612.5200 1.1400 16.48 NARW ? v
7 | 6 Spectrogram 1 11 199420.070000000 199420.860000000 114.900 275.900 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_071500Z.aif 71485MD02_002K_M11_multi_20150902_071500Z.aif 520.0700 0.7900 12.39 NARW Added by KBH v
8 | 7 Spectrogram 1 11 199442.540000000 199443.350000000 97.700 241.400 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_071500Z.aif 71485MD02_002K_M11_multi_20150902_071500Z.aif 542.5400 0.8100 15.06 NARW Added by KBH v
9 | 8 Spectrogram 1 11 199463.930000000 199464.620000000 114.900 252.900 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_071500Z.aif 71485MD02_002K_M11_multi_20150902_071500Z.aif 563.9300 0.6900 13.09 NARW Added by KBH v
10 | 9 Spectrogram 1 11 200157.620000000 200158.620000000 120.700 287.400 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_073000Z.aif 71485MD02_002K_M11_multi_20150902_073000Z.aif 357.6200 1.0000 16.03 NARW Added by KBH v
11 | 10 Spectrogram 1 11 200181.930000000 200182.930000000 103.100 275.000 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_073000Z.aif 71485MD02_002K_M11_multi_20150902_073000Z.aif 381.9300 1.0000 18.49 NARW ? v
12 | 11 Spectrogram 1 11 200248.580000000 200249.490000000 78.100 253.100 55 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_073000Z.aif 71485MD02_002K_M11_multi_20150902_073000Z.aif 448.5800 0.9100 19.22 NARW ? v
13 | 12 Spectrogram 1 11 201797.360000000 201798.300000000 90.600 275.000 56 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_080000Z.aif 71485MD02_002K_M11_multi_20150902_080000Z.aif 197.3600 0.9400 17.03 NARW ? v
14 | 13 Spectrogram 1 11 239679.880000000 239681.500000000 98.000 223.000 66 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_183000Z.aif 71485MD02_002K_M11_multi_20150902_183000Z.aif 279.8800 1.6200 13.02 NARW added by KBH v
15 | 14 Spectrogram 1 11 239728.010000000 239729.120000000 114.900 296.900 66 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_183000Z.aif 71485MD02_002K_M11_multi_20150902_183000Z.aif 328.0100 1.1100 15.59 NARW ? v
16 | 15 Spectrogram 1 11 240391.100000000 240392.390000000 90.600 265.600 66 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150902/71485MD02_002K_M11_multi_20150902_184500Z.aif 71485MD02_002K_M11_multi_20150902_184500Z.aif 91.1000 1.2900 11.94 NARW ? v
17 | 16 Spectrogram 1 3 12422.497000000 12425.527000000 75.000 231.200 3 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150626/71485MD02_002K_M11_multi_20150626_031500Z.aif 71485MD02_002K_M11_multi_20150626_031500Z.aif 722.4970 3.0300 10.75 NARW v
18 | 17 Spectrogram 1 8 26923.640000000 26925.266000000 105.300 236.800 7 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150626/71485MD02_002K_M11_multi_20150626_071500Z.aif 71485MD02_002K_M11_multi_20150626_071500Z.aif 823.6400 1.6260 12.59 NARW v
19 | 18 Spectrogram 1 5 26923.751000000 26925.672000000 46.100 243.400 7 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150626/71485MD02_002K_M11_multi_20150626_071500Z.aif 71485MD02_002K_M11_multi_20150626_071500Z.aif 823.7510 1.9210 12.01 NARW v
20 | 19 Spectrogram 1 8 30970.751000000 30972.266000000 65.800 315.800 8 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150626/71485MD02_002K_M11_multi_20150626_083000Z.aif 71485MD02_002K_M11_multi_20150626_083000Z.aif 370.7510 1.5150 12.36 NARW v
21 | 20 Spectrogram 1 2 96267.465000000 96269.941000000 98.700 289.500 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_023000Z.aif 71485MD02_002K_M11_multi_20150724_023000Z.aif 867.4650 2.4760 16.05 NARW v
22 | 21 Spectrogram 1 9 96272.675000000 96274.152000000 92.100 250.000 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_023000Z.aif 71485MD02_002K_M11_multi_20150724_023000Z.aif 872.6750 1.4770 14.55 NARW v
23 | 22 Spectrogram 1 8 96277.219000000 96278.512000000 92.100 263.200 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_023000Z.aif 71485MD02_002K_M11_multi_20150724_023000Z.aif 877.2190 1.2930 10.94 NARW v
24 | 23 Spectrogram 1 2 96365.675000000 96367.226000000 105.300 223.700 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_024500Z.aif 71485MD02_002K_M11_multi_20150724_024500Z.aif 65.6750 1.5510 14.99 NARW v
25 | 24 Spectrogram 1 9 96370.108000000 96372.029000000 72.400 256.600 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_024500Z.aif 71485MD02_002K_M11_multi_20150724_024500Z.aif 70.1080 1.9210 13.36 NARW v
26 | 25 Spectrogram 1 6 96374.135000000 96375.613000000 98.700 236.800 26 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_024500Z.aif 71485MD02_002K_M11_multi_20150724_024500Z.aif 74.1350 1.4780 12.53 NARW v
27 | 26 Spectrogram 1 2 99651.621000000 99653.911000000 78.900 223.700 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 651.6210 2.2900 15.56 NARW v
28 | 27 Spectrogram 1 9 99657.162000000 99658.825000000 98.700 269.700 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 657.1620 1.6630 11.47 NARW v
29 | 28 Spectrogram 1 2 99674.527000000 99676.670000000 85.500 197.400 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 674.5270 2.1430 18.25 NARW v
30 | 29 Spectrogram 1 9 99679.404000000 99681.140000000 92.100 223.700 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 679.4040 1.7360 13.42 NARW v
31 | 30 Spectrogram 1 2 99730.226000000 99732.111000000 78.900 263.200 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 730.2260 1.8850 17.15 NARW v
32 | 31 Spectrogram 1 9 99735.177000000 99736.655000000 52.600 223.700 27 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_033000Z.aif 71485MD02_002K_M11_multi_20150724_033000Z.aif 735.1770 1.4780 14.68 NARW v
33 | 32 Spectrogram 1 2 101636.014000000 101637.086000000 59.200 210.500 28 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_040000Z.aif 71485MD02_002K_M11_multi_20150724_040000Z.aif 836.0140 1.0720 19.00 NARW v
34 | 33 Spectrogram 1 9 101640.596000000 101641.815000000 59.200 236.800 28 /Volumes/ag-clo-repnas5.ad.cornell.edu-1/projects/2013_UnivMD_Maryland_71485/Sounds/71485_MD02_002K_11CH_AIFF/002K_11CH_20150724/71485MD02_002K_M11_multi_20150724_040000Z.aif 71485MD02_002K_M11_multi_20150724_040000Z.aif 840.5960 1.2190 13.21 NARW v
35 |
--------------------------------------------------------------------------------
/docs/DependenciesMapping.txt:
--------------------------------------------------------------------------------
1 | Dependencies map:
2 |
3 | CreateBenchmarkDataset notebook:
4 | - check_export_settings
5 | - check_selection_tab
6 | - create_path
7 | - load_selection_table
8 | - benchmark_size_estimator
9 | - update_labels
10 | - benchmark_creator
11 |
12 | benchmark_creator function:
13 | - get_bitdepth(export_settings): This function is called to retrieve the bit depth from the export settings.
14 | - get_print_fs(fs_original): This function is called to format the original sampling frequency for file naming.
15 | - exports(export_settings, selection_table_af_df, save_sel_dict): This function is called to export audio and annotation files.
16 |
17 | benchmark_size_estimator function:
18 | - get_number_clips(unique_audiofiles, export_settings['Audio duration (s)']): This function is called to determine the number of clips based on the duration of audio files and export settings.
19 | - check_bitdepth(export_settings): This function is called to validate the bit depth specified in the export settings.
20 |
21 | exports function:
22 | - save_audioclip: This function is called to export the audio clip based on provided parameters.
23 | - write_selection_table: This function is called to write entries in the selection table file.
24 | - write_annotation_csv: This function is called to write annotations in a global CSV file.
25 | - map_audio_selection: This function is called to create a file association CSV.
26 |
27 |
28 | Modules Imported:
29 | librosa: Used for loading audio files.
30 | os.path: Used for manipulating file paths.
31 | numpy as np: Used for numerical operations.
32 | soundfile as sf: Used for writing audio files.
33 | pandas: Utilized for working with DataFrames.
--------------------------------------------------------------------------------
/docs/HowToInstall/HowToInstall_Mac.txt:
--------------------------------------------------------------------------------
1 |
2 | How to install on a Mac
3 |
4 | 1) Open terminal at the folder level you want to work (right clic > New Terminal at Folder)
5 | 2) Follow instructions of https://mnzel.medium.com/how-to-activate-python-venv-on-a-mac-a8fa1c3cb511 to create a virtual environment.
6 |
7 | On my Mac, I need to use:
8 | > pip3.9 install --user virtualenv
9 | > python3.9 -m venv venv
10 | 3) Activate the virtual environment
11 | > source venv/bin/activate
12 | Now you should see (venv) on the left of your terminal (don't close it).
13 |
14 | 4) Download the BenchmarkDatasetCreator from the Github Repository and place it in your work folder
15 | https://github.com/leabouffaut/BenchmarkDatasetCreator/
16 |
17 | 5) Move the content of BenchmarkDatasetCreator-main to the same level as your venv
18 | 6) back in the (venv) environment you'll need to install some packages entering the following (check if the folderName for the Benchmark Dataset Creator matches yours)
19 | > python3.9 -m pip install -r requirements.txt
20 |
21 | 6) Now you're all set! Start the application using:
22 | > streamlit run BenchmarkDatasetCreator_app/Home.py
23 |
24 | To stop the app, close the terminal or ctrl+c
25 |
26 |
27 |
28 | After the first installation, to reopen the app:
29 | 1) Activate the virtual environment at the working folder
30 | > source venv/bin/activate
31 | 2) Launch the App:
32 | > streamlit run BenchmarkDatasetCreator_app/Home.py
33 |
--------------------------------------------------------------------------------
/docs/bioacoustics_species_list.txt:
--------------------------------------------------------------------------------
1 | Yang Center Bioacoustics species list:
2 | See here https://docs.google.com/spreadsheets/d/1ScxYST26QIGE2d_ovEI1NtyPDmpWeMHJJ2LEu4nFwOw/edit?usp=sharing (Cornell-restricted access, Editor privilege)
3 |
--------------------------------------------------------------------------------
/docs/illustrations/method_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leabouffaut/BenchmarkDatasetCreator/bdb3f1f46056d9f8fe21e948d330a22506638d03/docs/illustrations/method_schematic.png
--------------------------------------------------------------------------------
/docs/illustrations/method_schematicV3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leabouffaut/BenchmarkDatasetCreator/bdb3f1f46056d9f8fe21e948d330a22506638d03/docs/illustrations/method_schematicV3.jpeg
--------------------------------------------------------------------------------
/docs/illustrations/method_schematicV2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leabouffaut/BenchmarkDatasetCreator/bdb3f1f46056d9f8fe21e948d330a22506638d03/docs/illustrations/method_schematicV2.png
--------------------------------------------------------------------------------
/docs/illustrations/method_schematicV2_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leabouffaut/BenchmarkDatasetCreator/bdb3f1f46056d9f8fe21e948d330a22506638d03/docs/illustrations/method_schematicV2_zoom.png
--------------------------------------------------------------------------------
/examples/CreateBenchmarkDataset.py:
--------------------------------------------------------------------------------
1 | # Create Benchmark Dataset Python script
2 | #
3 | # Léa Bouffaut, Ph.D. -- K. Lisa Yang Center for Conservation Bioacoustics, Cornell University
4 | # lea.bouffaut@cornell.edu
5 | #
6 | # e.g. runs in Pycharm
7 |
8 | from BenchmarkDatasetCreator import dataset as bc
9 |
10 | # User-defined export settings dictionary
11 | export_settings = {
12 | 'Original project name': '2021_CLOCCB_BermudaPlantBank_S1105',
13 | 'Audio duration (s)': 300,
14 | 'fs (Hz)': 8000,
15 | 'Bit depth': 24,
16 | 'Export label': 'Tags',
17 | 'Split export selections': [True, 1],
18 | 'Export folder': 'benchmark_data'
19 | }
20 |
21 |
22 | # Run check on the user-defined entries
23 | bc.check_export_settings(export_settings)
24 |
25 | # User-defined path to selection table(s)
26 | selection_table_path = '/Volumes/DCLDE/projects/2022_CLOCCB_IthacaNY_S1112/Atlantic_whales/2021_CLOCCB_BermudaPlantBank_S1105/annotations/'
27 | bc.check_selection_tab(selection_table_path)
28 |
29 | # Create directories
30 | bc.create_path(export_settings)
31 |
32 | # Load selection table
33 | selection_table_df = bc.load_selection_table(selection_table_path)
34 |
35 | if selection_table_df.empty == False:
36 | print(selection_table_df)
37 |
38 | # User-defined label key, should be in the Selection table keys displaid above
39 | label_key = 'Call Type'
40 |
41 | # Test selection table and estimate size
42 | # Remove duplicates (e.g., if we have both the spectrogram and waveform view)
43 | selection_table_df.drop_duplicates(subset='Begin Time (s)', keep="last");
44 |
45 | # Estimate the size of the dataset
46 | bc.benchmark_size_estimator(selection_table_df, export_settings, label_key)
47 |
48 | # Check & update labels
49 | # Get a list of unique labels from the selection table
50 | unique_labels = selection_table_df[label_key].unique()
51 |
52 | # Print the list of unique labels
53 | print('Unique label list:')
54 | for lab in unique_labels:
55 | print(lab)
56 |
57 | # New label dictionnary
58 | # Yang Center species list: https://docs.google.com/spreadsheets/d/1ScxYST26QIGE2d_ovEI1NtyPDmpWeMHJJ2LEu4nFwOw/edit?usp=sharing"
59 |
60 | new_labels_dict = {
61 | 'NARW': 'EUBGLA.NWAO.Upcall',
62 | 'na': 'BALMUS.NWAO.Dcall',
63 | }
64 |
65 | # Swap the labels
66 | selection_table_df = bc.update_labels(selection_table_df, new_labels_dict, label_key)
67 |
68 | # Create the dataset
69 | import time
70 | start_time = time.time()
71 |
72 | bc.benchmark_creator(selection_table_df, export_settings, label_key)
73 |
74 | print(f'The Benchmark Dataset Creator took {time.time() - start_time} s to run')
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | altair==5.2.0
2 | attrs==23.2.0
3 | audioread==3.0.1
4 | blinker==1.7.0
5 | cachetools==5.3.3
6 | certifi==2024.2.2
7 | cffi==1.16.0
8 | charset-normalizer==3.3.2
9 | click==8.1.7
10 | decorator==5.1.1
11 | gitdb==4.0.11
12 | GitPython==3.1.42
13 | h3==3.7.7
14 | idna==3.6
15 | importlib-metadata==7.1.0
16 | importlib-resources==6.4.0
17 | Jinja2==3.1.3
18 | joblib==1.3.2
19 | jsonschema==4.21.1
20 | jsonschema-specifications==2023.12.1
21 | lazy-loader==0.3
22 | librosa==0.10.1
23 | llvmlite==0.41.1
24 | markdown-it-py==3.0.0
25 | MarkupSafe==2.1.5
26 | mdurl==0.1.2
27 | msgpack==1.0.8
28 | numba==0.58.1
29 | numpy==1.24.4
30 | packaging==23.2
31 | pandas==2.0.3
32 | pillow==10.2.0
33 | pkgutil-resolve-name==1.3.10
34 | platformdirs==4.2.0
35 | pooch==1.8.1
36 | protobuf==4.25.3
37 | pyarrow==15.0.2
38 | pycparser==2.21
39 | pydeck==0.8.1b0
40 | pygments==2.17.2
41 | python-dateutil==2.9.0.post0
42 | pytz==2024.1
43 | referencing==0.34.0
44 | requests==2.31.0
45 | rich==13.7.1
46 | rpds-py==0.18.0
47 | scikit-learn==1.3.2
48 | scipy==1.10.1
49 | six==1.16.0
50 | smmap==5.0.1
51 | soundfile==0.12.1
52 | soxr==0.3.7
53 | streamlit==1.32.2
54 | tenacity==8.2.3
55 | threadpoolctl==3.4.0
56 | timezonefinder==6.5.0
57 | toml==0.10.2
58 | toolz==0.12.1
59 | tornado==6.4
60 | tqdm==4.66.2
61 | typing-extensions==4.10.0
62 | tzdata==2024.1
63 | urllib3==2.2.1
64 | zipp==3.18.1
65 |
--------------------------------------------------------------------------------