├── images ├── mfcc1.png ├── english1.png ├── mfcc_flow.png ├── cnn_scores.png └── accent.gmu_sample.png ├── dataframes ├── df_japanese_male.csv ├── df_italian_female.csv ├── df_vietnamese_female.csv ├── df_german_male.csv ├── df_turkish_female.csv ├── df_vietnamese_male.csv ├── df_polish_female.csv ├── df_polish_male.csv ├── df_japanese_female.csv ├── df_dutch_male.csv ├── df_russian_male.csv ├── df_italian_male.csv ├── df_turkish_male.csv ├── df_german_female.csv ├── df_portuguese_female.csv ├── df_korean_male.csv ├── df_mandarin_male.csv ├── df_russian_female.csv ├── df_dutch_female.csv ├── df_portuguese_male.csv ├── df_korean_female.csv ├── df_french_female.csv ├── df_french_male.csv ├── df_mandarin_female.csv ├── df_arabic_female.csv ├── df_arabic_male.csv ├── df_spanish_female.csv ├── df_spanish_male.csv ├── df_usa_english_male.csv ├── df_usa_english_female.csv ├── df_usa_male.csv ├── df_usa_female.csv ├── df_english_female.csv └── df_english_male.csv ├── code ├── rnn_example.py ├── conv_1d_model.py ├── conv_1d_model_aws.py ├── testing.py └── mp3_getter.py └── README.md /images/mfcc1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/mfcc1.png -------------------------------------------------------------------------------- /images/english1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/english1.png -------------------------------------------------------------------------------- /images/mfcc_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/mfcc_flow.png -------------------------------------------------------------------------------- /images/cnn_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/cnn_scores.png -------------------------------------------------------------------------------- /images/accent.gmu_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwww2012/Accent-Classifier/HEAD/images/accent.gmu_sample.png -------------------------------------------------------------------------------- /dataframes/df_japanese_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 223,223,20.0,15.0,"tokyo, japan",japanese4,japanese,male,224,japan 3 | 226,226,18.0,12.0,"osaka, japan",japanese7,japanese,male,227,japan 4 | 485,485,25.0,12.0,"tokyo, japan",japanese8,japanese,male,486,japan 5 | 542,542,24.0,13.0,"yokohama, japan",japanese9,japanese,male,543,japan 6 | 1364,1364,36.0,12.0,"tokyo, japan",japanese12,japanese,male,1365,japan 7 | 1380,1380,28.0,11.0,"gunma, japan",japanese13,japanese,male,1381,japan 8 | 1520,1520,45.0,12.0,"osaka, japan",japanese15,japanese,male,1521,japan 9 | 1832,1832,21.0,14.0,"yokosuka, japan",japanese20,japanese,male,1833,japan 10 | 1941,1941,24.0,5.0,"kawasaki, japan",japanese23,japanese,male,1942,japan 11 | -------------------------------------------------------------------------------- /dataframes/df_italian_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 216,216,29.0,22.0,"carbonia, italy",italian1,italian,female,217,italy 3 | 219,219,47.0,11.0,"cremona, italy",italian4,italian,female,220,italy 4 | 472,472,32.0,15.0,"naples, italy",italian6,italian,female,473,italy 5 | 680,680,38.0,14.0,"bellinzona, switzerland",italian12,italian,female,681,switzerland 6 | 1176,1176,34.0,14.0,"cuneo, italy",italian23,italian,female,1177,italy 7 | 1405,1405,23.0,6.0,"milan, italy",italian24,italian,female,1406,italy 8 | 1542,1542,33.0,10.0,"milan, italy",italian27,italian,female,1543,italy 9 | 1685,1685,21.0,8.0,"trieste, italy",italian29,italian,female,1686,italy 10 | 1975,1975,59.0,23.0,"naples, italy",italian31,italian,female,1976,italy 11 | 2019,2019,78.0,50.0,"bitonto, italy",italian33,italian,female,2020,italy 12 | -------------------------------------------------------------------------------- /dataframes/df_vietnamese_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 396,396,28.0,9.0,"vinh long, vietnam",vietnamese3,vietnamese,female,397,vietnam 3 | 397,397,25.0,15.0,"hanoi, vietnam",vietnamese4,vietnamese,female,398,vietnam 4 | 400,400,29.0,18.0,"hue, vietnam",vietnamese7,vietnamese,female,401,vietnam 5 | 1191,1191,57.0,18.0,"quang nam, vietnam",vietnamese10,vietnamese,female,1192,vietnam 6 | 1528,1528,24.0,9.0,"can tho, vietnam",vietnamese12,vietnamese,female,1529,vietnam 7 | 1939,1939,33.0,11.0,"ho chi minh city, vietnam",vietnamese17,vietnamese,female,1940,vietnam 8 | 2009,2009,19.0,11.0,"saigon, vietnam",vietnamese18,vietnamese,female,2010,vietnam 9 | 2012,2012,18.0,4.0,"ho chi minh city, vietnam",vietnamese19,vietnamese,female,2013,vietnam 10 | 2041,2041,46.0,25.0,"haiphong, vietnam",vietnamese20,vietnamese,female,2042,vietnam 11 | -------------------------------------------------------------------------------- /dataframes/df_german_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 192,192,47.0,12.0,"halle, germany",german5,german,male,193,germany 3 | 193,193,28.0,8.0,"vienna, austria",german6,german,male,194,austria 4 | 500,500,20.0,11.0,"berlin, germany",german7,german,male,501,germany 5 | 544,544,25.0,10.0,"bernburg, germany",german8,german,male,545,germany 6 | 615,615,22.0,8.0,"wuppertal, germany",german9,german,male,616,germany 7 | 634,634,24.0,10.0,"darmstadt, germany",german10,german,male,635,germany 8 | 675,675,35.0,11.0,"innsbruck, austria",german15,german,male,676,austria 9 | 892,892,29.0,6.0,"bad aussee, austria",german18,german,male,893,austria 10 | 935,935,41.0,16.0,"chur, switzerland",german20,german,male,936,switzerland 11 | 1254,1254,24.0,11.0,"herdecke, germany",german23,german,male,1255,germany 12 | 1382,1382,27.0,10.0,"niedersachsen, germany",german24,german,male,1383,germany 13 | 1493,1493,23.0,10.0,"bielefeld, germany",german27,german,male,1494,germany 14 | 1876,1876,39.0,10.0,"stuttgart, germany",german33,german,male,1877,germany 15 | 1878,1878,28.0,11.0,"bochum, germany",german34,german,male,1879,germany 16 | -------------------------------------------------------------------------------- /dataframes/df_turkish_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 380,380,27.0,26.0,"bingol, turkey",turkish1,turkish,female,381,turkey 3 | 384,384,30.0,11.0,"istanbul, turkey",turkish5,turkish,female,385,turkey 4 | 633,633,24.0,23.0,"kocaeli, turkey",turkish11,turkish,female,634,turkey 5 | 685,685,18.0,3.0,"istanbul, turkey",turkish13,turkish,female,686,turkey 6 | 734,734,23.0,6.0,"istanbul, turkey",turkish14,turkish,female,735,turkey 7 | 1143,1143,19.0,9.0,"kocaeli, turkey",turkish20,turkish,female,1144,turkey 8 | 1209,1209,18.0,4.0,"istanbul, turkey",turkish21,turkish,female,1210,turkey 9 | 1290,1290,37.0,11.0,"adana, turkey",turkish23,turkish,female,1291,turkey 10 | 1729,1729,18.0,7.0,"istanbul, turkey",turkish27,turkish,female,1730,turkey 11 | 1771,1771,24.0,4.0,"istanbul, turkey",turkish28,turkish,female,1772,turkey 12 | 1807,1807,21.0,11.0,"ankara, turkey",turkish29,turkish,female,1808,turkey 13 | 1928,1928,24.0,11.0,"izmir, turkey",turkish30,turkish,female,1929,turkey 14 | 2047,2047,24.0,11.0,"izmir, turkey",turkish35,turkish,female,2048,turkey 15 | 2167,2167,26.0,12.0,"istanbul, turkey",turkish36,turkish,female,2168,turkey 16 | -------------------------------------------------------------------------------- /dataframes/df_vietnamese_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 394,394,29.0,12.0,"can tho, vietnam",vietnamese1,vietnamese,male,395,vietnam 3 | 395,395,29.0,25.0,"ho chi minh city, vietnam",vietnamese2,vietnamese,male,396,vietnam 4 | 398,398,34.0,31.0,"cam ranh, vietnam",vietnamese5,vietnamese,male,399,vietnam 5 | 399,399,41.0,12.0,"ho chi minh city, vietnam",vietnamese6,vietnamese,male,400,vietnam 6 | 446,446,69.0,12.0,"ninh binh, vietnam",vietnamese8,vietnamese,male,447,vietnam 7 | 1133,1133,20.0,14.0,"ho chi minh city, vietnam",vietnamese9,vietnamese,male,1134,vietnam 8 | 1200,1200,67.0,15.0,"quang nam, vietnam",vietnamese11,vietnamese,male,1201,vietnam 9 | 1561,1561,23.0,6.0,"hanoi, vietnam",vietnamese13,vietnamese,male,1562,vietnam 10 | 1697,1697,51.0,20.0,"ho chi minh city, vietnam",vietnamese14,vietnamese,male,1698,vietnam 11 | 1808,1808,29.0,15.0,"ho chi minh city, vietnam",vietnamese15,vietnamese,male,1809,vietnam 12 | 1849,1849,52.0,17.0,"long xuyen, vietnam",vietnamese16,vietnamese,male,1850,vietnam 13 | 2098,2098,21.0,16.0,"hanoi, vietnam",vietnamese21,vietnamese,male,2099,vietnam 14 | 2150,2150,23.0,8.0,"bien hoa, vietnam",vietnamese22,vietnamese,male,2151,vietnam 15 | -------------------------------------------------------------------------------- /dataframes/df_polish_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 276,276,26.0,23.0,"janow, poland",polish1,polish,female,277,poland 3 | 277,277,26.0,25.0,"mielec, poland",polish2,polish,female,278,poland 4 | 278,278,31.0,12.0,"nowy sacz, poland",polish3,polish,female,279,poland 5 | 279,279,47.0,15.0,"krakow, poland",polish4,polish,female,280,poland 6 | 625,625,26.0,8.0,"torun, poland",polish6,polish,female,626,poland 7 | 840,840,23.0,15.0,"ostroleka, poland",polish9,polish,female,841,poland 8 | 841,841,30.0,15.0,"wolanow, poland",polish10,polish,female,842,poland 9 | 842,842,56.0,10.0,"warsaw, poland",polish11,polish,female,843,poland 10 | 1244,1244,25.0,9.0,"ketrzyn, poland",polish15,polish,female,1245,poland 11 | 1452,1452,33.0,13.0,"krakow, poland",polish17,polish,female,1453,poland 12 | 1624,1624,53.0,4.0,"gdansk, poland",polish20,polish,female,1625,poland 13 | 1653,1653,20.0,10.0,"koszalin, poland",polish24,polish,female,1654,poland 14 | 1744,1744,20.0,10.0,"koszalin, poland",polish28,polish,female,1745,poland 15 | 1748,1748,21.0,9.0,"koszalin, poland",polish30,polish,female,1749,poland 16 | 1917,1917,34.0,16.0,"trzebnica, poland",polish31,polish,female,1918,poland 17 | 2084,2084,48.0,12.0,"wroclaw, poland",polish34,polish,female,2085,poland 18 | -------------------------------------------------------------------------------- /dataframes/df_polish_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 481,481,22.0,9.0,"ozarow maz, poland",polish5,polish,male,482,poland 3 | 727,727,20.0,6.0,"przemet, poland",polish7,polish,male,728,poland 4 | 768,768,20.0,11.0,"otwock, poland",polish8,polish,male,769,poland 5 | 882,882,40.0,20.0,"warsaw, poland",polish12,polish,male,883,poland 6 | 957,957,22.0,14.0,"gryfino, western pomerania, poland",polish13,polish,male,958,poland 7 | 1170,1170,97.0,35.0,"warsaw, poland",polish14,polish,male,1171,poland 8 | 1277,1277,19.0,8.0,"koszalin, poland",polish16,polish,male,1278,poland 9 | 1482,1482,18.0,5.0,"bialystok, poland",polish18,polish,male,1483,poland 10 | 1548,1548,24.0,7.0,"belchatow, poland",polish19,polish,male,1549,poland 11 | 1650,1650,30.0,10.0,"kolobrzeg, poland",polish21,polish,male,1651,poland 12 | 1651,1651,19.0,11.0,"koszalin, poland",polish22,polish,male,1652,poland 13 | 1652,1652,21.0,8.0,"lobez, poland",polish23,polish,male,1653,poland 14 | 1670,1670,20.0,11.0,"koszalin, poland",polish25,polish,male,1671,poland 15 | 1736,1736,19.0,16.0,"bytow, poland",polish26,polish,male,1737,poland 16 | 1737,1737,19.0,6.0,"koszalin, poland",polish27,polish,male,1738,poland 17 | 1747,1747,20.0,11.0,"sianow, poland",polish29,polish,male,1748,poland 18 | 2061,2061,42.0,14.0,"torun, poland",polish32,polish,male,2062,poland 19 | 2063,2063,21.0,7.0,"nowa ruda, poland",polish33,polish,male,2064,poland 20 | -------------------------------------------------------------------------------- /dataframes/df_japanese_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 220,220,25.0,12.0,"tokyo, japan",japanese1,japanese,female,221,japan 3 | 221,221,27.0,12.0,"tokyo, japan",japanese2,japanese,female,222,japan 4 | 222,222,49.0,12.0,"kofu, yamanashi prefecture, japan",japanese3,japanese,female,223,japan 5 | 224,224,25.0,12.0,"chiba, japan",japanese5,japanese,female,225,japan 6 | 225,225,30.0,10.0,"kawasaki, japan",japanese6,japanese,female,226,japan 7 | 826,826,26.0,12.0,"hakodate, japan",japanese10,japanese,female,827,japan 8 | 1045,1045,29.0,13.0,"toyama, japan",japanese11,japanese,female,1046,japan 9 | 1381,1381,37.0,12.0,"akashi, japan",japanese14,japanese,female,1382,japan 10 | 1536,1536,25.0,3.0,"clark field, philippines",japanese16,japanese,female,1537,philippines 11 | 1609,1609,44.0,13.0,"nara, japan",japanese17,japanese,female,1610,japan 12 | 1625,1625,52.0,13.0,"kyoto, japan",japanese18,japanese,female,1626,japan 13 | 1683,1683,57.0,13.0,"naha, japan",japanese19,japanese,female,1684,japan 14 | 1846,1846,53.0,13.0,"tokyo, japan",japanese21,japanese,female,1847,japan 15 | 1880,1880,69.0,12.0,"ashiya, japan",japanese22,japanese,female,1881,japan 16 | 1947,1947,40.0,13.0,"tokyo, japan",japanese24,japanese,female,1948,japan 17 | 1986,1986,52.0,13.0,"kyoto, japan",japanese25,japanese,female,1987,japan 18 | 1990,1990,44.0,13.0,"yokosuka, japan",japanese26,japanese,female,1991,japan 19 | 2122,2122,21.0,13.0,"tokyo, japan",japanese27,japanese,female,2123,japan 20 | -------------------------------------------------------------------------------- /dataframes/df_dutch_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 56,56,38.0,13.0,"nijmegen, netherlands",dutch1,dutch,male,57,netherlands 3 | 57,57,41.0,14.0,"amsterdam, netherlands",dutch2,dutch,male,58,netherlands 4 | 536,536,19.0,6.0,"alkmaar, netherlands",dutch3,dutch,male,537,netherlands 5 | 742,742,18.0,10.0,"bilzen, flanders, belgium",dutch4,dutch,male,743,belgium 6 | 894,894,68.0,10.0,"amsterdam, netherlands",dutch7,dutch,male,895,netherlands 7 | 912,912,39.0,12.0,"essen, belgium",dutch10,dutch,male,913,belgium 8 | 928,928,41.0,14.0,"brugge, belgium",dutch11,dutch,male,929,belgium 9 | 955,955,36.0,10.0,"oosterbeek, netherlands",dutch12,dutch,male,956,netherlands 10 | 1222,1222,23.0,10.0,"nunspeet, netherlands",dutch15,dutch,male,1223,netherlands 11 | 1261,1261,23.0,14.0,"antwerp, belgium",dutch18,dutch,male,1262,belgium 12 | 1267,1267,23.0,13.0,"antwerp, belgium",dutch23,dutch,male,1268,belgium 13 | 1274,1274,22.0,8.0,"antwerp, belgium",dutch28,dutch,male,1275,belgium 14 | 1299,1299,29.0,7.0,"ede, netherlands",dutch29,dutch,male,1300,netherlands 15 | 1463,1463,21.0,14.0,"brasschaat, belgium",dutch32,dutch,male,1464,belgium 16 | 1483,1483,25.0,9.0,"dordrecht, netherlands",dutch40,dutch,male,1484,netherlands 17 | 1516,1516,41.0,13.0,"deventer, netherlands",dutch41,dutch,male,1517,netherlands 18 | 1550,1550,36.0,8.0,"schiedam, netherlands",dutch42,dutch,male,1551,netherlands 19 | 1704,1704,21.0,11.0,"rotterdam, netherlands",dutch43,dutch,male,1705,netherlands 20 | 1741,1741,22.0,2.0,"sint niklaas, belgium",dutch45,dutch,male,1742,belgium 21 | -------------------------------------------------------------------------------- /dataframes/df_russian_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 298,298,37.0,36.0,"nizhni novgorod, russia",russian1,russian,male,299,russia 3 | 300,300,54.0,13.0,"ola magadan, russia",russian11,russian,male,301,russia 4 | 307,307,66.0,12.0,"moscow, russia",russian8,russian,male,308,russia 5 | 308,308,23.0,7.0,"moscow, russia",russian9,russian,male,309,russia 6 | 459,459,62.0,53.0,"st. petersburg, russia",russian12,russian,male,460,russia 7 | 471,471,26.0,10.0,"moscow, russia",russian13,russian,male,472,russia 8 | 608,608,33.0,18.0,"pskov, russia",russian14,russian,male,609,russia 9 | 647,647,35.0,11.0,"minsk, belarus",russian16,russian,male,648,belarus 10 | 670,670,37.0,18.0,"pskov, russia",russian17,russian,male,671,russia 11 | 830,830,23.0,13.0,"komsomolsk-on-amur, russia",russian18,russian,male,831,russia 12 | 833,833,40.0,17.0,"zhezkazgan, kazakhstan",russian19,russian,male,834,kazakhstan 13 | 993,993,30.0,11.0,"riga, latvia",russian22,russian,male,994,latvia 14 | 1190,1190,21.0,14.0,"moscow, russia",russian27,russian,male,1191,russia 15 | 1253,1253,21.0,6.0,"chisinau, moldova",russian29,russian,male,1254,moldova 16 | 1278,1278,18.0,12.0,"stavropol, russia",russian30,russian,male,1279,russia 17 | 1310,1310,21.0,4.0,"tallinn, estonia",russian31,russian,male,1311,estonia 18 | 1406,1406,19.0,15.0,"vladivostok, russia",russian34,russian,male,1407,russia 19 | 1466,1466,27.0,16.0,"dresden, germany",russian35,russian,male,1467,germany 20 | 1517,1517,25.0,20.0,"moscow, russia",russian36,russian,male,1518,russia 21 | 1522,1522,31.0,10.0,"st. petersburg, russia",russian37,russian,male,1523,russia 22 | 1707,1707,84.0,18.0,"gomel, belarus",russian38,russian,male,1708,belarus 23 | -------------------------------------------------------------------------------- /dataframes/df_italian_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 217,217,46.0,18.0,"caltanissetta, sicily, italy",italian2,italian,male,218,italy 3 | 218,218,55.0,21.0,"florence, italy",italian3,italian,male,219,italy 4 | 439,439,52.0,25.0,"vicenza, veneto, italy",italian5,italian,male,440,italy 5 | 552,552,24.0,8.0,"naples, italy",italian7,italian,male,553,italy 6 | 588,588,43.0,10.0,"bari, italy",italian8,italian,male,589,italy 7 | 618,618,18.0,8.0,"turin, italy",italian9,italian,male,619,italy 8 | 626,626,48.0,12.0,"palermo, italy",italian10,italian,male,627,italy 9 | 640,640,30.0,18.0,"cagliari, italy",italian11,italian,male,641,italy 10 | 732,732,20.0,8.0,"enna, italy",italian13,italian,male,733,italy 11 | 760,760,20.0,8.0,"cosenza, italy",italian14,italian,male,761,italy 12 | 838,838,48.0,15.0,"forli, italy",italian15,italian,male,839,italy 13 | 941,941,40.0,14.0,"mantua, italy",italian16,italian,male,942,italy 14 | 994,994,28.0,14.0,"turin, italy",italian17,italian,male,995,italy 15 | 1042,1042,20.0,8.0,"grugliasco, italy",italian18,italian,male,1043,italy 16 | 1113,1113,19.0,11.0,"rome, italy",italian19,italian,male,1114,italy 17 | 1122,1122,32.0,15.0,"naples, italy",italian20,italian,male,1123,italy 18 | 1123,1123,42.0,20.0,"trento, italy",italian21,italian,male,1124,italy 19 | 1174,1174,23.0,6.0,"teramo, italy",italian22,italian,male,1175,italy 20 | 1321,1321,40.0,3.5,"wiesbaden, germany",italian25,italian,male,1322,germany 21 | 1488,1488,49.0,11.0,"siracusa, sicily, italy",italian26,italian,male,1489,italy 22 | 1678,1678,25.0,6.0,"rome, italy",italian28,italian,male,1679,italy 23 | 1740,1740,23.0,11.0,"bologna, italy",italian30,italian,male,1741,italy 24 | 1983,1983,49.0,14.0,"sardinia, italy",italian32,italian,male,1984,italy 25 | -------------------------------------------------------------------------------- /dataframes/df_turkish_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 381,381,24.0,16.0,"ankara, turkey",turkish2,turkish,male,382,turkey 3 | 382,382,27.0,12.0,"adana, turkey",turkish3,turkish,male,383,turkey 4 | 383,383,20.0,14.0,"kayseri, turkey",turkish4,turkish,male,384,turkey 5 | 385,385,19.0,14.0,"istanbul, turkey",turkish6,turkish,male,386,turkey 6 | 386,386,19.0,13.0,"izmir, turkey",turkish7,turkish,male,387,turkey 7 | 477,477,37.0,17.0,"sivas, turkey",turkish8,turkish,male,478,turkey 8 | 531,531,25.0,11.0,"istanbul, turkey",turkish9,turkish,male,532,turkey 9 | 577,577,27.0,11.0,"ankara, turkey",turkish10,turkish,male,578,turkey 10 | 671,671,25.0,20.0,"giresun, turkey",turkish12,turkish,male,672,turkey 11 | 735,735,18.0,10.0,"ankara, turkey",turkish15,turkish,male,736,turkey 12 | 744,744,25.0,12.0,"kars, turkey",turkish16,turkish,male,745,turkey 13 | 762,762,31.0,6.0,"ankara, turkey",turkish17,turkish,male,763,turkey 14 | 925,925,27.0,9.0,"stuttgart, germany",turkish18,turkish,male,926,germany 15 | 970,970,25.0,18.0,"istanbul, turkey",turkish19,turkish,male,971,turkey 16 | 1281,1281,23.0,6.0,"istanbul, turkey",turkish22,turkish,male,1282,turkey 17 | 1328,1328,31.0,12.0,"istanbul, turkey",turkish24,turkish,male,1329,turkey 18 | 1569,1569,32.0,26.0,"kars, turkey",turkish25,turkish,male,1570,turkey 19 | 1703,1703,45.0,22.0,"sivas, turkey",turkish26,turkish,male,1704,turkey 20 | 2031,2031,24.0,11.0,"diyarbakir, turkey",turkish31,turkish,male,2032,turkey 21 | 2032,2032,24.0,11.0,"diyarbakir, turkey",turkish32,turkish,male,2033,turkey 22 | 2033,2033,24.0,11.0,"diyarbakir, turkey",turkish33,turkish,male,2034,turkey 23 | 2034,2034,24.0,11.0,"diyarbakir, turkey",turkish34,turkish,male,2035,turkey 24 | 2173,2173,19.0,17.0,"panjakent, tajikistan",turkish37,turkish,male,2174,tajikistan 25 | -------------------------------------------------------------------------------- /dataframes/df_german_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 188,188,48.0,12.0,"dusseldorf, germany",german1,german,female,189,germany 3 | 189,189,29.0,12.0,"bemberg, germany",german2,german,female,190,germany 4 | 190,190,19.0,12.0,"meissen, germany",german3,german,female,191,germany 5 | 191,191,19.0,11.0,"stuttgart, germany",german4,german,female,192,germany 6 | 639,639,20.0,11.0,"frankfurt, germany",german11,german,female,640,germany 7 | 650,650,29.0,14.0,"feldkirch, austria",german12,german,female,651,austria 8 | 651,651,54.0,13.0,"eschen, liechtenstein",german13,german,female,652,liechtenstein 9 | 674,674,30.0,10.0,"vienna, austria",german14,german,female,675,austria 10 | 880,880,31.0,11.0,"bremen, germany",german16,german,female,881,germany 11 | 881,881,53.0,10.0,"frankfurt, germany",german17,german,female,882,germany 12 | 927,927,19.0,11.0,"offenbach, germany",german19,german,female,928,germany 13 | 1063,1063,20.0,10.0,"villach, austria",german21,german,female,1064,austria 14 | 1091,1091,21.0,12.0,"elsterwerda, germany",german22,german,female,1092,germany 15 | 1442,1442,21.0,11.0,"geislingen, germany",german25,german,female,1443,germany 16 | 1451,1451,77.0,24.0,"stuttgart, germany",german26,german,female,1452,germany 17 | 1524,1524,19.0,10.0,"vienna, austria",german28,german,female,1525,austria 18 | 1618,1618,53.0,12.0,"coburg, germany",german29,german,female,1619,germany 19 | 1680,1680,32.0,8.0,"radstadt, austria",german30,german,female,1681,austria 20 | 1684,1684,21.0,10.0,"datteln, germany",german31,german,female,1685,germany 21 | 1706,1706,29.0,12.0,"niedersachsen, germany",german32,german,female,1707,germany 22 | 1971,1971,51.0,11.0,"ingolstadt, germany",german35,german,female,1972,germany 23 | 1993,1993,18.0,14.0,"hamburg, germany",german36,german,female,1994,germany 24 | -------------------------------------------------------------------------------- /dataframes/df_portuguese_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 280,280,37.0,16.0,"bahia, brazil",portuguese1,portuguese,female,281,brazil 3 | 282,282,18.0,10.0,"brasilia, brazil",portuguese2,portuguese,female,283,brazil 4 | 283,283,18.0,15.0,"blumenau, brazil",portuguese3,portuguese,female,284,brazil 5 | 494,494,40.0,7.0,"sao paulo, brazil",portuguese11,portuguese,female,495,brazil 6 | 811,811,50.0,12.0,"almada, portugal",portuguese18,portuguese,female,812,portugal 7 | 932,932,43.0,17.0,"luanda, angola",portuguese22,portuguese,female,933,angola 8 | 937,937,29.0,10.0,"curitiba, brazil",portuguese23,portuguese,female,938,brazil 9 | 959,959,22.0,11.0,"santos, brazil",portuguese24,portuguese,female,960,brazil 10 | 971,971,20.0,18.0,"rio de janeiro, brazil",portuguese25,portuguese,female,972,brazil 11 | 1108,1108,25.0,10.0,"campo grande, brazil",portuguese26,portuguese,female,1109,brazil 12 | 1127,1127,33.0,8.0,"sao paulo, brazil",portuguese27,portuguese,female,1128,brazil 13 | 1186,1186,36.0,10.0,"sao paulo, brazil",portuguese28,portuguese,female,1187,brazil 14 | 1284,1284,26.0,24.0,"juiz de fora, brazil",portuguese30,portuguese,female,1285,brazil 15 | 1388,1388,22.0,9.0,"sao paulo, brazil",portuguese35,portuguese,female,1389,brazil 16 | 1437,1437,39.0,15.0,"uberlandia, brazil",portuguese36,portuguese,female,1438,brazil 17 | 1453,1453,43.0,26.0,"sao paulo, brazil",portuguese37,portuguese,female,1454,brazil 18 | 1552,1552,26.0,8.0,"sao paulo, brazil",portuguese39,portuguese,female,1553,brazil 19 | 1921,1921,32.0,8.0,"coronel fabriciano, minas gerais, brazil",portuguese43,portuguese,female,1922,brazil 20 | 1929,1929,38.0,24.0,"rio de janeiro, brazil",portuguese44,portuguese,female,1930,brazil 21 | 1940,1940,19.0,7.0,"salvador, brazil",portuguese45,portuguese,female,1941,brazil 22 | 1988,1988,22.0,12.0,"rosario do sul, brazil",portuguese47,portuguese,female,1989,brazil 23 | -------------------------------------------------------------------------------- /dataframes/df_korean_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 237,237,29.0,13.0,"ui jong bu, south korea",korean1,korean,male,238,south korea 3 | 243,243,32.0,10.0,"seoul, south korea",korean7,korean,male,244,south korea 4 | 473,473,34.0,12.0,"taejon, south korea",korean9,korean,male,474,south korea 5 | 482,482,34.0,13.0,"kwangju, south korea",korean10,korean,male,483,south korea 6 | 558,558,26.0,14.0,"seoul, south korea",korean11,korean,male,559,south korea 7 | 607,607,31.0,31.0,"sokcho, south korea",korean12,korean,male,608,south korea 8 | 1004,1004,42.0,40.0,"pusan, south korea",korean15,korean,male,1005,south korea 9 | 1006,1006,21.0,8.0,"cheju-do, south korea",korean17,korean,male,1007,south korea 10 | 1116,1116,25.0,7.0,"seoul, south korea",korean18,korean,male,1117,south korea 11 | 1141,1141,21.0,12.0,"ulsan, south korea",korean21,korean,male,1142,south korea 12 | 1369,1369,42.0,13.0,"seoul, south korea",korean24,korean,male,1370,south korea 13 | 1506,1506,23.0,21.0,"seoul, south korea",korean26,korean,male,1507,south korea 14 | 1623,1623,51.0,14.0,"haenam, south korea",korean27,korean,male,1624,south korea 15 | 1814,1814,48.0,12.0,"kwangju, south korea",korean35,korean,male,1815,south korea 16 | 1847,1847,25.0,10.0,"inchon, south korea",korean38,korean,male,1848,south korea 17 | 1861,1861,55.0,14.0,"pusan, south korea",korean39,korean,male,1862,south korea 18 | 1949,1949,26.0,3.0,"bayside, new york, usa",korean42,korean,male,1950,usa 19 | 1951,1951,20.0,13.0,"taejon, south korea",korean43,korean,male,1952,south korea 20 | 1970,1970,49.0,14.0,"inchon, south korea",korean44,korean,male,1971,south korea 21 | 1981,1981,28.0,13.0,"inchon, south korea",korean45,korean,male,1982,south korea 22 | 2057,2057,23.0,10.0,"seoul, south korea",korean47,korean,male,2058,south korea 23 | 2074,2074,32.0,27.0,"seoul, south korea",korean48,korean,male,2075,south korea 24 | 2133,2133,25.0,16.0,"seoul, south korea",korean51,korean,male,2134,south korea 25 | 2157,2157,40.0,12.0,"seoul, south korea",korean52,korean,male,2158,south korea 26 | -------------------------------------------------------------------------------- /dataframes/df_mandarin_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 257,257,43.0,10.0,"jilin city, jilin, china",mandarin3,mandarin,male,258,china 3 | 261,261,22.0,5.0,"berkeley, california, usa",mandarin7,mandarin,male,262,usa 4 | 429,429,29.0,12.0,"jingmen, hubei, china",mandarin8,mandarin,male,430,china 5 | 450,450,38.0,12.0,"shanghai, china",mandarin9,mandarin,male,451,china 6 | 490,490,19.0,3.0,"beijing, china",mandarin10,mandarin,male,491,china 7 | 622,622,23.0,1.0,"singapore, singapore",mandarin12,mandarin,male,623,singapore 8 | 684,684,29.0,13.0,"nantou, taiwan",mandarin13,mandarin,male,685,taiwan 9 | 718,718,49.0,20.0,"dalian, liaoning, china",mandarin14,mandarin,male,719,china 10 | 749,749,32.0,10.0,"taipei, taiwan",mandarin16,mandarin,male,750,taiwan 11 | 795,795,26.0,13.0,"wuhan, hubei, china",mandarin17,mandarin,male,796,china 12 | 915,915,27.0,13.0,"qinzhou, guangxi, china",mandarin19,mandarin,male,916,china 13 | 1153,1153,18.0,15.0,"chengdu, sichuan, china",mandarin27,mandarin,male,1154,china 14 | 1446,1446,45.0,15.0,"shanghai, china",mandarin28,mandarin,male,1447,china 15 | 1489,1489,24.0,4.0,"lanzhou, gansu, china",mandarin29,mandarin,male,1490,china 16 | 1490,1490,27.0,13.0,"taipei, taiwan",mandarin30,mandarin,male,1491,taiwan 17 | 1540,1540,25.0,6.0,"beijing, china",mandarin33,mandarin,male,1541,china 18 | 1613,1613,28.0,12.0,"chengdu, sichuan, china",mandarin40,mandarin,male,1614,china 19 | 1629,1629,21.0,16.0,"hangzhou, zhejiang, china",mandarin44,mandarin,male,1630,china 20 | 1644,1644,37.0,12.0,"pingdingshan, henan, china",mandarin48,mandarin,male,1645,china 21 | 1735,1735,21.0,10.0,"shanghai, china",mandarin49,mandarin,male,1736,china 22 | 1787,1787,25.0,9.0,"hsinchu, taiwan",mandarin50,mandarin,male,1788,taiwan 23 | 1791,1791,25.0,10.0,"tianjin, hebei, china",mandarin51,mandarin,male,1792,china 24 | 1989,1989,20.0,12.0,"shanghai, china",mandarin57,mandarin,male,1990,china 25 | 1999,1999,23.0,7.0,"superior, colorado, usa",mandarin58,mandarin,male,2000,usa 26 | 2116,2116,24.0,7.0,"shanghai, china",mandarin62,mandarin,male,2117,china 27 | 2118,2118,27.0,12.0,"jiaozuo, henan, china",mandarin63,mandarin,male,2119,china 28 | -------------------------------------------------------------------------------- /code/rnn_example.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | 4 | from keras.optimizers import SGD 5 | 6 | 7 | np.random.seed(1337) # for reproducibility 8 | from keras.preprocessing import sequence 9 | from keras.utils import np_utils 10 | from keras.models import Sequential 11 | from keras.layers.core import Dense, Dropout, Activation 12 | from keras.layers.recurrent import LSTM 13 | from sklearn.cross_validation import train_test_split 14 | from sklearn.metrics import classification_report 15 | 16 | batch_size = 25 17 | hidden_units = 10 18 | nb_classes = 3 19 | print('Loading data...') 20 | X = np.load('top_3_100_split_mfcc.npy') 21 | y = np.load('top_3_100_split_y.npy') 22 | 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) 24 | 25 | print(len(X_train), 'train sequences') 26 | print(len(X_test), 'test sequences') 27 | print('X_train shape:', X_train.shape) 28 | print('X_test shape:', X_test.shape) 29 | print('y_train shape:', y_train.shape) 30 | print('y_test shape:', y_test.shape) 31 | print('Build model...') 32 | 33 | Y_train = np_utils.to_categorical(y_train, nb_classes) 34 | Y_test = np_utils.to_categorical(y_test, nb_classes) 35 | 36 | model = Sequential() 37 | 38 | #batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2]) 39 | 40 | # note that it is necessary to pass in 3d batch_input_shape if stateful=True 41 | model.add(LSTM(64, return_sequences=True, stateful=False, 42 | batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2]))) 43 | model.add(LSTM(64, return_sequences=True, stateful=False)) 44 | model.add(LSTM(64, stateful=False)) 45 | 46 | 47 | # add dropout to control for overfitting 48 | model.add(Dropout(.25)) 49 | 50 | # squash output onto number of classes in probability space 51 | model.add(Dense(nb_classes, activation='softmax')) 52 | 53 | 54 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"]) 55 | 56 | print("Train...") 57 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=5, validation_data=(X_test, Y_test)) 58 | 59 | y_pred=model.predict_classes(X_test, batch_size=batch_size) 60 | print(classification_report(y_test, y_pred)) 61 | -------------------------------------------------------------------------------- /dataframes/df_russian_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 299,299,20.0,5.0,"moscow, russia",russian10,russian,female,300,russia 3 | 301,301,50.0,10.0,"izmail, ukraine",russian2,russian,female,302,ukraine 4 | 302,302,35.0,6.0,"zaporizhzhya, ukraine",russian3,russian,female,303,ukraine 5 | 303,303,68.0,38.0,"moscow, russia",russian4,russian,female,304,russia 6 | 304,304,25.0,15.0,"khabarovsk, russia",russian5,russian,female,305,russia 7 | 305,305,26.0,11.0,"moscow, russia",russian6,russian,female,306,russia 8 | 306,306,54.0,13.0,"kiev, ukraine",russian7,russian,female,307,ukraine 9 | 646,646,34.0,12.0,"ayaguz, kazakhstan",russian15,russian,female,647,kazakhstan 10 | 910,910,28.0,17.0,"kostanai, kazakhstan",russian20,russian,female,911,kazakhstan 11 | 953,953,46.0,10.0,"moscow, russia",russian21,russian,female,954,russia 12 | 1056,1056,21.0,5.0,"zaporizhzhya, ukraine",russian23,russian,female,1057,ukraine 13 | 1072,1072,33.0,19.0,"sochi, russia",russian24,russian,female,1073,russia 14 | 1095,1095,58.0,41.0,"orhei, moldova",russian25,russian,female,1096,moldova 15 | 1096,1096,23.0,17.0,"chisinau, moldova",russian26,russian,female,1097,moldova 16 | 1197,1197,25.0,11.0,"minsk, belarus",russian28,russian,female,1198,belarus 17 | 1313,1313,31.0,14.0,"kiev, ukraine",russian32,russian,female,1314,ukraine 18 | 1316,1316,24.0,17.0,"karaganda, kazakhstan",russian33,russian,female,1317,kazakhstan 19 | 1749,1749,25.0,5.0,"brooklyn, new york, usa",russian39,russian,female,1750,usa 20 | 1773,1773,23.0,8.0,"bishkek, kyrgyzstan",russian40,russian,female,1774,kyrgyzstan 21 | 1843,1843,30.0,10.0,"nizhni novgorod, russia",russian41,russian,female,1844,russia 22 | 1923,1923,24.0,4.0,"moscow, russia",russian42,russian,female,1924,russia 23 | 1931,1931,27.0,8.0,"st. petersburg, russia",russian43,russian,female,1932,russia 24 | 1946,1946,29.0,6.0,"penza, russia",russian44,russian,female,1947,russia 25 | 2026,2026,68.0,14.0,"st. petersburg, russia",russian45,russian,female,2027,russia 26 | 2104,2104,24.0,7.0,"perm, russia",russian46,russian,female,2105,russia 27 | 2106,2106,26.0,6.0,"saransk, russia",russian47,russian,female,2107,russia 28 | 2136,2136,29.0,6.0,"fergana, uzbekistan",russian48,russian,female,2137,uzbekistan 29 | -------------------------------------------------------------------------------- /dataframes/df_dutch_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 769,769,54.0,13.0,"rotterdam, netherlands",dutch5,dutch,female,770,netherlands 3 | 846,846,46.0,10.0,"brussels, belgium",dutch6,dutch,female,847,belgium 4 | 895,895,35.0,12.0,"alphen aan de ryn, netherlands",dutch8,dutch,female,896,netherlands 5 | 896,896,68.0,12.0,"almelo, netherlands",dutch9,dutch,female,897,netherlands 6 | 1002,1002,33.0,10.0,"zwolle, netherlands",dutch13,dutch,female,1003,netherlands 7 | 1003,1003,35.0,12.0,"zwolle, netherlands",dutch14,dutch,female,1004,netherlands 8 | 1259,1259,23.0,12.0,"antwerp, belgium",dutch16,dutch,female,1260,belgium 9 | 1260,1260,21.0,12.0,"wilrijk, belgium",dutch17,dutch,female,1261,belgium 10 | 1262,1262,22.0,11.0,"heist-op-den-berg, belgium",dutch19,dutch,female,1263,belgium 11 | 1263,1263,21.0,13.0,"vlaams-brabant, belgium",dutch20,dutch,female,1264,belgium 12 | 1264,1264,21.0,13.0,"diest, belgium",dutch21,dutch,female,1265,belgium 13 | 1265,1265,23.0,13.0,"essen, belgium",dutch22,dutch,female,1266,belgium 14 | 1268,1268,23.0,14.0,"seoul, south korea",dutch24,dutch,female,1269,south korea 15 | 1269,1269,23.0,13.0,"wilrijk, belgium",dutch25,dutch,female,1270,belgium 16 | 1270,1270,23.0,13.0,"turnhout, belgium",dutch26,dutch,female,1271,belgium 17 | 1273,1273,21.0,13.0,"antwerp, belgium",dutch27,dutch,female,1274,belgium 18 | 1461,1461,21.0,14.0,"sint-niklaas, belgium",dutch30,dutch,female,1462,belgium 19 | 1462,1462,23.0,13.0,"antwerp, belgium",dutch31,dutch,female,1463,belgium 20 | 1465,1465,22.0,9.0,"mechelen, belgium",dutch33,dutch,female,1466,belgium 21 | 1467,1467,22.0,14.0,"bornem, belgium",dutch34,dutch,female,1468,belgium 22 | 1472,1472,21.0,13.0,"mortsel, belgium",dutch35,dutch,female,1473,belgium 23 | 1475,1475,21.0,12.0,"turnhout, belgium",dutch36,dutch,female,1476,belgium 24 | 1478,1478,21.0,11.0,"antwerp, belgium",dutch37,dutch,female,1479,belgium 25 | 1480,1480,21.0,12.0,"beveren, belgium",dutch38,dutch,female,1481,belgium 26 | 1481,1481,22.0,10.0,"hardinxveld-giessendam, netherlands",dutch39,dutch,female,1482,netherlands 27 | 1739,1739,21.0,12.0,"antwerp, belgium",dutch44,dutch,female,1740,belgium 28 | 1742,1742,22.0,12.0,"hulshout, belgium",dutch46,dutch,female,1743,belgium 29 | 2114,2114,37.0,11.0,"gouda, netherlands",dutch47,dutch,female,2115,netherlands 30 | -------------------------------------------------------------------------------- /dataframes/df_portuguese_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 281,281,21.0,13.0,"novo hamburgo, rio grande do sul, brazil",portuguese10,portuguese,male,282,brazil 3 | 284,284,20.0,15.0,"cabinda, angola",portuguese4,portuguese,male,285,angola 4 | 285,285,31.0,13.0,"lubango, angola",portuguese5,portuguese,male,286,angola 5 | 286,286,44.0,15.0,"sao paulo, brazil",portuguese6,portuguese,male,287,brazil 6 | 287,287,40.0,11.0,"vitoria, brazil",portuguese7,portuguese,male,288,brazil 7 | 288,288,26.0,7.0,"fortaleza, brazil",portuguese8,portuguese,male,289,brazil 8 | 289,289,18.0,14.0,"sao paulo, brazil",portuguese9,portuguese,male,290,brazil 9 | 512,512,36.0,12.0,"lisbon, portugal",portuguese12,portuguese,male,513,portugal 10 | 519,519,18.0,9.0,"santa maria, rio grande do sul, brazil",portuguese13,portuguese,male,520,brazil 11 | 557,557,25.0,8.0,"sao paulo, brazil",portuguese14,portuguese,male,558,brazil 12 | 614,614,54.0,9.0,"sao paulo, brazil",portuguese15,portuguese,male,615,brazil 13 | 631,631,25.0,15.0,"porto alegre, brazil",portuguese16,portuguese,male,632,brazil 14 | 805,805,18.0,15.0,"salinas, brazil",portuguese17,portuguese,male,806,brazil 15 | 828,828,32.0,4.0,"florianopolis, brazil",portuguese19,portuguese,male,829,brazil 16 | 832,832,21.0,10.0,"campinas, brazil",portuguese20,portuguese,male,833,brazil 17 | 890,890,23.0,17.0,"sao paulo, brazil",portuguese21,portuguese,male,891,brazil 18 | 1283,1283,28.0,8.0,"vitoria, brazil",portuguese29,portuguese,male,1284,brazil 19 | 1285,1285,29.0,16.0,"belo horizonte, brazil",portuguese31,portuguese,male,1286,brazil 20 | 1304,1304,29.0,12.0,"volta redonda, brazil",portuguese32,portuguese,male,1305,brazil 21 | 1315,1315,38.0,28.0,"brasilia, brazil",portuguese33,portuguese,male,1316,brazil 22 | 1375,1375,18.0,14.0,"porto alegre, brazil",portuguese34,portuguese,male,1376,brazil 23 | 1469,1469,25.0,15.0,"rio de janeiro, brazil",portuguese38,portuguese,male,1470,brazil 24 | 1772,1772,23.0,0.0,"london, england, uk",portuguese40,portuguese,male,1773,uk 25 | 1848,1848,22.0,10.0,"mexico city, mexico",portuguese41,portuguese,male,1849,mexico 26 | 1882,1882,24.0,21.0,"luanda, angola",portuguese42,portuguese,male,1883,angola 27 | 1964,1964,65.0,11.0,"cascais, portugal",portuguese46,portuguese,male,1965,portugal 28 | 2119,2119,31.0,26.0,"sao paulo, brazil",portuguese48,portuguese,male,2120,brazil 29 | -------------------------------------------------------------------------------- /dataframes/df_korean_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 238,238,23.0,14.0,"taegu, south korea",korean2,korean,female,239,south korea 3 | 239,239,19.0,17.0,"seoul, south korea",korean3,korean,female,240,south korea 4 | 240,240,29.0,15.0,"seoul, south korea",korean4,korean,female,241,south korea 5 | 241,241,50.0,12.0,"seoul, south korea",korean5,korean,female,242,south korea 6 | 242,242,21.0,12.0,"sunchun, south korea",korean6,korean,female,243,south korea 7 | 451,451,39.0,13.0,"seoul, south korea",korean8,korean,female,452,south korea 8 | 758,758,50.0,12.0,"pusan, south korea",korean13,korean,female,759,south korea 9 | 891,891,22.0,8.0,"kota kinabalu, malaysia",korean14,korean,female,892,malaysia 10 | 1005,1005,30.0,13.0,"nonsan, south korea",korean16,korean,female,1006,south korea 11 | 1117,1117,21.0,14.0,"seoul, south korea",korean19,korean,female,1118,south korea 12 | 1125,1125,58.0,13.0,"inchon, south korea",korean20,korean,female,1126,south korea 13 | 1144,1144,19.0,8.0,"inchon, south korea",korean22,korean,female,1145,south korea 14 | 1195,1195,49.0,12.0,"seoul, south korea",korean23,korean,female,1196,south korea 15 | 1385,1385,34.0,12.0,"anseong, kyong gi, south korea",korean25,korean,female,1386,south korea 16 | 1662,1662,62.0,13.0,"seoul, south korea",korean28,korean,female,1663,south korea 17 | 1686,1686,19.0,8.0,"masan, south korea",korean29,korean,female,1687,south korea 18 | 1689,1689,49.0,14.0,"seoul, south korea",korean30,korean,female,1690,south korea 19 | 1695,1695,18.0,7.0,"seoul, south korea",korean31,korean,female,1696,south korea 20 | 1714,1714,35.0,28.0,"seoul, south korea",korean32,korean,female,1715,south korea 21 | 1721,1721,30.0,13.0,"seongnam, south korea",korean33,korean,female,1722,south korea 22 | 1813,1813,20.0,11.0,"seoul, south korea",korean34,korean,female,1814,south korea 23 | 1837,1837,51.0,12.0,"taegu, south korea",korean36,korean,female,1838,south korea 24 | 1842,1842,27.0,14.0,"seoul, south korea",korean37,korean,female,1843,south korea 25 | 1942,1942,21.0,17.0,"seoul, south korea",korean40,korean,female,1943,south korea 26 | 1945,1945,50.0,12.0,"seoul, south korea",korean41,korean,female,1946,south korea 27 | 2014,2014,21.0,21.0,"seoul, south korea",korean46,korean,female,2015,south korea 28 | 2097,2097,22.0,7.0,"an yang, south korea",korean49,korean,female,2098,south korea 29 | 2132,2132,57.0,13.0,"taejon, south korea",korean50,korean,female,2133,south korea 30 | -------------------------------------------------------------------------------- /dataframes/df_french_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 178,178,20.0,12.0,"st. laurent d'onay, france",french1,french,female,179,france 3 | 180,180,22.0,11.0,"nice, france",french3,french,female,181,france 4 | 181,181,31.0,14.0,"abidjan, ivory coast",french4,french,female,182,ivory coast 5 | 182,182,36.0,11.0,"douala, cameroon",french5,french,female,183,cameroon 6 | 183,183,26.0,13.0,"booue, gabon",french6,french,female,184,gabon 7 | 523,523,19.0,9.0,"montreal, quebec, canada",french12,french,female,524,canada 8 | 628,628,23.0,14.0,"paris, france",french14,french,female,629,france 9 | 657,657,19.0,7.0,"montreal, quebec, canada",french16,french,female,658,canada 10 | 821,821,78.0,16.0,"ghent, belgium",french22,french,female,822,belgium 11 | 822,822,76.0,16.0,"ghent, belgium",french23,french,female,823,belgium 12 | 916,916,27.0,11.0,"montreal, quebec, canada",french26,french,female,917,canada 13 | 931,931,38.0,12.0,"algiers, algeria",french27,french,female,932,algeria 14 | 956,956,35.0,11.0,"port-au-prince, haiti",french28,french,female,957,haiti 15 | 1020,1020,54.0,10.0,"cannes, france",french29,french,female,1021,france 16 | 1303,1303,32.0,10.0,"reims, france",french36,french,female,1304,france 17 | 1428,1428,44.0,13.5,"rouen, france",french40,french,female,1429,france 18 | 1508,1508,22.0,20.0,"kinshasa, democratic republic of congo",french42,french,female,1509,democratic republic of congo 19 | 1544,1544,20.0,12.0,"la chaux-de-fonds, switzerland",french44,french,female,1545,switzerland 20 | 1661,1661,66.0,12.0,"landerneau, brittany, france",french47,french,female,1662,france 21 | 1700,1700,19.0,3.0,"douala, cameroon",french48,french,female,1701,cameroon 22 | 1750,1750,67.0,14.0,"paris, france",french50,french,female,1751,france 23 | 1812,1812,56.0,11.0,"lisbon, portugal",french52,french,female,1813,portugal 24 | 1829,1829,19.0,10.0,"creteil, france",french53,french,female,1830,france 25 | 1845,1845,25.0,8.0,"vichy, france",french54,french,female,1846,france 26 | 1888,1888,62.0,12.0,"settat, morocco",french55,french,female,1889,morocco 27 | 1897,1897,21.0,5.0,"douala, cameroon",french56,french,female,1898,cameroon 28 | 1936,1936,25.0,18.0,"douala, cameroon",french58,french,female,1937,cameroon 29 | 1937,1937,20.0,5.0,"paris, france",french59,french,female,1938,france 30 | 1944,1944,20.0,11.0,"paris, france",french60,french,female,1945,france 31 | 1997,1997,41.0,10.0,"rawalpindi, pakistan",french61,french,female,1998,pakistan 32 | 2029,2029,21.0,20.0,"paris, france",french62,french,female,2030,france 33 | -------------------------------------------------------------------------------- /dataframes/df_french_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 179,179,19.0,14.0,"tunis, tunisia",french2,french,male,180,tunisia 3 | 184,184,18.0,5.0,"rimouski, quebec, canada",french7,french,male,185,canada 4 | 185,185,66.0,16.0,"ghent, belgium",french8,french,male,186,belgium 5 | 411,411,21.0,11.0,"lamentin, martinique",french9,french,male,412,martinique 6 | 499,499,31.0,10.0,"bordeaux, france",french10,french,male,500,france 7 | 513,513,31.0,11.0,"limoges, france",french11,french,male,514,france 8 | 627,627,19.0,12.0,"bordeaux, france",french13,french,male,628,france 9 | 632,632,32.0,12.0,"sens, france",french15,french,male,633,france 10 | 682,682,39.0,14.0,"fribourg, switzerland",french17,french,male,683,switzerland 11 | 740,740,22.0,6.0,"washington, dc, usa",french18,french,male,741,usa 12 | 754,754,39.0,12.0,"grenoble, france",french19,french,male,755,france 13 | 803,803,23.0,12.0,"alma, quebec, canada",french20,french,male,804,canada 14 | 810,810,20.0,9.0,"amiens, france",french21,french,male,811,france 15 | 831,831,47.0,13.0,"mostaganem, algeria",french24,french,male,832,algeria 16 | 859,859,20.0,12.0,"st. louis fr, france",french25,french,male,860,france 17 | 1022,1022,37.0,12.0,"kinshasa, democratic republic of congo",french30,french,male,1023,democratic republic of congo 18 | 1061,1061,28.0,15.0,"paris, france",french31,french,male,1062,france 19 | 1079,1079,60.0,16.0,"kabinda, democratic republic of congo",french32,french,male,1080,democratic republic of congo 20 | 1126,1126,62.0,10.0,"montreal, quebec, canada",french33,french,male,1127,canada 21 | 1159,1159,56.0,16.0,"abidjan, ivory coast",french34,french,male,1160,ivory coast 22 | 1188,1188,27.0,10.0,"toulouse, france",french35,french,male,1189,france 23 | 1305,1305,42.0,14.0,"strasbourg, france",french37,french,male,1306,france 24 | 1423,1423,22.0,8.0,"montreal, quebec, canada",french38,french,male,1424,canada 25 | 1426,1426,28.0,13.0,"pezenas, france",french39,french,male,1427,france 26 | 1473,1473,24.0,12.0,"la massana, andorra",french41,french,male,1474,andorra 27 | 1534,1534,22.0,11.0,"paris, france",french43,french,male,1535,france 28 | 1566,1566,22.0,8.0,"montreal, quebec, canada",french45,french,male,1567,canada 29 | 1642,1642,22.0,10.0,"chatenay malabry, france",french46,french,male,1643,france 30 | 1728,1728,39.0,12.0,"paris, france",french49,french,male,1729,france 31 | 1764,1764,18.0,5.0,"liege, wallonia, belgium",french51,french,male,1765,belgium 32 | 1930,1930,23.0,12.0,"dakar, senegal",french57,french,male,1931,senegal 33 | 2110,2110,37.0,11.0,"ouagadougou, burkina faso",french63,french,male,2111,burkina faso 34 | -------------------------------------------------------------------------------- /dataframes/df_mandarin_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 255,255,26.0,13.0,"shanxi province, shanxi, china",mandarin1,mandarin,female,256,china 3 | 256,256,38.0,14.0,"nanjing, china",mandarin2,mandarin,female,257,china 4 | 258,258,24.0,6.0,"shanghai, china",mandarin4,mandarin,female,259,china 5 | 259,259,31.0,12.0,"beijing, china",mandarin5,mandarin,female,260,china 6 | 260,260,28.0,12.0,"le shan, sichuan, china",mandarin6,mandarin,female,261,china 7 | 606,606,53.0,13.0,"kao-hsiung, taiwan",mandarin11,mandarin,female,607,taiwan 8 | 745,745,28.0,11.0,"tainan, taiwan",mandarin15,mandarin,female,746,taiwan 9 | 871,871,29.0,13.0,"taoyuan, taiwan",mandarin18,mandarin,female,872,taiwan 10 | 930,930,40.0,12.0,"ping tong, taiwan",mandarin20,mandarin,female,931,taiwan 11 | 983,983,38.0,9.0,"chengdu, sichuan, china",mandarin21,mandarin,female,984,china 12 | 1018,1018,39.0,11.0,"haikou, hainan, china",mandarin22,mandarin,female,1019,china 13 | 1043,1043,46.0,13.0,"kao-hsiung, taiwan",mandarin23,mandarin,female,1044,taiwan 14 | 1088,1088,21.0,10.0,"shanghai, china",mandarin24,mandarin,female,1089,china 15 | 1124,1124,28.0,13.0,"wuxi, jiangsu, china",mandarin25,mandarin,female,1125,china 16 | 1140,1140,31.0,12.0,"emei, sichuan, china",mandarin26,mandarin,female,1141,china 17 | 1518,1518,26.0,13.0,"wenzhou, zhejiang, china",mandarin31,mandarin,female,1519,china 18 | 1526,1526,23.0,12.0,"taipei, taiwan",mandarin32,mandarin,female,1527,taiwan 19 | 1577,1577,31.0,13.0,"shi jia zhuang, hebei, china",mandarin34,mandarin,female,1578,china 20 | 1578,1578,27.0,12.0,"yantai, shandong, china",mandarin35,mandarin,female,1579,china 21 | 1579,1579,32.0,10.0,"beijing, china",mandarin36,mandarin,female,1580,china 22 | 1580,1580,32.0,12.0,"huhot, nei meng gu, china",mandarin37,mandarin,female,1581,china 23 | 1581,1581,33.0,12.0,"changsha, hunan, china",mandarin38,mandarin,female,1582,china 24 | 1582,1582,24.0,12.0,"tie ling, liaoning, china",mandarin39,mandarin,female,1583,china 25 | 1614,1614,34.0,13.0,"songyuan, jilin, china",mandarin41,mandarin,female,1615,china 26 | 1615,1615,47.0,16.0,"yanbian, jilin, china",mandarin42,mandarin,female,1616,china 27 | 1616,1616,24.0,15.0,"datong, shanxi, china",mandarin43,mandarin,female,1617,china 28 | 1630,1630,42.0,12.0,"beijing, china",mandarin45,mandarin,female,1631,china 29 | 1635,1635,43.0,9.0,"jilin city, jilin, china",mandarin46,mandarin,female,1636,china 30 | 1643,1643,28.0,8.0,"beijing, china",mandarin47,mandarin,female,1644,china 31 | 1792,1792,26.0,6.0,"shaoxing, zhejiang, china",mandarin52,mandarin,female,1793,china 32 | 1816,1816,38.0,13.0,"ningbo, zhejiang, china",mandarin53,mandarin,female,1817,china 33 | 1899,1899,25.0,10.0,"chengdu, sichuan, china",mandarin54,mandarin,female,1900,china 34 | 1908,1908,25.0,13.0,"loudi, hunan, china",mandarin55,mandarin,female,1909,china 35 | 1909,1909,26.0,11.0,"baoding, hebei, china",mandarin56,mandarin,female,1910,china 36 | 2018,2018,20.0,6.0,"kunming, yunnan, china",mandarin59,mandarin,female,2019,china 37 | 2108,2108,41.0,13.0,"guiyang, guizhou, china",mandarin60,mandarin,female,2109,china 38 | 2115,2115,33.0,16.0,"shangrao, jiangxi, china",mandarin61,mandarin,female,2116,china 39 | 2128,2128,24.0,10.0,"fuzhou, fujian, china",mandarin64,mandarin,female,2129,china 40 | 2172,2172,20.0,5.0,"tianjin, hebei, china",mandarin65,mandarin,female,2173,china 41 | -------------------------------------------------------------------------------- /dataframes/df_arabic_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 10,10,38.0,12.0,"riyadh, saudi arabia",arabic1,arabic,female,11,saudi arabia 3 | 12,12,30.0,14.0,"monastir, tunisia",arabic11,arabic,female,13,tunisia 4 | 18,18,19.0,15.0,"casablanca, morocco",arabic5,arabic,female,19,morocco 5 | 20,20,18.0,15.0,"casablanca, morocco",arabic7,arabic,female,21,morocco 6 | 22,22,28.0,4.0,"jerusalem, israel",arabic9,arabic,female,23,israel 7 | 669,669,46.0,30.0,"beirut, lebanon",arabic21,arabic,female,670,lebanon 8 | 919,919,23.0,10.0,"abu dhabi, united arab emirates",arabic22,arabic,female,920,united arab emirates 9 | 1011,1011,43.0,14.0,"amman, jordan",arabic26,arabic,female,1012,jordan 10 | 1132,1132,19.0,8.0,"riyadh, saudi arabia",arabic30,arabic,female,1133,saudi arabia 11 | 1180,1180,47.0,12.0,"baghdad, iraq",arabic31,arabic,female,1181,iraq 12 | 1203,1203,28.0,11.0,"baghdad, iraq",arabic33,arabic,female,1204,iraq 13 | 1252,1252,34.0,6.0,"kuwait city, kuwait",arabic34,arabic,female,1253,kuwait 14 | 1393,1393,19.0,9.0,"shady grove, maryland, usa",arabic38,arabic,female,1394,usa 15 | 1425,1425,29.0,8.0,"beirut, lebanon",arabic43,arabic,female,1426,lebanon 16 | 1454,1454,29.0,7.0,"jiddah, saudi arabia",arabic44,arabic,female,1455,saudi arabia 17 | 1455,1455,32.0,12.0,"fort collins, colorado, usa",arabic45,arabic,female,1456,usa 18 | 1486,1486,21.0,9.0,"jiddah, saudi arabia",arabic52,arabic,female,1487,saudi arabia 19 | 1698,1698,28.0,17.0,"beirut, lebanon",arabic57,arabic,female,1699,lebanon 20 | 1768,1768,28.0,10.0,"bani walid, libya",arabic63,arabic,female,1769,libya 21 | 1777,1777,32.0,10.0,"samail, oman",arabic65,arabic,female,1778,oman 22 | 1809,1809,55.0,15.0,"kerma, sudan",arabic69,arabic,female,1810,sudan 23 | 1827,1827,29.0,21.0,"kuwait city, kuwait",arabic71,arabic,female,1828,kuwait 24 | 1831,1831,20.0,11.0,"baghdad, iraq",arabic72,arabic,female,1832,iraq 25 | 1840,1840,47.0,7.0,"nazareth, israel",arabic73,arabic,female,1841,israel 26 | 1884,1884,26.0,2.0,"riyadh, saudi arabia",arabic75,arabic,female,1885,saudi arabia 27 | 1893,1893,18.0,4.0,"ad dammam, saudi arabia",arabic77,arabic,female,1894,saudi arabia 28 | 1896,1896,19.0,3.0,"washington, district of columbia, usa",arabic78,arabic,female,1897,usa 29 | 1898,1898,24.0,13.0,"medina, saudi arabia",arabic79,arabic,female,1899,saudi arabia 30 | 1926,1926,26.0,12.0,"medina, saudi arabia",arabic84,arabic,female,1927,saudi arabia 31 | 1962,1962,24.0,14.0,"ad dammam, saudi arabia",arabic85,arabic,female,1963,saudi arabia 32 | 2004,2004,49.0,10.0,"kuwait city, kuwait",arabic87,arabic,female,2005,kuwait 33 | 2007,2007,19.0,4.0,"beirut, lebanon",arabic88,arabic,female,2008,lebanon 34 | 2021,2021,35.0,19.0,"baghdad, iraq",arabic89,arabic,female,2022,iraq 35 | 2035,2035,23.0,12.0,"tripoli, libya",arabic90,arabic,female,2036,libya 36 | 2085,2085,57.0,12.0,"riyadh, saudi arabia",arabic91,arabic,female,2086,saudi arabia 37 | 2111,2111,27.0,10.0,"baghdad, iraq",arabic93,arabic,female,2112,iraq 38 | 2113,2113,24.0,9.0,"abu dhabi, united arab emirates",arabic94,arabic,female,2114,united arab emirates 39 | 2142,2142,31.0,10.0,"kuwait city, kuwait",arabic95,arabic,female,2143,kuwait 40 | 2153,2153,19.0,4.0,"sharjah, united arab emirates",arabic97,arabic,female,2154,united arab emirates 41 | 2154,2154,19.0,4.0,"ras al khaimah, united arab emirates",arabic98,arabic,female,2155,united arab emirates 42 | 2156,2156,25.0,16.0,"ad dammam, saudi arabia",arabic99,arabic,female,2157,saudi arabia 43 | 2158,2158,40.0,10.0,"beirut, lebanon",arabic100,arabic,female,2159,lebanon 44 | -------------------------------------------------------------------------------- /code/conv_1d_model.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import numpy as np 4 | from sklearn.cross_validation import train_test_split 5 | from sklearn.metrics import classification_report 6 | #np.random.seed(1337) # for reproducibility 7 | 8 | from keras.preprocessing import sequence 9 | from keras.models import Sequential 10 | from keras.layers.core import Dense, Dropout, Activation, Flatten 11 | from keras.layers.normalization import BatchNormalization 12 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 13 | from keras.utils import np_utils 14 | 15 | 16 | # set parameters: 17 | test_dim = 2999 18 | maxlen = 100 19 | batch_size = 100 20 | nb_filter = 64 21 | filter_length_1 = 50 22 | filter_length_2 = 25 23 | hidden_dims = 250 24 | nb_epoch = 8 25 | nb_classes = 2 26 | 27 | print('Loading data...') 28 | X = np.load('usa373_span162_mfcc_13.npy') 29 | y = np.append(np.ones(373), np.zeros(162)) 30 | 31 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) 32 | 33 | xts = X_train.shape 34 | #X_train = np.reshape(X_train, (xts[0], xts[1], 1)) 35 | xtss = X_test.shape 36 | #X_test = np.reshape(X_test, (xtss[0], xtss[1], 1)) 37 | yts = y_train.shape 38 | #y_train = np.reshape(y_train, (yts[0], 1)) 39 | ytss = y_test.shape 40 | #y_test = np.reshape(y_test, (ytss[0], 1)) 41 | 42 | print(len(X_train), 'train sequences') 43 | print(len(X_test), 'test sequences') 44 | 45 | Y_train = np_utils.to_categorical(y_train, nb_classes) 46 | Y_test = np_utils.to_categorical(y_test, nb_classes) 47 | 48 | # print('Pad sequences (samples x time)') 49 | # X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 50 | # X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 51 | # print('X_train shape:', X_train.shape) 52 | # print('X_test shape:', X_test.shape) 53 | 54 | print('Build model...') 55 | model = Sequential() 56 | 57 | # we start off with an efficient embedding layer which maps 58 | # our vocab indices into embedding_dims dimensions 59 | # model.add(Embedding(max_features, embedding_dims, input_length=maxlen)) 60 | # model.add(Dropout(0.25)) 61 | 62 | # we add a Convolution1D, which will learn nb_filter 63 | # word group filters of size filter_length: 64 | model.add(Convolution1D(nb_filter=nb_filter, 65 | filter_length=filter_length_1, 66 | input_shape=(test_dim, 13), 67 | border_mode='valid', 68 | activation='relu' 69 | )) 70 | # we use standard max pooling (halving the output of the previous layer): 71 | model.add(BatchNormalization()) 72 | 73 | model.add(Convolution1D(nb_filter=nb_filter, 74 | filter_length=filter_length_2, 75 | border_mode='same', 76 | activation='relu' 77 | )) 78 | 79 | model.add(BatchNormalization()) 80 | 81 | model.add(MaxPooling1D(pool_length=2)) 82 | 83 | model.add(Convolution1D(nb_filter=nb_filter, 84 | filter_length=filter_length_2, 85 | border_mode='same', 86 | activation='relu' 87 | )) 88 | 89 | model.add(BatchNormalization()) 90 | 91 | model.add(MaxPooling1D(pool_length=2)) 92 | 93 | # We flatten the output of the conv layer, 94 | # so that we can add a vanilla dense layer: 95 | model.add(Flatten()) 96 | 97 | # We add a vanilla hidden layer: 98 | # model.add(Dense(hidden_dims)) 99 | model.add(Dropout(0.25)) 100 | # model.add(Activation('relu')) 101 | 102 | # We project onto a single unit output layer, and squash it with a sigmoid: 103 | model.add(Dense(2)) 104 | model.add(Activation('softmax')) 105 | 106 | model.compile(loss='binary_crossentropy', 107 | optimizer='rmsprop') 108 | model.fit(X_train, Y_train, batch_size=batch_size, 109 | nb_epoch=nb_epoch, verbose=1, 110 | validation_data=(X_test, Y_test), show_accuracy=True) 111 | 112 | #y_preds = model.predict(X_test) 113 | 114 | score = model.evaluate(X_test, Y_test, verbose=1) 115 | 116 | #print(classification_report(y_test, y_preds)) 117 | -------------------------------------------------------------------------------- /dataframes/df_arabic_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 11,11,26.0,5.0,"cairo, egypt",arabic10,arabic,male,12,egypt 3 | 13,13,32.0,11.0,"baghdad, iraq",arabic12,arabic,male,14,iraq 4 | 14,14,25.0,15.0,"zabbougha, lebanon",arabic13,arabic,male,15,lebanon 5 | 15,15,18.0,2.5,"damascus, syria",arabic2,arabic,male,16,syria 6 | 16,16,24.0,9.0,"doha, qatar",arabic3,arabic,male,17,qatar 7 | 17,17,19.0,6.0,"sharjah, united arab emirates",arabic4,arabic,male,18,united arab emirates 8 | 19,19,21.0,14.5,"sanaa, yemen",arabic6,arabic,male,20,yemen 9 | 21,21,21.0,17.0,"casablanca, morocco",arabic8,arabic,male,22,morocco 10 | 427,427,47.0,11.0,"amman, jordan",arabic14,arabic,male,428,jordan 11 | 436,436,40.0,33.0,"qayrawan, tunisia",arabic15,arabic,male,437,tunisia 12 | 470,470,22.0,16.0,"meknes, morocco",arabic16,arabic,male,471,morocco 13 | 511,511,35.0,15.0,"settat, morocco",arabic17,arabic,male,512,morocco 14 | 538,538,18.0,6.0,"kuwait city, kuwait",arabic18,arabic,male,539,kuwait 15 | 624,624,43.0,18.0,"khouribga, morocco",arabic19,arabic,male,625,morocco 16 | 642,642,36.0,12.0,"chtaura, beqa valley, lebanon",arabic20,arabic,male,643,lebanon 17 | 926,926,42.0,13.0,"al mahalla, egypt",arabic23,arabic,male,927,egypt 18 | 969,969,40.0,7.0,"cairo, egypt",arabic24,arabic,male,970,egypt 19 | 1010,1010,38.0,5.0,"ramun, israel (occupied territory)",arabic25,arabic,male,1011,israel (occupied territory) 20 | 1012,1012,70.0,10.0,"jerusalem, israel",arabic27,arabic,male,1013,israel 21 | 1114,1114,30.0,12.0,"al-ayn, united arab emirates",arabic28,arabic,male,1115,united arab emirates 22 | 1115,1115,19.0,17.0,"doha, qatar",arabic29,arabic,male,1116,qatar 23 | 1202,1202,42.0,12.0,"baghdad, iraq",arabic32,arabic,male,1203,iraq 24 | 1390,1390,56.0,16.0,"jiddah, saudi arabia",arabic35,arabic,male,1391,saudi arabia 25 | 1391,1391,36.0,6.0,"jiddah, saudi arabia",arabic36,arabic,male,1392,saudi arabia 26 | 1392,1392,18.0,16.0,"jiddah, saudi arabia",arabic37,arabic,male,1393,saudi arabia 27 | 1404,1404,22.0,5.0,"jiddah, saudi arabia",arabic39,arabic,male,1405,saudi arabia 28 | 1419,1419,19.0,13.0,"jiddah, saudi arabia",arabic40,arabic,male,1420,saudi arabia 29 | 1420,1420,25.0,7.0,"riyadh, saudi arabia",arabic41,arabic,male,1421,saudi arabia 30 | 1421,1421,21.0,13.0,"jiddah, saudi arabia",arabic42,arabic,male,1422,saudi arabia 31 | 1456,1456,28.0,13.0,"jiddah, saudi arabia",arabic46,arabic,male,1457,saudi arabia 32 | 1457,1457,39.0,16.0,"jiddah, saudi arabia",arabic47,arabic,male,1458,saudi arabia 33 | 1458,1458,29.0,12.0,"mecca, saudi arabia",arabic48,arabic,male,1459,saudi arabia 34 | 1459,1459,26.0,8.0,"medina, saudi arabia",arabic49,arabic,male,1460,saudi arabia 35 | 1460,1460,36.0,22.0,"jiddah, saudi arabia",arabic50,arabic,male,1461,saudi arabia 36 | 1485,1485,23.0,12.0,"jiddah, saudi arabia",arabic51,arabic,male,1486,saudi arabia 37 | 1525,1525,36.0,17.0,"rabat, morocco",arabic53,arabic,male,1526,morocco 38 | 1564,1564,22.0,7.0,"baghdad, iraq",arabic54,arabic,male,1565,iraq 39 | 1568,1568,70.0,33.0,"beirut, lebanon",arabic55,arabic,male,1569,lebanon 40 | 1676,1676,43.0,12.0,"ain defla, algeria",arabic56,arabic,male,1677,algeria 41 | 1699,1699,34.0,9.0,"baghdad, iraq",arabic58,arabic,male,1700,iraq 42 | 1701,1701,55.0,9.0,"irbid, jordan",arabic59,arabic,male,1702,jordan 43 | 1710,1710,60.0,14.0,"cairo, egypt",arabic60,arabic,male,1711,egypt 44 | 1752,1752,20.0,7.0,"manama, bahrain",arabic61,arabic,male,1753,bahrain 45 | 1767,1767,19.0,18.0,"ad dammam, saudi arabia",arabic62,arabic,male,1768,saudi arabia 46 | 1776,1776,23.0,6.0,"riyadh, saudi arabia",arabic64,arabic,male,1777,saudi arabia 47 | 1783,1783,50.0,14.0,"medina, saudi arabia",arabic66,arabic,male,1784,saudi arabia 48 | 1784,1784,43.0,22.0,"nasriah, iraq",arabic67,arabic,male,1785,iraq 49 | 1804,1804,20.0,6.0,"london, uk",arabic68,arabic,male,1805,uk 50 | 1826,1826,47.0,16.0,"casablanca, morocco",arabic70,arabic,male,1827,morocco 51 | 1869,1869,18.0,4.0,"alexandria, egypt",arabic74,arabic,male,1870,egypt 52 | 1891,1891,25.0,4.0,"jiddah, saudi arabia",arabic76,arabic,male,1892,saudi arabia 53 | 1900,1900,25.0,11.0,"najran, saudi arabia",arabic80,arabic,male,1901,saudi arabia 54 | 1913,1913,22.0,17.0,"riyadh, saudi arabia",arabic81,arabic,male,1914,saudi arabia 55 | 1916,1916,42.0,12.0,"damascus, syria",arabic82,arabic,male,1917,syria 56 | 1919,1919,22.0,21.0,"riyadh, saudi arabia",arabic83,arabic,male,1920,saudi arabia 57 | 1963,1963,28.0,3.0,"cairo, egypt",arabic86,arabic,male,1964,egypt 58 | 2086,2086,36.0,10.0,"baghdad, iraq",arabic92,arabic,male,2087,iraq 59 | 2152,2152,21.0,7.0,"riyadh, saudi arabia",arabic96,arabic,male,2153,saudi arabia 60 | 2159,2159,21.0,10.0,"doha, qatar",arabic101,arabic,male,2160,qatar 61 | 2160,2160,22.0,3.0,"jiddah, saudi arabia",arabic102,arabic,male,2161,saudi arabia 62 | -------------------------------------------------------------------------------- /code/conv_1d_model_aws.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import numpy as np 4 | from sklearn.cross_validation import train_test_split 5 | from sklearn.metrics import classification_report 6 | np.random.seed(1337) # for reproducibility 7 | 8 | from keras.preprocessing import sequence 9 | from keras.layers.noise import GaussianNoise 10 | from keras.models import Sequential 11 | from keras.layers.core import Dense, Dropout, Activation, Flatten 12 | from keras.layers.normalization import BatchNormalization 13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D, AveragePooling1D 14 | from keras.utils import np_utils 15 | 16 | 17 | # set parameters: 18 | test_dim = 999 19 | maxlen = 100 20 | batch_size = 50 21 | nb_filter = 512 22 | filter_length_1 = 100 23 | filter_length_2 = 30 24 | filter_length_3 = 15 25 | hidden_dims = 10 26 | nb_epoch = 5 27 | nb_classes = 3 28 | 29 | print('Loading data...') 30 | X = np.load('top_3_100_split_mfcc.npy') 31 | y = np.load('top_3_100_split_y.npy') 32 | 33 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) 34 | 35 | # in case the passed in data is 2d and not 3d 36 | ''' 37 | xts = X_train.shape 38 | X_train = np.reshape(X_train, (xts[0], xts[1], 1)) 39 | xtss = X_test.shape 40 | X_test = np.reshape(X_test, (xtss[0], xtss[1], 1)) 41 | yts = y_train.shape 42 | y_train = np.reshape(y_train, (yts[0], 1)) 43 | ytss = y_test.shape 44 | y_test = np.reshape(y_test, (ytss[0], 1)) 45 | ''' 46 | 47 | print(len(X_train), 'train sequences') 48 | print(len(X_test), 'test sequences') 49 | 50 | Y_train = np_utils.to_categorical(y_train, nb_classes) 51 | Y_test = np_utils.to_categorical(y_test, nb_classes) 52 | 53 | 54 | print('Build model...') 55 | model = Sequential() 56 | 57 | # we add a Convolution1D, which will learn nb_filter mfcc groups: 58 | model.add(Convolution1D(nb_filter=nb_filter, 59 | filter_length=filter_length_1, 60 | input_shape=(test_dim, 13), 61 | init = 'glorot_normal', 62 | border_mode='valid', 63 | activation='relu' 64 | )) 65 | 66 | # batch normalization to keep weights in the 0 to 1 range 67 | model.add(BatchNormalization()) 68 | 69 | # add more layers 70 | model.add(Convolution1D(nb_filter=nb_filter, 71 | filter_length=filter_length_2, 72 | border_mode='valid', 73 | activation='relu' 74 | )) 75 | 76 | model.add(BatchNormalization()) 77 | 78 | # we use standard max pooling (halving the output of the previous layer) 79 | model.add(MaxPooling1D(pool_length=2)) 80 | 81 | 82 | model.add(Convolution1D(nb_filter=nb_filter, 83 | filter_length=filter_length_2, 84 | border_mode='valid', 85 | activation='relu' 86 | )) 87 | 88 | model.add(BatchNormalization()) 89 | 90 | model.add(MaxPooling1D(pool_length=2)) 91 | 92 | model.add(Convolution1D(nb_filter=nb_filter, 93 | filter_length=filter_length_2, 94 | border_mode='valid', 95 | activation='relu' 96 | )) 97 | 98 | model.add(BatchNormalization()) 99 | 100 | model.add(MaxPooling1D(pool_length=2)) 101 | 102 | # Dropout reduces overfitting 103 | model.add(Dropout(.1)) 104 | 105 | model.add(Convolution1D(nb_filter=nb_filter, 106 | filter_length=filter_length_2, 107 | border_mode='valid', 108 | activation='relu' 109 | )) 110 | 111 | model.add(BatchNormalization()) 112 | 113 | model.add(MaxPooling1D(pool_length=2)) 114 | 115 | model.add(Dropout(.1)) 116 | 117 | model.add(Convolution1D(nb_filter=nb_filter, 118 | filter_length=filter_length_3, 119 | border_mode='valid', 120 | activation='relu' 121 | )) 122 | 123 | model.add(BatchNormalization()) 124 | 125 | model.add(MaxPooling1D(pool_length=2)) 126 | 127 | # We flatten the output of the conv layer, 128 | # so that we can add a vanilla dense layer: 129 | model.add(Flatten()) 130 | 131 | # We project onto a single unit output layer, and squash it with a softmax into 0-1 probability space: 132 | model.add(Dense(nb_classes)) 133 | model.add(Activation('softmax')) 134 | 135 | model.compile(loss='categorical_crossentropy', 136 | optimizer='adam', metrics = ["accuracy"]) 137 | model.fit(X_train, Y_train, batch_size=batch_size, 138 | nb_epoch=nb_epoch, verbose=1, 139 | validation_data=(X_test, Y_test)) 140 | 141 | # print report of recall, precision, f1 score 142 | y_pred = model.predict_classes(X_test) 143 | print(classification_report(y_test, y_pred)) 144 | -------------------------------------------------------------------------------- /dataframes/df_spanish_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 324,324,49.0,28.0,"lima, peru",spanish11,spanish,female,325,peru 3 | 326,326,29.0,11.0,"duitama, colombia",spanish13,spanish,female,327,colombia 4 | 327,327,34.0,15.0,"pereira, colombia",spanish14,spanish,female,328,colombia 5 | 330,330,22.0,9.0,"puebla, mexico",spanish17,spanish,female,331,mexico 6 | 332,332,25.0,17.0,"havana, cuba",spanish19,spanish,female,333,cuba 7 | 336,336,32.0,11.0,"madrid, spain",spanish22,spanish,female,337,spain 8 | 342,342,20.0,18.0,"san miguel, el salvador",spanish28,spanish,female,343,el salvador 9 | 343,343,17.0,16.0,"bogota, colombia",spanish3,spanish,female,344,colombia 10 | 344,344,21.0,17.0,"cabanas, el salvador",spanish4,spanish,female,345,el salvador 11 | 345,345,52.0,13.0,"bogota, colombia",spanish5,spanish,female,346,colombia 12 | 347,347,21.0,20.0,"bogota, colombia",spanish7,spanish,female,348,colombia 13 | 448,448,31.0,7.0,"guayaquil, ecuador",spanish31,spanish,female,449,ecuador 14 | 574,574,50.0,17.0,"tegucigalpa, honduras",spanish37,spanish,female,575,honduras 15 | 711,711,80.0,18.0,"bogota, colombia",spanish42,spanish,female,712,colombia 16 | 714,714,47.0,12.0,"santiago-dr, dominican republic",spanish45,spanish,female,715,dominican republic 17 | 715,715,37.0,4.0,"la romana, dominican republic",spanish44,spanish,female,716,dominican republic 18 | 717,717,25.0,12.0,"santo domingo, dominican republic",spanish47,spanish,female,718,dominican republic 19 | 776,776,29.0,21.0,"popayan, cauca, colombia",spanish52,spanish,female,777,colombia 20 | 907,907,31.0,5.0,"mayaguez, puerto rico",spanish59,spanish,female,908,puerto rico 21 | 914,914,18.0,9.0,"santiago-dr, dominican republic",spanish60,spanish,female,915,dominican republic 22 | 924,924,63.0,19.0,"buenos aires, argentina",spanish62,spanish,female,925,argentina 23 | 958,958,32.0,10.0,"la coruna, spain",spanish63,spanish,female,959,spain 24 | 1110,1110,48.0,14.0,"santa marta, colombia",spanish67,spanish,female,1111,colombia 25 | 1228,1228,20.0,7.0,"caracas, venezuela",spanish69,spanish,female,1229,venezuela 26 | 1229,1229,30.0,10.0,"bogota, colombia",spanish70,spanish,female,1230,colombia 27 | 1237,1237,20.0,5.0,"miami, florida, usa",spanish72,spanish,female,1238,usa 28 | 1272,1272,28.0,5.0,"lima, peru",spanish75,spanish,female,1273,peru 29 | 1337,1337,55.0,5.0,"lima, peru",spanish78,spanish,female,1338,peru 30 | 1343,1343,70.0,22.0,"lamas, peru",spanish79,spanish,female,1344,peru 31 | 1345,1345,77.0,75.0,"bogota, colombia",spanish80,spanish,female,1346,colombia 32 | 1346,1346,63.0,7.0,"san juan, puerto rico",spanish81,spanish,female,1347,puerto rico 33 | 1397,1397,19.0,4.0,"buenos aires, argentina",spanish84,spanish,female,1398,argentina 34 | 1398,1398,44.0,6.0,"buenos aires, argentina",spanish85,spanish,female,1399,argentina 35 | 1430,1430,23.0,3.0,"caracas, venezuela",spanish90,spanish,female,1431,venezuela 36 | 1432,1432,20.0,7.0,"caracas, venezuela",spanish91,spanish,female,1433,venezuela 37 | 1435,1435,29.0,18.0,"santa rosa, honduras",spanish93,spanish,female,1436,honduras 38 | 1443,1443,31.0,21.0,"juarez, mexico",spanish95,spanish,female,1444,mexico 39 | 1448,1448,29.0,12.0,"arequipa, peru",spanish96,spanish,female,1449,peru 40 | 1449,1449,52.0,19.0,"bogota, colombia",spanish97,spanish,female,1450,colombia 41 | 1505,1505,20.0,4.0,"lima, peru",spanish99,spanish,female,1506,peru 42 | 1507,1507,30.0,26.0,"iquitos, peru",spanish100,spanish,female,1508,peru 43 | 1573,1573,39.0,19.0,"la union, el salvador",spanish101,spanish,female,1574,el salvador 44 | 1599,1599,28.0,20.0,"san miguel, el salvador",spanish104,spanish,female,1600,el salvador 45 | 1612,1612,41.0,10.0,"jerez de la frontera, spain",spanish105,spanish,female,1613,spain 46 | 1731,1731,22.0,8.0,"seville, spain",spanish114,spanish,female,1732,spain 47 | 1743,1743,58.0,8.0,"la paz, bolivia",spanish115,spanish,female,1744,bolivia 48 | 1805,1805,48.0,13.0,"monterrey, mexico",spanish118,spanish,female,1806,mexico 49 | 1806,1806,40.0,11.0,"madrid, spain",spanish119,spanish,female,1807,spain 50 | 1822,1822,25.0,2.0,"bogota, colombia",spanish120,spanish,female,1823,colombia 51 | 1830,1830,26.0,10.0,"toluca, mexico",spanish122,spanish,female,1831,mexico 52 | 1833,1833,57.0,14.0,"ilbague, colombia",spanish123,spanish,female,1834,colombia 53 | 1852,1852,54.0,5.0,"colon, panama",spanish126,spanish,female,1853,panama 54 | 1868,1868,30.0,7.0,"lima, peru",spanish128,spanish,female,1869,peru 55 | 1873,1873,47.0,30.0,"havana, cuba",spanish129,spanish,female,1874,cuba 56 | 1903,1903,50.0,15.0,"rio piedras, puerto rico",spanish132,spanish,female,1904,puerto rico 57 | 1920,1920,41.0,13.0,"la union, el salvador",spanish134,spanish,female,1921,el salvador 58 | 1922,1922,41.0,22.0,"santiago de cuba, cuba",spanish135,spanish,female,1923,cuba 59 | 1978,1978,24.0,21.0,"santa ana, el salvador",spanish137,spanish,female,1979,el salvador 60 | 1979,1979,24.0,16.0,"san miguel, el salvador",spanish138,spanish,female,1980,el salvador 61 | 1984,1984,37.0,31.0,"cochabamba, bolivia",spanish140,spanish,female,1985,bolivia 62 | 2015,2015,59.0,8.0,"san salvador, el salvador",spanish142,spanish,female,2016,el salvador 63 | 2050,2050,33.0,23.0,"florencia, zacatecas, mexico",spanish144,spanish,female,2051,mexico 64 | 2051,2051,37.0,21.0,"florencia, zacatecas, mexico",spanish145,spanish,female,2052,mexico 65 | 2095,2095,58.0,50.0,"bilwas karma, rio coco, nicaragua",spanish151,spanish,female,2096,nicaragua 66 | 2141,2141,65.0,8.0,"montevideo, uruguay",spanish156,spanish,female,2142,uruguay 67 | 2143,2143,19.0,3.0,"mexico city, mexico",spanish157,spanish,female,2144,mexico 68 | 2147,2147,54.0,37.0,"santurce, puerto rico",spanish158,spanish,female,2148,puerto rico 69 | 2155,2155,56.0,32.0,"leon, nicaragua",spanish161,spanish,female,2156,nicaragua 70 | -------------------------------------------------------------------------------- /code/testing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from features import mfcc 4 | from features import logfbank 5 | import scipy.io.wavfile as wav 6 | from scipy.io.wavfile import write as wav_write 7 | import librosa 8 | import scikits.samplerate 9 | import os 10 | 11 | 12 | ''' 13 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True) 14 | ''' 15 | # read in wav file, get out signal (np array) and sampling rate (int) 16 | def read_in_audio(filename): 17 | (rate, sig) = wav.read(filename) 18 | return sig, rate 19 | 20 | 21 | # read in signal, take absolute value and slice seconds 1-3 from beginning 22 | def get_two_secs(filename): 23 | sig, rate = read_in_audio(filename) 24 | abs_sig = np.abs(sig) 25 | two_secs = abs_sig[rate:3*rate] 26 | return two_secs 27 | 28 | # calculates moving average for a specified window (number of samples) 29 | def take_moving_average(sig, window_width): 30 | cumsum_vec = np.cumsum(np.insert(sig, 0, 0)) 31 | ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width) 32 | return ma_vec 33 | 34 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk 35 | def downsample(filename, outrate=8000, write_wav = False): 36 | (rate, sig) = wav.read(filename) 37 | down_sig = librosa.core.resample(sig, rate, outrate, scale=True) 38 | if not write_wav: 39 | return down_sig, outrate 40 | if write_wav: 41 | wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig) 42 | 43 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them 44 | def make_standard_length(filename, n_samps=240000): 45 | down_sig, rate = downsample(filename) 46 | normed_sig = librosa.util.fix_length(down_sig, n_samps) 47 | normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig)) 48 | return normed_sig 49 | 50 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix 51 | def make_split_audio_array(folder, num_splits = 5): 52 | lst = [] 53 | for filename in os.listdir(folder): 54 | if filename.endswith('wav'): 55 | normed_sig = make_standard_length(filename) 56 | chunk = normed_sig.shape[0]/num_splits 57 | for i in range(num_splits - 1): 58 | lst.append(normed_sig[i*chunk:(i+2)*chunk]) 59 | lst = np.array(lst) 60 | lst = lst.reshape(lst.shape[0], -1) 61 | return lst 62 | 63 | # for input wav file outputs (13, 2999) mfcc np array 64 | def make_normed_mfcc(filename, outrate=8000): 65 | normed_sig = make_standard_length(filename) 66 | normed_mfcc_feat = mfcc(normed_sig, outrate) 67 | normed_mfcc_feat = normed_mfcc_feat.T 68 | return normed_mfcc_feat 69 | 70 | # make mfcc np array from wav file using librosa package 71 | def make_librosa_mfcc(filename): 72 | y, sr = librosa.load(filename) 73 | mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) 74 | return mfcc_feat 75 | 76 | # make mfcc np array from wav file using speech features package 77 | def make_mfcc(filename): 78 | (rate, sig) = wav.read(filename) 79 | mfcc_feat = mfcc(sig, rate) 80 | mfcc_feat = mfcc_feat.T 81 | return mfcc_feat 82 | 83 | # for folder containing wav files, output numpy array of normed mfcc 84 | def make_class_array(folder): 85 | lst = [] 86 | for filename in os.listdir(folder): 87 | lst.append(make_normed_mfcc(filename)) 88 | class_array = np.array(lst) 89 | class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1])) 90 | return class_array 91 | 92 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features 93 | def make_mean_mfcc(filename): 94 | try: 95 | (rate, sig) = wav.read(filename) 96 | mfcc_feat = mfcc(sig, rate) 97 | avg_mfcc = np.mean(mfcc_feat, axis = 0) 98 | return avg_mfcc 99 | except: 100 | pass 101 | 102 | # write new csv corresponding to dataframe of given language and gender 103 | def make_df_language_gender(df, language, gender): 104 | newdf = df.query("native_language == @language").query("sex == @gender") 105 | newdf.to_csv('df_{}_{}.csv'.format(language, gender)) 106 | 107 | # write new directories to disk containing the male and female speakers from the most common languages 108 | def make_folders_from_csv(): 109 | top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese'] 110 | for lang in top_15_langs: 111 | os.makedirs('{}/{}_male'.format(lang, lang)) 112 | os.makedirs('{}/{}_female'.format(lang, lang)) 113 | 114 | # copy files to the corresponding directories 115 | def copy_files_from_csv(): 116 | top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese'] 117 | for lang in top_15_langs: 118 | df_male = pd.read_csv('df_{}_male.csv'.format(lang)) 119 | df_female = pd.read_csv('df_{}_female.csv'.format(lang)) 120 | m_list = df_male['filename'].values 121 | f_list = df_female['filename'].values 122 | for filename in f_list: 123 | shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename)) 124 | 125 | # input folder of wav files, output pandas dataframe of mean mfcc values 126 | def make_mean_mfcc_df(folder): 127 | norms = [] 128 | for filename in os.listdir(folder): 129 | (rate, sig) = wav.read(filename) 130 | mfcc_feat = mfcc(sig, rate) 131 | mean_mfcc = np.mean(mfcc_feat, axis = 0) 132 | #mean_mfcc = np.reshape(mean_mfcc, (1,13)) 133 | norms.append(mean_mfcc) 134 | flat = [a.ravel() for a in norms] 135 | stacked = np.vstack(flat) 136 | df = pd.DataFrame(stacked) 137 | return df 138 | -------------------------------------------------------------------------------- /dataframes/df_spanish_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 322,322,24.0,17.0,"caracas, venezuela",spanish1,spanish,male,323,venezuela 3 | 323,323,48.0,47.0,"san luis de la reina, el salvador",spanish10,spanish,male,324,el salvador 4 | 325,325,25.0,8.0,"oviedo, spain",spanish12,spanish,male,326,spain 5 | 328,328,54.0,4.0,"san juan, puerto rico",spanish15,spanish,male,329,puerto rico 6 | 329,329,44.0,25.0,"buenos aires, argentina",spanish16,spanish,male,330,argentina 7 | 331,331,22.0,17.0,"la paz, bolivia",spanish18,spanish,male,332,bolivia 8 | 333,333,20.0,12.0,"puerto la cruz, venezuela",spanish2,spanish,male,334,venezuela 9 | 334,334,45.0,30.0,"managua, nicaragua",spanish20,spanish,male,335,nicaragua 10 | 335,335,25.0,18.0,"santiago, chile",spanish21,spanish,male,336,chile 11 | 337,337,22.0,7.0,"santiago, chile",spanish23,spanish,male,338,chile 12 | 338,338,27.0,5.0,"mexico city, mexico",spanish24,spanish,male,339,mexico 13 | 339,339,26.0,21.0,"san jose, costa rica",spanish25,spanish,male,340,costa rica 14 | 340,340,18.0,6.0,"burgos, spain",spanish26,spanish,male,341,spain 15 | 341,341,34.0,14.0,"zaragoza, spain",spanish27,spanish,male,342,spain 16 | 346,346,20.0,19.0,"lima, peru",spanish6,spanish,male,347,peru 17 | 348,348,21.0,12.0,"san salvador, el salvador",spanish8,spanish,male,349,el salvador 18 | 349,349,20.0,17.0,"araure, venezuela",spanish9,spanish,male,350,venezuela 19 | 409,409,18.0,10.0,"maracaibo, zulia, venezuela",spanish29,spanish,male,410,venezuela 20 | 440,440,18.0,5.0,"buenos aires, argentina",spanish30,spanish,male,441,argentina 21 | 449,449,26.0,22.0,"santa cruz, bolivia",spanish32,spanish,male,450,bolivia 22 | 466,466,18.0,15.0,"caracas, venezuela",spanish33,spanish,male,467,venezuela 23 | 475,475,28.0,11.0,"cartagena, spain",spanish34,spanish,male,476,spain 24 | 478,478,28.0,17.0,"mexico city, mexico",spanish35,spanish,male,479,mexico 25 | 498,498,27.0,25.0,"montevideo, uruguay",spanish36,spanish,male,499,uruguay 26 | 575,575,53.0,13.0,"tegucigalpa, honduras",spanish38,spanish,male,576,honduras 27 | 613,613,20.0,3.0,"pamplona, spain",spanish39,spanish,male,614,spain 28 | 643,643,20.0,6.0,"cartago, costa rica",spanish40,spanish,male,644,costa rica 29 | 705,705,25.0,6.0,"bonao, dominican republic",spanish41,spanish,male,706,dominican republic 30 | 713,713,43.0,16.0,"santiago-dr, dominican republic",spanish43,spanish,male,714,dominican republic 31 | 716,716,33.0,14.0,"santo domingo, dominican republic",spanish46,spanish,male,717,dominican republic 32 | 730,730,21.0,6.0,"bilbao, spain",spanish48,spanish,male,731,spain 33 | 731,731,31.0,6.0,"tandil, argentina",spanish49,spanish,male,732,argentina 34 | 751,751,27.0,15.0,"santo domingo, dominican republic",spanish50,spanish,male,752,dominican republic 35 | 752,752,20.0,6.0,"caguas, puerto rico",spanish51,spanish,male,753,puerto rico 36 | 796,796,33.0,6.0,"bogota, colombia",spanish53,spanish,male,797,colombia 37 | 799,799,25.0,18.0,"vina del mar, chile",spanish54,spanish,male,800,chile 38 | 809,809,36.0,9.0,"tandil, argentina",spanish55,spanish,male,810,argentina 39 | 837,837,22.0,8.0,"monterrey, mexico",spanish56,spanish,male,838,mexico 40 | 885,885,33.0,11.0,"montevideo, uruguay",spanish57,spanish,male,886,uruguay 41 | 905,905,31.0,18.0,"bogota, colombia",spanish58,spanish,male,906,colombia 42 | 923,923,67.0,39.0,"azuaga, spain",spanish61,spanish,male,924,spain 43 | 961,961,28.0,12.0,"medellin, colombia",spanish64,spanish,male,962,colombia 44 | 1068,1068,32.0,12.0,"barranquilla, colombia",spanish65,spanish,male,1069,colombia 45 | 1076,1076,30.0,12.0,"san salvador, el salvador",spanish66,spanish,male,1077,el salvador 46 | 1226,1226,27.0,14.0,"san fernando, chile",spanish68,spanish,male,1227,chile 47 | 1230,1230,24.0,6.0,"rosario, argentina",spanish71,spanish,male,1231,argentina 48 | 1239,1239,19.0,15.0,"merida, venezuela",spanish73,spanish,male,1240,venezuela 49 | 1248,1248,44.0,14.0,"zaragoza, spain",spanish74,spanish,male,1249,spain 50 | 1289,1289,27.0,10.0,"la paz, bolivia",spanish76,spanish,male,1290,bolivia 51 | 1300,1300,45.0,13.0,"jalisco, mexico",spanish77,spanish,male,1301,mexico 52 | 1395,1395,49.0,14.0,"la paz, bolivia",spanish82,spanish,male,1396,bolivia 53 | 1396,1396,48.0,14.0,"la paz, bolivia",spanish83,spanish,male,1397,bolivia 54 | 1399,1399,23.0,4.0,"buenos aires, argentina",spanish86,spanish,male,1400,argentina 55 | 1400,1400,55.0,9.0,"la paz, bolivia",spanish87,spanish,male,1401,bolivia 56 | 1401,1401,53.0,25.0,"la paz, bolivia",spanish88,spanish,male,1402,bolivia 57 | 1424,1424,22.0,13.0,"monterrey, mexico",spanish89,spanish,male,1425,mexico 58 | 1434,1434,54.0,9.0,"cardenas, cuba",spanish92,spanish,male,1435,cuba 59 | 1438,1438,19.0,6.0,"bogota, colombia",spanish94,spanish,male,1439,colombia 60 | 1450,1450,34.0,29.0,"veracruz, mexico",spanish98,spanish,male,1451,mexico 61 | 1574,1574,31.0,26.0,"cochabamba, bolivia",spanish102,spanish,male,1575,bolivia 62 | 1583,1583,46.0,15.0,"san miguel, el salvador",spanish103,spanish,male,1584,el salvador 63 | 1620,1620,21.0,5.0,"lima, peru",spanish106,spanish,male,1621,peru 64 | 1621,1621,36.0,14.0,"san salvador, el salvador",spanish107,spanish,male,1622,el salvador 65 | 1628,1628,36.0,24.0,"medellin, colombia",spanish108,spanish,male,1629,colombia 66 | 1659,1659,21.0,16.0,"maracay, venezuela",spanish109,spanish,male,1660,venezuela 67 | 1679,1679,20.0,18.0,"caracas, venezuela",spanish110,spanish,male,1680,venezuela 68 | 1688,1688,55.0,33.0,"santiago, chile",spanish111,spanish,male,1689,chile 69 | 1692,1692,25.0,15.0,"la paz, bolivia",spanish112,spanish,male,1693,bolivia 70 | 1694,1694,19.0,8.0,"cochabamba, bolivia",spanish113,spanish,male,1695,bolivia 71 | 1746,1746,37.0,6.0,"bogota, colombia",spanish116,spanish,male,1747,colombia 72 | 1796,1796,46.0,12.0,"madrid, spain",spanish117,spanish,male,1797,spain 73 | 1825,1825,34.0,21.0,"pasaquina, el salvador",spanish121,spanish,male,1826,el salvador 74 | 1834,1834,22.0,5.0,"santa cruz, bolivia",spanish124,spanish,male,1835,bolivia 75 | 1839,1839,23.0,9.0,"lima, peru",spanish125,spanish,male,1840,peru 76 | 1859,1859,34.0,19.0,"san salvador, el salvador",spanish127,spanish,male,1860,el salvador 77 | 1890,1890,27.0,21.0,"guayaquil, ecuador",spanish130,spanish,male,1891,ecuador 78 | 1902,1902,56.0,25.0,"havana, cuba",spanish131,spanish,male,1903,cuba 79 | 1915,1915,38.0,19.0,"cochabamba, bolivia",spanish133,spanish,male,1916,bolivia 80 | 1948,1948,49.0,8.0,"santiago, chile",spanish136,spanish,male,1949,chile 81 | 1980,1980,50.0,38.0,"guatemala city, guatemala",spanish139,spanish,male,1981,guatemala 82 | 2008,2008,24.0,12.0,"la union, el salvador",spanish141,spanish,male,2009,el salvador 83 | 2025,2025,45.0,6.0,"oviedo, spain",spanish143,spanish,male,2026,spain 84 | 2056,2056,25.0,18.0,"las palmas, oaxaca, mexico",spanish146,spanish,male,2057,mexico 85 | 2077,2077,26.0,15.0,"puerto_cabezas, nicaragua",spanish147,spanish,male,2078,nicaragua 86 | 2078,2078,19.0,11.0,"la ceiba, honduras",spanish148,spanish,male,2079,honduras 87 | 2090,2090,23.0,22.0,"bilwi, puerto cabezas, nicaragua",spanish149,spanish,male,2091,nicaragua 88 | 2093,2093,18.0,13.0,"bilwi, puerto cabezas, nicaragua",spanish150,spanish,male,2094,nicaragua 89 | 2099,2099,51.0,25.0,"barcelona, spain",spanish152,spanish,male,2100,spain 90 | 2117,2117,21.0,10.0,"bogota, colombia",spanish153,spanish,male,2118,colombia 91 | 2126,2126,30.0,4.0,"montevideo, uruguay",spanish154,spanish,male,2127,uruguay 92 | 2129,2129,27.0,9.0,"lima, peru",spanish155,spanish,male,2130,peru 93 | 2148,2148,29.0,21.0,"san salvador, el salvador",spanish159,spanish,male,2149,el salvador 94 | 2149,2149,20.0,7.0,"bogota, colombia",spanish160,spanish,male,2150,colombia 95 | 2168,2168,28.0,5.0,"mexico city, mexico",spanish162,spanish,male,2169,mexico 96 | -------------------------------------------------------------------------------- /code/mp3_getter.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import time 3 | import shutil 4 | from requests import get 5 | from bs4 import BeautifulSoup 6 | import pandas as pd 7 | import numpy as np 8 | 9 | 10 | # from the accent.gmu website, pass in list of languages to scrape mp3 files and save them to disk 11 | def mp3getter(lst): 12 | for j in range(len(lst)): 13 | for i in range(1,lst[j][1]+1): 14 | while True: 15 | try: 16 | urllib.urlretrieve("http://accent.gmu.edu/soundtracks/{0}{1}.mp3".format(lst[j][0], i), '{0}{1}.mp3'.format(lst[j][0], i)) 17 | except: 18 | time.sleep(2) 19 | else: 20 | break 21 | 22 | # from list of languages, return urls of each language landing page 23 | def lang_pages(lst): 24 | urls=[] 25 | for lang in lst: 26 | urls.append('http://accent.gmu.edu/browse_language.php?function=find&language={}'.format(lang)) 27 | return urls 28 | 29 | #output: 30 | # 31 | # ['http://accent.gmu.edu/browse_language.php?function=find&language=amharic', 32 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=arabic', 33 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=bengali', 34 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=bulgarian', 35 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=cantonese', 36 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=dutch', 37 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=english', 38 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=farsi', 39 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=french', 40 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=german', 41 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=greek', 42 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=hindi', 43 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=italian', 44 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=japanese', 45 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=korean', 46 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=kurdish', 47 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=macedonian', 48 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=mandarin', 49 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=miskito', 50 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=nepali', 51 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=pashto', 52 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=polish', 53 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=portuguese', 54 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=punjabi', 55 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=romanian', 56 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=russian', 57 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=serbian', 58 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=spanish', 59 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=swedish', 60 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=tagalog', 61 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=thai', 62 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=turkish', 63 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=ukrainian', 64 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=urdu', 65 | # 'http://accent.gmu.edu/browse_language.php?function=find&language=vietnamese'] 66 | 67 | # from http://accent.gmu.edu/browse_language.php, return list of languages 68 | def get_languages(): 69 | url = "http://accent.gmu.edu/browse_language.php" 70 | html = get(url) 71 | soup = BeautifulSoup(html.content, 'html.parser') 72 | languages = [] 73 | language_lists = soup.findAll('ul', attrs={'class': 'languagelist'}) 74 | for ul in language_lists: 75 | for li in ul.findAll('li'): 76 | languages.append(li.text) 77 | return languages 78 | 79 | # from list of languages, return list of urls 80 | def get_language_urls(lst): 81 | urls = [] 82 | for language in lst: 83 | urls.append('http://accent.gmu.edu/browse_language.php?function=find&language=' + language) 84 | return urls 85 | 86 | # from language, get the number of speakers of that language 87 | def get_num(language): 88 | url = 'http://accent.gmu.edu/browse_language.php?function=find&language=' + language 89 | html = get(url) 90 | soup = BeautifulSoup(html.content, 'html.parser') 91 | test = soup.find_all('div', attrs={'class': 'content'}) 92 | try: 93 | num = int(test[0].find('h5').text.split()[2]) 94 | except AttributeError: 95 | num = 0 96 | return num 97 | 98 | # from list of languages, return list of tuples (LANGUAGE, LANGUAGE_NUM_SPEAKERS) for mp3getter, ignoring languages 99 | # with 0 speakers 100 | def get_formatted_languages(languages): 101 | formatted_languages = [] 102 | for language in languages: 103 | num = get_num(language) 104 | if num != 0: 105 | formatted_languages.append((language,num)) 106 | return formatted_languages 107 | 108 | # from each language whose url is contained in the above list, save the number of speakers of that language to a list 109 | def get_nums(lst): 110 | nums = [] 111 | for url in lst: 112 | html = get(url) 113 | soup = BeautifulSoup(html.content, 'html.parser') 114 | test = soup.find_all('div', attrs={'class': 'content'}) 115 | nums.append(int(test[0].find('h5').text.split()[2])) 116 | return nums 117 | 118 | def get_speaker_info(start, stop): 119 | ''' 120 | Inputs: two integers, corresponding to min and max speaker id number per language 121 | Outputs: Pandas Dataframe containing speaker filename, birthplace, native_language, age, sex, age_onset of English 122 | ''' 123 | 124 | user_data = [] 125 | for num in range(start,stop): 126 | info = {'speakerid': num, 'filename': 0, 'birthplace':1, 'native_language': 2, 'age':3, 'sex':4, 'age_onset':5} 127 | url = "http://accent.gmu.edu/browse_language.php?function=detail&speakerid={}".format(num) 128 | html = get(url) 129 | soup = BeautifulSoup(html.content, 'html.parser') 130 | body = soup.find_all('div', attrs={'class': 'content'}) 131 | try: 132 | info['filename']=str(body[0].find('h5').text.split()[0]) 133 | bio_bar = soup.find_all('ul', attrs={'class':'bio'}) 134 | info['birthplace'] = str(bio_bar[0].find_all('li')[0].text)[13:-6] 135 | info['native_language'] = str(bio_bar[0].find_all('li')[1].text.split()[2]) 136 | info['age'] = float(bio_bar[0].find_all('li')[3].text.split()[2].strip(',')) 137 | info['sex'] = str(bio_bar[0].find_all('li')[3].text.split()[3].strip()) 138 | info['age_onset'] = float(bio_bar[0].find_all('li')[4].text.split()[4].strip()) 139 | user_data.append(info) 140 | except: 141 | info['filename'] = '' 142 | info['birthplace'] = '' 143 | info['native_language'] = '' 144 | info['age'] = '' 145 | info['sex'] = '' 146 | info['age_onset'] = '' 147 | user_data.append(info) 148 | df = pd.DataFrame(user_data) 149 | df.to_csv('speaker_info_{}.csv'.format(stop)) 150 | return df 151 | 152 | # copy files from one list of wav files to a specified location 153 | def copy_files(lst, path): 154 | for filename in lst: 155 | shutil.copy2('{}.wav'.format(filename), '{}/{}.wav'.format(path, filename)) 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Accent Classification in Spoken English 2 | 3 | This repo contains the code, images and .csv files for my accent classification project. The goal of this project is to train a neural network to distinguish and classify the accents of English speakers from different geographic and linguistic backgrounds. The code section includes python files both for preprocessing of audio files into useful data objects and models of neural net classifiers. 4 | 5 | 6 | ## Table of Contents 7 | 1. [Motivation](#motivation) 8 | 2. [Dataset](#dataset) 9 | 3. [Signal Processing](#signal-processing) 10 | 4. [Features from MFCCs](#features) 11 | 5. [Neural Network Models](#models) 12 | * [1D Convolutional Neural Net](#1dconvnet) 13 | * [LSTM Recurrent Neural Net](#rnn) 14 | * [Results](#results) 15 | 6. [Future Directions](#future-directions) 16 | 17 | ## Motivation 18 | 19 | The motivation behind developing a model to recognize accents in spoken English is primarily twofold. First, if it is possible to determine a speaker's geographic location or native language simply by their accent, then it might be possible, for instance in a call center, to more efficiently route that person to a regional representative or to a speaker of an appropriate language. Secondly, accent recognition is simply a necessary precursor to automatic speech recognition (ASR), such as is found in Siri--to understand what a person is saying, there must be a model in place that expects how they are going to say it. 20 | 21 | The decision to use neural networks to tackle this problem, which has frequently been approached with other methods in the past, was one done for the sake of novelty and because nets perform well for a variety of media classification tasks. 22 | 23 | ## Dataset 24 | 25 | All of the speech files used for this project come from the [Speech Accent Archive](http://accent.gmu.edu), a repository of spoken English hosted by George Mason University. Over 2000 speakers representing over 100 native languages read a common elicitation paragraph in English: 26 | 27 | >Please call Stella. Ask her to bring these things with her from the store: Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob. We also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station. 28 | 29 | 30 | The common nature of the dataset makes it ideal for studying accent, being that the wording is provided and the recording quality is (nearly) uniform across all speakers. 31 | 32 | Sample Speaker Page 33 | 34 | Figure 1: Example [speech accent archive](http://accent.gmu.edu) page for a speaker, including the audio file, a phonetic transcription of their reading, as well as biographical data. 35 | 36 | For each of the bigger languages (defined by me as having at least 10 speakers represented in the dataset), I scraped the site and saved the mp3 files to folders and the biographical data to [csv files](https://github.com/dwww2012/Accent-Classifier/tree/master/dataframes)--for manipulation using Pandas DataFrames. 37 | 38 | ## Signal Processing 39 | 40 | Having saved all the .mp3 files, I then converted them to .wav files, being that this is a more universal (and less compressed) format for audio processing. (Functions used in this process of audio retrieval and ordering can be found in my **code** directory [here](https://github.com/dwww2012/Accent-Classifier/tree/master/code/mp3_getter.py).) 41 | 42 | The .wav files themselves are routinely visualized as waveforms such as this: 43 | English1 Waveform 44 | 45 | Figure 2: Waveform for the 'english1' file, showing speech power over time (roughly 20 seconds). 46 | 47 | Depicted here is the graph of a numerical representation of a 21 second audio file, corresponding essentially to energy (i.e. pressure) on the y-axis and time on the x-axis. Being that the sample rate is set at 44100 samples/sec, this gives us a 1-dimensional vector of nearly 1 million values in less than 30 seconds! 48 | 49 | Attempts were made to process that raw data, but passing a vector with of order 1 million features into a machine learning algorithm seemed intractable. One attempt was to downsample (i.e., reduce the sampling rate) of the audio. To arrive at a reasonable number of features, however, meant losing almost all recognizable speech signal, let alone accent information, contained in the file. Thus it was necessary to use a lower-dimensional but more highly significant feature space to represent the speech files. 50 | 51 | ## Features from MFCCs 52 | 53 | To featurize the audio files, I used mel frequency cepstral coefficients (MFCCs), which are a decades old tool for representing human speech as it is perceived. Quoting from [practical cryptography](http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/), 54 | > The main point to understand about speech is that the sounds generated by a human are filtered by the shape of the vocal tract including tongue, teeth etc. This shape determines what sound comes out. If we can determine the shape accurately, this should give us an accurate representation of the phoneme being produced. The shape of the vocal tract manifests itself in the envelope of the short time power spectrum, and the job of MFCCs is to accurately represent this envelope. 55 | 56 | This is achieved through six steps: 57 | 1. Frame the signal into short frames. 58 | 2. For each frame calculate the periodogram estimate of the power spectrum. 59 | 3. Apply the mel filterbank to the power spectra, sum the energy in each filter. 60 | 4. Take the logarithm of all filterbank energies. 61 | 5. Take the DCT of the log filterbank energies. 62 | 6. Keep DCT coefficients 2-13, discard the rest. 63 | 64 | MFCC Flow 65 | 66 | Figure 3: Schematic representation of the steps necessary to create a Mel frequency cepstral coefficient (MFCC) from an audio signal. 67 | 68 | The output of this process is a 13-dimensional vector, each dimension corresponding to a different band in human hearing. 69 | 70 | English1 MFCCs 71 | 72 | Figure 4: Visual representation of the 13 MFCCs for the 'english1' audio file over ~22s. 73 | 74 | In my data, being that every speech instance was different, I had to normalize all of them to some standard, so that the MFCCs had the same dimensions. I chose to trim/pad them all to be ~30 seconds in length. Thus given that my MFCC moving window is 0.1s, this yielded MFCCs of shape (2999, 13). 75 | 76 | Code necessary for the creation of these features and other pre-processing to feed into my model(s) can be found [here](https://github.com/dwww2012/Accent-Classifier/tree/master/code/testing.py). 77 | 78 | ## Neural Network Models 79 | 80 | The problem of machine learning accent recognition has traditionally been handled with some combination of support vector machines (SVMs), hidden Markov models (HMMs), Gaussian mixture models (GMMs) and dynamic time warping (DTW). These tools allow machines to arrive at the most important features of speech, controlling for inherent temporal variation. 81 | 82 | I found the prospect of a neural network appealing in that it can train itself to learn the important features without needing to controlling explicitly for time--that is, the difference in speaking pace between two speakers needn't necessarily be controlled for (as with DTW), because a convolutional or recurrent net will update its weights across a sequence of any length. 83 | 84 | ### 1D Convolutional Neural Net 85 | 86 | Two-dimensional convolutional neural networks (CNNs) are increasingly used as the go-to machine learning algorithm for computer vision--i.e., recognizing an image. Less used but no less powerful--in their domain of application--are 1D CNNs, which are especially suited for text or time-series data, such as mine. The key underlying trait of convolutional nets is that they are spatially (or temporally) invariant, and thus inherent variation in speech patterns is controlled for by the convolutions and pooling. 87 | 88 | My [CNN model](https://github.com/dwww2012/Accent-Classifier/blob/master/code/conv_1d_model_aws.py) was built on top of the [Keras](http://keras.io) deep learning Python package and largely based on a [text processing example](https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py). I performed many adjustments on my net depending on how many and which accents I was modeling. In its current form, it features six convolutional layers with batch normalization after each to control for weights veering too far from (0,1) and MaxPooling after all but the first convolution. 89 | 90 | The model was trained on (generally) 85% of my data, using either full (2999, 13)-dimensional MFCCs or else split (5, 999, 13)-dimensional MFCCs, where each 30 second file is split into 5 overlapping 10 second files. This latter split was used simply to give me 5 times more training samples in the case of small data. 91 | 92 | ### LSTM Recurrent Neural Net 93 | 94 | Another, potentially more promising, variety of neural network I was eager to try was the recurrent neural net (RNN). Specifically, I implemented a long short-term memory (LSTM) RNN. RNNs generally are nets wherein the weights are updated *between* hidden cells in the same layer, and these updates occur recurrently for passed in sequences (such as time-varying speech signal data!). An LSTM is a specific version of RNN that employs *memory cells* to preserve data throughout the sequence of unknown duration, such as ours. 95 | 96 | Again, my [LSTM RNN model](https://github.com/dwww2012/Accent-Classifier/blob/master/code/rnn_example.py) was built on top of Keras. It features three LSTM layers and again feeds in (n, 999, 13)-dimensional MFCCs as its inputs. The model can be passed as either 'stateful' or not, meaning that the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch--in this case, the 3-dimensional batch size must be passed explicitly into the model's first layer. 97 | 98 | ### Results 99 | 100 | After training both the CNN and RNN on the top 5 most common languages--those being English, Spanish, Arabic, Mandarin, and French--my best results were ~86% precision for US English classification. Other languages lagged behind, with Arabic, Spanish and Mandarin in the 65-75% range and French performing especially poorly. Focusing only on the top three languages, my RNN outperformed my CNN, yielding **accuracy and recall of 75%+ for English, Spanish and Arabic**. 101 | 102 | ## Future Directions 103 | 104 | Going forward, my I would like to improve my net(s) so that they can distinguish more languages more reliably. That most likely will entail building deeper nets. Moreover, it is likely that I will simply need more and better data to do that. My training sets are on the order of hundreds of observations, which are very small numbers for a neural network. Moreover, many speakers have only faint accents. To train a model well, I would need to manually select only those speakers with a strong accent. 105 | 106 | My ultimate goal is to build a live web app that take 'wild' (i.e., unstructured) speech audio and make a prediction of the age, gender and geographic location/language of the speaker. 107 | -------------------------------------------------------------------------------- /dataframes/df_usa_english_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state 2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa,pennsylvania 3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa,connecticut 4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa,new york 5 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa,virginia 6 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa,virginia 7 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa,kentucky 8 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa,tennessee 9 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa,georgia 10 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa,maryland 11 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa,california 12 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa,wisconsin 13 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa,arkansas 14 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa,north carolina 15 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa,california 16 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa,virginia 17 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa,michigan 18 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa,new york 19 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa,ohio 20 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa,virginia 21 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa,utah 22 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa,texas 23 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa,pennsylvania 24 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa,maryland 25 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa,indiana 26 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa,new york 27 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa,mississippi 28 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa,texas 29 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa,california 30 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa,kansas 31 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa,idaho 32 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa,massachusetts 33 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa,minnesota 34 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa,south carolina 35 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa,michigan 36 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa,massachusetts 37 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa,pennsylvania 38 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa,indiana 39 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa,new jersey 40 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa,north carolina 41 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa,washington 42 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa,illinois 43 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa,georgia 44 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa,ohio 45 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa,minnesota 46 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa,new york 47 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa,connecticut 48 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa,florida 49 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa,alabama 50 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa,west virginia 51 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa,rhode island 52 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa,new york 53 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa,minnesota 54 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa,arkansas 55 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa,georgia 56 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa,kentucky 57 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa,maryland 58 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa,california 59 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa,texas 60 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa,ohio 61 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa,pennsylvania 62 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa,michigan 63 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa,california 64 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa,mississippi 65 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa,missouri 66 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa,illinois 67 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa,washington 68 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa,pennsylvania 69 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa,south dakota 70 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa,rhode island 71 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa,pennsylvania 72 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa,new york 73 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa,north dakota 74 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa,illinois 75 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa,new mexico 76 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa,hawaii 77 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa,iowa 78 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa,district of columbia 79 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa,new york 80 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa,maryland 81 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa,illinois 82 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa,dc 83 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa,massachusetts 84 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa,south carolina 85 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa,kansas 86 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa,south carolina 87 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa,pennsylvania 88 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa,vermont 89 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa,florida 90 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa,indiana 91 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa,illinois 92 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa,california 93 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa,pennsylvania 94 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa,california 95 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa,new york 96 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa,oregon 97 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa,north carolina 98 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa,california 99 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa,maryland 100 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa,illinois 101 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa,california 102 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa,washington 103 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa,california 104 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa,texas 105 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa,pennsylvania 106 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa,minnesota 107 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa,florida 108 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa,new york 109 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa,florida 110 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa,georgia 111 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa,missouri 112 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa,new york 113 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa,va 114 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa,maryland 115 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa,california 116 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa,california 117 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa,vermont 118 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa,new york 119 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa,pennsylvania 120 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa,ohio 121 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa,florida 122 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa,district of columbia 123 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa,illinois 124 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa,west virginia 125 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa,missouri 126 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa,maine 127 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa,missouri 128 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa,connecticut 129 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa,missouri 130 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa,california 131 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa,georgia 132 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa,oregon 133 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa,virginia 134 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa,kentucky 135 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa,connecticut 136 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa,california 137 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa,new jersey 138 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa,kentucky 139 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa,virginia 140 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa,west virginia 141 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa,wisconsin 142 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa,virginia 143 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa,virginia 144 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa,california 145 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa,rhode island 146 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa,ohio 147 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa,new york 148 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa,virginia 149 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa,virginia 150 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa,kansas 151 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa,ohio 152 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa,new york 153 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa,colorado 154 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa,alabama 155 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa,california 156 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa,connecticut 157 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa,vermont 158 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa,georgia 159 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa,new york 160 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa,north carolina 161 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa,kentucky 162 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa,maine 163 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa,south carolina 164 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa,tennessee 165 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa,north carolina 166 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa,massachusetts 167 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa,connecticut 168 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa,florida 169 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa,ohio 170 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa,washington 171 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa,ohio 172 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa,florida 173 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa,ohio 174 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa,colorado 175 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa,illinois 176 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa,virginia 177 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa,new hampshire 178 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa,virginia 179 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa,virginia 180 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa,virginia 181 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa,tennessee 182 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa,new york 183 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa,new york 184 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa,virginia 185 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa,wyoming 186 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa,new york 187 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa,dc 188 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa,texas 189 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa,ohio 190 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa,virginia 191 | -------------------------------------------------------------------------------- /dataframes/df_usa_english_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state 2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa,iowa 3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa,florida 4 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa,ohio 5 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa,virginia 6 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa,virginia 7 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa,north carolina 8 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa,massachusetts 9 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa,maryland 10 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa,california 11 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa,new york 12 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa,wisconsin 13 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa,louisiana 14 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa,north carolina 15 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa,arkansas 16 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa,south dakota 17 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa,indiana 18 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa,missouri 19 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa,new york 20 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa,massachusetts 21 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa,maine 22 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa,illinois 23 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa,louisiana 24 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa,texas 25 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa,wisconsin 26 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa,louisiana 27 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa,florida 28 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa,pennsylvania 29 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa,california 30 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa,new hampshire 31 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa,alabama 32 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa,alabama 33 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa,california 34 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa,ohio 35 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa,florida 36 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa,new york 37 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa,alaska 38 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa,california 39 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa,district of columbia 40 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa,minnesota 41 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa,ohio 42 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa,ohio 43 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa,michigan 44 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa,mississippi 45 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa,north carolina 46 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa,louisiana 47 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa,oregon 48 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa,alabama 49 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa,iowa 50 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa,michigan 51 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa,wisconsin 52 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa,california 53 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa,michigan 54 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa,massachusetts 55 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa,minnesota 56 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa,pennsylvania 57 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa,illinois 58 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa,virginia 59 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa,california 60 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa,california 61 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa,indiana 62 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa,virginia 63 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa,louisiana 64 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa,ohio 65 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa,mississippi 66 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa,pennsylvania 67 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa,massachusetts 68 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa,california 69 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa,arizona 70 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa,maryland 71 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa,new jersey 72 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa,tennessee 73 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa,new york 74 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa,georgia 75 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa,texas 76 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa,missouri 77 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa,virginia 78 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa,montana 79 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa,california 80 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa,michigan 81 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa,tennessee 82 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa,florida 83 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa,idaho 84 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa,georgia 85 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa,maryland 86 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa,washington 87 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa,maryland 88 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa,new jersey 89 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa,new york 90 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa,north carolina 91 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa,minnesota 92 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa,virginia 93 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa,pennsylvania 94 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa,minnesota 95 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa,colorado 96 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa,michigan 97 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa,maryland 98 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa,virginia 99 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa,district of columbia 100 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa,kentucky 101 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa,california 102 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa,california 103 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa,west virginia 104 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa,virginia 105 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa,florida 106 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa,district of columbia 107 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa,south carolina 108 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa,virginia 109 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa,north carolina 110 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa,north carolina 111 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa,georgia 112 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa,florida 113 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa,district of columbia 114 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa,district of columbia 115 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa,rhode island 116 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa,maryland 117 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa,south carolina 118 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa,pennsylvania 119 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa,south carolina 120 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa,pennsylvania 121 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa,wisconsin 122 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa,michigan 123 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa,california 124 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa,texas 125 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa,illinois 126 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa,minnesota 127 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa,illinois 128 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa,north carolina 129 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa,dc 130 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa,nevada 131 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa,new york 132 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa,new york 133 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa,minnesota 134 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa,utah 135 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa,new hampshire 136 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa,california 137 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa,new york 138 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa,oklahoma 139 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa,pennsylvania 140 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa,nebraska 141 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa,minnesota 142 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa,pennsylvania 143 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa,michigan 144 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa,louisiana 145 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa,illinois 146 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa,pennsylvania 147 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa,pennsylvania 148 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa,alabama 149 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa,kansas 150 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa,ohio 151 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa,texas 152 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa,minnesota 153 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa,maryland 154 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa,new york 155 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa,wisconsin 156 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa,new york 157 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa,pennsylvania 158 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa,wisconsin 159 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa,vermont 160 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa,michigan 161 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa,virginia 162 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa,connecticut 163 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa,virginia 164 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa,new york 165 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa,massachusetts 166 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa,virginia 167 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa,virginia 168 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa,virginia 169 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa,california 170 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa,virginia 171 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa,florida 172 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa,virginia 173 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa,virginia 174 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa,maryland 175 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa,illinois 176 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa,new york 177 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa,virginia 178 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa,connecticut 179 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa,virginia 180 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa,south carolina 181 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa,oregon 182 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa,florida 183 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa,ohio 184 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa,dc 185 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa,california 186 | -------------------------------------------------------------------------------- /dataframes/df_usa_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state 2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa,pennsylvania 3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa,connecticut 4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa,new york 5 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa,virginia 6 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa,virginia 7 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa,kentucky 8 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa,tennessee 9 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa,georgia 10 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa,maryland 11 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa,california 12 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa,wisconsin 13 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa,arkansas 14 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa,north carolina 15 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa,california 16 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa,virginia 17 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa,michigan 18 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa,new york 19 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa,ohio 20 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa,virginia 21 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa,utah 22 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa,texas 23 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa,pennsylvania 24 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa,maryland 25 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa,indiana 26 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa,new york 27 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa,mississippi 28 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa,texas 29 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa,california 30 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa,kansas 31 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa,idaho 32 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa,massachusetts 33 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa,minnesota 34 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa,south carolina 35 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa,michigan 36 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa,massachusetts 37 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa,pennsylvania 38 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa,indiana 39 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa,new jersey 40 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa,north carolina 41 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa,washington 42 | 261,261,22.0,5.0,"berkeley, california, usa",mandarin7,mandarin,male,262,usa,california 43 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa,illinois 44 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa,georgia 45 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa,ohio 46 | 463,463,19.0,16.0,"new orleans, louisiana, usa",kikongo1,kikongo,male,464,usa,louisiana 47 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa,minnesota 48 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa,new york 49 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa,connecticut 50 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa,florida 51 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa,alabama 52 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa,west virginia 53 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa,rhode island 54 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa,new york 55 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa,minnesota 56 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa,arkansas 57 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa,georgia 58 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa,kentucky 59 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa,maryland 60 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa,california 61 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa,texas 62 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa,ohio 63 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa,pennsylvania 64 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa,michigan 65 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa,california 66 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa,mississippi 67 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa,missouri 68 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa,illinois 69 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa,washington 70 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa,pennsylvania 71 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa,south dakota 72 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa,rhode island 73 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa,pennsylvania 74 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa,new york 75 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa,north dakota 76 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa,illinois 77 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa,new mexico 78 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa,hawaii 79 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa,iowa 80 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa,district of columbia 81 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa,new york 82 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa,maryland 83 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa,illinois 84 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa,dc 85 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa,massachusetts 86 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa,south carolina 87 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa,kansas 88 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa,south carolina 89 | 740,740,22.0,6.0,"washington, dc, usa",french18,french,male,741,usa,dc 90 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa,pennsylvania 91 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa,vermont 92 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa,florida 93 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa,indiana 94 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa,illinois 95 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa,california 96 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa,pennsylvania 97 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa,california 98 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa,new york 99 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa,oregon 100 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa,north carolina 101 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa,california 102 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa,maryland 103 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa,illinois 104 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa,california 105 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa,washington 106 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa,california 107 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa,texas 108 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa,pennsylvania 109 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa,minnesota 110 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa,florida 111 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa,new york 112 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa,florida 113 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa,georgia 114 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa,missouri 115 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa,new york 116 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa,va 117 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa,maryland 118 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa,california 119 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa,california 120 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa,vermont 121 | 1160,1160,52.0,5.0,"brooklyn, new york, usa",yiddish3,yiddish,male,1161,usa,new york 122 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa,new york 123 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa,pennsylvania 124 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa,ohio 125 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa,florida 126 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa,district of columbia 127 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa,illinois 128 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa,west virginia 129 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa,missouri 130 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa,maine 131 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa,missouri 132 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa,connecticut 133 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa,missouri 134 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa,california 135 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa,georgia 136 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa,oregon 137 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa,virginia 138 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa,kentucky 139 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa,connecticut 140 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa,california 141 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa,new jersey 142 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa,kentucky 143 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa,virginia 144 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa,west virginia 145 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa,wisconsin 146 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa,virginia 147 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa,virginia 148 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa,california 149 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa,rhode island 150 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa,ohio 151 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa,new york 152 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa,virginia 153 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa,virginia 154 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa,kansas 155 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa,ohio 156 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa,new york 157 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa,colorado 158 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa,alabama 159 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa,california 160 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa,connecticut 161 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa,vermont 162 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa,georgia 163 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa,new york 164 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa,north carolina 165 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa,kentucky 166 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa,maine 167 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa,south carolina 168 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa,tennessee 169 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa,north carolina 170 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa,massachusetts 171 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa,connecticut 172 | 1674,1674,73.0,5.0,"pepeekeo, hawai'i, usa",hawai'i,hawai'i,male,1675,usa,hawai'i 173 | 1711,1711,24.0,4.0,"bethlehem, pennsylvania, usa",greek12,greek,male,1712,usa,pennsylvania 174 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa,florida 175 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa,ohio 176 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa,washington 177 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa,ohio 178 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa,florida 179 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa,ohio 180 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa,colorado 181 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa,illinois 182 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa,virginia 183 | 1949,1949,26.0,3.0,"bayside, new york, usa",korean42,korean,male,1950,usa,new york 184 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa,new hampshire 185 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa,virginia 186 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa,virginia 187 | 1999,1999,23.0,7.0,"superior, colorado, usa",mandarin58,mandarin,male,2000,usa,colorado 188 | 2000,2000,19.0,5.0,"woodbridge, virginia, usa",twi5,twi,male,2001,usa,virginia 189 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa,virginia 190 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa,tennessee 191 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa,new york 192 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa,new york 193 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa,virginia 194 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa,wyoming 195 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa,new york 196 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa,dc 197 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa,texas 198 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa,ohio 199 | 2145,2145,21.0,6.0,"alexandria, virginia, usa",tagalog17,tagalog,male,2146,usa,virginia 200 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa,virginia 201 | -------------------------------------------------------------------------------- /dataframes/df_usa_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,state 2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa,iowa 3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa,florida 4 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa,ohio 5 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa,virginia 6 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa,virginia 7 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa,north carolina 8 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa,massachusetts 9 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa,maryland 10 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa,california 11 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa,new york 12 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa,wisconsin 13 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa,louisiana 14 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa,north carolina 15 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa,arkansas 16 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa,south dakota 17 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa,indiana 18 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa,missouri 19 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa,new york 20 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa,massachusetts 21 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa,maine 22 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa,illinois 23 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa,louisiana 24 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa,texas 25 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa,wisconsin 26 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa,louisiana 27 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa,florida 28 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa,pennsylvania 29 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa,california 30 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa,new hampshire 31 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa,alabama 32 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa,alabama 33 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa,california 34 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa,ohio 35 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa,florida 36 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa,new york 37 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa,alaska 38 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa,california 39 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa,district of columbia 40 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa,minnesota 41 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa,ohio 42 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa,ohio 43 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa,michigan 44 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa,mississippi 45 | 570,570,31.0,1.0,"bethel, alaska, usa",yupik1,yupik,female,571,usa,alaska 46 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa,north carolina 47 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa,louisiana 48 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa,oregon 49 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa,alabama 50 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa,iowa 51 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa,michigan 52 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa,wisconsin 53 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa,california 54 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa,michigan 55 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa,massachusetts 56 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa,minnesota 57 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa,pennsylvania 58 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa,illinois 59 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa,virginia 60 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa,california 61 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa,california 62 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa,indiana 63 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa,virginia 64 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa,louisiana 65 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa,ohio 66 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa,mississippi 67 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa,pennsylvania 68 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa,massachusetts 69 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa,california 70 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa,arizona 71 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa,maryland 72 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa,new jersey 73 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa,tennessee 74 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa,new york 75 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa,georgia 76 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa,texas 77 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa,missouri 78 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa,virginia 79 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa,montana 80 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa,california 81 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa,michigan 82 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa,tennessee 83 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa,florida 84 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa,idaho 85 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa,georgia 86 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa,maryland 87 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa,washington 88 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa,maryland 89 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa,new jersey 90 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa,new york 91 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa,north carolina 92 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa,minnesota 93 | 1237,1237,20.0,5.0,"miami, florida, usa",spanish72,spanish,female,1238,usa,florida 94 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa,virginia 95 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa,pennsylvania 96 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa,minnesota 97 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa,colorado 98 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa,michigan 99 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa,maryland 100 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa,virginia 101 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa,district of columbia 102 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa,kentucky 103 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa,california 104 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa,california 105 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa,west virginia 106 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa,virginia 107 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa,florida 108 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa,district of columbia 109 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa,south carolina 110 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa,virginia 111 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa,north carolina 112 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa,north carolina 113 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa,georgia 114 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa,florida 115 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa,district of columbia 116 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa,district of columbia 117 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa,rhode island 118 | 1393,1393,19.0,9.0,"shady grove, maryland, usa",arabic38,arabic,female,1394,usa,maryland 119 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa,maryland 120 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa,south carolina 121 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa,pennsylvania 122 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa,south carolina 123 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa,pennsylvania 124 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa,wisconsin 125 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa,michigan 126 | 1455,1455,32.0,12.0,"fort collins, colorado, usa",arabic45,arabic,female,1456,usa,colorado 127 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa,california 128 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa,texas 129 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa,illinois 130 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa,minnesota 131 | 1538,1538,20.0,5.0,"alexandria, virginia, usa",urdu9,urdu,female,1539,usa,virginia 132 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa,illinois 133 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa,north carolina 134 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa,dc 135 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa,nevada 136 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa,new york 137 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa,new york 138 | 1571,1571,52.0,6.0,"bethesda, maryland, usa",farsi14,farsi,female,1572,usa,maryland 139 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa,minnesota 140 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa,utah 141 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa,new hampshire 142 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa,california 143 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa,new york 144 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa,oklahoma 145 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa,pennsylvania 146 | 1675,1675,69.0,4.0,"pa'ia, hawai'i, usa",hawai'i,hawai'i,female,1676,usa,hawai'i 147 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa,nebraska 148 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa,minnesota 149 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa,pennsylvania 150 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa,michigan 151 | 1749,1749,25.0,5.0,"brooklyn, new york, usa",russian39,russian,female,1750,usa,new york 152 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa,louisiana 153 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa,illinois 154 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa,pennsylvania 155 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa,pennsylvania 156 | 1815,1815,22.0,3.0,"lancaster, pennsylvania, usa",greek14,greek,female,1816,usa,pennsylvania 157 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa,alabama 158 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa,kansas 159 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa,ohio 160 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa,texas 161 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa,minnesota 162 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa,maryland 163 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa,new york 164 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa,wisconsin 165 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa,new york 166 | 1896,1896,19.0,3.0,"washington, district of columbia, usa",arabic78,arabic,female,1897,usa,district of columbia 167 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa,pennsylvania 168 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa,wisconsin 169 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa,vermont 170 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa,michigan 171 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa,virginia 172 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa,connecticut 173 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa,virginia 174 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa,new york 175 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa,massachusetts 176 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa,virginia 177 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa,virginia 178 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa,virginia 179 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa,california 180 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa,virginia 181 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa,florida 182 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa,virginia 183 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa,virginia 184 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa,maryland 185 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa,illinois 186 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa,new york 187 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa,virginia 188 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa,connecticut 189 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa,virginia 190 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa,south carolina 191 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa,oregon 192 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa,florida 193 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa,ohio 194 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa,dc 195 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa,california 196 | -------------------------------------------------------------------------------- /dataframes/df_english_female.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 61,61,35.0,0.0,"davenport, iowa, usa",english10,english,female,62,usa 3 | 62,62,23.0,0.0,"miami, florida, usa",english100,english,female,63,usa 4 | 63,63,18.0,0.0,"toronto, ontario, canada",english101,english,female,64,canada 5 | 66,66,18.0,0.0,"youngstown, ohio, usa",english104,english,female,67,usa 6 | 67,67,35.0,0.0,"staffordshire, uk",english11,english,female,68,uk 7 | 68,68,71.0,0.0,"leicester, uk",english12,english,female,69,uk 8 | 71,71,7.0,0.0,"norton, virginia, usa",english15,english,female,72,usa 9 | 74,74,39.0,0.0,"dickenson county, virginia, usa",english18,english,female,75,usa 10 | 76,76,30.0,0.0,"birmingham, uk",english2,english,female,77,uk 11 | 77,77,27.0,0.0,"mt. holly, north carolina, usa",english20,english,female,78,usa 12 | 78,78,37.0,0.0,"boston, massachusetts, usa",english21,english,female,79,usa 13 | 79,79,25.0,0.0,"panama city, panama",english22,english,female,80,panama 14 | 84,84,37.0,0.0,"smith island, maryland, usa",english27,english,female,85,usa 15 | 86,86,54.0,0.0,"lindsay, ontario, canada",english29,english,female,87,canada 16 | 87,87,26.0,0.0,"brisbane, australia",english3,english,female,88,australia 17 | 88,88,30.0,0.0,"st. mary's, ontario, canada",english30,english,female,89,canada 18 | 89,89,31.0,0.0,"st. augustine, trinidad",english31,english,female,90,trinidad 19 | 90,90,50.0,0.0,"fresno, california, usa",english32,english,female,91,usa 20 | 92,92,18.0,0.0,"ronkonkoma, new york, usa",english34,english,female,93,usa 21 | 93,93,60.0,0.0,"milwaukee, wisconsin, usa",english35,english,female,94,usa 22 | 95,95,28.0,0.0,"yole, india",english37,english,female,96,india 23 | 97,97,59.0,0.0,"new orleans, louisiana, usa",english39,english,female,98,usa 24 | 98,98,53.0,0.0,"saint anne's bay, jamaica",english4,english,female,99,jamaica 25 | 103,103,63.0,0.0,"winston salem, north carolina, usa",english44,english,female,104,usa 26 | 106,106,76.0,0.0,"plantersville, arkansas, usa",english47,english,female,107,usa 27 | 110,110,29.0,0.0,"baltic, south dakota, usa",english50,english,female,111,usa 28 | 114,114,40.0,0.0,"new albany, indiana, usa",english54,english,female,115,usa 29 | 115,115,53.0,0.0,"st. louis, missouri, usa",english55,english,female,116,usa 30 | 118,118,46.0,0.0,"southhampton, uk",english58,english,female,119,uk 31 | 120,120,45.0,0.0,"brooklyn, new york, usa",english6,english,female,121,usa 32 | 125,125,25.0,0.0,"montreal, quebec, canada",english64,english,female,126,canada 33 | 138,138,18.0,0.0,"boston, massachusetts, usa",english76,english,female,139,usa 34 | 140,140,20.0,0.0,"caribou, maine, usa",english78,english,female,141,usa 35 | 142,142,21.0,0.0,"perth, australia",english8,english,female,143,australia 36 | 145,145,50.0,0.0,"chicago, illinois, usa",english82,english,female,146,usa 37 | 146,146,42.0,0.0,"winnfield, louisiana, usa",english83,english,female,147,usa 38 | 150,150,18.0,0.0,"tamworth, australia",english87,english,female,151,australia 39 | 153,153,48.0,0.0,"carthage, texas, usa",english9,english,female,154,usa 40 | 156,156,22.0,0.0,"wisconsin rapids, wisconsin, usa",english92,english,female,157,usa 41 | 157,157,22.0,0.0,"mamou, louisiana, usa",english93,english,female,158,usa 42 | 158,158,55.0,0.0,"pensacola, florida, usa",english94,english,female,159,usa 43 | 163,163,52.0,0.0,"pittsburgh, pennsylvania, usa",english99,english,female,164,usa 44 | 407,407,39.0,0.0,"los angeles, california, usa",english106,english,female,408,usa 45 | 419,419,26.0,0.0,"concord, new hampshire, usa",english109,english,female,420,usa 46 | 441,441,38.0,0.0,"birmingham 2, alabama, usa",english114,english,female,442,usa 47 | 444,444,60.0,0.0,"elmore, alabama, usa",english117,english,female,445,usa 48 | 454,454,32.0,0.0,"saskatoon, saskatchewan, canada",english119,english,female,455,canada 49 | 467,467,20.0,0.0,"kuala lumpur, malaysia",english122,english,female,468,malaysia 50 | 468,468,28.0,0.0,"riverside, california, usa",english123,english,female,469,usa 51 | 486,486,34.0,0.0,"cincinnati, ohio, usa",english126,english,female,487,usa 52 | 489,489,20.0,0.0,"miami, florida, usa",english128,english,female,490,usa 53 | 502,502,18.0,0.0,"toronto, ontario, canada",english132,english,female,503,canada 54 | 503,503,36.0,0.0,"norwich, new york, usa",english133,english,female,504,usa 55 | 505,505,22.0,0.0,"belfast, northern ireland, uk",english134,english,female,506,uk 56 | 509,509,25.0,0.0,"palmer, alaska, usa",english138,english,female,510,usa 57 | 522,522,40.0,0.0,"merced, california, usa",english147,english,female,523,usa 58 | 539,539,18.0,0.0,"washington, district of columbia, usa",english158,english,female,540,usa 59 | 541,541,21.0,0.0,"redwood falls, minnesota, usa",english160,english,female,542,usa 60 | 545,545,75.0,0.0,"wakefield, ohio, usa",english161,english,female,546,usa 61 | 547,547,41.0,0.0,"delaware, ohio, usa",english162,english,female,548,usa 62 | 549,549,43.0,0.0,"detroit, michigan, usa",english165,english,female,550,usa 63 | 555,555,50.0,0.0,"belmont, mississippi, usa",english170,english,female,556,usa 64 | 567,567,30.0,0.0,"kuching, sarawak, malaysia",english174,english,female,568,malaysia 65 | 572,572,18.0,0.0,"elizabeth city, north carolina, usa",english176,english,female,573,usa 66 | 573,573,23.0,0.0,"new orleans, louisiana, usa",english177,english,female,574,usa 67 | 594,594,55.0,0.0,"corriverton, guyana",english183,english,female,595,guyana 68 | 596,596,18.0,0.0,"hillsboro, oregon, usa",english184,english,female,597,usa 69 | 604,604,39.0,0.0,"gadsden, alabama, usa",english186,english,female,605,usa 70 | 605,605,42.0,0.0,"algona, iowa, usa",english187,english,female,606,usa 71 | 635,635,27.0,0.0,"east lansing, michigan, usa",english192,english,female,636,usa 72 | 638,638,21.0,0.0,"delavan, wisconsin, usa",english195,english,female,639,usa 73 | 653,653,50.0,0.0,"suva, fiji",english196,english,female,654,fiji 74 | 666,666,22.0,0.0,"berkeley, california, usa",english200,english,female,667,usa 75 | 667,667,26.0,0.0,"warren, michigan, usa",english201,english,female,668,usa 76 | 673,673,63.0,0.0,"boston, massachusetts, usa",english203,english,female,674,usa 77 | 678,678,45.0,0.0,"blue earth, minnesota, usa",english205,english,female,679,usa 78 | 679,679,35.0,0.0,"hucknall, nottinghamshire, england, uk",english206,english,female,680,uk 79 | 683,683,23.0,0.0,"wilkes-barre, pennsylvania, usa",english207,english,female,684,usa 80 | 701,701,28.0,0.0,"saint johns, antigua and barbuda",english209,english,female,702,antigua and barbuda 81 | 704,704,23.0,0.0,"trenton, nova scotia, canada",english210,english,female,705,canada 82 | 738,738,48.0,0.0,"chicago, illinois, usa",english216,english,female,739,usa 83 | 746,746,27.0,0.0,"abingdon, virginia, usa",english218,english,female,747,usa 84 | 747,747,38.0,0.0,"oakland, california, usa",english219,english,female,748,usa 85 | 748,748,18.0,0.0,"anaheim, california, usa",english220,english,female,749,usa 86 | 763,763,18.0,0.0,"auburn, indiana, usa",english222,english,female,764,usa 87 | 772,772,42.0,0.0,"camberley, surrey, uk",english227,english,female,773,uk 88 | 773,773,59.0,0.0,"glasgow, scotland, uk",english228,english,female,774,uk 89 | 792,792,26.0,0.0,"edmonton, alberta, canada",english233,english,female,793,canada 90 | 794,794,52.0,0.0,"richmond, virginia, usa",english235,english,female,795,usa 91 | 797,797,32.0,0.0,"metairie, louisiana, usa",english232,english,female,798,usa 92 | 800,800,22.0,0.0,"jeffersonville, ohio, usa",english237,english,female,801,usa 93 | 801,801,21.0,0.0,"vancouver, british columbia, canada",english238,english,female,802,canada 94 | 816,816,52.0,0.0,"wanganui, new zealand",english241,english,female,817,new zealand 95 | 817,817,77.0,0.0,"laurel, mississippi, usa",english242,english,female,818,usa 96 | 839,839,18.0,0.0,"philadelphia, pennsylvania, usa",english248,english,female,840,usa 97 | 849,849,29.0,0.0,"boston, massachusetts, usa",english249,english,female,850,usa 98 | 855,855,25.0,0.0,"san jose ca, california, usa",english252,english,female,856,usa 99 | 858,858,24.0,0.0,"oamaru, new zealand",english255,english,female,859,new zealand 100 | 870,870,18.0,0.0,"phoenix, arizona, usa",english260,english,female,871,usa 101 | 874,874,59.0,0.0,"forestville, maryland, usa",english261,english,female,875,usa 102 | 900,900,67.0,0.0,"leeds, uk",english268,english,female,901,uk 103 | 901,901,22.0,0.0,"adelaide, australia",english269,english,female,902,australia 104 | 903,903,32.0,0.0,"cape town, south africa",english271,english,female,904,south africa 105 | 908,908,25.0,0.0,"sydney, australia",english274,english,female,909,australia 106 | 911,911,20.0,0.0,"clifton, new jersey, usa",english275,english,female,912,usa 107 | 917,917,50.0,0.0,"chattanooga, tennessee, usa",english276,english,female,918,usa 108 | 918,918,48.0,0.0,"hudson, new york, usa",english277,english,female,919,usa 109 | 920,920,18.0,0.0,"augusta, georgia, usa",english278,english,female,921,usa 110 | 921,921,30.0,0.0,"singapore, singapore",english279,english,female,922,singapore 111 | 934,934,19.0,0.0,"ramsey, isle of man",english280,english,female,935,isle of man 112 | 976,976,21.0,0.0,"beaumont, texas, usa",english286,english,female,977,usa 113 | 980,980,27.0,0.0,"singapore, singapore",english287,english,female,981,singapore 114 | 981,981,26.0,0.0,"st. louis, missouri, usa",english288,english,female,982,usa 115 | 982,982,19.0,0.0,"fairfax, virginia, usa",english289,english,female,983,usa 116 | 1051,1051,22.0,0.0,"billings, montana, usa",english293,english,female,1052,usa 117 | 1059,1059,19.0,0.0,"naracoorte, australia",english294,english,female,1060,australia 118 | 1069,1069,18.0,0.0,"edmonton, alberta, canada",english296,english,female,1070,canada 119 | 1085,1085,45.0,0.0,"anaheim, california, usa",english303,english,female,1086,usa 120 | 1087,1087,20.0,0.0,"clare, ireland",english304,english,female,1088,ireland 121 | 1092,1092,24.0,0.0,"london, uk",english306,english,female,1093,uk 122 | 1093,1093,24.0,0.0,"la paz, bolivia",english305,english,female,1094,bolivia 123 | 1104,1104,44.0,0.0,"dunedin, new zealand",english309,english,female,1105,new zealand 124 | 1107,1107,37.0,0.0,"oxford, uk",english310,english,female,1108,uk 125 | 1109,1109,38.0,0.0,"detroit, michigan, usa",english311,english,female,1110,usa 126 | 1112,1112,40.0,0.0,"edmonton, alberta, canada",english312,english,female,1113,canada 127 | 1119,1119,26.0,0.0,"memphis, tennessee, usa",english314,english,female,1120,usa 128 | 1161,1161,59.0,0.0,"maryland county, liberia",english320,english,female,1162,liberia 129 | 1172,1172,31.0,0.0,"regina, saskatchewan, canada",english323,english,female,1173,canada 130 | 1205,1205,23.0,0.0,"hollywood, florida, usa",english328,english,female,1206,usa 131 | 1206,1206,21.0,0.0,"boise, idaho, usa",english329,english,female,1207,usa 132 | 1207,1207,18.0,0.0,"augusta, georgia, usa",english330,english,female,1208,usa 133 | 1208,1208,28.0,0.0,"perth, australia",english331,english,female,1209,australia 134 | 1211,1211,20.0,0.0,"baltimore, maryland, usa",english332,english,female,1212,usa 135 | 1213,1213,32.0,0.0,"spokane, washington, usa",english333,english,female,1214,usa 136 | 1214,1214,18.0,0.0,"derby, england, uk",english334,english,female,1215,uk 137 | 1216,1216,46.0,0.0,"baltimore, maryland, usa",english336,english,female,1217,usa 138 | 1217,1217,84.0,0.0,"jersey city, new jersey, usa",english337,english,female,1218,usa 139 | 1218,1218,53.0,0.0,"sydney, australia",english338,english,female,1219,australia 140 | 1223,1223,23.0,0.0,"new york, new york, usa",english341,english,female,1224,usa 141 | 1231,1231,19.0,0.0,"lumberton, north carolina, usa",english344,english,female,1232,usa 142 | 1235,1235,22.0,0.0,"burnsville, minnesota, usa",english347,english,female,1236,usa 143 | 1236,1236,20.0,0.0,"las pinas, philippines",english348,english,female,1237,philippines 144 | 1238,1238,24.0,0.0,"warrenton, virginia, usa",english349,english,female,1239,usa 145 | 1240,1240,18.0,0.0,"polesworth, staffordshire, uk",english350,english,female,1241,uk 146 | 1249,1249,23.0,0.0,"christchurch, new zealand",english354,english,female,1250,new zealand 147 | 1256,1256,23.0,0.0,"morden, manitoba, canada",english356,english,female,1257,canada 148 | 1276,1276,19.0,0.0,"erie, pennsylvania, usa",english360,english,female,1277,usa 149 | 1279,1279,20.0,0.0,"burnsville, minnesota, usa",english361,english,female,1280,usa 150 | 1287,1287,28.0,0.0,"sydney, australia",english364,english,female,1288,australia 151 | 1292,1292,25.0,0.0,"bournesmouth, uk",english366,english,female,1293,uk 152 | 1293,1293,25.0,0.0,"birkenhead, uk",english367,english,female,1294,uk 153 | 1302,1302,27.0,0.0,"colorado springs, colorado, usa",english371,english,female,1303,usa 154 | 1318,1318,32.0,0.0,"trenton, michigan, usa",english377,english,female,1319,usa 155 | 1319,1319,38.0,0.0,"silver spring, maryland, usa",english378,english,female,1320,usa 156 | 1322,1322,34.0,0.0,"roanoke, virginia, usa",english380,english,female,1323,usa 157 | 1325,1325,32.0,0.0,"washington, district of columbia, usa",english383,english,female,1326,usa 158 | 1327,1327,77.0,0.0,"mcveigh, kentucky, usa",english385,english,female,1328,usa 159 | 1329,1329,56.0,0.0,"los angeles, california, usa",english386,english,female,1330,usa 160 | 1330,1330,21.0,0.0,"lancaster, california, usa",english387,english,female,1331,usa 161 | 1331,1331,70.0,0.0,"bluefield, west virginia, usa",english388,english,female,1332,usa 162 | 1332,1332,71.0,0.0,"aldie, virginia, usa",english389,english,female,1333,usa 163 | 1334,1334,84.0,0.0,"milton, florida, usa",english391,english,female,1335,usa 164 | 1336,1336,58.0,0.0,"washington, district of columbia, usa",english393,english,female,1337,usa 165 | 1338,1338,82.0,0.0,"aiken, south carolina, usa",english394,english,female,1339,usa 166 | 1340,1340,48.0,0.0,"orange, virginia, usa",english396,english,female,1341,usa 167 | 1341,1341,76.0,0.0,"wadesboro, north carolina, usa",english397,english,female,1342,usa 168 | 1342,1342,83.0,0.0,"salisbury, north carolina, usa",english398,english,female,1343,usa 169 | 1344,1344,80.0,0.0,"la grange, georgia, usa",english399,english,female,1345,usa 170 | 1347,1347,59.0,0.0,"shanklin, isle of wight, uk",english400,english,female,1348,uk 171 | 1349,1349,26.0,0.0,"canberra, australia",english402,english,female,1350,australia 172 | 1350,1350,29.0,0.0,"sydney, australia",english403,english,female,1351,australia 173 | 1352,1352,32.0,0.0,"geelong, australia",english405,english,female,1353,australia 174 | 1357,1357,24.0,0.0,"pensacola, florida, usa",english406,english,female,1358,usa 175 | 1361,1361,60.0,0.0,"maitland, australia",english409,english,female,1362,australia 176 | 1370,1370,26.0,0.0,"washington, district of columbia, usa",english411,english,female,1371,usa 177 | 1371,1371,31.0,0.0,"washington, district of columbia, usa",english412,english,female,1372,usa 178 | 1372,1372,20.0,0.0,"manchester, england, uk",english413,english,female,1373,uk 179 | 1373,1373,30.0,0.0,"woonsocket, rhode island, usa",english414,english,female,1374,usa 180 | 1379,1379,20.0,0.0,"cardiff, wales, uk",english417,english,female,1380,uk 181 | 1394,1394,24.0,0.0,"baltimore, maryland, usa",english423,english,female,1395,usa 182 | 1410,1410,30.0,0.0,"florence, south carolina, usa",english426,english,female,1411,usa 183 | 1411,1411,68.0,0.0,"hanover, pennsylvania, usa",english427,english,female,1412,usa 184 | 1412,1412,55.0,0.0,"kingstree, south carolina, usa",english428,english,female,1413,usa 185 | 1415,1415,19.0,0.0,"philadelphia, pennsylvania, usa",english429,english,female,1416,usa 186 | 1418,1418,23.0,0.0,"melbourne, australia",english430,english,female,1419,australia 187 | 1422,1422,23.0,0.0,"green bay, wisconsin, usa",english431,english,female,1423,usa 188 | 1427,1427,22.0,0.0,"dublin, ireland",english432,english,female,1428,ireland 189 | 1429,1429,32.0,0.0,"gladwin, michigan, usa",english433,english,female,1430,usa 190 | 1470,1470,20.0,0.0,"palm springs, california, usa",english435,english,female,1471,usa 191 | 1474,1474,19.0,0.0,"freeport, the bahamas",english436,english,female,1475,the bahamas 192 | 1491,1491,20.0,0.0,"houston, texas, usa",english438,english,female,1492,usa 193 | 1492,1492,18.0,0.0,"richmond, british columbia, canada",english439,english,female,1493,canada 194 | 1501,1501,22.0,0.0,"glasgow, scotland, uk",english440,english,female,1502,uk 195 | 1523,1523,19.0,0.0,"oquawka, illinois, usa",english441,english,female,1524,usa 196 | 1532,1532,23.0,0.0,"moorhead, minnesota, usa",english446,english,female,1533,usa 197 | 1547,1547,21.0,0.0,"chicago, illinois, usa",english453,english,female,1548,usa 198 | 1549,1549,19.0,0.0,"raleigh, north carolina, usa",english454,english,female,1550,usa 199 | 1551,1551,29.0,0.0,"washington, dc, usa",english455,english,female,1552,usa 200 | 1553,1553,37.0,0.0,"lancashire, leyland, uk",english456,english,female,1554,uk 201 | 1558,1558,22.0,0.0,"reno, nevada, usa",english458,english,female,1559,usa 202 | 1560,1560,19.0,0.0,"rutland, england, uk",english460,english,female,1561,uk 203 | 1562,1562,44.0,0.0,"poughkeepsie, new york, usa",english461,english,female,1563,usa 204 | 1570,1570,53.0,0.0,"brooklyn, new york, usa",english463,english,female,1571,usa 205 | 1647,1647,26.0,0.0,"calgary, alberta, canada",english471,english,female,1648,canada 206 | 1655,1655,33.0,0.0,"rochester, minnesota, usa",english474,english,female,1656,usa 207 | 1656,1656,29.0,0.0,"ogden, utah, usa",english475,english,female,1657,usa 208 | 1657,1657,64.0,0.0,"manchester, new hampshire, usa",english476,english,female,1658,usa 209 | 1658,1658,31.0,0.0,"san diego, california, usa",english477,english,female,1659,usa 210 | 1663,1663,64.0,0.0,"new york, new york, usa",english479,english,female,1664,usa 211 | 1665,1665,88.0,0.0,"stringtown, oklahoma, usa",english481,english,female,1666,usa 212 | 1666,1666,74.0,0.0,"yeadon, pennsylvania, usa",english482,english,female,1667,usa 213 | 1682,1682,53.0,0.0,"corentyne, guyana",english486,english,female,1683,guyana 214 | 1687,1687,22.0,0.0,"london, uk",english487,english,female,1688,uk 215 | 1691,1691,35.0,0.0,"irvine, scotland, uk",english488,english,female,1692,uk 216 | 1693,1693,46.0,0.0,"kingston, jamaica",english489,english,female,1694,jamaica 217 | 1712,1712,79.0,0.0,"westmoreland, jamaica",english491,english,female,1713,jamaica 218 | 1715,1715,22.0,0.0,"omaha, nebraska, usa",english492,english,female,1716,usa 219 | 1727,1727,21.0,0.0,"st. paul, minnesota, usa",english499,english,female,1728,usa 220 | 1733,1733,64.0,0.0,"pittsburgh, pennsylvania, usa",english501,english,female,1734,usa 221 | 1734,1734,30.0,0.0,"farmington hills, michigan, usa",english502,english,female,1735,usa 222 | 1738,1738,29.0,0.0,"benin city, nigeria",english503,english,female,1739,nigeria 223 | 1797,1797,50.0,0.0,"beirut, lebanon",english505,english,female,1798,lebanon 224 | 1798,1798,30.0,0.0,"new orleans, louisiana, usa",english506,english,female,1799,usa 225 | 1800,1800,20.0,0.0,"winfield, illinois, usa",english508,english,female,1801,usa 226 | 1801,1801,44.0,0.0,"coudersport, pennsylvania, usa",english509,english,female,1802,usa 227 | 1802,1802,66.0,0.0,"philadelphia, pennsylvania, usa",english510,english,female,1803,usa 228 | 1817,1817,31.0,0.0,"montgomery, alabama, usa",english511,english,female,1818,usa 229 | 1818,1818,54.0,0.0,"salina, kansas, usa",english512,english,female,1819,usa 230 | 1855,1855,52.0,0.0,"bocas del toro, panama",english513,english,female,1856,panama 231 | 1872,1872,55.0,0.0,"youngstown, ohio, usa",english515,english,female,1873,usa 232 | 1874,1874,32.0,0.0,"dripping springs, texas, usa",english516,english,female,1875,usa 233 | 1881,1881,18.0,1.0,"karachi, pakistan",english519,english,female,1882,pakistan 234 | 1883,1883,47.0,0.0,"st. paul, minnesota, usa",english520,english,female,1884,usa 235 | 1885,1885,25.0,0.0,"bethesda, maryland, usa",english521,english,female,1886,usa 236 | 1886,1886,22.0,0.0,"bethpage, new york, usa",english522,english,female,1887,usa 237 | 1887,1887,18.0,0.0,"madison, wisconsin, usa",english523,english,female,1888,usa 238 | 1892,1892,18.0,0.0,"albion, new york, usa",english525,english,female,1893,usa 239 | 1906,1906,25.0,0.0,"wynnewood, pennsylvania, usa",english527,english,female,1907,usa 240 | 1907,1907,52.0,0.0,"green bay, wisconsin, usa",english528,english,female,1908,usa 241 | 1924,1924,29.0,0.0,"burlington, vermont, usa",english529,english,female,1925,usa 242 | 1925,1925,50.0,0.0,"detroit, michigan, usa",english530,english,female,1926,usa 243 | 1927,1927,29.0,0.0,"nokesville, virginia, usa",english531,english,female,1928,usa 244 | 1932,1932,23.0,0.0,"bristol, connecticut, usa",english532,english,female,1933,usa 245 | 1933,1933,38.0,0.0,"fairfax, virginia, usa",english533,english,female,1934,usa 246 | 1950,1950,29.0,0.0,"mineola, new york, usa",english534,english,female,1951,usa 247 | 1954,1954,22.0,0.0,"plymouth, massachusetts, usa",english535,english,female,1955,usa 248 | 1955,1955,21.0,0.0,"richmond, virginia, usa",english536,english,female,1956,usa 249 | 1956,1956,21.0,0.0,"woodbridge, virginia, usa",english537,english,female,1957,usa 250 | 1957,1957,20.0,0.0,"lyndhurst, virginia, usa",english538,english,female,1958,usa 251 | 1958,1958,19.0,0.0,"orange county, california, usa",english539,english,female,1959,usa 252 | 1959,1959,18.0,0.0,"arlington, virginia, usa",english540,english,female,1960,usa 253 | 1960,1960,27.0,0.0,"fort lauderdale, florida, usa",english541,english,female,1961,usa 254 | 1968,1968,39.0,0.0,"vancouver, british columbia, canada",english543,english,female,1969,canada 255 | 1995,1995,43.0,0.0,"tangier island, virginia, usa",english546,english,female,1996,usa 256 | 2011,2011,19.0,0.0,"exminister, uk",english549,english,female,2012,uk 257 | 2028,2028,81.0,0.0,"king george, virginia, usa",english551,english,female,2029,usa 258 | 2042,2042,23.0,0.0,"bethesda, maryland, usa",english552,english,female,2043,usa 259 | 2046,2046,22.0,0.0,"hoffman estates, illinois, usa",english554,english,female,2047,usa 260 | 2049,2049,84.0,0.0,"brooklyn, new york, usa",english556,english,female,2050,usa 261 | 2052,2052,25.0,2.0,"sharjah, united arab emirates",english557,english,female,2053,united arab emirates 262 | 2055,2055,19.0,0.0,"reston, virginia, usa",english559,english,female,2056,usa 263 | 2058,2058,22.0,0.0,"hartford, connecticut, usa",english560,english,female,2059,usa 264 | 2059,2059,20.0,0.0,"yorktown, virginia, usa",english561,english,female,2060,usa 265 | 2060,2060,30.0,0.0,"florence, south carolina, usa",english562,english,female,2061,usa 266 | 2079,2079,27.0,0.0,"portland, oregon, usa",english565,english,female,2080,usa 267 | 2102,2102,86.0,0.0,"quincy, florida, usa",english568,english,female,2103,usa 268 | 2105,2105,42.0,0.0,"parma, ohio, usa",english570,english,female,2106,usa 269 | 2161,2161,26.0,0.0,"toronto, ontario, canada",english574,english,female,2162,canada 270 | 2166,2166,63.0,0.0,"washington, dc, usa",english576,english,female,2167,usa 271 | 2169,2169,38.0,0.0,"san leandro, california, usa",english577,english,female,2170,usa 272 | -------------------------------------------------------------------------------- /dataframes/df_english_male.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country 2 | 60,60,42.0,0.0,"pittsburgh, pennsylvania, usa",english1,english,male,61,usa 3 | 64,64,22.0,0.0,"torrington, connecticut, usa",english102,english,male,65,usa 4 | 65,65,21.0,0.0,"staten island, new york, usa",english103,english,male,66,usa 5 | 69,69,69.0,0.0,"henley on thames, oxfordshire, uk",english13,english,male,70,uk 6 | 70,70,40.0,0.0,"belfast, northern ireland, uk",english14,english,male,71,uk 7 | 72,72,21.0,0.0,"wise, virginia, usa",english16,english,male,73,usa 8 | 73,73,79.0,0.0,"darwin va, virginia, usa",english17,english,male,74,usa 9 | 75,75,53.0,0.0,"louisville, kentucky, usa",english19,english,male,76,usa 10 | 80,80,43.0,0.0,"englewood, tennessee, usa",english23,english,male,81,usa 11 | 81,81,37.0,0.0,"glasgow, scotland, uk",english24,english,male,82,uk 12 | 82,82,57.0,0.0,"atlanta, georgia, usa",english25,english,male,83,usa 13 | 83,83,71.0,0.0,"smith island, maryland, usa",english26,english,male,84,usa 14 | 85,85,54.0,0.0,"toronto, ontario, canada",english28,english,male,86,canada 15 | 91,91,21.0,0.0,"san diego, california, usa",english33,english,male,92,usa 16 | 94,94,60.0,0.0,"arcadia, wisconsin, usa",english36,english,male,95,usa 17 | 96,96,27.0,0.0,"littlehampton, uk",english38,english,male,97,uk 18 | 99,99,53.0,0.0,"dudley, uk",english40,english,male,100,uk 19 | 100,100,44.0,0.0,"wellington, new zealand",english41,english,male,101,new zealand 20 | 101,101,65.0,0.0,"nigel, south africa",english42,english,male,102,south africa 21 | 102,102,47.0,0.0,"vancouver, british columbia, canada",english43,english,male,103,canada 22 | 104,104,52.0,0.0,"pine bluff, arkansas, usa",english45,english,male,105,usa 23 | 105,105,73.0,0.0,"creswell, north carolina, usa",english46,english,male,106,usa 24 | 107,107,26.0,0.0,"port moresby, national capital district, papua new guinea",english48,english,male,108,papua new guinea 25 | 108,108,43.0,0.0,"castro valley, california, usa",english49,english,male,109,usa 26 | 109,109,62.0,0.0,"fairfax, virginia, usa",english5,english,male,110,usa 27 | 111,111,67.0,0.0,"detroit, michigan, usa",english51,english,male,112,usa 28 | 112,112,20.0,0.0,"calgary, alberta, canada",english52,english,male,113,canada 29 | 113,113,63.0,0.0,"syracuse, new york, usa",english53,english,male,114,usa 30 | 116,116,20.0,0.0,"londonderry, northern ireland, uk",english56,english,male,117,uk 31 | 117,117,34.0,0.0,"birmingham 1, uk",english57,english,male,118,uk 32 | 119,119,30.0,0.0,"columbus, ohio, usa",english59,english,male,120,usa 33 | 121,121,18.0,0.0,"chesapeake, virginia, usa",english60,english,male,122,usa 34 | 122,122,19.0,0.0,"toronto, ontario, canada",english61,english,male,123,canada 35 | 123,123,30.0,0.0,"west jordan, utah, usa",english62,english,male,124,usa 36 | 124,124,21.0,0.0,"lewisville, texas, usa",english63,english,male,125,usa 37 | 126,126,47.0,0.0,"lewistown, pennsylvania, usa",english65,english,male,127,usa 38 | 127,127,19.0,0.0,"baltimore, maryland, usa",english66,english,male,128,usa 39 | 128,128,37.0,0.0,"princeton, indiana, usa",english67,english,male,129,usa 40 | 129,129,52.0,0.0,"new york, new york, usa",english68,english,male,130,usa 41 | 130,130,23.0,0.0,"adelaide, australia",english69,english,male,131,australia 42 | 131,131,52.0,0.0,"macon, mississippi, usa",english7,english,male,132,usa 43 | 132,132,21.0,0.0,"beaumont, texas, usa",english70,english,male,133,usa 44 | 133,133,20.0,0.0,"west covina, california, usa",english71,english,male,134,usa 45 | 134,134,31.0,0.0,"kilkenny, ireland",english72,english,male,135,ireland 46 | 135,135,24.0,0.0,"darwin, australia",english73,english,male,136,australia 47 | 136,136,21.0,0.0,"wichita, kansas, usa",english74,english,male,137,usa 48 | 137,137,32.0,0.0,"idaho falls, idaho, usa",english75,english,male,138,usa 49 | 139,139,26.0,0.0,"melbourne, australia",english77,english,male,140,australia 50 | 141,141,73.0,0.0,"boston, massachusetts, usa",english79,english,male,142,usa 51 | 143,143,34.0,0.0,"glasgow, scotland, uk",english80,english,male,144,uk 52 | 144,144,30.0,0.0,"st. paul, minnesota, usa",english81,english,male,145,usa 53 | 147,147,45.0,0.0,"sydney, australia",english84,english,male,148,australia 54 | 148,148,31.0,0.0,"strabane, northern ireland, uk",english85,english,male,149,uk 55 | 149,149,50.0,0.0,"charleston, south carolina, usa",english86,english,male,150,usa 56 | 151,151,18.0,0.0,"grand rapids, michigan, usa",english88,english,male,152,usa 57 | 152,152,18.0,0.0,"kingston ma, massachusetts, usa",english89,english,male,153,usa 58 | 154,154,53.0,0.0,"pittsburgh, pennsylvania, usa",english90,english,male,155,usa 59 | 155,155,23.0,0.0,"kingston, jamaica",english91,english,male,156,jamaica 60 | 159,159,18.0,0.0,"mishawaka, indiana, usa",english95,english,male,160,usa 61 | 160,160,31.0,0.0,"point pleasant, new jersey, usa",english96,english,male,161,usa 62 | 161,161,42.0,0.0,"wilmington nc, north carolina, usa",english97,english,male,162,usa 63 | 162,162,22.0,0.0,"spokane, washington, usa",english98,english,male,163,usa 64 | 406,406,57.0,0.0,"windsor, ontario, canada",english105,english,male,407,canada 65 | 415,415,19.0,0.0,"chicago, illinois, usa",english107,english,male,416,usa 66 | 418,418,21.0,0.0,"strabane, northern ireland, uk",english108,english,male,419,uk 67 | 420,420,21.0,0.0,"great yarmouth, norfolk, uk",english110,english,male,421,uk 68 | 425,425,21.0,0.0,"bombay, india",english111,english,male,426,india 69 | 426,426,21.0,0.0,"st. thomas, us virgin islands",english112,english,male,427,us virgin islands 70 | 438,438,31.0,0.0,"wrexham, wales, uk",english113,english,male,439,uk 71 | 442,442,31.0,0.0,"st. albans, hertfordshire, uk",english115,english,male,443,uk 72 | 443,443,56.0,0.0,"atlanta, georgia, usa",english116,english,male,444,usa 73 | 445,445,19.0,0.0,"akron, ohio, usa",english118,english,male,446,usa 74 | 455,455,65.0,0.0,"saskatoon, saskatchewan, canada",english120,english,male,456,canada 75 | 464,464,21.0,0.0,"waconia, minnesota, usa",english121,english,male,465,usa 76 | 479,479,38.0,0.0,"brooklyn, new york, usa",english124,english,male,480,usa 77 | 484,484,28.0,0.0,"st. george, queensland, australia",english125,english,male,485,australia 78 | 488,488,39.0,0.0,"new britain, connecticut, usa",english127,english,male,489,usa 79 | 491,491,25.0,0.0,"blackpool, lancashire, uk",english129,english,male,492,uk 80 | 495,495,32.0,0.0,"port elizabeth, south africa",english130,english,male,496,south africa 81 | 496,496,23.0,0.0,"west palm beach, florida, usa",english131,english,male,497,usa 82 | 506,506,22.0,0.0,"orange beach, alabama, usa",english135,english,male,507,usa 83 | 507,507,25.0,0.0,"calgary, alberta, canada",english136,english,male,508,canada 84 | 508,508,26.0,0.0,"charleston, west virginia, usa",english137,english,male,509,usa 85 | 510,510,20.0,0.0,"providence, rhode island, usa",english139,english,male,511,usa 86 | 514,514,43.0,0.0,"quezon city, philippines",english140,english,male,515,philippines 87 | 515,515,19.0,0.0,"york, uk",english141,english,male,516,uk 88 | 516,516,19.0,0.0,"avon, new york, usa",english142,english,male,517,usa 89 | 517,517,42.0,0.0,"minneapolis, minnesota, usa",english143,english,male,518,usa 90 | 518,518,24.0,0.0,"barneys river, nova scotia, canada",english144,english,male,519,canada 91 | 520,520,18.0,0.0,"bury st. edmunds, uk",english145,english,male,521,uk 92 | 521,521,22.0,0.0,"blytheville, arkansas, usa",english146,english,male,522,usa 93 | 524,524,31.0,0.0,"sydney, australia",english148,english,male,525,australia 94 | 525,525,18.0,0.0,"lawrenceville, georgia, usa",english149,english,male,526,usa 95 | 526,526,31.0,0.0,"brownsville, kentucky, usa",english150,english,male,527,usa 96 | 527,527,18.0,0.0,"baltimore, maryland, usa",english151,english,male,528,usa 97 | 528,528,18.0,0.0,"adelaide, australia",english152,english,male,529,australia 98 | 532,532,35.0,0.0,"brisbane, australia",english153,english,male,533,australia 99 | 533,533,28.0,0.0,"glace bay, nova scotia, canada",english154,english,male,534,canada 100 | 534,534,35.0,0.0,"oakland, california, usa",english155,english,male,535,usa 101 | 535,535,33.0,0.0,"dublin, ireland",english156,english,male,536,ireland 102 | 537,537,35.0,0.0,"fort worth, texas, usa",english157,english,male,538,usa 103 | 540,540,18.0,0.0,"ottawa, ontario, canada",english159,english,male,541,canada 104 | 546,546,41.0,0.0,"fairview park, ohio, usa",english163,english,male,547,usa 105 | 548,548,38.0,0.0,"leeds, uk",english164,english,male,549,uk 106 | 550,550,18.0,0.0,"glenside, pennsylvania, usa",english166,english,male,551,usa 107 | 551,551,43.0,0.0,"lakeview, michigan, usa",english167,english,male,552,usa 108 | 553,553,40.0,0.0,"oceanside, california, usa",english168,english,male,554,usa 109 | 554,554,18.0,0.0,"cleveland, mississippi, usa",english169,english,male,555,usa 110 | 562,562,21.0,0.0,"st. louis, missouri, usa",english171,english,male,563,usa 111 | 563,563,49.0,0.0,"kingston, jamaica",english172,english,male,564,jamaica 112 | 566,566,19.0,0.0,"libertyville, illinois, usa",english173,english,male,567,usa 113 | 571,571,24.0,0.0,"seattle, washington, usa",english175,english,male,572,usa 114 | 582,582,30.0,0.0,"wilkes-barre, pennsylvania, usa",english178,english,male,583,usa 115 | 583,583,57.0,0.0,"huron, south dakota, usa",english179,english,male,584,usa 116 | 584,584,20.0,0.0,"newport, rhode island, usa",english180,english,male,585,usa 117 | 585,585,68.0,0.0,"new eagle, pennsylvania, usa",english181,english,male,586,usa 118 | 589,589,50.0,0.0,"troy, new york, usa",english182,english,male,590,usa 119 | 603,603,38.0,0.0,"oxford, uk",english185,english,male,604,uk 120 | 610,610,35.0,0.0,"edinburgh, scotland, uk",english188,english,male,611,uk 121 | 616,616,19.0,0.0,"grand forks, north dakota, usa",english189,english,male,617,usa 122 | 617,617,27.0,0.0,"oak park, illinois, usa",english190,english,male,618,usa 123 | 630,630,19.0,0.0,"las cruces, new mexico, usa",english191,english,male,631,usa 124 | 636,636,32.0,0.0,"honolulu, hawaii, usa",english193,english,male,637,usa 125 | 637,637,19.0,0.0,"manchester, england, uk",english194,english,male,638,uk 126 | 661,661,22.0,0.0,"iowa city, iowa, usa",english197,english,male,662,usa 127 | 662,662,56.0,0.0,"washington, district of columbia, usa",english198,english,male,663,usa 128 | 663,663,25.0,0.0,"bay shore, new york, usa",english199,english,male,664,usa 129 | 672,672,19.0,0.0,"crisfield, maryland, usa",english202,english,male,673,usa 130 | 677,677,53.0,0.0,"chicago, illinois, usa",english204,english,male,678,usa 131 | 694,694,46.0,0.0,"washington, dc, usa",english208,english,male,695,usa 132 | 719,719,30.0,0.0,"worcester, massachusetts, usa",english211,english,male,720,usa 133 | 728,728,24.0,0.0,"spartanburg, south carolina, usa",english212,english,male,729,usa 134 | 733,733,38.0,0.0,"dodge city, kansas, usa",english213,english,male,734,usa 135 | 736,736,26.0,0.0,"myrtle beach, south carolina, usa",english214,english,male,737,usa 136 | 737,737,20.0,0.0,"lemington spa, warwickshire, uk",english215,english,male,738,uk 137 | 739,739,66.0,0.0,"harrow, middlesex, uk",english217,english,male,740,uk 138 | 753,753,45.0,0.0,"london, uk",english221,english,male,754,uk 139 | 766,766,18.0,0.0,"erie, pennsylvania, usa",english223,english,male,767,usa 140 | 767,767,20.0,0.0,"bridgetown, barbados",english224,english,male,768,barbados 141 | 770,770,50.0,0.0,"dundee, scotland, uk",english225,english,male,771,uk 142 | 771,771,38.0,0.0,"walton-on-thames, surrey, uk",english226,english,male,772,uk 143 | 774,774,25.0,0.0,"burlington, vermont, usa",english229,english,male,775,usa 144 | 775,775,35.0,0.0,"launceston, tasmania, australia",english230,english,male,776,australia 145 | 790,790,22.0,0.0,"tampa, florida, usa",english231,english,male,791,usa 146 | 793,793,49.0,0.0,"bloomington, indiana, usa",english234,english,male,794,usa 147 | 798,798,44.0,0.0,"st. charles, illinois, usa",english236,english,male,799,usa 148 | 802,802,19.0,0.0,"fort kobbe, panama",english239,english,male,803,panama 149 | 814,814,26.0,0.0,"guelph, ontario, canada",english240,english,male,815,canada 150 | 819,819,27.0,0.0,"fredericton, new brunswick, canada",english243,english,male,820,canada 151 | 820,820,23.0,0.0,"san francisco, california, usa",english244,english,male,821,usa 152 | 823,823,25.0,0.0,"pittsburgh, pennsylvania, usa",english245,english,male,824,usa 153 | 824,824,32.0,0.0,"los angeles, california, usa",english246,english,male,825,usa 154 | 834,834,33.0,0.0,"prince rupert, british columbia, canada",english247,english,male,835,canada 155 | 851,851,19.0,0.0,"chester, england, uk",english250,english,male,852,uk 156 | 854,854,21.0,0.0,"mt. kisco, new york, usa",english251,english,male,855,usa 157 | 856,856,21.0,0.0,"dublin, ireland",english253,english,male,857,ireland 158 | 857,857,39.0,0.0,"eugene, oregon, usa",english254,english,male,858,usa 159 | 860,860,23.0,0.0,"laurinburg, north carolina, usa",english256,english,male,861,usa 160 | 862,862,24.0,0.0,"san diego, california, usa",english257,english,male,863,usa 161 | 868,868,24.0,0.0,"kilkenny, ireland",english258,english,male,869,ireland 162 | 869,869,23.0,0.0,"singapore, singapore",english259,english,male,870,singapore 163 | 875,875,60.0,0.0,"naylor, maryland, usa",english262,english,male,876,usa 164 | 883,883,21.0,0.0,"oak forest, illinois, usa",english263,english,male,884,usa 165 | 888,888,21.0,0.0,"freemont, california, usa",english264,english,male,889,usa 166 | 889,889,31.0,0.0,"vancouver wa, washington, usa",english265,english,male,890,usa 167 | 898,898,18.0,0.0,"san diego, california, usa",english266,english,male,899,usa 168 | 899,899,66.0,0.0,"leeds, uk",english267,english,male,900,uk 169 | 902,902,29.0,0.0,"cape town, south africa",english270,english,male,903,south africa 170 | 904,904,20.0,0.0,"dallas, texas, usa",english272,english,male,905,usa 171 | 906,906,46.0,0.0,"blue bell, pennsylvania, usa",english273,english,male,907,usa 172 | 936,936,23.0,0.0,"ramsey mn, minnesota, usa",english281,english,male,937,usa 173 | 938,938,20.0,0.0,"tampa, florida, usa",english282,english,male,939,usa 174 | 950,950,18.0,0.0,"pelham, new york, usa",english283,english,male,951,usa 175 | 951,951,33.0,0.0,"gainesville, florida, usa",english284,english,male,952,usa 176 | 960,960,50.0,0.0,"melbourne, australia",english285,english,male,961,australia 177 | 985,985,20.0,0.0,"duluth, georgia, usa",english290,english,male,986,usa 178 | 1025,1025,18.0,0.0,"belize city, belize",english291,english,male,1026,belize 179 | 1050,1050,38.0,0.0,"kansas city, missouri, usa",english292,english,male,1051,usa 180 | 1066,1066,25.0,0.0,"airdrie, alberta, canada",english295,english,male,1067,canada 181 | 1070,1070,19.0,0.0,"lindenhurst, new york, usa",english297,english,male,1071,usa 182 | 1071,1071,22.0,0.0,"lismore, new south wales, australia",english298,english,male,1072,australia 183 | 1074,1074,59.0,0.0,"windsor, ontario, canada",english299,english,male,1075,canada 184 | 1075,1075,18.0,0.0,"dublin, ireland",english300,english,male,1076,ireland 185 | 1077,1077,41.0,0.0,"toronto, ontario, canada",english301,english,male,1078,canada 186 | 1084,1084,22.0,0.0,"melbourne, australia",english302,english,male,1085,australia 187 | 1099,1099,48.0,0.0,"alexandria, va, usa",english307,english,male,1100,usa 188 | 1100,1100,34.0,0.0,"cardiff, wales, uk",english308,english,male,1101,uk 189 | 1118,1118,28.0,0.0,"silver spring, maryland, usa",english313,english,male,1119,usa 190 | 1120,1120,25.0,0.0,"los angeles, california, usa",english315,english,male,1121,usa 191 | 1121,1121,33.0,0.0,"sacramento, california, usa",english316,english,male,1122,usa 192 | 1131,1131,26.0,0.0,"burlington, vermont, usa",english317,english,male,1132,usa 193 | 1136,1136,46.0,0.0,"sarnia, ontario, canada",english318,english,male,1137,canada 194 | 1137,1137,29.0,0.0,"grimsby, lincolnshire, uk",english319,english,male,1138,uk 195 | 1162,1162,52.0,0.0,"brooklyn, new york, usa",english321,english,male,1163,usa 196 | 1171,1171,29.0,0.0,"yorkton, saskatchewan, canada",english322,english,male,1172,canada 197 | 1173,1173,28.0,0.0,"st. marys, ontario, canada",english324,english,male,1174,canada 198 | 1175,1175,32.0,0.0,"reading, pennsylvania, usa",english325,english,male,1176,usa 199 | 1187,1187,53.0,0.0,"fairborn, ohio, usa",english326,english,male,1188,usa 200 | 1204,1204,27.0,0.0,"dunedin, florida, usa",english327,english,male,1205,usa 201 | 1215,1215,6.0,0.0,"washington, district of columbia, usa",english335,english,male,1216,usa 202 | 1219,1219,54.0,0.0,"chicago, illinois, usa",english339,english,male,1220,usa 203 | 1221,1221,19.0,0.0,"charleston, west virginia, usa",english340,english,male,1222,usa 204 | 1224,1224,33.0,0.0,"kansas city, missouri, usa",english342,english,male,1225,usa 205 | 1225,1225,23.0,0.0,"portland, maine, usa",english343,english,male,1226,usa 206 | 1232,1232,20.0,0.0,"kansas city, missouri, usa",english345,english,male,1233,usa 207 | 1234,1234,60.0,0.0,"east hartford, connecticut, usa",english346,english,male,1235,usa 208 | 1241,1241,37.0,0.0,"carthage, missouri, usa",english351,english,male,1242,usa 209 | 1242,1242,25.0,0.0,"singapore, singapore",english352,english,male,1243,singapore 210 | 1245,1245,18.0,0.0,"saskatoon, saskatchewan, canada",english353,english,male,1246,canada 211 | 1255,1255,19.0,0.0,"los angeles, california, usa",english355,english,male,1256,usa 212 | 1257,1257,24.0,0.0,"alto, georgia, usa",english357,english,male,1258,usa 213 | 1258,1258,19.0,0.0,"fredericton, new brunswick, canada",english358,english,male,1259,canada 214 | 1275,1275,32.0,0.0,"sackville, new brunswick, canada",english359,english,male,1276,canada 215 | 1280,1280,21.0,0.0,"cork, ireland",english362,english,male,1281,ireland 216 | 1286,1286,20.0,0.0,"castle donington, uk",english363,english,male,1287,uk 217 | 1291,1291,52.0,0.0,"oxford, uk",english365,english,male,1292,uk 218 | 1294,1294,43.0,0.0,"strattford-on-avon, uk",english368,english,male,1295,uk 219 | 1296,1296,27.0,0.0,"mcminnville, oregon, usa",english369,english,male,1297,usa 220 | 1301,1301,22.0,0.0,"pondicherry, india",english370,english,male,1302,india 221 | 1306,1306,80.0,0.0,"christiansburg, virginia, usa",english372,english,male,1307,usa 222 | 1307,1307,22.0,0.0,"russellville, kentucky, usa",english373,english,male,1308,usa 223 | 1309,1309,36.0,0.0,"milford haven, wales, uk",english374,english,male,1310,uk 224 | 1311,1311,28.0,0.0,"manchester, connecticut, usa",english375,english,male,1312,usa 225 | 1314,1314,46.0,0.0,"pasadena, california, usa",english376,english,male,1315,usa 226 | 1320,1320,43.0,0.0,"summit, new jersey, usa",english379,english,male,1321,usa 227 | 1323,1323,85.0,0.0,"pike county, kentucky, usa",english381,english,male,1324,usa 228 | 1324,1324,34.0,0.0,"arlington, virginia, usa",english382,english,male,1325,usa 229 | 1326,1326,57.0,0.0,"williamson, west virginia, usa",english384,english,male,1327,usa 230 | 1333,1333,74.0,0.0,"milwaukee, wisconsin, usa",english390,english,male,1334,usa 231 | 1335,1335,62.0,0.0,"warrenton, virginia, usa",english392,english,male,1336,usa 232 | 1339,1339,39.0,0.0,"alexandria, virginia, usa",english395,english,male,1340,usa 233 | 1348,1348,26.0,0.0,"lucerne, switzerland",english401,english,male,1349,switzerland 234 | 1351,1351,29.0,0.0,"windsor, australia",english404,english,male,1352,australia 235 | 1359,1359,47.0,0.0,"windsor, ontario, canada",english407,english,male,1360,canada 236 | 1360,1360,64.0,0.0,"lynwood, california, usa",english408,english,male,1361,usa 237 | 1362,1362,35.0,0.0,"hamilton, new zealand",english410,english,male,1363,new zealand 238 | 1374,1374,32.0,0.0,"woonsocket, rhode island, usa",english415,english,male,1375,usa 239 | 1376,1376,34.0,0.0,"sydney, australia",english416,english,male,1377,australia 240 | 1383,1383,29.0,0.0,"arlington, virginia",english418,english,male,1384,virginia 241 | 1384,1384,27.0,0.0,"youngstown, ohio, usa",english419,english,male,1385,usa 242 | 1386,1386,21.0,0.0,"auckland, new zealand",english420,english,male,1387,new zealand 243 | 1387,1387,38.0,0.0,"landstuhl, germany",english421,english,male,1388,germany 244 | 1389,1389,31.0,0.0,"rochester, new york, usa",english422,english,male,1390,usa 245 | 1408,1408,75.0,0.0,"danville, virginia, usa",english424,english,male,1409,usa 246 | 1409,1409,31.0,0.0,"danville, virginia, usa",english425,english,male,1410,usa 247 | 1468,1468,24.0,0.0,"kansas city, kansas, usa",english434,english,male,1469,usa 248 | 1476,1476,21.0,0.0,"cleveland, ohio, usa",english437,english,male,1477,usa 249 | 1527,1527,23.0,0.0,"salisbury mills, new york, usa",english442,english,male,1528,usa 250 | 1529,1529,31.0,0.0,"fort collins, colorado, usa",english443,english,male,1530,usa 251 | 1530,1530,24.0,0.0,"anniston, alabama, usa",english444,english,male,1531,usa 252 | 1531,1531,60.0,0.0,"san francisco, california, usa",english445,english,male,1532,usa 253 | 1533,1533,21.0,0.0,"hartford, connecticut, usa",english447,english,male,1534,usa 254 | 1535,1535,20.0,0.0,"isle of arran, scotland, uk",english448,english,male,1536,uk 255 | 1537,1537,27.0,0.0,"barton, vermont, usa",english449,english,male,1538,usa 256 | 1543,1543,26.0,0.0,"windsor, uk",english450,english,male,1544,uk 257 | 1545,1545,44.0,0.0,"hazlehurst, georgia, usa",english451,english,male,1546,usa 258 | 1546,1546,24.0,0.0,"newcastle, australia",english452,english,male,1547,australia 259 | 1554,1554,43.0,0.0,"syracuse, new york, usa",english457,english,male,1555,usa 260 | 1559,1559,19.0,0.0,"charlotte, north carolina, usa",english459,english,male,1560,usa 261 | 1563,1563,20.0,0.0,"paducah, kentucky, usa",english462,english,male,1564,usa 262 | 1617,1617,27.0,0.0,"ottawa, ontario, canada",english464,english,male,1618,canada 263 | 1634,1634,20.0,0.0,"aylesbury, buckinghamshire, uk",english465,english,male,1635,uk 264 | 1636,1636,18.0,0.0,"portland, maine, usa",english466,english,male,1637,usa 265 | 1638,1638,23.0,1.0,"pozzuoli, italy",english467,english,male,1639,italy 266 | 1639,1639,30.0,0.0,"myrtle beach, south carolina, usa",english468,english,male,1640,usa 267 | 1640,1640,22.0,0.0,"knoxville, tennessee, usa",english469,english,male,1641,usa 268 | 1641,1641,21.0,0.0,"leighton buzzard, uk",english470,english,male,1642,uk 269 | 1648,1648,26.0,0.0,"rota, spain",english472,english,male,1649,spain 270 | 1649,1649,20.0,0.0,"nottingham, uk",english473,english,male,1650,uk 271 | 1660,1660,19.0,0.0,"sydney, nova scotia, canada",english478,english,male,1661,canada 272 | 1664,1664,58.0,0.0,"winston-salem, north carolina, usa",english480,english,male,1665,usa 273 | 1667,1667,19.0,0.0,"boston, massachusetts, usa",english483,english,male,1668,usa 274 | 1671,1671,24.0,0.0,"dublin, ireland",english484,english,male,1672,ireland 275 | 1673,1673,20.0,0.0,"cromwell, connecticut, usa",english485,english,male,1674,usa 276 | 1708,1708,18.0,0.0,"auckland, new zealand",english490,english,male,1709,new zealand 277 | 1717,1717,20.0,0.0,"miami, florida, usa",english493,english,male,1718,usa 278 | 1718,1718,27.5,0.0,"findlay, ohio, usa",english494,english,male,1719,usa 279 | 1719,1719,21.0,0.0,"kirkland, washington, usa",english495,english,male,1720,usa 280 | 1723,1723,20.0,0.0,"london, england, uk",english496,english,male,1724,uk 281 | 1725,1725,22.0,0.0,"cleveland, ohio, usa",english497,english,male,1726,usa 282 | 1726,1726,27.0,0.0,"prince george, british columbia, canada",english498,english,male,1727,canada 283 | 1732,1732,38.0,0.0,"albury, new south wales, australia",english500,english,male,1733,australia 284 | 1785,1785,25.0,0.0,"west palm beach, florida, usa",english504,english,male,1786,usa 285 | 1799,1799,21.0,0.0,"accra, ghana",english507,english,male,1800,ghana 286 | 1871,1871,57.0,0.0,"youngstown, ohio, usa",english514,english,male,1872,usa 287 | 1875,1875,33.0,0.0,"colorado springs, colorado, usa",english517,english,male,1876,usa 288 | 1877,1877,24.0,0.0,"kells, meath, ireland",english518,english,male,1878,ireland 289 | 1889,1889,19.0,0.0,"downers grove, illinois, usa",english524,english,male,1890,usa 290 | 1894,1894,20.0,0.0,"woodbridge, virginia, usa",english526,english,male,1895,usa 291 | 1961,1961,60.0,0.0,"toronto, ontario, canada",english542,english,male,1962,canada 292 | 1969,1969,31.0,0.0,"hanover, new hampshire, usa",english544,english,male,1970,usa 293 | 1994,1994,56.0,0.0,"tangier island, virginia, usa",english545,english,male,1995,usa 294 | 1996,1996,63.0,0.0,"tangier island, virginia, usa",english547,english,male,1997,usa 295 | 2002,2002,61.0,0.0,"dublin, ireland",english548,english,male,2003,ireland 296 | 2027,2027,82.0,0.0,"fredericksburg, virginia, usa",english550,english,male,2028,usa 297 | 2045,2045,21.0,0.0,"nashville, tennessee, usa",english553,english,male,2046,usa 298 | 2048,2048,90.0,0.0,"brooklyn, new york, usa",english555,english,male,2049,usa 299 | 2054,2054,23.0,0.0,"queens, new york, usa",english558,english,male,2055,usa 300 | 2073,2073,32.0,0.0,"manassas, virginia, usa",english563,english,male,2074,usa 301 | 2076,2076,52.0,0.0,"casper, wyoming, usa",english564,english,male,2077,usa 302 | 2080,2080,22.0,0.0,"derby, england, uk",english566,english,male,2081,uk 303 | 2100,2100,24.0,0.0,"new york, new york, usa",english567,english,male,2101,usa 304 | 2103,2103,24.0,0.0,"sydney, australia",english569,english,male,2104,australia 305 | 2112,2112,39.0,0.0,"washington, dc, usa",english571,english,male,2113,usa 306 | 2120,2120,51.0,0.0,"fort worth, texas, usa",english572,english,male,2121,usa 307 | 2123,2123,46.0,0.0,"painesville, ohio, usa",english573,english,male,2124,usa 308 | 2164,2164,24.0,0.0,"great falls, virginia, usa",english575,english,male,2165,usa 309 | 2170,2170,60.0,0.0,"salford, lancashire, uk",english578,english,male,2171,uk 310 | 2171,2171,22.0,0.0,"adelaide, australia",english579,english,male,2172,australia 311 | --------------------------------------------------------------------------------