├── Labor force participation ethnicity.csv ├── Gender STEM by State.csv ├── LICENSE ├── Labor force participation by Gender and state.csv ├── Gender demo numbers by state.csv ├── Race demo numbers by State.csv ├── README.md ├── Gender Across World.R ├── Technical Gender Across USA.R ├── Contributing.md ├── Gender Across USA.R └── International Gender.csv /Labor force participation ethnicity.csv: -------------------------------------------------------------------------------- 1 | Race,Labor Force Participation 2 | White,0.629 3 | Black or African American,0.616 4 | Asian,0.632 5 | American Indian and Alaskan Native,0.611 6 | Native Hawaiian and Other Pacific Islander,0.687 7 | Two or More Races,0.65 8 | Hispanic or Latino,0.658 -------------------------------------------------------------------------------- /Gender STEM by State.csv: -------------------------------------------------------------------------------- 1 | State,Percent_STEM_Workers_Female 2 | Alabama,0.27 3 | Alaska,0.25 4 | Arizona,0.27 5 | Arkansas,0.30 6 | California,0.29 7 | Colorado,0.29 8 | Connecticut,0.30 9 | Delaware,0.30 10 | District of Columbia,0.44 11 | Florida,0.29 12 | Georgia,0.28 13 | Hawaii,0.30 14 | Idaho,0.26 15 | Illinois,0.28 16 | Indiana,0.26 17 | Iowa,0.32 18 | Kansas,0.29 19 | Kentucky,0.27 20 | Louisiana,0.28 21 | Maine,0.29 22 | Maryland,0.34 23 | Massachusetts,0.32 24 | Michigan,0.27 25 | Minnesota,0.30 26 | Mississippi,0.33 27 | Missouri,0.31 28 | Montana,0.32 29 | Nebraska,0.26 30 | Nevada,0.31 31 | New Hampshire,0.25 32 | New Jersey,0.29 33 | New Mexico,0.25 34 | New York,0.31 35 | North Carolina,0.31 36 | North Dakota,0.32 37 | Ohio,0.29 38 | Oklahoma,0.25 39 | Oregon,0.28 40 | Pennsylvania,0.30 41 | Rhode Island,0.30 42 | South Carolina,0.28 43 | South Dakota,0.28 44 | Tennessee,0.30 45 | Texas,0.27 46 | Utah,0.24 47 | Vermont,0.34 48 | Virginia,0.30 49 | Washington,0.26 50 | West Virginia,0.26 51 | Wisconsin,0.28 52 | Wyoming,0.33 53 | , -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jared Valdron 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Labor force participation by Gender and state.csv: -------------------------------------------------------------------------------- 1 | State,Male_LFPR,Female_LFPR 2 | Alabama,0.64,0.53 3 | Alaska ,0.76,0.68 4 | Arizona ,0.64,0.54 5 | Arkansas,0.64,0.54 6 | California ,0.7,0.57 7 | Colorado,0.74,0.63 8 | Connecticut,0.73,0.63 9 | Delaware,0.68,0.59 10 | District of Columbia ,0.73,0.64 11 | Florida,0.64,0.54 12 | Georgia ,0.68,0.58 13 | Hawaii,0.71,0.59 14 | Idaho,0.7,0.56 15 | Illinois,0.71,0.61 16 | Indiana,0.69,0.59 17 | Iowa ,0.71,0.63 18 | Kansas ,0.72,0.61 19 | Kentucky,0.65,0.54 20 | Louisiana,0.66,0.56 21 | Maine ,0.66,0.6 22 | Maryland,0.73,0.65 23 | Massachusetts,0.72,0.63 24 | Michigan,0.65,0.58 25 | Minnesota,0.74,0.66 26 | Mississippi,0.62,0.54 27 | Missouri,0.68,0.6 28 | Montana,0.67,0.59 29 | Nebraska ,0.75,0.65 30 | Nevada,0.7,0.59 31 | New Hampshire,0.73,0.63 32 | New Jersey,0.72,0.61 33 | New Mexico,0.64,0.55 34 | New York,0.69,0.59 35 | North Carolina ,0.68,0.58 36 | North Dakota,0.76,0.65 37 | Ohio,0.68,0.59 38 | Oklahoma,0.69,0.56 39 | Oregon,0.67,0.58 40 | Pennsylvania,0.67,0.59 41 | Rhode Island ,0.71,0.62 42 | South Carolina ,0.66,0.57 43 | South Dakota,0.72,0.66 44 | Tennessee,0.67,0.56 45 | Texas ,0.72,0.58 46 | Utah,0.75,0.59 47 | Vermont ,0.68,0.63 48 | Virginia,0.71,0.61 49 | Washington,0.7,0.59 50 | West Virginia,0.61,0.49 51 | Wisconsin,0.71,0.63 52 | Wyoming,0.74,0.63 -------------------------------------------------------------------------------- /Gender demo numbers by state.csv: -------------------------------------------------------------------------------- 1 | Geography,Male,Female 2 | Alabama,2346193,2494971 3 | Alaska,385296,351559 4 | Arizona,3344106,3384471 5 | Arkansas,1456694,1511778 6 | California,19200970,19453236 7 | Colorado,2689636,2669659 8 | Connecticut,1750270,1838300 9 | Delaware,452416,482279 10 | District of Columbia,312629,346380 11 | Florida,9741262,10193189 12 | Georgia,4922471,5176849 13 | Hawaii,709870,703803 14 | Idaho,819845,815638 15 | Illinois,6310460,6541224 16 | Indiana,3245322,3344256 17 | Iowa,1542988,1563601 18 | Kansas,1441912,1456380 19 | Kentucky,2172745,2239244 20 | Louisiana,2271684,2373986 21 | Maine,651344,678579 22 | Maryland,2886734,3073168 23 | Massachusetts,3269371,3472772 24 | Michigan,4869885,5039715 25 | Minnesota,2710157,2740711 26 | Mississippi,1450269,1538923 27 | Missouri,2973317,3086334 28 | Montana,514191,509200 29 | Nebraska,935935,945324 30 | Nevada,1426322,1412850 31 | New Hampshire,656507,670996 32 | New Jersey,4350531,4564925 33 | New Mexico,1031440,1051229 34 | New York,9555130,10142327 35 | North Carolina,4834592,5106236 36 | North Dakota,376979,359183 37 | Ohio,5673893,5913048 38 | Oklahoma,1919995,1955594 39 | Oregon,1971512,2010755 40 | Pennsylvania,6255042,6528935 41 | Rhode Island,511297,543194 42 | South Carolina,2348338,2486267 43 | South Dakota,428134,422924 44 | Tennessee,3191977,3356032 45 | Texas,13379165,13577270 46 | Utah,1483055,1465372 47 | Vermont,308711,317538 48 | Virginia,4086283,4224018 49 | Washington,3529431,3543715 50 | West Virginia,912270,933822 51 | Wisconsin,2859055,2895743 52 | Wyoming,297691,285338 53 | Puerto Rico,1685787,1843598 -------------------------------------------------------------------------------- /Race demo numbers by State.csv: -------------------------------------------------------------------------------- 1 | Geography,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Two or more races 2 | Alabama,3325037,1282053,23919,60744,2008,85412 3 | Alaska,483518,24443,103574,44218,8862,62340 4 | Arizona,5235158,287110,296732,205229,12863,222579 5 | Arkansas,2307136,460638,18697,41539,6738,66079 6 | California,23680584,2261835,285512,5354608,150908,1787159 7 | Colorado,4517058,219349,50160,157523,7441,183735 8 | Connecticut,2768080,372696,9399,152782,1031,106988 9 | Delaware,647002,203027,3494,34428,461,25831 10 | District of Columbia,266035,318598,2174,24036,271,18245 11 | Florida,15130748,3216994,52904,521272,11288,495368 12 | Georgia,6039389,3148134,28950,373983,4916,222457 13 | Hawaii,353643,25871,2715,537363,143973,336629 14 | Idaho,1493155,10433,21454,21630,2003,42050 15 | Illinois,9270907,1837612,29399,655799,4186,300222 16 | Indiana,5534759,608226,15799,130232,2746,146316 17 | Iowa,2823890,103052,9951,66570,2422,61294 18 | Kansas,2468923,167706,23871,77605,1874,96030 19 | Kentucky,3859516,350242,9052,57084,2584,92837 20 | Louisiana,2909290,1494201,26403,78719,1464,85159 21 | Maine,1260476,16303,8013,14643,211,27126 22 | Maryland,3408240,1765926,15946,362259,2792,186153 23 | Massachusetts,5343665,489233,13931,411736,2002,203014 24 | Michigan,7817827,1376446,52891,276769,2492,271798 25 | Minnesota,4597525,310853,56904,246819,1969,148502 26 | Mississippi,1764038,1121327,13072,28562,492,35367 27 | Missouri,5000875,701896,25641,107953,6479,148041 28 | Montana,911907,4260,67222,7481,841,26522 29 | Nebraska,1655708,88388,15739,39794,1305,43653 30 | Nevada,1933057,243552,31927,222612,18334,129036 31 | New Hampshire,1243594,17483,2085,32094,229,24906 32 | New Jersey,6075710,1207221,18470,819208,2811,225713 33 | New Mexico,1530636,41957,193295,29168,1340,67797 34 | New York,12667413,3073278,75751,1599216,7528,576440 35 | North Carolina,6882915,2137131,117479,254550,6588,243370 36 | North Dakota,649730,14761,38369,9296,336,17979 37 | Ohio,9519506,1421943,21459,224520,3248,298177 38 | Oklahoma,2823497,281295,286231,77541,4969,299764 39 | Oregon,3387825,74012,45233,160155,14936,175541 40 | Pennsylvania,10402743,1410563,24947,401979,4463,285768 41 | Rhode Island,854026,68886,5130,34598,575,30058 42 | South Carolina,3252252,1322368,15417,68553,2784,100082 43 | South Dakota,721410,14078,74187,11351,313,22239 44 | Tennessee,5096733,1100577,18757,105368,3299,133210 45 | Texas,20174403,3221133,128145,1175423,22248,673400 46 | Utah,2572595,32512,31686,66039,26411,77810 47 | Vermont,592965,7404,1984,9212,206,12169 48 | Virginia,5712958,1596352,21948,502878,5494,279699 49 | Washington,5470566,256990,94026,552032,44870,377703 50 | West Virginia,1725411,63305,3281,13990,648,33726 51 | Wisconsin,4961193,361730,51459,148077,1378,125923 52 | Wyoming,531851,6435,13033,5385,391,15531 53 | Puerto Rico,2448085,320087,11814,9574,182,264764 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Diversity Across Geography 2 | This project aims to make it easier for HR/people analysts to compare the representation of different demographics in their company to the communities they have a presence in. It also helps identify if a company is getting the application numbers from different groups they would expect based on the same criteria. 3 | 4 | ## Why is this project useful? 5 | Especially with very distributed companies, it is often difficult to see which areas you have the biggest struggles in attracting and retaining talent from different backgrounds. This project helps make that process a bit easier, by creating a framework for these comparisons to be made. 6 | 7 | ## How can I use this project as an analyst? 8 | Download the project and unzip it. Open the R file of interest (USA, International, Technical USA), and import the data sources from your company as instructed. This project will calculate the representation numbers of a particular demographic (starting with gender) across different geographic regions (starting with states in the USA or Countries across the world). Then, it will take the labour market data I’ve compiled to estimate the representation of that demographic group in the labour force in that region. Finally, it calculates the difference between the expected and actual representations of underrepresented groups, and writes a CSV file that highlights areas where your company has a significant presence and has a lot less of a particular group than one would expect. If you want to add the optional application rates piece, it goes through a similar process to help you see if you have issues attracting talent from underrepresented groups at the top of the funnel. 9 | 10 | ## Where did you get your Data? 11 | Labor force participation and demographics by state comes from the American Community Survey for Gender Across USA (note: labor force participation rates are from 2013, demographics from 2017). For Gender Across World, female labor force percentage comes from International Labour Organization, ILOSTAT database (2017). For Technical Gender Across USA, they come from a specific aggregation of the American Community Survey and are from 2013. 12 | 13 | ## Why is there only Gender by state in the USA or Country in the World right now? 14 | This is the case because these are the only area where I personally was able to find the data I wanted. This was always meant to be a starting place, and I’m looking forward to the open source process in helping this project advance. 15 | 16 | ## What is on the roadmap and how can I help? 17 | Check out the “Contributing” file; your help is much appreciated! 18 | 19 | ## You work at GitHub. Is this a GitHub sponsored project? 20 | I do work at GitHub, and it’s awesome! While GitHub is supportive of me contributing to open source, this project is independent of my work there, and in no way reflects GitHub’s philosophy or projects on this topic. This is just something I find personally interesting and want to build with the community. 21 | -------------------------------------------------------------------------------- /Gender Across World.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(tidyr) 4 | 5 | #Setting Up the right Working directory 6 | setwd("diversity across geography folder directory here") 7 | 8 | #Reading in Company Gender info: INPUT A CSV FILE WITH "GENDER" and "COUNTRY" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER (Gender must be male, female, or blank, Country just the country name) 9 | Company_demos <- read_csv("Your file here.csv") 10 | 11 | #Reading in Percentage of Labor force that is Female 12 | Gender_LFPR <- read_csv("International Gender.csv") 13 | 14 | #Creating by Country Company Gender Percentages 15 | filtered_gend <- filter(Company_demos, !is.na(Gender)) 16 | Company_gend_Country <- count(filtered_gend, Gender, Country) 17 | Company_gend_Country_spread <- spread(Company_gend_Country, Gender, n, fill=0) 18 | Company_gend_Country_spread2 <- mutate(Company_gend_Country_spread, Females_in_org_percent = Female / (Male+Female), Males_in_org_percent = Male / (Male+Female), total_emps = Male + Female ) 19 | 20 | #Joining Workforce Info to Company Info 21 | Gender_joined <- left_join( Company_gend_Country_spread2,Gender_LFPR, by = "Country" ) 22 | Gender_joined2 <- mutate(Gender_joined, Female_difference = Females_in_org_percent - Female_percent_of_LF) 23 | Gender_summary <- select(Gender_joined2, Country, total_emps,Females_in_org_percent,Female_percent_of_LF, Female_difference) 24 | 25 | #Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 26 | Gender_summary_filt <- filter(Gender_summary, total_emps >= 10) 27 | Gender_summary_filt <- Gender_summary_filt[order(Gender_summary_filt$Female_difference),] 28 | 29 | #Writing Summary Table to CSV 30 | write_csv(Gender_summary_filt,"Gender Summary Table.csv") 31 | 32 | 33 | 34 | # (OPTIONAL) Reading in Applications Data: INPUT A CSV FILE "GENDER" and "Country" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER (Gender must be male, female, or blank, Country just the Country name) 35 | applications <- read_csv("Applicant International Gender.csv") 36 | 37 | # (OPTIONAL) Creating by Country Application Gender Percentages 38 | filtered_applications <- filter(applications, !is.na(Gender)) 39 | apps_Country <- count(filtered_applications, Gender, Country) 40 | apps_spread <- spread(apps_Country, Gender, n, fill=0) 41 | apps_spread2 <- mutate(apps_spread, Female_app_percent = Female / (Male + Female)) 42 | apps_spread3 <- select(apps_spread2, Country, Female_app_percent) 43 | 44 | # (OPTIONAL) Join Apps with Summary Table 45 | Gender_Summary_Apps <- left_join(Gender_summary, apps_spread3, by="Country") 46 | 47 | # (OPTIONAL) Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 48 | Gender_summary_apps_filt <- filter(Gender_Summary_Apps, total_emps >= 10) 49 | Gender_summary_apps_filt <- Gender_summary_apps_filt[order(Gender_summary_apps_filt$Female_difference),] 50 | 51 | # (OPTIONAL) Writing Applications Summary Table to CSV 52 | write_csv(Gender_summary_apps_filt,"Gender and Apps Summary Table.csv") 53 | 54 | 55 | -------------------------------------------------------------------------------- /Technical Gender Across USA.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(tidyr) 4 | 5 | #Setting Up the right Working directory 6 | setwd("diversity across geography folder directory here") 7 | 8 | #Reading in Company Gender info: INPUT A CSV FILE WITH "GENDER" and "STATE" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER FILTERED SUCH THAT YOU ONLY HAVE TECHNICAL WORKERS (Gender must be male, female, or blank, State just the state name) 9 | Company_demos <- read_csv("Your File 1.csv") 10 | 11 | #Reading in Percentage of STEM workforce that is female by state 12 | Gender_STEM <- read_csv("Gender STEM by State.csv") 13 | 14 | # Creating by State Company Gender Percentages 15 | filtered_gend <- filter(Company_demos, !is.na(Gender)) 16 | Company_gend_state <- count(filtered_gend, Gender, State) 17 | Company_gend_state_spread <- spread(Company_gend_state, Gender, n, fill=0) 18 | Company_gend_state_spread2 <- mutate(Company_gend_state_spread, Tech_Females_in_org_percent = Female / (Male+Female), Males_in_org_percent = Male / (Male+Female), total_emps = Male + Female ) 19 | 20 | #Joining Workforce Info to Company Info 21 | Gender_joined <- left_join( Company_gend_state_spread2,Gender_STEM, by = "State" ) 22 | Gender_joined2 <- mutate(Gender_joined, Female_difference = Tech_Females_in_org_percent - Percent_STEM_Workers_Female) 23 | Gender_summary <- select(Gender_joined2, State, total_emps,Tech_Females_in_org_percent,Percent_STEM_Workers_Female, Female_difference) 24 | 25 | #Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 26 | Gender_summary_filt <- filter(Gender_summary, total_emps >= 5) 27 | Gender_summary_filt <- Gender_summary_filt[order(Gender_summary_filt$Female_difference),] 28 | 29 | #Writing Summary Table to CSV 30 | write_csv(Gender_summary_filt,"Gender Summary Table.csv") 31 | 32 | 33 | 34 | # (OPTIONAL) Reading in Applications Data: INPUT A CSV FILE "GENDER" and "STATE" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER FILTERED SUCH THAT YOU ONLY HAVE APPLICATIONS FOR TECHINCAL ROLES (Gender must be male, female, or blank, State just the state name) 35 | applications <- read_csv("Your file here 2.csv") 36 | 37 | # (OPTIONAL) Creating by State Technical Application Gender Percentages 38 | filtered_applications <- filter(applications, !is.na(Gender)) 39 | apps_state <- count(filtered_applications, Gender, State) 40 | apps_spread <- spread(apps_state, Gender, n, fill=0) 41 | apps_spread2 <- mutate(apps_spread, Female_Tech_app_percent = Female / (Male + Female)) 42 | apps_spread3 <- select(apps_spread2, State, Female_Tech_app_percent) 43 | 44 | # (OPTIONAL) Join Apps with Summary Table 45 | Gender_Summary_Apps <- left_join(Gender_summary, apps_spread3, by="State") 46 | 47 | # (OPTIONAL) Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 48 | Gender_summary_apps_filt <- filter(Gender_Summary_Apps, total_emps >= 5) 49 | Gender_summary_apps_filt <- Gender_summary_apps_filt[order(Gender_summary_apps_filt$Female_difference),] 50 | 51 | # (OPTIONAL) Writing Applications Summary Table to CSV 52 | write_csv(Gender_summary_apps_filt,"Gender and Apps Summary Table.csv") 53 | 54 | 55 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # The Types of Contributions I'm looking for 2 | Essentially, I'd love contributions that add better (ie more granularity) datasets, and new functionality of the tool. If you want to alter the base code in some meaningful way, please get in touch with me via GitHub. When you add additional functionality (ie, Gender by country), please make a separate R or python file and specify what it can be used for / how to use it as comments in the code. 3 | 4 | # Roadmap / Where you Can Help 5 | Here is a roadmap ordered from most to less impactful in my eye: 6 | 7 | 1. Taking differential labour force participation rates across states in the USA for Gender 8 | ~~The script I created currently just applies USA-wide labour force participation rates to each individual state’s gender population breakdown, which is obviously imperfect. I could not find a good, free source of labour force participation rates by gender by state. If you have this, it could become a lot more powerful!~~ 9 | 10 | ^*SOLVED by @javaldro by finding another datset*^ 11 | 12 | 2. Adding functionality for comparisons of ethnicity in USA by state 13 | I have actually already built most of the script for this, but I ran into a major roadblock. I have only seen companies collect ethnicity in a format that parses our Hispanic/Latino folks as a separate group, but the American community survey does not. Instead, they have a separate table of what percentage of white folks, black folks, etc are Hispanic for the USA overall. I could make a correction myself, but the problem is that it would end up assuming that Hispanic/ Latino folks are evenly distributed across the USA, which is obviously not the case. So if you know of some better data sets that could solve this problem, we could add this very important functionality. I have added the files I have access to for your reference. 14 | 15 | 3. Making comparisons across countries 16 | ~~Obviously, the USA is not the only place where people are thinking about these topics. I haven’t searched too hard for relevant data, but if someone could find by country breakdowns of the labor force, we could apply this same approach to see if a company is appropriately mirroring the labour force composition of different countries they have a presence in. I think this would be very interesting, as naturally ethnic population base rates vary meaningfully across countries, as does the labour force participation of women. That said, I think the ethnic piece has an additional challenge in that the terminology and definitions of different groups vary meaningfully across countries.~~ 17 | 18 | ^*GENDER SOLVED by @javaldro with the help of Frank Mu*^ 19 | 20 | 4. Creating an easy conversion from state shorthand (NY) to state name (New York); Same with Countries 21 | Not all of our state data is in the same format, and the script currently only recognizes full state name. It would be great if someone could create a framework for this conversion to make all of our lives a bit easier. 22 | 23 | 5. Making python versions of these tools 24 | I have created this base script in R as it is my tool of preference. That said, I know others prefer python. So to help make the project inclusive, I think it would be nice to add python versions as well. 25 | -------------------------------------------------------------------------------- /Gender Across USA.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(tidyr) 4 | 5 | #Setting Up the right Working directory 6 | setwd("diversity across geography folder directory here") 7 | 8 | #Reading in Company Gender info: INPUT A CSV FILE WITH "GENDER" and "STATE" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER (Gender must be male, female, or blank, State just the state name) 9 | Company_demos <- read_csv("Your file Here 1") 10 | 11 | #Reading in Labor Force Participation Rates and Census Numbers 12 | Gender_LFPR <- read_csv("Labor force participation by Gender and state.csv") 13 | Gender_state <- read_csv("Gender demo numbers by state.csv") 14 | 15 | # Creating by State Company Gender Percentages 16 | filtered_gend <- filter(Company_demos, !is.na(Gender)) 17 | Company_gend_state <- count(filtered_gend, Gender, State) 18 | Company_gend_state_spread <- spread(Company_gend_state, Gender, n, fill=0) 19 | Company_gend_state_spread2 <- mutate(Company_gend_state_spread, Females_in_org_percent = Female / (Male+Female), Males_in_org_percent = Male / (Male+Female), total_emps = Male + Female ) 20 | 21 | # Creating By State Labour force Gender Percentages 22 | Gender_LFPR_spread <- spread(Gender_LFPR, Gender, "Labor Force Participation", fill=0) 23 | Gender_LFPR_spread2 <- select(Gender_LFPR_spread, Male, Female) 24 | Gender_LFPR_spread3 <- Gender_LFPR_spread2[rep(1, nrow(Gender_state)),] 25 | 26 | Gender_labor_adjusted <- left_join(Gender_state, Gender_LFPR, by = c("Geography"="State")) 27 | Gender_labor_adjusted1.5 <- mutate(Gender_labor_adjusted, Male2 = Male*Male_LFPR, Female2= Female*Female_LFPR) 28 | Gender_labor_adjusted2 <- mutate(Gender_labor_adjusted1.5, Females_in_workforce_percent = Female2 / (Male2+Female2), Males_in_workforce_percent = Male2 / (Male2+Female2), total_workforce = Male2 + Female2 ) 29 | Gender_labor_adjusted2$State <- Gender_state$Geography 30 | 31 | #Joining Workforce Info to Company Info 32 | Gender_joined <- left_join( Company_gend_state_spread2,Gender_labor_adjusted2, by = "State" ) 33 | Gender_joined2 <- mutate(Gender_joined, Female_difference = Females_in_org_percent - Females_in_workforce_percent) 34 | Gender_summary <- select(Gender_joined2, State, total_emps,Females_in_org_percent,Females_in_workforce_percent, Female_difference) 35 | 36 | #Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 37 | Gender_summary_filt <- filter(Gender_summary, total_emps >= 10) 38 | Gender_summary_filt <- Gender_summary_filt[order(Gender_summary_filt$Female_difference),] 39 | 40 | #Writing Summary Table to CSV 41 | write_csv(Gender_summary_filt,"Gender Summary Table.csv") 42 | 43 | 44 | 45 | # (OPTIONAL) Reading in Applications Data: INPUT A CSV FILE "GENDER" and "STATE" COLUMNS IN DIVERSITY ACROSS GEOGRAPHY FOLDER (Gender must be male, female, or blank, State just the state name) 46 | applications <- read_csv("Your file Here 2.csv") 47 | 48 | # (OPTIONAL) Creating by State Application Gender Percentages 49 | filtered_applications <- filter(applications, !is.na(Gender)) 50 | apps_state <- count(filtered_applications, Gender, State) 51 | apps_spread <- spread(apps_state, Gender, n, fill=0) 52 | apps_spread2 <- mutate(apps_spread, Female_app_percent = Female / (Male + Female)) 53 | apps_spread3 <- select(apps_spread2, State, Female_app_percent) 54 | 55 | # (OPTIONAL) Join Apps with Summary Table 56 | Gender_Summary_Apps <- left_join(Gender_summary, apps_spread3, by="State") 57 | 58 | # (OPTIONAL) Filtering and Ordering to Highlight Discrepancies (Filter min value on total employees is editable) 59 | Gender_summary_apps_filt <- filter(Gender_Summary_Apps, total_emps >= 10) 60 | Gender_summary_apps_filt <- Gender_summary_apps_filt[order(Gender_summary_apps_filt$Female_difference),] 61 | 62 | # (OPTIONAL) Writing Applications Summary Table to CSV 63 | write_csv(Gender_summary_apps_filt,"Gender and Apps Summary Table.csv") 64 | 65 | 66 | -------------------------------------------------------------------------------- /International Gender.csv: -------------------------------------------------------------------------------- 1 | Country,Female_percent_of_LF 2 | "Yemen, Rep.",0.07881696 3 | United Arab Emirates,0.123875665 4 | Oman,0.128908874 5 | Qatar,0.140698521 6 | Syrian Arab Republic,0.144205221 7 | Saudi Arabia,0.161842124 8 | Afghanistan,0.17339948 9 | Jordan,0.176556842 10 | Algeria,0.182763058 11 | "Iran, Islamic Rep.",0.190430069 12 | Iraq,0.201137455 13 | Somalia,0.20448507 14 | Middle East & North Africa,0.204986411 15 | Arab World,0.205354211 16 | Middle East & North Africa (IDA & IBRD countries),0.206198132 17 | Middle East & North Africa (excluding high income),0.20624813 18 | Bahrain,0.209667799 19 | West Bank and Gaza,0.210739433 20 | Pakistan,0.223935801 21 | "Egypt, Arab Rep.",0.230703616 22 | Lebanon,0.244493769 23 | India,0.245429006 24 | Libya,0.245525722 25 | South Asia,0.254582019 26 | South Asia (IDA & IBRD),0.254582019 27 | Sudan,0.257165638 28 | Morocco,0.260520757 29 | Tunisia,0.264585494 30 | Maldives,0.270357122 31 | Kuwait,0.281660116 32 | Bangladesh,0.291405018 33 | Early-demographic dividend,0.312438314 34 | Mauritania,0.31298557 35 | Timor-Leste,0.317036948 36 | Lower middle income,0.320484834 37 | Turkey,0.322136109 38 | Guatemala,0.339294002 39 | Sri Lanka,0.34493834 40 | Fiji,0.346745763 41 | Guyana,0.351736907 42 | Sao Tome and Principe,0.359837382 43 | Mexico,0.364838223 44 | Samoa,0.36543015 45 | IDA blend,0.373530619 46 | Middle income,0.37396341 47 | IBRD only,0.376441965 48 | Honduras,0.37838727 49 | Other small states,0.3799778 50 | Malaysia,0.380536337 51 | Indonesia,0.382401023 52 | Low & middle income,0.382802446 53 | Costa Rica,0.382933549 54 | IDA & IBRD total,0.383159588 55 | Cuba,0.383389867 56 | Tonga,0.384473557 57 | Tajikistan,0.386477683 58 | Bosnia and Herzegovina,0.387145578 59 | "Macedonia, FYR",0.388414537 60 | Malta,0.388642139 61 | Small states,0.389064415 62 | Nicaragua,0.389291185 63 | Pacific island small states,0.390205732 64 | Fragile and conflict affected situations,0.390395694 65 | Mauritius,0.391279008 66 | World,0.392927821 67 | Equatorial Guinea,0.393178469 68 | Suriname,0.393740515 69 | Panama,0.396356036 70 | Paraguay,0.396782932 71 | Philippines,0.39898978 72 | Kyrgyz Republic,0.399960008 73 | Bhutan,0.40064719 74 | "Venezuela, RB",0.400832567 75 | Gabon,0.401128759 76 | Belize,0.401684953 77 | Guam,0.404657879 78 | IDA total,0.406787657 79 | Myanmar,0.407271093 80 | Argentina,0.408810924 81 | Ecuador,0.409818257 82 | Latin America & Caribbean (excluding high income),0.410177796 83 | Latin America & Caribbean,0.41049187 84 | Latin America & the Caribbean (IDA & IBRD countries),0.410809578 85 | Cote d'Ivoire,0.411211149 86 | Bolivia,0.411803419 87 | Chile,0.412614906 88 | Dominican Republic,0.41282307 89 | Swaziland,0.41360274 90 | Senegal,0.413822013 91 | Cabo Verde,0.413864069 92 | Comoros,0.415966068 93 | Uzbekistan,0.416292213 94 | El Salvador,0.417552171 95 | St. Vincent and the Grenadines,0.418290443 96 | "Korea, Rep.",0.418830391 97 | Turkmenistan,0.419132206 98 | Djibouti,0.419405582 99 | Albania,0.419659153 100 | Trinidad and Tobago,0.419997657 101 | Italy,0.420282294 102 | Puerto Rico,0.420808962 103 | IDA only,0.421190048 104 | Upper middle income,0.421920714 105 | Caribbean small states,0.424718994 106 | Least developed countries: UN classification,0.425670742 107 | Brunei Darussalam,0.426521468 108 | French Polynesia,0.428523428 109 | Colombia,0.429236431 110 | Brazil,0.42939521 111 | Mali,0.430655272 112 | East Asia & Pacific (IDA & IBRD countries),0.431254162 113 | Romania,0.431494853 114 | Jamaica,0.431645007 115 | East Asia & Pacific (excluding high income),0.431922905 116 | Japan,0.43219493 117 | East Asia & Pacific,0.432329901 118 | Solomon Islands,0.433786297 119 | Niger,0.434062455 120 | Late-demographic dividend,0.435568705 121 | Channel Islands,0.436385842 122 | China,0.436522942 123 | Vanuatu,0.437933409 124 | High income,0.438766242 125 | OECD members,0.438984369 126 | Greece,0.440240934 127 | Serbia,0.442197784 128 | "Gambia, The",0.443328172 129 | Europe & Central Asia (excluding high income),0.44387115 130 | Czech Republic,0.44427685 131 | Europe & Central Asia (IDA & IBRD countries),0.444594832 132 | Montenegro,0.445682158 133 | Burkina Faso,0.445779883 134 | Pre-demographic dividend,0.446070336 135 | Ireland,0.449242232 136 | Singapore,0.44966974 137 | South Africa,0.450067183 138 | Poland,0.450360861 139 | Luxembourg,0.451067499 140 | Central Europe and the Baltics,0.452105231 141 | Central African Republic,0.452476564 142 | Europe & Central Asia,0.452659133 143 | Mongolia,0.453303854 144 | Uruguay,0.453878952 145 | Heavily indebted poor countries (HIPC),0.454062652 146 | Peru,0.454217407 147 | Nigeria,0.454389915 148 | Post-demographic dividend,0.454860947 149 | Slovak Republic,0.454879789 150 | Low income,0.455342769 151 | Georgia,0.456418931 152 | Thailand,0.456737781 153 | New Caledonia,0.45679284 154 | Chad,0.457557795 155 | Hungary,0.457580147 156 | United States of America,0.458211792 157 | European Union,0.458387454 158 | Euro area,0.458751195 159 | Belgium,0.459550075 160 | North America,0.45970836 161 | St. Lucia,0.460196094 162 | Netherlands,0.461390571 163 | Australia,0.461705979 164 | Bulgaria,0.463502721 165 | Spain,0.463579883 166 | Croatia,0.464120148 167 | Germany,0.464650179 168 | Cyprus,0.4648692 169 | United Kingdom,0.465278356 170 | Slovenia,0.465744267 171 | Botswana,0.465795345 172 | Armenia,0.466396691 173 | Switzerland,0.466424585 174 | Sub-Saharan Africa (excluding high income),0.466722017 175 | Sub-Saharan Africa,0.466722017 176 | Sub-Saharan Africa (IDA & IBRD countries),0.466722017 177 | Lesotho,0.467220576 178 | Austria,0.467696705 179 | Cameroon,0.469565139 180 | France,0.469674294 181 | Guinea-Bissau,0.470751889 182 | Norway,0.47085046 183 | Iceland,0.471435619 184 | Israel,0.471598068 185 | Canada,0.471880825 186 | Ethiopia,0.473219475 187 | New Zealand,0.473815743 188 | Ukraine,0.473938864 189 | Denmark,0.474001715 190 | Sweden,0.476547715 191 | "Bahamas, The",0.476572464 192 | "Korea, Dem. People’s Rep.",0.477716488 193 | Zambia,0.478040491 194 | Uganda,0.479135002 195 | Haiti,0.480085384 196 | Finland,0.480514053 197 | Malawi,0.480545967 198 | Vietnam,0.480923305 199 | Liberia,0.48437343 200 | Kenya,0.48499295 201 | Estonia,0.48529235 202 | Kazakhstan,0.485328515 203 | "Congo, Rep.",0.485812314 204 | Russian Federation,0.486344101 205 | Portugal,0.488236156 206 | Azerbaijan,0.488274381 207 | Madagascar,0.488442814 208 | Tanzania,0.488765108 209 | Papua New Guinea,0.48902658 210 | Moldova,0.489596025 211 | "Hong Kong SAR, China",0.489852244 212 | "Macao SAR, China",0.490281766 213 | Guinea,0.49068653 214 | Zimbabwe,0.491609695 215 | Benin,0.491801203 216 | South Sudan,0.492196523 217 | Togo,0.492267523 218 | Ghana,0.495245607 219 | Namibia,0.49542078 220 | Belarus,0.497142805 221 | Lao PDR,0.49771289 222 | "Congo, Dem. Rep.",0.498313536 223 | Cambodia,0.498990786 224 | Barbados,0.499294791 225 | Virgin Islands (U.S.),0.501174703 226 | Angola,0.50121489 227 | Latvia,0.501310682 228 | Sierra Leone,0.501470617 229 | Lithuania,0.50619336 230 | Rwanda,0.515331255 231 | Nepal,0.517849823 232 | Burundi,0.52360573 233 | Mozambique,0.548169433 --------------------------------------------------------------------------------