├── README.md └── storage_automation_02_27_a.py /README.md: -------------------------------------------------------------------------------- 1 | # Storage_automation 2 | 3 | This is used internally by one team at BCM-HGSC. It has little interest for 4 | others. The main ideas are certain filtering heuristics on storage report 5 | files. 6 | 7 | If using for mfts report two options #1 orphans #2 shares and #3 will be "mfts" 8 | for submissions #1 base #2 "blank" and #3 will be "submissions" 9 | This will be used to merge several spreadsheets for a storage report. 10 | -------------------------------------------------------------------------------- /storage_automation_02_27_a.py: -------------------------------------------------------------------------------- 1 | from sys import * 2 | from openpyxl import * 3 | import xlsxwriter 4 | import os 5 | import pandas as pd 6 | import numpy as np 7 | 8 | ###READ ME 9 | ### Takes two arguements the Orphans spreadsheet and the Shares 10 | ###TODO add type for mfts or SUB 11 | base = argv [1] #also = base 12 | shares = argv [2] 13 | report_type = argv [3] #mfts or submisions 14 | if report_type == "mfts": 15 | df_share = pd.read_excel(shares) 16 | shares_mod = (df_share[['blocks','last_accessed','path']]) 17 | writer = pd.ExcelWriter('analysis_mfts.xlsx', engine='xlsxwriter') 18 | shares_mod.to_excel(writer, sheet_name='shares_mod') 19 | 20 | df_orphan = pd.read_excel(base) 21 | df_orphan.to_excel(writer, sheet_name='orphans') 22 | 23 | merged = [shares_mod,df_orphan] 24 | df_merged = pd.DataFrame() 25 | for f in merged: 26 | df_merged = df_merged.append(f) 27 | 28 | b_sum =0 29 | for b in df_merged['blocks']: 30 | b_sum = b_sum + b 31 | #bsum = total sum for blocks 32 | df_merged['percentage'] = df_merged['blocks']* 100 / b_sum 33 | 34 | df_merged.to_excel(writer, sheet_name='merged') 35 | df_trim_pct = pd.DataFrame() 36 | df_trim_pct = df_merged[df_merged['percentage'] > 2.5] 37 | 38 | df_trim_pct.to_excel(writer,sheet_name='trim_percent') 39 | for pat in df_trim_pct['path']: 40 | if ("SAS") in pat: 41 | int_sas = (int((df_trim_pct.path[df_trim_pct.path == pat].index.values))) 42 | df_trim_pct.set_value(int_sas,'group','SAS') 43 | elif ("venner") in pat: 44 | int_v = (int((df_trim_pct.path[df_trim_pct.path == pat].index.values))) 45 | df_trim_pct.set_value(int_v,'group','venner') 46 | elif ("TCRB") in pat: 47 | int_tcrb = (int((df_trim_pct.path[df_trim_pct.path == pat].index.values))) 48 | df_trim_pct.set_value(int_tcrb,'group','TCRB') 49 | elif ("CAfGEN") in pat: 50 | int_Caf = (int((df_trim_pct.path[df_trim_pct.path == pat].index.values))) 51 | df_trim_pct.set_value(int_Caf,'group','Cafv') 52 | else: 53 | print ("----------------------") 54 | print (pat + " was not found") 55 | custom = input('Please enter group to assign to value: ') 56 | int_custom = (int((df_trim_pct.path[df_trim_pct.path == pat].index.values))) 57 | df_trim_pct.set_value(int_custom,'group',custom) 58 | df_trim_pct_group = pd.DataFrame() 59 | df_trim_pct_group = df_trim_pct 60 | df_trim_pct_group.to_excel(writer,sheet_name='groups') 61 | 62 | writer.save() 63 | #print (df_trim_pct_group) 64 | elif report_type =="submissions": 65 | df_base = pd.read_excel(base,names = ["blocks",'ast','path']) 66 | df_base_mod = (df_base[['blocks','path']]) 67 | df_base_trim = df_base_mod[df_base_mod['blocks'] > 1000] 68 | df_snfs1 = df_base_trim[df_base_trim['path'].str.contains("/stornext/snfs1/submissions/*/")] 69 | writer = pd.ExcelWriter('analysis_sub.xlsx', engine='xlsxwriter') 70 | df_snfs1.to_excel(writer, sheet_name='snfs1_submissions_trim') 71 | print (df_snfs1) 72 | """for pat in df_base_trim['path']: 73 | if ("stornext/snfs1/submissions/") in pat: 74 | p_list=pat.split("/") 75 | place = (int((df_base_trim[df_base_trim.path == pat].index.values))) 76 | print (place) 77 | print ("hi") 78 | project = (p_list[5]) 79 | #df_base_trim.set_value(,'projects',project) 80 | """ 81 | 82 | writer.save() 83 | else: 84 | print ("dam") 85 | --------------------------------------------------------------------------------