├── GP_drugs_processing.R └── GP_drugs_analysis.R /GP_drugs_processing.R: -------------------------------------------------------------------------------- 1 | setwd("/Users/francinebennett/Desktop/analysis/NHS_analysis/") 2 | 3 | # Load packages 4 | require(ggplot2) 5 | require(plyr) 6 | require(googleVis) 7 | require(RSQLite) 8 | require(gdata) 9 | 10 | # List filenames (currently up to July 2012. Present analysis only includes up to May, as after that the Atorva price changes) 11 | file.list<-c("T201109PDP IEXT.csv","T201110PDP IEXT.csv","T201111PDP IEXT.csv","T201112PDP IEXT.csv","T201201PDP IEXT.csv","T201202PDP IEXT.csv","T201203PDP IEXT.csv","T201204PDP IEXT.csv","T201205PDP IEXT.CSV") 12 | #,"T201206PDP IEXT.csv","T201207PDP IEXT.csv") 13 | write.csv(file.list,"file_list.txt",row.names=FALSE) 14 | addresses<-read.csv("T201204ADD REXT.CSV",header=FALSE) 15 | short.addresses<-addresses[,c(2,3,6,8)] 16 | 17 | # Create list of potential problem drugs 18 | GP.drugs <- read.csv("T201109PDP IEXT.csv", header=TRUE) 19 | drug.list<-unique(GP.drugs$BNF.NAME) 20 | drug.list<-drug.list[order(drug.list)] 21 | statins<-drug.list[grep("statin",drug.list)] 22 | statins<-statins[-grep("Nystatin",statins)] 23 | clopidogrel<-c("Clopidogrel") 24 | sartans<-c( 25 | "Azilsartan Medoxomil", 26 | "Candesartan Cilexetil", 27 | "Eprosartan", 28 | "Irbesartan", 29 | "Olmesartan Medoxomil", 30 | "Telmisartan", 31 | "Valsartan", 32 | "Losartan Potassium") 33 | 34 | problem.drugs<-as.data.frame(rbind(cbind(as.character(statins),"statin"),cbind(clopidogrel,"clopidogrel"),cbind(sartans,"sartan"))) 35 | names(problem.drugs)<-c("Drug","category") 36 | problem.drugs$Drug<-trim(problem.drugs$Drug) 37 | 38 | # Set up data frames for results 39 | total.problem.spend<-data.frame(matrix(nrow=0,ncol=4)) 40 | spend.practice<-data.frame(matrix(nrow=0,ncol=11)) 41 | spend.pct<-data.frame(matrix(nrow=0,ncol=6)) 42 | spend.practice.total<-data.frame(matrix(nrow=0,ncol=4)) 43 | 44 | # Loop to load, analyse, and remove large data files 45 | for (i in 1:length(file.list)){ 46 | file.name<-file.list[i] 47 | print(file.name) 48 | GP.drugs <- read.csv(file.name, header=TRUE) 49 | GP.drugs$BNF.NAME<-trim(GP.drugs$BNF.NAME) 50 | surgery.subtotal<-aggregate(GP.drugs[,c("ACT.COST","ITEMS")],by=list(GP.drugs$PRACTICE,GP.drugs$PERIOD),FUN=sum) 51 | names(surgery.subtotal)<-c("Practice.code","Month","cost.alldrugs","items.alldrugs") 52 | 53 | t<-subset(GP.drugs,BNF.NAME %in% problem.drugs$Drug) 54 | problem.spend<-aggregate(t[,c("ACT.COST","ITEMS")],by=list(t$BNF.NAME,t$PERIOD),FUN=sum) 55 | names(problem.spend)<-c("Drug","Period","Spend","Items") 56 | problem.spend$Spend<-round(problem.spend$Spend,digits=0) 57 | problem.spend$Drug<-as.character(problem.spend$Drug) 58 | total.problem.spend<-rbind(total.problem.spend,problem.spend) 59 | 60 | # Calculations by practice 61 | s<-aggregate(t[,c("ACT.COST","ITEMS")],by=list(t$PRACTICE,t$PERIOD,t$BNF.NAME),FUN=sum) 62 | names(s)<-c("Practice.code","Month","Drug","cost.thisdrug","items.thisdrug") 63 | s<-merge(s,surgery.subtotal,all.x=TRUE) 64 | s<-merge(s,short.addresses,by.x="Practice.code",by.y="V2",all.x=TRUE) 65 | spend.practice<-rbind(spend.practice,s) 66 | } 67 | 68 | ## Calculate Simvastatin 40mg price and other median actual prices 69 | preparation.level<-read.csv("T201206PDPI+BNFT.csv") 70 | simvastatin<-preparation.level[grep("Simvastatin_Tab 40mg",preparation.level$BNF.NAME),] 71 | simva.price<-median(simvastatin$ACT.COST/simvastatin$ITEMS) 72 | write.csv(simva.price,"simva_price.csv",row.names=FALSE) 73 | atorvastatin<-spend.practice[grep("Atorvastatin",spend.practice$Drug),] 74 | atorva.price<-median(atorvastatin$cost.thisdrug/atorvastatin$items.thisdrug) 75 | rosuvastatin<-spend.practice[grep("Rosuvastatin Calcium",spend.practice$Drug),] 76 | rosuva.price<-median(rosuvastatin$cost.thisdrug/rosuvastatin$items.thisdrug) 77 | 78 | ## Calculate and file savings figures 79 | problem.drugs$saving<-as.numeric(0) 80 | problem.drugs[problem.drugs$Drug=="Rosuvastatin Calcium",]$saving<-1-(simva.price/rosuva.price) 81 | problem.drugs[problem.drugs$Drug=="Atorvastatin",]$saving<-1-(simva.price/atorva.price) 82 | 83 | ## Write out summary files for main analysis process 84 | write.csv(spend.practice,"spend_practice.csv",row.names=FALSE,quote=FALSE) 85 | write.csv(problem.drugs,"problem_drugs.csv",row.names=FALSE,quote=FALSE) 86 | write.csv(problem.spend,"problem_spend.csv",row.names=FALSE,quote=FALSE) 87 | write.csv(total.problem.spend,"total_problem_spend.csv",row.names=FALSE,quote=FALSE) -------------------------------------------------------------------------------- /GP_drugs_analysis.R: -------------------------------------------------------------------------------- 1 | setwd("/Users/francinebennett/Desktop/analysis/NHS_analysis/") 2 | 3 | # Load packages 4 | require(ggplot2) 5 | require(plyr) 6 | require(googleVis) 7 | require(gdata) 8 | 9 | # load aggregate.data 10 | file.list<-read.csv("file_list.txt")$x 11 | spend.practice<-read.csv("spend_practice.csv") 12 | problem.drugs<-read.csv("problem_drugs.csv") 13 | problem.spend<-read.csv("problem_spend.csv") 14 | total.problem.spend<-read.csv("total_problem_spend.csv") 15 | simva.price<-read.csv("simva_price.csv") 16 | simva.price<-simva.price[1,1] 17 | 18 | spend.practice$item.pct<-spend.practice$items.thisdrug/spend.practice$items.alldrugs 19 | total.problem.spend<-merge(total.problem.spend,problem.drugs,all.x=TRUE) 20 | total.problem.spend$amount.wasted<-total.problem.spend$Spend*total.problem.spend$saving 21 | wasted.totals<-aggregate(total.problem.spend[,c("Spend","Items","amount.wasted")],by=list("Drug"=total.problem.spend$Drug,"category"=total.problem.spend$category),FUN=sum) 22 | 23 | ## Calculate waste per practice 24 | spend.practice<-merge(spend.practice,problem.drugs,all.x=TRUE) 25 | spend.practice$amount.wasted<-spend.practice$cost.thisdrug*as.numeric(spend.practice$saving) 26 | 27 | totals.headings<- list("Drug"=spend.practice$Drug, 28 | "Practice.name"=spend.practice$V3, 29 | "Practice.code"=spend.practice$Practice.code, 30 | "Postcode"=spend.practice$V8, 31 | "category"=spend.practice$category) 32 | spend.practice.totals<- 33 | aggregate(spend.practice[,c("cost.thisdrug","items.thisdrug", 34 | "amount.wasted")], 35 | by=totals.headings, 36 | FUN=sum) 37 | 38 | temp.totals<-spend.practice[,c("Practice.code","Month","cost.alldrugs","items.alldrugs")] 39 | temp.totals<-temp.totals[!duplicated(temp.totals),] 40 | temp.totals<-aggregate(temp.totals[,c("cost.alldrugs","items.alldrugs")],by=list("Practice.code"=temp.totals$Practice.code),FUN=sum) 41 | spend.practice.totals<-merge(spend.practice.totals,temp.totals,all.x=TRUE) 42 | spend.practice.totals$item.pct<-spend.practice.totals$items.thisdrug/spend.practice.totals$items.alldrugs 43 | 44 | # Switching over time 45 | statins<-subset(spend.practice,category=="statin") 46 | statins<-subset(statins,Drug!="Simvastatin") 47 | statin.totals<-aggregate(statins[,"items.thisdrug"],by=list(statins$V3,statins$V6,statins$V8,statins$Practice.code,statins$Month),FUN=sum) 48 | names(statin.totals)[4]<-"Practice.code" 49 | names(statin.totals)[5]<-"Month" 50 | statins<-subset(statins,Drug=="Atorvastatin") 51 | statins<-merge(statins,statin.totals,all.x=TRUE) 52 | statins<-statins[,c("Practice.code","Month","items.thisdrug","Group.1","Group.2","Group.3","x")] 53 | statins$pct.prop.atorva<-statins$items.thisdrug/statins$x 54 | statins<-subset(statins,x>50) # Only include clinics with >50 statin prescriptions/month 55 | statins$Month<-as.Date(paste(statins$Month,"01"),"%Y%m%d") 56 | boxplot(statins$pct.prop.atorva~statins$Month,ylim=c(0.35,1),main="Percent statin items which are Atorvastatin, by month") 57 | boxplot(statins$items.thisdrug~statins$Month,ylim=c(0,300),main="Number of Atorvastatin items prescribed, by month") 58 | 59 | ## Looking for connections between overprescription by drug 60 | 61 | # Create data frame of % of category and % of total prescriptions that are potentially problematic 62 | practice.indicator<-spend.practice.totals[,c("Practice.name","Practice.code","Postcode","items.alldrugs")] 63 | practice.indicator<-practice.indicator[!duplicated(practice.indicator),] 64 | t<-subset(spend.practice.totals,category=="statin") 65 | statin.total<-aggregate(t$items.thisdrug,by=list("Practice.name"=t$Practice.name,"Practice.code"=t$Practice.code),FUN=sum) 66 | names(statin.total)[3]<-"items.statins" 67 | t<-subset(spend.practice.totals,Drug=="Atorvastatin") 68 | atorva<-aggregate(t$items.thisdrug,by=list("Practice.name"=t$Practice.name,"Practice.code"=t$Practice.code),FUN=sum) 69 | names(atorva)[3]<-"items.atorva" 70 | t<-subset(spend.practice.totals,Drug=="Rosuvastatin Calcium") 71 | rosuva<-aggregate(t$items.thisdrug,by=list("Practice.name"=t$Practice.name,"Practice.code"=t$Practice.code),FUN=sum) 72 | names(rosuva)[3]<-"items.rosuva" 73 | 74 | practice.indicator<-merge(practice.indicator,statin.total,all.x=TRUE) 75 | practice.indicator<-merge(practice.indicator,atorva,all.x=TRUE) 76 | practice.indicator<-merge(practice.indicator,rosuva,all.x=TRUE) 77 | 78 | practice.indicator$pct.statin.atorva<-practice.indicator$items.atorva/practice.indicator$items.statins 79 | practice.indicator$pct.statin.rosuva<-practice.indicator$items.rosuva/practice.indicator$items.statins 80 | practice.indicator[is.na(practice.indicator)]<-0 81 | 82 | # Check for correlations between bad prescribing 83 | summary(lm(practice.indicator$pct.statin.rosuva~practice.indicator$pct.statin.atorva)) 84 | 85 | # Plot different types of bad prescribing 86 | ggplot(practice.indicator, aes(pct.statin.rosuva,pct.statin.atorva))+geom_point(aes(colour=log(items.alldrugs))) 87 | 88 | # Basis for funnel plot 89 | ggplot(practice.indicator, aes(items.alldrugs,pct.statin.atorva))+ 90 | geom_point()+opts(title="Atorva funnel") 91 | ggplot(practice.indicator, aes(items.alldrugs,pct.statin.rosuva))+ 92 | geom_point()+opts(title="Rosuva funnel") 93 | 94 | ## Timeseries plots 95 | t<-subset(spend.practice,Drug %in% c("Atorvastatin","Rosuvastatin Calcium","Simvastatin")) 96 | statin.timeseries<-aggregate(t$cost.thisdrug,by=list(t$Month,t$Drug),FUN=sum) 97 | names(statin.timeseries)<-c("Month","Drug","Spend") 98 | statin.timeseries$Month<-as.Date(paste(statin.timeseries$Month,"01",sep=""),"%Y%m%d") 99 | p <- ggplot(statin.timeseries, aes(Month, Spend)) + geom_line()+ 100 | scale_y_continuous(limits = c(0, 3e7))+ 101 | opts(strip.text.y=theme_text(size=15,angle=270)) 102 | p + facet_grid(Drug ~ .) 103 | 104 | t<-subset(spend.practice,Drug %in% c("Atorvastatin","Rosuvastatin Calcium","Simvastatin")) 105 | statin.item.timeseries<-aggregate(t$items.thisdrug,by=list(t$Month,t$Drug),FUN=sum) 106 | names(statin.item.timeseries)<-c("Month","Drug","Items") 107 | statin.item.timeseries$Month<-as.Date(paste(statin.item.timeseries$Month,"01",sep=""),"%Y%m%d") 108 | p <- ggplot(statin.item.timeseries, aes(Month, Items)) + geom_line() 109 | p + facet_grid(Drug ~ .) 110 | 111 | ## PCT mapping 112 | pct.lookup<-read.csv("epcmem.csv",header=FALSE) 113 | names(pct.lookup)<-c("Practice.code","PCT.code","Org.type","Join.date","Leave.date","Amended.record") 114 | pct.lookup<-subset(pct.lookup,is.na(Leave.date)) 115 | pct.totals<-merge(spend.practice.totals,pct.lookup,all.x=TRUE) 116 | pct.totals<-subset(pct.totals,category=="statin") 117 | pct.totals$item.bad<-FALSE 118 | pct.totals[pct.totals$Drug %in% c("Atorvastatin","Rosuvastatin Calcium"),]$item.bad<-TRUE 119 | pct.totals<-aggregate(pct.totals$items.thisdrug,by=list(pct.totals$item.bad,pct.totals$PCT.code),FUN=sum) 120 | pct.totals<-cast(pct.totals,Group.2~Group.1) 121 | names(pct.totals)<-c("PCT.code","ok.drugs","problem.drugs") 122 | pct.totals$pct.problem<-pct.totals$problem.drugs/(pct.totals$problem.drugs+pct.totals$ok.drugs) 123 | pct.totals$total.items.month<-(pct.totals$ok.drugs+pct.totals$problem.drugs)/length(file.list) 124 | pct.totals<-pct.totals[,c("PCT.code","total.items.month","pct.problem")] 125 | pct.totals$pct.problem<-round(pct.totals$pct.problem,3) 126 | pct.totals$total.items.month<-round(pct.totals$total.items.month,0) 127 | write.csv(pct.totals,"pct_statin_totals.csv",row.names=FALSE) 128 | 129 | # Roll up to CCG level for mapping 130 | ccg.rollup<-read.xls("list-of-proposed-practices-ccg.xls",sheet="Practice list") 131 | ccg.totals<-merge(spend.practice.totals,ccg.rollup,by.x="Practice.code",by.y="Practice.code",all.x=TRUE) 132 | ccg.totals<-subset(ccg.totals,category=="statin") 133 | ccg.totals$item.bad<-FALSE 134 | ccg.totals[ccg.totals$Drug %in% c("Atorvastatin","Rosuvastatin Calcium"),]$item.bad<-TRUE 135 | ccg.totals<-aggregate(ccg.totals$items.thisdrug,by=list(ccg.totals$item.bad,ccg.totals$Interim.CCG.code),FUN=sum) 136 | ccg.totals<-cast(ccg.totals,Group.2~Group.1) 137 | names(ccg.totals)<-c("CCG.code","ok.drugs","problem.drugs") 138 | ccg.totals$ccg.problem<-ccg.totals$problem.drugs/(ccg.totals$problem.drugs+ccg.totals$ok.drugs) 139 | ccg.totals$total.items.month<-(ccg.totals$ok.drugs+ccg.totals$problem.drugs)/length(file.list) 140 | ccg.totals<-ccg.totals[,c("CCG.code","total.items.month","ccg.problem")] 141 | ccg.totals$ccg.problem<-round(ccg.totals$ccg.problem,3) 142 | ccg.totals$total.items.month<-round(ccg.totals$total.items.month,0) 143 | write.csv(ccg.totals,"ccg_statin_totals.csv",row.names=FALSE) 144 | 145 | ## Savings figures 146 | median(subset(statin.timeseries,Drug=="Atorvastatin")$Spend)*problem.drugs[problem.drugs$Drug=="Atorvastatin",]$saving 147 | median(subset(statin.timeseries,Drug=="Rosuvastatin Calcium")$Spend)*problem.drugs[problem.drugs$Drug=="Rosuvastatin Calcium",]$saving 148 | 149 | # JSON format for time series charts 150 | require(RJSONIO) 151 | toJSONarray <- function(dtf){ 152 | clnms <- colnames(dtf) 153 | name.value <- function(i){ 154 | quote <- ''; 155 | if(class(dtf[, i])!='numeric'){ 156 | quote <- '"'; 157 | } 158 | paste('"', i, '" : ', quote, dtf[,i], quote, sep='') 159 | } 160 | objs <- apply(sapply(clnms, name.value), 1, function(x){paste(x, collapse=', ')}) 161 | objs <- paste('{', objs, '}') 162 | res <- paste('[', paste(objs, collapse=', '), ']') 163 | return(res) 164 | } 165 | atorva<-subset(statin.timeseries,Drug=="Atorvastatin")[,c("Month","Spend")] 166 | names(atorva)<-c("x","y") 167 | atorva$x<-as.numeric(as.Date(atorva$x))*24*60*60 168 | toJSONarray(atorva) 169 | 170 | simva<-subset(statin.timeseries,Drug=="Simvastatin")[,c("Month","Spend")] 171 | names(simva)<-c("x","y") 172 | simva$x<-as.numeric(as.Date(simva$x))*24*60*60 173 | toJSONarray(simva) 174 | 175 | rosuva<-subset(statin.timeseries,Drug=="Rosuvastatin Calcium")[,c("Month","Spend")] 176 | names(rosuva)<-c("x","y") 177 | rosuva$x<-as.numeric(as.Date(rosuva$x))*24*60*60 178 | toJSONarray(rosuva) 179 | 180 | 181 | 182 | --------------------------------------------------------------------------------