├── LICENSE ├── Machine Learning └── randomForest.R ├── README.md ├── _config.yml ├── clustering ├── bin │ └── kmeans.R ├── how_to_run ├── input │ ├── example.FS │ ├── example.mx │ └── input_file_format └── output │ ├── example.kmeans.3.mx │ ├── example.kmeans.3.pdf │ ├── example.kmeans.4.mx │ └── example.kmeans.4.pdf ├── feature_selection ├── bin │ ├── Delete_SBS_combine.R │ ├── Delete_by_rank_combine.R │ ├── cv.IMP.R │ ├── cv.idx.R │ ├── cv.test.R │ └── lasso.R ├── how_to_run_lasso ├── how_to_run_wrapper_step0 ├── how_to_run_wrapper_step1_IMP ├── how_to_run_wrapper_step1_IMP_SBS ├── input │ ├── example.initial.FS │ ├── example.train.mx │ └── input_file_format ├── output │ ├── example.selected.FS.IMP │ ├── example.selected.FS.IMP_SBS │ └── example.selected.FS.lasso └── tmp │ ├── IMP │ ├── delete │ │ ├── feat │ │ │ ├── feat.0 │ │ │ ├── feat.1 │ │ │ ├── feat.10 │ │ │ ├── feat.11 │ │ │ ├── feat.12 │ │ │ ├── feat.13 │ │ │ ├── feat.14 │ │ │ ├── feat.15 │ │ │ ├── feat.16 │ │ │ ├── feat.17 │ │ │ ├── feat.18 │ │ │ ├── feat.19 │ │ │ ├── feat.2 │ │ │ ├── feat.20 │ │ │ ├── feat.21 │ │ │ ├── feat.22 │ │ │ ├── feat.3 │ │ │ ├── feat.4 │ │ │ ├── feat.5 │ │ │ ├── feat.6 │ │ │ ├── feat.7 │ │ │ ├── feat.8 │ │ │ └── feat.9 │ │ ├── haha.R │ │ ├── haha.Rout │ │ ├── log │ │ │ ├── log_del.0 │ │ │ ├── log_del.1 │ │ │ ├── log_del.10 │ │ │ ├── log_del.11 │ │ │ ├── log_del.12 │ │ │ ├── log_del.13 │ │ │ ├── log_del.14 │ │ │ ├── log_del.15 │ │ │ ├── log_del.16 │ │ │ ├── log_del.17 │ │ │ ├── log_del.18 │ │ │ ├── log_del.19 │ │ │ ├── log_del.2 │ │ │ ├── log_del.20 │ │ │ ├── log_del.21 │ │ │ ├── log_del.22 │ │ │ ├── log_del.3 │ │ │ ├── log_del.4 │ │ │ ├── log_del.5 │ │ │ ├── log_del.6 │ │ │ ├── log_del.7 │ │ │ ├── log_del.8 │ │ │ └── log_del.9 │ │ ├── perf │ │ │ ├── perf_del.0 │ │ │ ├── perf_del.1 │ │ │ ├── perf_del.10 │ │ │ ├── perf_del.11 │ │ │ ├── perf_del.12 │ │ │ ├── perf_del.13 │ │ │ ├── perf_del.14 │ │ │ ├── perf_del.15 │ │ │ ├── perf_del.16 │ │ │ ├── perf_del.17 │ │ │ ├── perf_del.18 │ │ │ ├── perf_del.19 │ │ │ ├── perf_del.2 │ │ │ ├── perf_del.20 │ │ │ ├── perf_del.21 │ │ │ ├── perf_del.22 │ │ │ ├── perf_del.3 │ │ │ ├── perf_del.4 │ │ │ ├── perf_del.5 │ │ │ ├── perf_del.6 │ │ │ ├── perf_del.7 │ │ │ ├── perf_del.8 │ │ │ └── perf_del.9 │ │ ├── perf_del_sum.pdf │ │ ├── perf_del_sum.txt │ │ └── qsub │ │ │ ├── qsub_del.0 │ │ │ ├── qsub_del.1 │ │ │ ├── qsub_del.10 │ │ │ ├── qsub_del.11 │ │ │ ├── qsub_del.12 │ │ │ ├── qsub_del.13 │ │ │ ├── qsub_del.14 │ │ │ ├── qsub_del.15 │ │ │ ├── qsub_del.16 │ │ │ ├── qsub_del.17 │ │ │ ├── qsub_del.18 │ │ │ ├── qsub_del.19 │ │ │ ├── qsub_del.2 │ │ │ ├── qsub_del.20 │ │ │ ├── qsub_del.21 │ │ │ ├── qsub_del.22 │ │ │ ├── qsub_del.3 │ │ │ ├── qsub_del.4 │ │ │ ├── qsub_del.5 │ │ │ ├── qsub_del.6 │ │ │ ├── qsub_del.7 │ │ │ ├── qsub_del.8 │ │ │ └── qsub_del.9 │ └── rank │ │ ├── IMP │ │ └── rank │ ├── IMP_SBS │ └── delete │ │ ├── feat │ │ ├── baseF.0 │ │ ├── baseF.1.1 │ │ ├── baseF.1.2 │ │ ├── baseF.1.3 │ │ ├── baseF.1.4 │ │ ├── baseF.1.5 │ │ ├── baseF.1.6 │ │ ├── baseF.1.7 │ │ ├── baseF.2.1 │ │ ├── baseF.2.2 │ │ ├── baseF.2.4 │ │ ├── baseF.2.5 │ │ ├── baseF.2.6 │ │ ├── baseF.2.7 │ │ ├── baseF.3.1 │ │ ├── baseF.3.2 │ │ ├── baseF.3.4 │ │ ├── baseF.3.6 │ │ ├── baseF.3.7 │ │ ├── baseF.4.1 │ │ ├── baseF.4.2 │ │ ├── baseF.4.4 │ │ ├── baseF.4.7 │ │ ├── baseF.5.1 │ │ ├── baseF.5.2 │ │ ├── baseF.5.4 │ │ └── delF.0 │ │ ├── foo1 │ │ ├── foo2 │ │ ├── foo3 │ │ ├── foo4 │ │ ├── foo5 │ │ ├── haha.R │ │ ├── haha.Rout │ │ ├── log │ │ ├── log_del.0 │ │ ├── log_del.1.1 │ │ ├── log_del.1.2 │ │ ├── log_del.1.3 │ │ ├── log_del.1.4 │ │ ├── log_del.1.5 │ │ ├── log_del.1.6 │ │ ├── log_del.1.7 │ │ ├── log_del.2.1 │ │ ├── log_del.2.2 │ │ ├── log_del.2.4 │ │ ├── log_del.2.5 │ │ ├── log_del.2.6 │ │ ├── log_del.2.7 │ │ ├── log_del.3.1 │ │ ├── log_del.3.2 │ │ ├── log_del.3.4 │ │ ├── log_del.3.6 │ │ ├── log_del.3.7 │ │ ├── log_del.4.1 │ │ ├── log_del.4.2 │ │ ├── log_del.4.4 │ │ ├── log_del.4.7 │ │ ├── log_del.5.1 │ │ ├── log_del.5.2 │ │ └── log_del.5.4 │ │ ├── perf │ │ ├── perf_del.0 │ │ ├── perf_del.1.1 │ │ ├── perf_del.1.2 │ │ ├── perf_del.1.3 │ │ ├── perf_del.1.4 │ │ ├── perf_del.1.5 │ │ ├── perf_del.1.6 │ │ ├── perf_del.1.7 │ │ ├── perf_del.2.1 │ │ ├── perf_del.2.2 │ │ ├── perf_del.2.4 │ │ ├── perf_del.2.5 │ │ ├── perf_del.2.6 │ │ ├── perf_del.2.7 │ │ ├── perf_del.3.1 │ │ ├── perf_del.3.2 │ │ ├── perf_del.3.4 │ │ ├── perf_del.3.6 │ │ ├── perf_del.3.7 │ │ ├── perf_del.4.1 │ │ ├── perf_del.4.2 │ │ ├── perf_del.4.4 │ │ ├── perf_del.4.7 │ │ ├── perf_del.5.1 │ │ ├── perf_del.5.2 │ │ └── perf_del.5.4 │ │ ├── qsub │ │ ├── qsub_del.0 │ │ ├── qsub_del.1.1 │ │ ├── qsub_del.1.2 │ │ ├── qsub_del.1.3 │ │ ├── qsub_del.1.4 │ │ ├── qsub_del.1.5 │ │ ├── qsub_del.1.6 │ │ ├── qsub_del.1.7 │ │ ├── qsub_del.2.1 │ │ ├── qsub_del.2.2 │ │ ├── qsub_del.2.4 │ │ ├── qsub_del.2.5 │ │ ├── qsub_del.2.6 │ │ ├── qsub_del.2.7 │ │ ├── qsub_del.3.1 │ │ ├── qsub_del.3.2 │ │ ├── qsub_del.3.4 │ │ ├── qsub_del.3.6 │ │ ├── qsub_del.3.7 │ │ ├── qsub_del.4.1 │ │ ├── qsub_del.4.2 │ │ ├── qsub_del.4.4 │ │ ├── qsub_del.4.7 │ │ ├── qsub_del.5.1 │ │ ├── qsub_del.5.2 │ │ └── qsub_del.5.4 │ │ └── summary │ │ ├── bestFeat │ │ └── bestPerf │ └── idx │ └── cv.idx ├── plots ├── 1.boxplots.R ├── 10.ballonplots.R ├── 11.vennpieplots.R ├── 2.violinplots.R ├── 3.histogramplots.R ├── 4.densityplots.R ├── 5.dotplots.R ├── 6.scatterplots.R ├── 7.volcanoplots.R ├── 8.manhattanplots.R ├── 9.heatmaps.R ├── README.md ├── ballon_plots_GO.txt ├── box_plots_mtcars.txt ├── heatmaps.txt ├── histogram_plots.txt ├── manhattan_plots_gwasResults.txt └── volcano_plots.txt └── pre-processGff ├── bin ├── getlncRNAexon.py ├── makeIntron.py ├── specify5-3UTR.R └── specify5-3UTR.py ├── output └── clean_gtf │ ├── 3prime_overlapping_ncrna.gtf │ ├── Mt_rRNA.gtf │ ├── Mt_tRNA.gtf │ ├── antisense.gtf │ ├── lincRNA.gtf │ ├── miRNA.gtf │ ├── misc_RNA.gtf │ ├── non_coding.gtf │ ├── rRNA.gtf │ ├── sense_intronic.gtf │ ├── sense_overlapping.gtf │ ├── snRNA.gtf │ └── snoRNA.gtf ├── processGFF.sh └── readme /Machine Learning/randomForest.R: -------------------------------------------------------------------------------- 1 | # this file introduces how to do machine learning using random forest in R 2 | # 3 | # The script is written by 2018 rotation student, Zhuoer Dong 4 | # 5 | # The script is last edited on 2018/11/07 6 | 7 | 8 | # Here we use **Random Forest** classifier as an example. 9 | 10 | 11 | # 1) preparation --------------- 12 | 13 | # we need the following packages 14 | # 1. magrittr enable one style of writing R code. For why I use that style, 15 | # please refer to https://r4ds.had.co.nz/pipes.html#piping-alternatives 16 | # 2. dplyr: manipulate data frame 17 | # 3. randomForest: build random forest model 18 | # 4. ROCR: ROC analysis 19 | # 5. GGally: plot correlation between features 20 | 21 | 22 | # specify needed packages 23 | cran_pkg <- c('magrittr', 'dplyr', 'randomForest', 'ROCR', 'GGally') 24 | # install packages which doesn’t exist in your system 25 | # (`.packages(T)` will list all existing packages in the system.) 26 | lapply(cran_pkg, function(x) {if (!(x %in% .packages(T))) install.packages(x)}) 27 | 28 | # To avoid conflict of function name, in the following code, 29 | # I prefer `pkg::fun()` instead of `library(pkg)`. 30 | library(magrittr) 31 | 32 | 33 | # Before we start, let set the random seed to make our results reproducible: 34 | 35 | set.seed(0) 36 | 37 | # 2) Generate/Read a data set ------------- 38 | 39 | # We use one of R’s built-in data set, `iris`, Edgar Anderson’s Iris Data set. 40 | 41 | # The original data set contains observations for four features 42 | # (sepal length and width, and petal length and width — all in cm) of 43 | # 150 flowers of three species (each 50). 44 | 45 | # To make things simple, here we only choose two species, 46 | # `versicolor` and `virginica`. 47 | 48 | # The frist line turn iris into a tibble, a modern reimagining of the data.frame. 49 | # The second line select rows whose species is not setosa, 50 | # so only versicolor and virginica are left. 51 | # The third line drops factor level of Species variable, 52 | # randomForest::randomForest() would complain if you don’t do this. 53 | # (This is a little technical, the orignial Species contains three levels, 54 | # setosa, versicolor and virginica. Although we remove all setosa values, 55 | # the setosa level still exists, and now this level contains no values, 56 | # that would cause randomForest::randomForest() to fail. 57 | # After we call factor(), Species only contains two levels, 58 | # both do have values.) 59 | df <- iris %>% tibble::as_tibble() %>% 60 | dplyr::filter(Species != 'setosa') %>% 61 | dplyr::mutate(Species = factor(Species)) 62 | 63 | 64 | # Let’s have a look at our data (only part of it is shown): 65 | df %>% {dplyr::bind_rows(head(., 3), tail(., 3))} 66 | 67 | 68 | # 3) Divide the data into training and test sets -------------- 69 | 70 | # Before we build the model, 71 | # we need to divide the data set into training set and testing set. 72 | # So we can train our model using data in training set, 73 | # and evalute the model using data in testing set. 74 | 75 | # Here we randomly assigns 80 percent samples to the training set, 76 | # and the left 20 percent to the testing set. 77 | 78 | 79 | # The following code seems a little complicated, 80 | # and it require you to be familiar with the R language. 81 | 82 | # Anyway, I will try to use a simple example to explain the core idea: 83 | # - Image your data contains only 5 rows, then 80 percent is 5 * 0.8 = 4 84 | # (here `nrow_training` is `4`). 85 | # - Image you decide to choose the 1st, 2nd, 3rd and 5th rows for training 86 | # (here `indexes` is `c(1, 2, 3, 5)`) 87 | # - Now `training` contains the 1st, 2nd, 3rd and 5th rows of `df` 88 | # (`[indexes, ]` means to choose these rows) 89 | # - And `testing` contains the 4th row of `df` 90 | # (`[-indexes, ]` means not to choose these rows, 91 | # so only the 4th row is left) 92 | 93 | nrow_training <- floor(nrow(df) * 0.8) # Calculate the size of training sets 94 | indexes <- sample(1:nrow(df), nrow_training) # these rows will be select for training 95 | 96 | training <- df[indexes, ] 97 | testing <- df[-indexes, ] 98 | 99 | # 4) Build the model on training set ----------------- 100 | 101 | # Then we can build a random forest model. 102 | 103 | # The code is fairly easy and straightforward: 104 | # - `Species` is the reponse variable 105 | # - `.` tells that all other variables are features 106 | # - `training` is the data to train the model 107 | rf_classifier = randomForest::randomForest(Species ~ ., training) 108 | 109 | 110 | # Let’s have a look at our model 111 | rf_classifier 112 | 113 | 114 | # 5) Evaluate the model on test set -------------------- 115 | 116 | # After we build the model, we can make prediction on the testing set: 117 | 118 | # `predict()` needs two arguments, the model and a `data.frame` of features. 119 | # (`-ncol(testing)` means to drop the last column, 120 | # so `testing[, -ncol(testing)` only contains features) 121 | predicted_value <- predict(rf_classifier, testing[, -ncol(testing)]) 122 | 123 | # we use `testing[[ncol(testing)]]` to get the last column, i.e, 124 | # the real value of `Species` in the testing set 125 | real_value <- testing[[ncol(testing)]] 126 | 127 | # As you can see, `predicted_value` and `real_value` both contains 20 values, 128 | # correspond to 20 rows of testing data. 129 | # Each value tells a row belongs which species, 130 | # the former is the model’ precdiction, the latter is the real case. 131 | predicted_value 132 | real_value 133 | 134 | # We can reformat the result to make it more clear: 135 | tibble::tibble(predicted_value, real_value) %>% 136 | tibble::add_column(correct = predicted_value == real_value) 137 | 138 | # You can summarise the result into a confusion matrix, 139 | # and calculate sensitivity, specificity, accuracy, etc. 140 | 141 | 142 | # 6) ROC ------------------------------ 143 | 144 | # Finally, let’s draw a ROC curve. 145 | 146 | 147 | # for each row, 148 | # we use the model to predict the probability of it belongs to each species 149 | probability <- predict(rf_classifier, testing[, -ncol(testing)], type = 'prob') 150 | 151 | # we flag the first level (`versicolor` here, notice the `[1]`) as `1`, 152 | # the second level (`virginica` here) as `0`. 153 | # (`as.integer()` will convert `T` to `1`, `F` to `0`.) 154 | label <- as.integer(testing[[5]] == colnames(probability)[1]) 155 | 156 | 157 | # we calculate the ROC statistics. 158 | # `[ , 1]` chooses the first column, corresponds to the `[1]` above. 159 | prediction <- ROCR::prediction(probability[ , 1], label) 160 | 161 | 162 | 163 | # Plot the ROC using false positive rate (`'fpr'`) as x axis, 164 | # true positive rate (`'tpr'`) as y axis. 165 | prediction %>% 166 | ROCR::performance('tpr', 'fpr') %>% 167 | ROCR::plot(main = 'ROC Curve') 168 | 169 | 170 | # Finally, we can cauculate the AUC 171 | 172 | 173 | ROCR::performance(prediction, 'auc')@y.values[[1]] 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Shared Scripts 2 | 3 | by [Lu Lab @ Tsinghua University](http://bioinfo.life.tsinghua.edu.cn) 4 | 5 | * **local dir**: /BioII/lulab_b/shared/shared_scripts/ 6 | * **github link**: https://github.com/lulab/shared_scripts 7 | 8 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /clustering/bin/kmeans.R: -------------------------------------------------------------------------------- 1 | Args = commandArgs(TRUE); 2 | inputF1 = Args[1];#input the matrix; with header; each column is a feature and each row is a sample 3 | inputF2 = Args[2];#input features that used to perform clustering 4 | inputF3 = as.numeric(Args[3]);#input k as parameter 5 | output1 = Args[4];#output the matrix with clusterID; 6 | output2 = Args[5];#output the heatmap file; 7 | library("gplots") 8 | 9 | #preprocessing matrix; 10 | Matrix = read.table(inputF1,sep="\t",head=T); 11 | FN = colnames(Matrix); 12 | FI = as.vector(as.matrix(read.table(inputF2,head=F,sep="\t"))); 13 | ID = which(is.element(FN,FI)); 14 | Matrix1 = as.matrix(Matrix[,ID]); 15 | #kmeans clustering 16 | km = kmeans(Matrix1,inputF3,iter.max=1000); 17 | cluster = (km$cluster); 18 | #output matrix with cluster ID for each row; 19 | Matrix2 = cbind(cluster,Matrix); 20 | write.table(Matrix2,file=output1,sep="\t",quote=F,col.name=T,row.name=F); 21 | 22 | #draw heatmap with sampled 5000 samples; 23 | mycolhc = rainbow(length(unique(cluster)), start=0.1, end=0.9); 24 | mycolhc = mycolhc[cluster]; 25 | ID3 = order(cluster); 26 | Matrix3 = Matrix1[ID3,]; 27 | mycolhc3= mycolhc[ID3]; 28 | if(nrow(Matrix3)>5000){ 29 | ID4 = sort(sample(1:nrow(Matrix3),size=5000,replace=F)); 30 | Matrix4 = Matrix3[ID4,]; 31 | mycolhc4= mycolhc3[ID4]; 32 | }else{ 33 | Matrix4 = Matrix3; 34 | mycolhc4= mycolhc3; 35 | } 36 | pdf(output2,width=4,height=5); 37 | heatmap.2(Matrix4,Rowv = FALSE, Colv=FALSE,margins = c(8, 1),labRow=F,dendrogram="none",scale = "none",trace="none",RowSideColors=mycolhc4); 38 | dev.off() 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /clustering/how_to_run: -------------------------------------------------------------------------------- 1 | ######## this is the manual for clustering methods and generate correponding heatmaps 2 | #### set up the path variables; 3 | path0=/home/hulong/module/clustering/bin 4 | path1=/home/hulong/module/clustering/input 5 | path2=/home/hulong/module/clustering/output 6 | #### K-means clustering with k=3 or k=4; 7 | ## It requires a matirx file: $path1/example.mx 8 | ## It requires a feature name file: $path1/example.FS 9 | ## It requires a user-defined parameter k: $k 10 | ## It outputs a matrix file with each sample assigned a clusterID in the first column: $path2/example.kmeans.$k.mx 11 | ## It outputs a heatmap showing how each clusters are like: $path2/example.kmeans.$k.pdf 12 | for k in $(seq 3 4);do echo $k; 13 | Rscript $path0/kmeans.R $path1/example.mx $path1/example.FS $k $path2/example.kmeans.$k.mx $path2/example.kmeans.$k.pdf; 14 | done 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /clustering/input/example.FS: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | infernal.lo 3 | polya.lo 4 | smallrna.lo 5 | rnacode.lo 6 | gc.lo 7 | nonpolya.lo 8 | blastn.lo 9 | h3k36me3.lo 10 | h3k4me3.up 11 | -------------------------------------------------------------------------------- /clustering/input/input_file_format: -------------------------------------------------------------------------------- 1 | ######## this is a readme about the input files 2 | 3 | #### K-means; 4 | 5 | # Two files are needed: 6 | 7 | # 1. input.example.mx 8 | # It is a matrix file. Each row is a sample and each column is a feature. The first row is a header, recording each feature names; 9 | # For example: 10 | # X1 X2 X3 X4 X5 #this is the header, not numeric, indicating each feature's name 11 | # x1,1 x1,2 x1,3 x1,4 x1,5 #this is the first sample, a vector with 5 features 12 | # x2,1 x2,2 x2,3 x2,4 x2,5 #this is the second sample, a vector with 5 features 13 | 14 | # 2. input.example.FS 15 | # It is a feature name file. Each row is a feature name; These features in the matrix file would be in used when clustering 16 | # For example: 17 | # X1 18 | # X3 19 | # X4 20 | # In this case, only X1, X3 and X4 would be used to perform the clustering. namely, only the following part of the original matrix is used in clustering: 21 | # X1 X3 X4 22 | # x1,1 x1,3 x1,4 23 | # x2,1 x2,3 x2,4 24 | -------------------------------------------------------------------------------- /clustering/output/example.kmeans.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lulab/shared_scripts/a49e08b6a09097a26baaa669bcf0ec2c1e71c741/clustering/output/example.kmeans.3.pdf -------------------------------------------------------------------------------- /clustering/output/example.kmeans.4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lulab/shared_scripts/a49e08b6a09097a26baaa669bcf0ec2c1e71c741/clustering/output/example.kmeans.4.pdf -------------------------------------------------------------------------------- /feature_selection/bin/Delete_SBS_combine.R: -------------------------------------------------------------------------------- 1 | #Args <-commandArgs(TRUE); 2 | #inputF1= Args[1];#input example.train.mx ; first column is bin second is label, seperated by tab, with header; 3 | #inputF2= Args[2];#input the cv.idx, no header; 4 | #inputF3= Args[3];#input CV fold; 5 | #inputF4= Args[4];#the base feature file; no header;one column; contains only features name 6 | #inputF5= Args[5];#the deleting feature file; no header;tow columns; first is feature names; second is the rank. features with the same rank will be delete together 7 | 8 | 9 | delta=-0.01; 10 | my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 11 | str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N SBS_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 12 | write(str,file=qsubF); 13 | system(paste("rm",outputF)); 14 | system(paste("rm",logF)); 15 | system(paste("qsub -V -o",logF,qsubF)); 16 | } 17 | my_check<-function(file_path,FileList){ 18 | while(1==1){ 19 | filelist_all=list.files(path=file_path,full.names=T); 20 | tmp=is.element(FileList,filelist_all); 21 | if(all(tmp)){break;} 22 | Sys.sleep(30); 23 | } 24 | } 25 | ######## calculate the base feature set performance. 26 | ROLL=0; 27 | featF=inputF4; 28 | qsubF=paste("./qsub/qsub_del",ROLL,sep="."); 29 | logF= paste("./log/log_del",ROLL,sep="."); 30 | perfF=paste("./perf/perf_del",ROLL,sep="."); 31 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 32 | file_path="./perf"; 33 | my_check(file_path,perfF); 34 | Sys.sleep(2); 35 | tmp=as.vector(as.matrix(read.table(perfF,sep="\t",head=F))) 36 | all.acc=tmp[1]; 37 | print(paste("base feature set's performance is: ",all.acc)); 38 | print("start deleting features............................"); 39 | best_performance= all.acc; 40 | baseFeat = as.vector(as.matrix(read.table(inputF4,sep="\t",head=F))); 41 | 42 | 43 | RANK = read.table(inputF5,head=F,sep="\t"); 44 | RANK_groups = RANK[,2]; 45 | RANK_group = as.vector(unique(RANK_groups)); 46 | deleFeat = as.vector(RANK[,1]); 47 | RANK_group_0 = RANK_group; 48 | 49 | for (ROLL in 1:length(RANK_group_0)){ 50 | bad_rank = c(); 51 | bad_feature = c(); 52 | BP = c(); 53 | FileList = c(); 54 | for (j in sort(RANK_group,decreasing=T)){ 55 | DeleFeat = deleFeat[which(RANK_groups==j)]; 56 | remain_features_tmp = setdiff(baseFeat,DeleFeat); 57 | print(paste("deleting this feature now: ",paste(DeleFeat,collapse=" "),sep="")); 58 | featF = paste("./feat/baseF",ROLL,j,sep="."); 59 | qsubF = paste("./qsub/qsub_del",ROLL,j,sep="."); 60 | logF = paste("./log/log_del",ROLL,j,sep="."); 61 | perfF = paste("./perf/perf_del",ROLL,j,sep="."); 62 | write.table(remain_features_tmp,file=featF,quote=F,sep="\t",col.name=F,row.name=F); 63 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 64 | FileList = c(FileList,perfF); 65 | } 66 | my_check(file_path,FileList);Sys.sleep(2); 67 | for (j in sort(RANK_group,decreasing=T)){ 68 | perfF = paste("./perf/perf_del",ROLL,j,sep="."); 69 | Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 70 | MYPERFORMANCE = c(Perf[1]); 71 | names(MYPERFORMANCE) = j; 72 | print(paste("performance summary of three trial is: ",MYPERFORMANCE,sep="")); 73 | BP = c(BP,MYPERFORMANCE); 74 | } 75 | BPB = max(BP,na.rm=T); 76 | if(best_performance<(BPB-delta)){ 77 | best_performance = BPB; 78 | bad_rank = sort(RANK_group,decreasing=T)[(which(BP==BPB)[1])]; 79 | bad_feature = deleFeat[which(RANK_groups==bad_rank)]; 80 | } 81 | if (length(bad_feature)==0){ 82 | print("no bad feature can be deleted, break here");break; 83 | }else if(length(baseFeat)<=5){ 84 | print("less than five features");break; 85 | }else{ 86 | print(paste("find the bad feature: ",paste(bad_feature,collapse=" "),sep="")); 87 | print(paste("the best performance is now: ",best_performance,sep="")); 88 | baseFeat = setdiff(baseFeat,bad_feature); 89 | ID = which(!(is.element(RANK[,2],bad_rank))); 90 | RANK = RANK[ID,]; 91 | RANK_groups = RANK[,2]; 92 | RANK_group = as.vector(unique(RANK_groups)); 93 | deleFeat = as.vector(RANK[,1]); 94 | print(paste("now the good features are: ",paste(baseFeat,collapse=" "),sep="")); 95 | } 96 | } 97 | write.table(baseFeat,file=paste("./summary/bestFeat",sep="."),quote=F,col.name=F,row.name=F,sep="\t"); 98 | write.table(best_performance,file=paste("./summary/bestPerf",sep="."),quote=F,col.name=F,row.name=F,sep="\t") 99 | 100 | rm(list=ls()); 101 | 102 | -------------------------------------------------------------------------------- /feature_selection/bin/Delete_by_rank_combine.R: -------------------------------------------------------------------------------- 1 | #Args <-commandArgs(TRUE); 2 | #inputF1= Args[1];#input TR.b.m ; first column is bin,second is label, seperated by tab, with header; 3 | #inputF2= Args[2];#input the cv.idx, no header; 4 | #inputF3= Args[3];#input CV fold; 5 | #inputF4= Args[4];#the rank file; the most important ones are on the top. two columns. the first is the features' names, the second is the rank of features. features have the same will be deleted together in one time. 6 | 7 | 8 | my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 9 | str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N DEL_rank_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 10 | write(str,file=qsubF); 11 | system(paste("rm",outputF)); 12 | system(paste("rm",logF)); 13 | system(paste("qsub -V -o",logF,qsubF)); 14 | } 15 | my_check<-function(file_path,FileList){ 16 | while(1==1){ 17 | filelist_all=list.files(path=file_path,full.names=T); 18 | tmp=is.element(FileList,filelist_all); 19 | if(all(tmp)){break;} 20 | Sys.sleep(30); 21 | } 22 | } 23 | 24 | RANK = read.table(inputF4,head=F,sep="\t"); 25 | RANK_groups = RANK[,2]; 26 | RANK_group = as.vector(unique(RANK_groups)); 27 | Feat = as.vector(RANK[,1]); 28 | 29 | FileList = c(); 30 | for (i in sort(RANK_group,decreasing=T)){ 31 | ID1 = which(RANK_groups<=i); 32 | Feat_i = Feat[ID1]; 33 | tmp = max(RANK_group)-i; 34 | featF = paste("./feat/feat",tmp,sep="."); 35 | write.table(Feat_i,file=featF,sep="\t",col.name=F,row.name=F,quote=F); 36 | qsubF = paste("./qsub/qsub_del",tmp,sep="."); 37 | logF = paste("./log/log_del",tmp,sep="."); 38 | perfF = paste("./perf/perf_del",tmp,sep="."); 39 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 40 | FileList = c(FileList,perfF); 41 | } 42 | file_path="./perf" 43 | my_check(file_path,FileList); 44 | Sys.sleep(2); 45 | Perf_summary=matrix(0,nrow=0,ncol=2); 46 | for (i in sort(RANK_group,decreasing=T)){ 47 | tmp = max(RANK_group)-i; 48 | perfF = paste("./perf/perf_del",tmp,sep="."); 49 | Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 50 | Perf_summary = rbind(Perf_summary,Perf) 51 | } 52 | colnames(Perf_summary)<-c("acc","nvar"); 53 | write.table(Perf_summary,file=paste("perf_del_sum","txt",sep="."),sep="\t",col.name=T,row.name=F,quote=F); 54 | pdf(paste("perf_del_sum","pdf",sep=".")) 55 | plot(Perf_summary[,"nvar"],Perf_summary[,"acc"],type="o",lwd=2); 56 | dev.off() 57 | 58 | rm(list=ls()); 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /feature_selection/bin/cv.IMP.R: -------------------------------------------------------------------------------- 1 | Args <-commandArgs(TRUE); 2 | inputF1= Args[1];#input train matrix; first column is bin, second is label, seperated by tab, with header; 3 | inputF2= Args[2];#input the cv.idx, no header; 4 | inputF3= Args[3];#input cross-validation fold;should be consistent with the cv.idx 5 | outputF2=Args[4];#output the IMP 6 | library(randomForest); 7 | cv.fold=as.numeric(inputF3); 8 | Matrix1=read.table(inputF1,sep="\t",head=T,check.name=F); 9 | #first column is bin, remove it when learning; 10 | Matrix1=Matrix1[,-1]; 11 | trainx=as.matrix(Matrix1[,-1]); 12 | trainy=as.factor(as.matrix(Matrix1[,1])); 13 | rm("Matrix1"); 14 | idx=as.vector(as.matrix(read.table(inputF2,sep="\t",head=F))); 15 | my_fuc<-function(trainx,trainy,idx,cv.fold){ 16 | cv.pred <- trainy; 17 | p=ncol(trainx); 18 | MTRY=max(1,floor(sqrt(p))); 19 | for (i in 1:cv.fold) { 20 | all.rf <- randomForest(trainx[idx != i, , drop = FALSE], trainy[idx != i], trainx[idx == i, , drop = FALSE], trainy[idx == i],mtry=MTRY,importance=T,ntree=1000,proximity=F); 21 | cv.pred[idx == i] <- all.rf$test$predicted; 22 | if(i==1){ 23 | IMP=all.rf$importance; 24 | }else{ 25 | IMP=IMP+all.rf$importance; 26 | } 27 | } 28 | acc=mean(cv.pred==trainy); 29 | out=list(acc=acc,IMP=IMP); 30 | } 31 | 32 | all.acc=my_fuc(trainx,trainy,idx,cv.fold); 33 | IMP=all.acc$IMP; 34 | IMP2=IMP[order(IMP[,"MeanDecreaseAccuracy"], decreasing = F),]; 35 | write.table(IMP2,file=outputF2,sep="\t",quote=F,row.name=T,col.name=T); 36 | 37 | rm(list=ls()); 38 | 39 | 40 | -------------------------------------------------------------------------------- /feature_selection/bin/cv.idx.R: -------------------------------------------------------------------------------- 1 | Args <-commandArgs(TRUE); 2 | inputF1= Args[1];#input label column, with header; 3 | inputF2= Args[2];#input CV fold 4 | outputF= Args[3];#output index 5 | 6 | trainy=as.factor(as.matrix(read.table(inputF1,sep="\t",head=T))); 7 | cv.fold=as.numeric(inputF2); 8 | n <- length(trainy); 9 | nlvl <- table(trainy); 10 | idx <- numeric(n); 11 | for (i in 1:length(nlvl)) { 12 | idx[which(trainy == levels(trainy)[i])] <- rep(1:cv.fold,each=floor(nlvl[i]/cv.fold),length.out = nlvl[i]); 13 | } 14 | write.table(idx,file=outputF,sep="\t",quote=F,col.name=F,row.name=F); 15 | rm(list=ls()); 16 | 17 | -------------------------------------------------------------------------------- /feature_selection/bin/cv.test.R: -------------------------------------------------------------------------------- 1 | Args <-commandArgs(TRUE); 2 | inputF1= Args[1];#input TR.b.m ; first column is bin second is label, seperated by tab, with header; 3 | inputF2= Args[2];#input the cv.idx, no header; 4 | inputF3= Args[3];#input CV fold; 5 | inputF4= Args[4];#input features, with no header; 6 | outputF= Args[5];#output the performance and how many features used in this round. 7 | 8 | library(randomForest); 9 | cv.fold=as.numeric(inputF3); 10 | Matrix1=read.table(inputF1,sep="\t",head=T,check.name=F); 11 | #first column is bin, remove it when learning; 12 | Matrix1=Matrix1[,-1]; 13 | trainx=as.matrix(Matrix1[,-1]); 14 | trainy=as.factor(as.matrix(Matrix1[,1])); 15 | rm("Matrix1"); 16 | idx=as.vector(as.matrix(read.table(inputF2,sep="\t",head=F))); 17 | FI=as.vector(as.matrix(read.table(inputF4,head=F,sep="\t"))); 18 | FN=colnames(trainx); 19 | ID=which(is.element(FN,FI)); 20 | trainx=trainx[,ID]; 21 | my_fuc<-function(trainx,trainy,idx,cv.fold){ 22 | trainx=as.matrix(trainx); 23 | cv.pred <- trainy; 24 | p=ncol(trainx); 25 | MTRY=max(1,floor(sqrt(p))); 26 | for (i in 1:cv.fold) { 27 | all.rf <- randomForest(trainx[idx != i, , drop = FALSE], trainy[idx != i], trainx[idx == i, , drop = FALSE], trainy[idx == i],mtry=MTRY,importance=F,ntree=1000,proximity=F); 28 | cv.pred[idx == i] <- all.rf$test$predicted; 29 | } 30 | acc=mean(cv.pred==trainy); 31 | acc; 32 | } 33 | 34 | all.acc=my_fuc(trainx,trainy,idx,cv.fold); 35 | out=c(all.acc,length(FI)); 36 | write.table(out,file=outputF,sep="\t",quote=F,row.name=F,col.name=F); 37 | 38 | rm(list=ls()); 39 | 40 | 41 | -------------------------------------------------------------------------------- /feature_selection/bin/lasso.R: -------------------------------------------------------------------------------- 1 | ######## lasso algorithm 2 | Args = commandArgs(TRUE); 3 | inputF1 = Args[1];#with header; bin, anno, features 4 | inputF2 = Args[2];#input the initial feature names, with no header; 5 | outputF = Args[3];#output features coeffiecents 6 | 7 | mx = read.table(inputF1,sep="\t",header=T);#the matrix in the fixed format 8 | mx = mx[,-1]; 9 | dataall = as.matrix(mx[,-1]); 10 | classesall = as.matrix(mx[,1]); 11 | FI = as.vector(as.matrix(read.table(inputF2,head=F,sep="\t"))); 12 | FN = colnames(dataall); 13 | ID = which(is.element(FN,FI)); 14 | 15 | dataall = dataall[,ID]; 16 | 17 | library("glmnet") 18 | cvob1 = cv.glmnet(dataall,as.factor(classesall),family="multinomial"); 19 | coefs <- coef(cvob1,s="lambda.1se"); 20 | classesID = unique(classesall)[,1]; 21 | 22 | for(i in 1:length(classesID)){ 23 | coefs_i = as.matrix(coefs[[i]]); 24 | if(i==1){ 25 | out = coefs_i; 26 | }else{ 27 | out = cbind(out,coefs_i); 28 | } 29 | } 30 | colnames(out) = classesID; 31 | out = data.frame(out); 32 | coef = rownames(out); 33 | coef = data.frame(coef); 34 | out2 = cbind(coef,out); 35 | 36 | write.table(out2,file=outputF,quote=F,row.names=F,col.names=T,sep="\t"); 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /feature_selection/how_to_run_lasso: -------------------------------------------------------------------------------- 1 | ######## this is the manual for feature selection methods 2 | #### set up the path variables; 3 | path0=/home/hulong/module/feature_selection/bin 4 | path1=/home/hulong/module/feature_selection/input 5 | path2=/home/hulong/module/feature_selection/output 6 | path3=/home/hulong/module/feature_selection/tmp 7 | #### lasso algorithm to perform regression and shrinkage 8 | ## It requires a matirx file: $path1/example.train.mx 9 | ## It requires a initial feature file: $path1/example.initial.FS; 10 | ## It outputs a matrix file with each element is a coeffiecent for each feature (row) on each class (column) 11 | 12 | /data/apps/R-2.15/bin/Rscript $path0/lasso.R $path1/example.train.mx $path1/example.initial.FS $path3/foo; 13 | awk 'NR!=1&&$2!=0&&$3!=0&&$4!=0&&$5!=0{print $1}' $path3/foo |grep -v "Intercept" > $path2/example.selected.FS.lasso 14 | rm -e -f $path3/foo; 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /feature_selection/how_to_run_wrapper_step0: -------------------------------------------------------------------------------- 1 | ######## Preparation for feature selection by wrapper method, using Random Forest algorithm as classifier. 2 | path0=/home/hulong/module/feature_selection/bin 3 | path1=/home/hulong/module/feature_selection/input 4 | path2=/home/hulong/module/feature_selection/output 5 | path3=/home/hulong/module/feature_selection/tmp 6 | cd $path3;mkdir idx IMP IMP_SBS 7 | 8 | #### generate the sample index for cross-validation 9 | ######## idx 10 | cd $path3/idx 11 | cut -f 2 $path1/example.train.mx > $path3/foo; 12 | Rscript $path0/cv.idx.R $path3/foo 5 $path3/idx/cv.idx; 13 | rm -e -f $path3/foo; 14 | 15 | -------------------------------------------------------------------------------- /feature_selection/how_to_run_wrapper_step1_IMP: -------------------------------------------------------------------------------- 1 | ######## use randomForest importance to rank features then delete features in this order 2 | path0=/home/hulong/module/feature_selection/bin 3 | path1=/home/hulong/module/feature_selection/input 4 | path2=/home/hulong/module/feature_selection/output 5 | path3=/home/hulong/module/feature_selection/tmp 6 | cd $path3/IMP; 7 | mkdir rank delete 8 | 9 | ######## IMP to rank 10 | Rscript $path0/cv.IMP.R $path1/example.train.mx $path3/idx/cv.idx 5 $path3/IMP/rank/IMP; 11 | ######################################## delete multiple features per time. 12 | ######## rank features by importance and group them (up-stream, local and down-stream as one group and share one rank) 13 | awk 'NR>1{print $0}' $path3/IMP/rank/IMP |sort -k6,6gr |cut -f 1 |awk 'BEGIN{FS=".";OFS="\t"}{print $1,$0,NR}' |sort -k1,1 -k3,3n > $path3/IMP/foo; 14 | awk 'BEGIN{OFS="\t";foo="";rank=0;foo1=""}{if(foo!=$1){if(foo!=""){print rank,foo1;}foo=$1;foo1=$2;rank=$3;}else{foo1=(foo1"\t"$2)}}END{print rank,foo1}' $path3/IMP/foo |sort -k1,1n > $path3/IMP/foo1 15 | awk 'BEGIN{OFS="\t"}{for(i=2;i<=NF;i++){print $i,NR}}' $path3/IMP/foo1 > $path3/IMP/rank/rank; 16 | rm -e -f $path3/IMP/foo* 17 | ######## delete 18 | cd $path3/IMP/delete; 19 | mkdir qsub log perf feat 20 | echo " inputF1=\"$path1/example.train.mx\"; 21 | inputF2=\"$path3/idx/cv.idx\"; 22 | inputF3=\"5\"; 23 | inputF4=\"$path3/IMP/rank/rank\";" > $path3/IMP/delete/haha 24 | cat $path3/IMP/delete/haha $path0/Delete_by_rank_combine.R > $path3/IMP/delete/haha.R; 25 | rm -e -f $path3/IMP/delete/haha 26 | R CMD BATCH $path3/IMP/delete/haha.R 27 | 28 | ######## summary selected features, select best+suboptimal performance as turnover point. 29 | nvar=`awk 'NR>1{printf "%0.5f\t%d\n",$1,$2}' $path3/IMP/delete/perf_del_sum.txt|sort -k1,1gr -k2,2g|awk '{if(NR==1){max=$1}if($1>max-0.002){print $2}}'|sort -k1,1n|head -1` 30 | echo $nvar; 31 | head -n $nvar $path3/IMP/rank/rank |cut -f 1 > $path2/example.selected.FS.IMP; 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /feature_selection/how_to_run_wrapper_step1_IMP_SBS: -------------------------------------------------------------------------------- 1 | ######## use SBS algorithm after IMP algorithm 2 | path0=/home/hulong/module/feature_selection/bin 3 | path1=/home/hulong/module/feature_selection/input 4 | path2=/home/hulong/module/feature_selection/output 5 | path3=/home/hulong/module/feature_selection/tmp 6 | cd $path3/IMP_SBS; 7 | mkdir delete 8 | cd $path3/IMP_SBS/delete; 9 | mkdir qsub log perf feat summary 10 | ### get the initial feature; 11 | awk 'BEGIN{FS=".";OFS="\t"}{print $1,$0}' $path2/example.selected.FS.IMP | awk 'BEGIN{OFS="\t";foo="";count=0;}{if($1!=foo){count=count+1;foo=$1}print $2}' > $path3/IMP_SBS/delete/feat/baseF.0; 12 | awk 'BEGIN{FS=".";OFS="\t"}{print $1,$0}' $path2/example.selected.FS.IMP | awk 'BEGIN{OFS="\t";foo="";count=0;}{if($1!=foo){count=count+1;foo=$1}print $2,count}' > $path3/IMP_SBS/delete/feat/delF.0; 13 | ### get performance 14 | echo "inputF1=\"$path1/example.train.mx\"; 15 | inputF2=\"$path3/idx/cv.idx\"; 16 | inputF3=\"5\"; 17 | inputF4=\"$path3/IMP_SBS/delete/feat/baseF.0\"; 18 | inputF5=\"$path3/IMP_SBS/delete/feat/delF.0\";" > $path3/IMP_SBS/delete/haha 19 | cat $path3/IMP_SBS/delete/haha $path0/Delete_SBS_combine.R > $path3/IMP_SBS/delete/haha.R; 20 | rm -e -f $path3/IMP_SBS/delete/haha 21 | R CMD BATCH $path3/IMP_SBS/delete/haha.R 22 | 23 | 24 | ######## summary features, best performance as turnover point. 25 | egrep "\[1\] \"base feature set's performance is: |\[1\] \"find the bad feature: |\[1\] \"the best performance is now: " $path3/IMP_SBS/delete/haha.Rout | cut -d ':' -f 2 |sed 's/"//g; s/^ //g;s/ / /g' > $path3/IMP_SBS/delete/foo1 26 | grep -i "\[1\] \"now the good features are: " $path3/IMP_SBS/delete/haha.Rout | tail -1 | cut -d ':' -f 2 |sed 's/"//g ; s/^ //g; s/ / /g' > $path3/IMP_SBS/delete/foo2 27 | cat $path3/IMP_SBS/delete/foo1 $path3/IMP_SBS/delete/foo2 > $path3/IMP_SBS/delete/foo3; 28 | awk '{if(NR%2==1){performance=$0;}else{feature=$0;print NR/2"\t"performance"\t"feature}}' $path3/IMP_SBS/delete/foo3 > $path3/IMP_SBS/delete/foo4 29 | BP=`sort -k2,2gr -k1,1gr $path3/IMP_SBS/delete/foo4 |head -1 |cut -f 2`; 30 | BP_NR=`sort -k2,2gr -k1,1gr $path3/IMP_SBS/delete/foo4 |head -1 |cut -f 1`; 31 | nvar=`awk -v BP_NR="$BP_NR" -v BP="$BP" '((NR>=BP_NR)&&($2>=BP)){print $1}' $path3/IMP_SBS/delete/foo4 |tail -n 1` 32 | echo $nvar; 33 | awk -v nvar="$nvar" 'BEGIN{FS="\t"} NR>=nvar {print $3}' $path3/IMP_SBS/delete/foo4 |sed 's/ /\n/g' >> $path2/example.selected.FS.IMP_SBS 34 | rm -e -f $path3/IMP_SBS/delete/foo* 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /feature_selection/input/example.initial.FS: -------------------------------------------------------------------------------- 1 | blastn.up 2 | blastn.lo 3 | blastn.dn 4 | blastx.up 5 | blastx.lo 6 | blastx.dn 7 | gc.up 8 | gc.lo 9 | gc.dn 10 | infernal.up 11 | infernal.lo 12 | infernal.dn 13 | randfold.up 14 | randfold.lo 15 | randfold.dn 16 | rnacode.up 17 | rnacode.lo 18 | rnacode.dn 19 | sci.up 20 | sci.lo 21 | sci.dn 22 | nonpolya.up 23 | nonpolya.lo 24 | nonpolya.dn 25 | polya.up 26 | polya.lo 27 | polya.dn 28 | total.up 29 | total.lo 30 | total.dn 31 | smallrna.up 32 | smallrna.lo 33 | smallrna.dn 34 | ctcf.up 35 | ctcf.lo 36 | ctcf.dn 37 | h3k27ac.up 38 | h3k27ac.lo 39 | h3k27ac.dn 40 | h3k27me3.up 41 | h3k27me3.lo 42 | h3k27me3.dn 43 | h3k36me3.up 44 | h3k36me3.lo 45 | h3k36me3.dn 46 | h3k4me1.up 47 | h3k4me1.lo 48 | h3k4me1.dn 49 | h3k4me3.up 50 | h3k4me3.lo 51 | h3k4me3.dn 52 | h3k9ac.up 53 | h3k9ac.lo 54 | h3k9ac.dn 55 | h3k9me3.up 56 | h3k9me3.lo 57 | h3k9me3.dn 58 | h3k4me2.up 59 | h3k4me2.lo 60 | h3k4me2.dn 61 | h3k79me2.up 62 | h3k79me2.lo 63 | h3k79me2.dn 64 | h4k20me1.up 65 | h4k20me1.lo 66 | h4k20me1.dn 67 | pol2.up 68 | pol2.lo 69 | pol2.dn 70 | -------------------------------------------------------------------------------- /feature_selection/input/input_file_format: -------------------------------------------------------------------------------- 1 | ######## this is a readme about the input files 2 | 3 | # Two files are needed for machine-learning: 4 | 5 | # 1. example.train.mx 6 | # It is a matrix file. Each row is a sample and each column is a feature. The first row is a header, recording each feature names; the first column is a unique sample ID, here we used bin; the second column is the label, here we used anno 7 | # For example: 8 | # bin anno X1 X2 X3 X4 X5 #this is the header, not numeric, indicating each feature's name 9 | # bin1 CDS x1,1 x1,2 x1,3 x1,4 x1,5 #this is the first sample, a vector with 5 features , a unique ID (bin1) and a label (CDS) 10 | # bin2 NC1 x2,1 x2,2 x2,3 x2,4 x2,5 #this is the second sample, a vector with 5 features , a unique ID (bin2) and a label (NC1) 11 | 12 | # 2. example.initial.FS 13 | # It is a feature name file. Each row is a feature name; These features (and the sample ID and label columns ) in the matrix file would be in used during training and testing and prediction 14 | # For example: 15 | # X1 16 | # X3 17 | # X4 18 | # In this case, only X1, X3 and X4 would be used to perform the machine-learning. Namely, only the following part of the original matrix is used in machine-learning: 19 | # bin anno X1 X3 X4 20 | # bin1 CDS x1,1 x1,3 x1,4 21 | # bin2 NC1 x2,1 x2,3 x2,4 22 | 23 | -------------------------------------------------------------------------------- /feature_selection/output/example.selected.FS.IMP: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | -------------------------------------------------------------------------------- /feature_selection/output/example.selected.FS.IMP_SBS: -------------------------------------------------------------------------------- 1 | gc.lo 2 | gc.dn 3 | gc.up 4 | infernal.lo 5 | infernal.up 6 | infernal.dn 7 | polya.lo 8 | polya.up 9 | polya.dn 10 | blastx.lo 11 | blastx.dn 12 | blastx.up 13 | total.lo 14 | total.dn 15 | total.up 16 | -------------------------------------------------------------------------------- /feature_selection/output/example.selected.FS.lasso: -------------------------------------------------------------------------------- 1 | blastn.up 2 | blastx.lo 3 | gc.up 4 | rnacode.up 5 | nonpolya.lo 6 | polya.lo 7 | total.up 8 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.0: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | h3k4me2.up 56 | h3k4me2.dn 57 | h3k4me2.lo 58 | h3k79me2.dn 59 | h3k79me2.up 60 | h3k79me2.lo 61 | ctcf.lo 62 | ctcf.up 63 | ctcf.dn 64 | randfold.dn 65 | randfold.up 66 | randfold.lo 67 | sci.up 68 | sci.dn 69 | sci.lo 70 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.1: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | h3k4me2.up 56 | h3k4me2.dn 57 | h3k4me2.lo 58 | h3k79me2.dn 59 | h3k79me2.up 60 | h3k79me2.lo 61 | ctcf.lo 62 | ctcf.up 63 | ctcf.dn 64 | randfold.dn 65 | randfold.up 66 | randfold.lo 67 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.10: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.11: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.12: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.13: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.14: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.15: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.16: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.17: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.18: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.19: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | h3k4me2.up 56 | h3k4me2.dn 57 | h3k4me2.lo 58 | h3k79me2.dn 59 | h3k79me2.up 60 | h3k79me2.lo 61 | ctcf.lo 62 | ctcf.up 63 | ctcf.dn 64 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.20: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.21: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.22: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.3: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | h3k4me2.up 56 | h3k4me2.dn 57 | h3k4me2.lo 58 | h3k79me2.dn 59 | h3k79me2.up 60 | h3k79me2.lo 61 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | h3k4me2.up 56 | h3k4me2.dn 57 | h3k4me2.lo 58 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.5: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | pol2.lo 53 | pol2.up 54 | pol2.dn 55 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.6: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | h3k4me3.dn 50 | h3k4me3.lo 51 | h3k4me3.up 52 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.7: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | h3k4me1.lo 47 | h3k4me1.up 48 | h3k4me1.dn 49 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.8: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | h3k9ac.lo 44 | h3k9ac.up 45 | h3k9ac.dn 46 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/feat/feat.9: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | h3k27me3.lo 23 | h3k27me3.dn 24 | h3k27me3.up 25 | h3k36me3.lo 26 | h3k36me3.dn 27 | h3k36me3.up 28 | rnacode.lo 29 | rnacode.dn 30 | rnacode.up 31 | blastn.lo 32 | blastn.up 33 | blastn.dn 34 | h3k9me3.lo 35 | h3k9me3.dn 36 | h3k9me3.up 37 | h4k20me1.dn 38 | h4k20me1.up 39 | h4k20me1.lo 40 | h3k27ac.lo 41 | h3k27ac.up 42 | h3k27ac.dn 43 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/haha.R: -------------------------------------------------------------------------------- 1 | inputF1="/home/hulong/module/feature_selection/input/example.train.mx"; 2 | inputF2="/home/hulong/module/feature_selection/tmp/idx/cv.idx"; 3 | inputF3="5"; 4 | inputF4="/home/hulong/module/feature_selection/tmp/IMP/rank/rank"; 5 | #Args <-commandArgs(TRUE); 6 | #inputF1= Args[1];#input TR.b.m ; first column is bin,second is label, seperated by tab, with header; 7 | #inputF2= Args[2];#input the cv.idx, no header; 8 | #inputF3= Args[3];#input CV fold; 9 | #inputF4= Args[4];#the rank file; the most important ones are on the top. two columns. the first is the features' names, the second is the rank of features. features have the same will be deleted together in one time. 10 | 11 | 12 | my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 13 | str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N DEL_rank_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 14 | write(str,file=qsubF); 15 | system(paste("rm",outputF)); 16 | system(paste("rm",logF)); 17 | system(paste("qsub -V -o",logF,qsubF)); 18 | } 19 | my_check<-function(file_path,FileList){ 20 | while(1==1){ 21 | filelist_all=list.files(path=file_path,full.names=T); 22 | tmp=is.element(FileList,filelist_all); 23 | if(all(tmp)){break;} 24 | Sys.sleep(30); 25 | } 26 | } 27 | 28 | RANK = read.table(inputF4,head=F,sep="\t"); 29 | RANK_groups = RANK[,2]; 30 | RANK_group = as.vector(unique(RANK_groups)); 31 | Feat = as.vector(RANK[,1]); 32 | 33 | FileList = c(); 34 | for (i in sort(RANK_group,decreasing=T)){ 35 | ID1 = which(RANK_groups<=i); 36 | Feat_i = Feat[ID1]; 37 | tmp = max(RANK_group)-i; 38 | featF = paste("./feat/feat",tmp,sep="."); 39 | write.table(Feat_i,file=featF,sep="\t",col.name=F,row.name=F,quote=F); 40 | qsubF = paste("./qsub/qsub_del",tmp,sep="."); 41 | logF = paste("./log/log_del",tmp,sep="."); 42 | perfF = paste("./perf/perf_del",tmp,sep="."); 43 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 44 | FileList = c(FileList,perfF); 45 | } 46 | file_path="./perf" 47 | my_check(file_path,FileList); 48 | Sys.sleep(2); 49 | Perf_summary=matrix(0,nrow=0,ncol=2); 50 | for (i in sort(RANK_group,decreasing=T)){ 51 | tmp = max(RANK_group)-i; 52 | perfF = paste("./perf/perf_del",tmp,sep="."); 53 | Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 54 | Perf_summary = rbind(Perf_summary,Perf) 55 | } 56 | colnames(Perf_summary)<-c("acc","nvar"); 57 | write.table(Perf_summary,file=paste("perf_del_sum","txt",sep="."),sep="\t",col.name=T,row.name=F,quote=F); 58 | pdf(paste("perf_del_sum","pdf",sep=".")) 59 | plot(Perf_summary[,"nvar"],Perf_summary[,"acc"],type="o",lwd=2); 60 | dev.off() 61 | 62 | rm(list=ls()); 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/haha.Rout: -------------------------------------------------------------------------------- 1 | 2 | R version 2.13.1 (2011-07-08) 3 | Copyright (C) 2011 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: x86_64-redhat-linux-gnu (64-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | Natural language support but running in an English locale 12 | 13 | R is a collaborative project with many contributors. 14 | Type 'contributors()' for more information and 15 | 'citation()' on how to cite R or R packages in publications. 16 | 17 | Type 'demo()' for some demos, 'help()' for on-line help, or 18 | 'help.start()' for an HTML browser interface to help. 19 | Type 'q()' to quit R. 20 | 21 | > inputF1="/home/hulong/module/feature_selection/input/example.train.mx"; 22 | > inputF2="/home/hulong/module/feature_selection/tmp/idx/cv.idx"; 23 | > inputF3="5"; 24 | > inputF4="/home/hulong/module/feature_selection/tmp/IMP/rank/rank"; 25 | > #Args <-commandArgs(TRUE); 26 | > #inputF1= Args[1];#input TR.b.m ; first column is bin,second is label, seperated by tab, with header; 27 | > #inputF2= Args[2];#input the cv.idx, no header; 28 | > #inputF3= Args[3];#input CV fold; 29 | > #inputF4= Args[4];#the rank file; the most important ones are on the top. two columns. the first is the features' names, the second is the rank of features. features have the same will be deleted together in one time. 30 | > 31 | > 32 | > my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 33 | + str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N DEL_rank_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 34 | + write(str,file=qsubF); 35 | + system(paste("rm",outputF)); 36 | + system(paste("rm",logF)); 37 | + system(paste("qsub -V -o",logF,qsubF)); 38 | + } 39 | > my_check<-function(file_path,FileList){ 40 | + while(1==1){ 41 | + filelist_all=list.files(path=file_path,full.names=T); 42 | + tmp=is.element(FileList,filelist_all); 43 | + if(all(tmp)){break;} 44 | + Sys.sleep(30); 45 | + } 46 | + } 47 | > 48 | > RANK = read.table(inputF4,head=F,sep="\t"); 49 | > RANK_groups = RANK[,2]; 50 | > RANK_group = as.vector(unique(RANK_groups)); 51 | > Feat = as.vector(RANK[,1]); 52 | > 53 | > FileList = c(); 54 | > for (i in sort(RANK_group,decreasing=T)){ 55 | + ID1 = which(RANK_groups<=i); 56 | + Feat_i = Feat[ID1]; 57 | + tmp = max(RANK_group)-i; 58 | + featF = paste("./feat/feat",tmp,sep="."); 59 | + write.table(Feat_i,file=featF,sep="\t",col.name=F,row.name=F,quote=F); 60 | + qsubF = paste("./qsub/qsub_del",tmp,sep="."); 61 | + logF = paste("./log/log_del",tmp,sep="."); 62 | + perfF = paste("./perf/perf_del",tmp,sep="."); 63 | + my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 64 | + FileList = c(FileList,perfF); 65 | + } 66 | rm: cannot remove `./perf/perf_del.0': No such file or directory 67 | rm: cannot remove `./log/log_del.0': No such file or directory 68 | Your job 2166990 ("DEL_rank_c") has been submitted 69 | rm: cannot remove `./perf/perf_del.1': No such file or directory 70 | rm: cannot remove `./log/log_del.1': No such file or directory 71 | Your job 2166991 ("DEL_rank_c") has been submitted 72 | rm: cannot remove `./perf/perf_del.2': No such file or directory 73 | rm: cannot remove `./log/log_del.2': No such file or directory 74 | Your job 2166992 ("DEL_rank_c") has been submitted 75 | rm: cannot remove `./perf/perf_del.3': No such file or directory 76 | rm: cannot remove `./log/log_del.3': No such file or directory 77 | Your job 2166993 ("DEL_rank_c") has been submitted 78 | rm: cannot remove `./perf/perf_del.4': No such file or directory 79 | rm: cannot remove `./log/log_del.4': No such file or directory 80 | Your job 2166994 ("DEL_rank_c") has been submitted 81 | rm: cannot remove `./perf/perf_del.5': No such file or directory 82 | rm: cannot remove `./log/log_del.5': No such file or directory 83 | Your job 2166995 ("DEL_rank_c") has been submitted 84 | rm: cannot remove `./perf/perf_del.6': No such file or directory 85 | rm: cannot remove `./log/log_del.6': No such file or directory 86 | Your job 2166996 ("DEL_rank_c") has been submitted 87 | rm: cannot remove `./perf/perf_del.7': No such file or directory 88 | rm: cannot remove `./log/log_del.7': No such file or directory 89 | Your job 2166997 ("DEL_rank_c") has been submitted 90 | rm: cannot remove `./perf/perf_del.8': No such file or directory 91 | rm: cannot remove `./log/log_del.8': No such file or directory 92 | Your job 2166998 ("DEL_rank_c") has been submitted 93 | rm: cannot remove `./perf/perf_del.9': No such file or directory 94 | rm: cannot remove `./log/log_del.9': No such file or directory 95 | Your job 2166999 ("DEL_rank_c") has been submitted 96 | rm: cannot remove `./perf/perf_del.10': No such file or directory 97 | rm: cannot remove `./log/log_del.10': No such file or directory 98 | Your job 2167000 ("DEL_rank_c") has been submitted 99 | rm: cannot remove `./perf/perf_del.11': No such file or directory 100 | rm: cannot remove `./log/log_del.11': No such file or directory 101 | Your job 2167001 ("DEL_rank_c") has been submitted 102 | rm: cannot remove `./perf/perf_del.12': No such file or directory 103 | rm: cannot remove `./log/log_del.12': No such file or directory 104 | Your job 2167002 ("DEL_rank_c") has been submitted 105 | rm: cannot remove `./perf/perf_del.13': No such file or directory 106 | rm: cannot remove `./log/log_del.13': No such file or directory 107 | Your job 2167003 ("DEL_rank_c") has been submitted 108 | rm: cannot remove `./perf/perf_del.14': No such file or directory 109 | rm: cannot remove `./log/log_del.14': No such file or directory 110 | Your job 2167004 ("DEL_rank_c") has been submitted 111 | rm: cannot remove `./perf/perf_del.15': No such file or directory 112 | rm: cannot remove `./log/log_del.15': No such file or directory 113 | Your job 2167005 ("DEL_rank_c") has been submitted 114 | rm: cannot remove `./perf/perf_del.16': No such file or directory 115 | rm: cannot remove `./log/log_del.16': No such file or directory 116 | Your job 2167006 ("DEL_rank_c") has been submitted 117 | rm: cannot remove `./perf/perf_del.17': No such file or directory 118 | rm: cannot remove `./log/log_del.17': No such file or directory 119 | Your job 2167007 ("DEL_rank_c") has been submitted 120 | rm: cannot remove `./perf/perf_del.18': No such file or directory 121 | rm: cannot remove `./log/log_del.18': No such file or directory 122 | Your job 2167008 ("DEL_rank_c") has been submitted 123 | rm: cannot remove `./perf/perf_del.19': No such file or directory 124 | rm: cannot remove `./log/log_del.19': No such file or directory 125 | Your job 2167009 ("DEL_rank_c") has been submitted 126 | rm: cannot remove `./perf/perf_del.20': No such file or directory 127 | rm: cannot remove `./log/log_del.20': No such file or directory 128 | Your job 2167010 ("DEL_rank_c") has been submitted 129 | rm: cannot remove `./perf/perf_del.21': No such file or directory 130 | rm: cannot remove `./log/log_del.21': No such file or directory 131 | Your job 2167011 ("DEL_rank_c") has been submitted 132 | rm: cannot remove `./perf/perf_del.22': No such file or directory 133 | rm: cannot remove `./log/log_del.22': No such file or directory 134 | Your job 2167012 ("DEL_rank_c") has been submitted 135 | > file_path="./perf" 136 | > my_check(file_path,FileList); 137 | > Sys.sleep(2); 138 | > Perf_summary=matrix(0,nrow=0,ncol=2); 139 | > for (i in sort(RANK_group,decreasing=T)){ 140 | + tmp = max(RANK_group)-i; 141 | + perfF = paste("./perf/perf_del",tmp,sep="."); 142 | + Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 143 | + Perf_summary = rbind(Perf_summary,Perf) 144 | + } 145 | > colnames(Perf_summary)<-c("acc","nvar"); 146 | > write.table(Perf_summary,file=paste("perf_del_sum","txt",sep="."),sep="\t",col.name=T,row.name=F,quote=F); 147 | > pdf(paste("perf_del_sum","pdf",sep=".")) 148 | > plot(Perf_summary[,"nvar"],Perf_summary[,"acc"],type="o",lwd=2); 149 | > dev.off() 150 | null device 151 | 1 152 | > 153 | > rm(list=ls()); 154 | > 155 | > 156 | > 157 | > 158 | > proc.time() 159 | user system elapsed 160 | 0.868 0.544 184.199 161 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.0: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.10: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.11: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.12: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.13: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.14: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.15: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.16: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.17: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.18: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.19: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.20: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.21: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.22: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.3: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.5: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.6: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.7: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.8: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/log/log_del.9: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.0: -------------------------------------------------------------------------------- 1 | 0.923527190332326 2 | 69 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.1: -------------------------------------------------------------------------------- 1 | 0.925793051359517 2 | 66 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.10: -------------------------------------------------------------------------------- 1 | 0.926925981873112 2 | 39 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.11: -------------------------------------------------------------------------------- 1 | 0.926359516616314 2 | 36 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.12: -------------------------------------------------------------------------------- 1 | 0.925981873111782 2 | 33 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.13: -------------------------------------------------------------------------------- 1 | 0.925415407854985 2 | 30 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.14: -------------------------------------------------------------------------------- 1 | 0.924848942598187 2 | 27 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.15: -------------------------------------------------------------------------------- 1 | 0.923904833836858 2 | 24 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.16: -------------------------------------------------------------------------------- 1 | 0.926359516616314 2 | 21 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.17: -------------------------------------------------------------------------------- 1 | 0.921638972809668 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.18: -------------------------------------------------------------------------------- 1 | 0.921450151057402 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.19: -------------------------------------------------------------------------------- 1 | 0.921450151057402 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.2: -------------------------------------------------------------------------------- 1 | 0.924660120845921 2 | 63 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.20: -------------------------------------------------------------------------------- 1 | 0.870468277945619 2 | 9 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.21: -------------------------------------------------------------------------------- 1 | 0.83440332326284 2 | 6 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.22: -------------------------------------------------------------------------------- 1 | 0.658799093655589 2 | 3 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.3: -------------------------------------------------------------------------------- 1 | 0.924660120845921 2 | 60 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.4: -------------------------------------------------------------------------------- 1 | 0.924848942598187 2 | 57 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.5: -------------------------------------------------------------------------------- 1 | 0.925037764350453 2 | 54 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.6: -------------------------------------------------------------------------------- 1 | 0.925793051359517 2 | 51 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.7: -------------------------------------------------------------------------------- 1 | 0.925037764350453 2 | 48 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.8: -------------------------------------------------------------------------------- 1 | 0.926170694864048 2 | 45 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf/perf_del.9: -------------------------------------------------------------------------------- 1 | 0.925981873111782 2 | 42 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf_del_sum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lulab/shared_scripts/a49e08b6a09097a26baaa669bcf0ec2c1e71c741/feature_selection/tmp/IMP/delete/perf_del_sum.pdf -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/perf_del_sum.txt: -------------------------------------------------------------------------------- 1 | acc nvar 2 | 0.923527190332326 69 3 | 0.925793051359517 66 4 | 0.924660120845921 63 5 | 0.924660120845921 60 6 | 0.924848942598187 57 7 | 0.925037764350453 54 8 | 0.925793051359517 51 9 | 0.925037764350453 48 10 | 0.926170694864048 45 11 | 0.925981873111782 42 12 | 0.926925981873112 39 13 | 0.926359516616314 36 14 | 0.925981873111782 33 15 | 0.925415407854985 30 16 | 0.924848942598187 27 17 | 0.923904833836858 24 18 | 0.926359516616314 21 19 | 0.921638972809668 18 20 | 0.921450151057402 15 21 | 0.921450151057402 12 22 | 0.870468277945619 9 23 | 0.83440332326284 6 24 | 0.658799093655589 3 25 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.0: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.0 ./perf/perf_del.0 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.1 ./perf/perf_del.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.10: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.10 ./perf/perf_del.10 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.11: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.11 ./perf/perf_del.11 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.12: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.12 ./perf/perf_del.12 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.13: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.13 ./perf/perf_del.13 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.14: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.14 ./perf/perf_del.14 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.15: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.15 ./perf/perf_del.15 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.16: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.16 ./perf/perf_del.16 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.17: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.17 ./perf/perf_del.17 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.18: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.18 ./perf/perf_del.18 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.19: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.19 ./perf/perf_del.19 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.2 ./perf/perf_del.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.20: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.20 ./perf/perf_del.20 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.21: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.21 ./perf/perf_del.21 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.22: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.22 ./perf/perf_del.22 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.3: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.3 ./perf/perf_del.3 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.4 ./perf/perf_del.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.5: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.5 ./perf/perf_del.5 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.6: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.6 ./perf/perf_del.6 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.7: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.7 ./perf/perf_del.7 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.8: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.8 ./perf/perf_del.8 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/delete/qsub/qsub_del.9: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N DEL_rank_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/feat.9 ./perf/perf_del.9 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/rank/IMP: -------------------------------------------------------------------------------- 1 | CDS NC1 NegControl UTR MeanDecreaseAccuracy MeanDecreaseGini 2 | sci.lo 0.000758224765213094 0.00151467815765277 0.000287133338040294 -4.56286208066757e-05 0.00062721005095592 15.8206206288731 3 | randfold.lo 0.0012074957056647 0.00305021922735016 0.000172514672416137 0.00226816184075575 0.00167179182182291 33.5315022310431 4 | sci.dn -0.000428653262427885 0.00736766967475331 0.000866051370518752 0.000259803720134815 0.00201751170408565 38.6323177356816 5 | sci.up 0.000775906587450985 0.00805069095107785 0.000694538128791004 4.52037764917289e-05 0.00238994524706526 39.3502991987404 6 | randfold.up 0.000386067734366369 0.00876484429009533 0.000487053729462037 0.00113127043886297 0.00269444006996397 37.522047329254 7 | randfold.dn 0.00024233966358726 0.0082198967865063 0.000577125971850167 0.00226771329852623 0.00281971511290993 42.0535313098857 8 | ctcf.dn 0.00187493071192701 0.00846222831354345 0.00106847328875677 9.73401603737975e-05 0.00287738980485347 40.1635880985683 9 | ctcf.up 0.00189252864136364 0.0111948638600745 0.00112390867083546 0.000895815212586513 0.00377673648630051 41.0213912775619 10 | ctcf.lo 0.00156841285895068 0.00849150488132 0.00205680494061145 0.00405211079386279 0.00404601098510895 43.7386282060694 11 | h3k9ac.dn 0.00796043785960299 0.0122796862749671 0.00357995788747143 0.0150632348828093 0.00973741971372823 48.463808807981 12 | h3k9me3.up 0.0118111650117086 0.0194956781384146 0.00222287587241785 0.00583129588939111 0.00985692498372286 53.4049556753742 13 | h3k4me1.dn 0.00875029137954668 0.01559935383171 0.00325796205460569 0.0123450093567363 0.00997765201564062 46.1949913067488 14 | h3k9me3.dn 0.0089333453753129 0.0218822042483741 0.00218225804012325 0.0101864812733645 0.0107975153764233 61.0277130796174 15 | blastn.dn 0.00735703177493243 0.0258421649182679 0.00401418302192041 0.00733318496702696 0.0111369523918437 70.1618623915319 16 | h3k4me1.up 0.0150133760809652 0.0134908186611308 0.00721077263762086 0.00881009110340893 0.0111476783163997 45.0831309744441 17 | h3k27me3.up 0.0114962178307964 0.0187705836170781 0.00643084548679017 0.00806987285492806 0.0111843627973113 55.7126456320842 18 | h3k27ac.dn 0.0110858042560051 0.0155468437267624 0.00494734382771664 0.0139019030593339 0.0114004094094406 48.9133964150354 19 | blastn.up 0.0037407652542255 0.025942328132723 0.0050189177701509 0.0117819671436587 0.0116056734338033 71.7276901619478 20 | h3k79me2.lo 0.01158652511388 0.0192287501060728 0.00379999456008109 0.012179582512519 0.0116935203075123 51.5357288546566 21 | pol2.dn 0.0123577722263898 0.0203587165115402 0.00447294620477334 0.0113959065189435 0.0121368501978756 61.2849075787616 22 | h3k4me2.lo 0.0127401405763049 0.0170348811331469 0.00687271134573802 0.0119906721013071 0.0121752843981727 53.0622291135033 23 | h3k79me2.up 0.00797250102410814 0.020592553117335 0.0132174648062651 0.00737190561041967 0.0122938595921003 61.2726165840913 24 | h3k79me2.dn 0.011257460284594 0.0233114903734816 0.0123084757165588 0.00788772613064076 0.0136805147674898 63.1411899201374 25 | pol2.up 0.0127771140978292 0.0216742900110738 0.00858145312458281 0.0122372233215532 0.0138178419258427 61.1419994918533 26 | h3k4me2.dn 0.00965870150457092 0.0207499124283763 0.00812952183322137 0.0170984676356018 0.0138908913979221 57.6684264515665 27 | h3k27me3.dn 0.0170540299656717 0.0258273749738332 0.00471008167412901 0.0109160390433366 0.0146157746771155 64.2109193149814 28 | h3k4me2.up 0.00951443514912493 0.0242029025729765 0.0122559468153219 0.0128138026470257 0.0146932101647354 68.0846988977421 29 | h3k27ac.up 0.0199840584309743 0.0176800070930506 0.0122192216600417 0.00886781918420011 0.0147032457286879 54.834496885541 30 | h3k9ac.up 0.0246318527054456 0.0174699999694542 0.0118532152587103 0.00806393438766758 0.0155318916072849 50.5074254655216 31 | h3k4me3.up 0.0143461316533887 0.0182543846621166 0.0177167382451535 0.012620438733075 0.015746370114648 55.5638549211639 32 | rnacode.up 0.0134331324483146 0.0229215144744617 0.0226661343516796 0.00666822078598466 0.0164339379113172 63.1406490311125 33 | pol2.lo 0.0217581080603862 0.0175773740777308 0.0182705436390051 0.00975523551304025 0.0168291105723708 69.7670899407104 34 | h3k4me3.lo 0.0340765737859138 0.0165714377832273 0.0129769350468245 0.0104199148370023 0.0184813225196491 60.6148727794367 35 | h3k4me3.dn 0.0122729559060351 0.0165972412721326 0.0261306265133894 0.0202755401677381 0.018815429445347 58.0856950060787 36 | rnacode.dn 0.0344120142573583 0.0281562371234768 0.0145958060399198 0.00993086742684578 0.0217870161899523 93.300053579504 37 | h4k20me1.lo 0.0236668293272375 0.0316752759090312 0.0174501429527791 0.0191137802389535 0.0229826950601125 92.5497635874241 38 | h4k20me1.up 0.00736190415679509 0.0240852328595239 0.0573473509546824 0.00668137380070612 0.0238973150507964 72.2397361962027 39 | gc.up 0.016619675482784 0.0302152692626111 0.0463446668664734 0.00648844019481606 0.0249192605889576 74.1156143283547 40 | h3k36me3.up 0.0262335309544728 0.0222223343041852 0.0445125296529822 0.0106455870405803 0.0259143880298114 65.8666065579805 41 | h3k4me1.lo 0.0710021328447974 0.0200300183927823 0.00409203926052653 0.0160029685252538 0.0277581392764546 92.7279742826287 42 | h3k36me3.dn 0.0473359075435942 0.0245926795165032 0.0304345602503658 0.0179297457663956 0.0300500816238509 77.6134883617488 43 | h3k9ac.lo 0.0782219854918802 0.0204723216295043 0.00965671170021368 0.0131963855302227 0.03034754444944 89.6318594013586 44 | h3k27ac.lo 0.088863173387409 0.0200095475515612 0.00813295532604891 0.0184889381618652 0.0338714921008948 96.8038466222149 45 | h4k20me1.dn 0.0128133896193019 0.0328224247806958 0.0808990691009245 0.0166084212811613 0.0358265060076012 89.8894494216049 46 | gc.dn 0.0508973085336651 0.0434004819313401 0.0183644081097427 0.0312884520609089 0.0359906861191396 125.179122046643 47 | h3k9me3.lo 0.0955900370021323 0.0370509778988546 0.00285323333654877 0.0292055121971065 0.0411748571181848 185.103834971288 48 | blastn.lo 0.022193242775948 0.079396133565665 0.021894821500038 0.0518703039659003 0.0438193830575133 203.742710686141 49 | rnacode.lo 0.101676638107649 0.0531944832847241 0.0193127287711737 0.0305339101765196 0.0511922621873319 323.37104316171 50 | nonpolya.dn 0.0145175513627951 0.054114410361785 0.10438344818647 0.0564449979727016 0.0573982568810656 219.638290614704 51 | nonpolya.up 0.0146912631854329 0.0540100565239385 0.106026659828944 0.0680718327458232 0.0606614431705106 217.349557719175 52 | h3k36me3.lo 0.148809815392367 0.0364989086162681 0.027798382103764 0.0324169421666009 0.0613259789192189 172.853194406865 53 | infernal.dn 0.0637560305787189 0.084705808847546 0.0683672269580123 0.0538367387836115 0.0676224197497583 218.677022851805 54 | infernal.up 0.0655854266061187 0.0867818935609511 0.0768932802901222 0.0492226436973979 0.0696152338216685 225.144012170089 55 | h3k27me3.lo 0.169494139915623 0.0588850365121583 0.00540814044710273 0.0568055036133844 0.0726164119020114 283.689147878148 56 | smallrna.dn 0.0692376968515217 0.0617006532807132 0.153850840552734 0.0899469722658372 0.0936080740488373 355.688867223128 57 | infernal.lo 0.105261680830671 0.070518932149416 0.143560553844374 0.111812975027071 0.107774205100419 368.740754423071 58 | polya.dn 0.18655331296414 0.0440845211889089 0.0886294132926679 0.128047465433952 0.111729238779849 499.896385970479 59 | smallrna.up 0.0596673443539155 0.0727628402039991 0.278221415939589 0.0881662159628457 0.124733335870035 394.693337285387 60 | gc.lo 0.322031201692482 0.069312445463171 0.0215970247997473 0.107054310155975 0.129980663657469 485.086366967407 61 | blastx.up 0.233955377765154 0.0815012694131685 0.330642832760609 0.0289504381049744 0.168758218264322 360.386263912987 62 | polya.up 0.179176462806194 0.0653843818878738 0.190099680621047 0.243252039146172 0.169300824472416 624.578762029473 63 | blastx.dn 0.285284273164123 0.102570518672325 0.258542139080843 0.0899555065368901 0.183952292147685 483.130759325448 64 | total.up 0.130550526966673 0.175805388631015 0.348037540755596 0.0990130726528098 0.188285372778576 693.132093737724 65 | total.dn 0.165832192372329 0.246184370448431 0.229223273630701 0.144867979067385 0.196405355775575 801.411402134345 66 | nonpolya.lo 0.0230607923774587 0.0863420147182593 0.499599966783902 0.259400428225594 0.217055228528926 409.141609554668 67 | total.lo 0.312755055393951 0.716574924355139 1.01002216735645 0.34228439416522 0.595305319309522 1340.72255835329 68 | smallrna.lo 0.097811944109505 0.110338624681846 2.08678910444075 0.142523656044829 0.609511070982832 995.906264087865 69 | blastx.lo 1.41627563363411 0.154514391953697 0.541755511053136 0.832152707370328 0.735772119647352 1940.41997378308 70 | polya.lo 0.404488983699047 0.0882903912689981 2.14115314591612 0.566788093336889 0.800268709419897 1325.33943177703 71 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP/rank/rank: -------------------------------------------------------------------------------- 1 | polya.lo 1 2 | polya.up 1 3 | polya.dn 1 4 | blastx.lo 2 5 | blastx.dn 2 6 | blastx.up 2 7 | smallrna.lo 3 8 | smallrna.up 3 9 | smallrna.dn 3 10 | total.lo 4 11 | total.dn 4 12 | total.up 4 13 | nonpolya.lo 5 14 | nonpolya.up 5 15 | nonpolya.dn 5 16 | gc.lo 6 17 | gc.dn 6 18 | gc.up 6 19 | infernal.lo 7 20 | infernal.up 7 21 | infernal.dn 7 22 | h3k27me3.lo 8 23 | h3k27me3.dn 8 24 | h3k27me3.up 8 25 | h3k36me3.lo 9 26 | h3k36me3.dn 9 27 | h3k36me3.up 9 28 | rnacode.lo 10 29 | rnacode.dn 10 30 | rnacode.up 10 31 | blastn.lo 11 32 | blastn.up 11 33 | blastn.dn 11 34 | h3k9me3.lo 12 35 | h3k9me3.dn 12 36 | h3k9me3.up 12 37 | h4k20me1.dn 13 38 | h4k20me1.up 13 39 | h4k20me1.lo 13 40 | h3k27ac.lo 14 41 | h3k27ac.up 14 42 | h3k27ac.dn 14 43 | h3k9ac.lo 15 44 | h3k9ac.up 15 45 | h3k9ac.dn 15 46 | h3k4me1.lo 16 47 | h3k4me1.up 16 48 | h3k4me1.dn 16 49 | h3k4me3.dn 17 50 | h3k4me3.lo 17 51 | h3k4me3.up 17 52 | pol2.lo 18 53 | pol2.up 18 54 | pol2.dn 18 55 | h3k4me2.up 19 56 | h3k4me2.dn 19 57 | h3k4me2.lo 19 58 | h3k79me2.dn 20 59 | h3k79me2.up 20 60 | h3k79me2.lo 20 61 | ctcf.lo 21 62 | ctcf.up 21 63 | ctcf.dn 21 64 | randfold.dn 22 65 | randfold.up 22 66 | randfold.lo 22 67 | sci.up 23 68 | sci.dn 23 69 | sci.lo 23 70 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.0: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | infernal.lo 20 | infernal.up 21 | infernal.dn 22 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.1: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | blastx.dn 3 | blastx.up 4 | smallrna.lo 5 | smallrna.up 6 | smallrna.dn 7 | total.lo 8 | total.dn 9 | total.up 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | smallrna.lo 5 | smallrna.up 6 | smallrna.dn 7 | total.lo 8 | total.dn 9 | total.up 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.3: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.5: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.6: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | infernal.lo 17 | infernal.up 18 | infernal.dn 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.1.7: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | smallrna.lo 8 | smallrna.up 9 | smallrna.dn 10 | total.lo 11 | total.dn 12 | total.up 13 | nonpolya.lo 14 | nonpolya.up 15 | nonpolya.dn 16 | gc.lo 17 | gc.dn 18 | gc.up 19 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.1: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | blastx.dn 3 | blastx.up 4 | total.lo 5 | total.dn 6 | total.up 7 | nonpolya.lo 8 | nonpolya.up 9 | nonpolya.dn 10 | gc.lo 11 | gc.dn 12 | gc.up 13 | infernal.lo 14 | infernal.up 15 | infernal.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | total.lo 5 | total.dn 6 | total.up 7 | nonpolya.lo 8 | nonpolya.up 9 | nonpolya.dn 10 | gc.lo 11 | gc.dn 12 | gc.up 13 | infernal.lo 14 | infernal.up 15 | infernal.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | nonpolya.lo 8 | nonpolya.up 9 | nonpolya.dn 10 | gc.lo 11 | gc.dn 12 | gc.up 13 | infernal.lo 14 | infernal.up 15 | infernal.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.5: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | gc.lo 11 | gc.dn 12 | gc.up 13 | infernal.lo 14 | infernal.up 15 | infernal.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.6: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | infernal.lo 14 | infernal.up 15 | infernal.dn 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.2.7: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | nonpolya.lo 11 | nonpolya.up 12 | nonpolya.dn 13 | gc.lo 14 | gc.dn 15 | gc.up 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.3.1: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | blastx.dn 3 | blastx.up 4 | total.lo 5 | total.dn 6 | total.up 7 | gc.lo 8 | gc.dn 9 | gc.up 10 | infernal.lo 11 | infernal.up 12 | infernal.dn 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.3.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | total.lo 5 | total.dn 6 | total.up 7 | gc.lo 8 | gc.dn 9 | gc.up 10 | infernal.lo 11 | infernal.up 12 | infernal.dn 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.3.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | gc.lo 8 | gc.dn 9 | gc.up 10 | infernal.lo 11 | infernal.up 12 | infernal.dn 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.3.6: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | infernal.lo 11 | infernal.up 12 | infernal.dn 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.3.7: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | gc.lo 11 | gc.dn 12 | gc.up 13 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.4.1: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | blastx.dn 3 | blastx.up 4 | total.lo 5 | total.dn 6 | total.up 7 | infernal.lo 8 | infernal.up 9 | infernal.dn 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.4.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | total.lo 5 | total.dn 6 | total.up 7 | infernal.lo 8 | infernal.up 9 | infernal.dn 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.4.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | infernal.lo 8 | infernal.up 9 | infernal.dn 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.4.7: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.5.1: -------------------------------------------------------------------------------- 1 | blastx.lo 2 | blastx.dn 3 | blastx.up 4 | total.lo 5 | total.dn 6 | total.up 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.5.2: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | total.lo 5 | total.dn 6 | total.up 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/baseF.5.4: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/feat/delF.0: -------------------------------------------------------------------------------- 1 | polya.lo 1 2 | polya.up 1 3 | polya.dn 1 4 | blastx.lo 2 5 | blastx.dn 2 6 | blastx.up 2 7 | smallrna.lo 3 8 | smallrna.up 3 9 | smallrna.dn 3 10 | total.lo 4 11 | total.dn 4 12 | total.up 4 13 | nonpolya.lo 5 14 | nonpolya.up 5 15 | nonpolya.dn 5 16 | gc.lo 6 17 | gc.dn 6 18 | gc.up 6 19 | infernal.lo 7 20 | infernal.up 7 21 | infernal.dn 7 22 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/foo1: -------------------------------------------------------------------------------- 1 | 0.924848942598187 2 | smallrna.lo smallrna.up smallrna.dn 3 | 0.928058912386707 4 | nonpolya.lo nonpolya.up nonpolya.dn 5 | 0.928625377643505 6 | gc.lo gc.dn gc.up 7 | 0.925981873111782 8 | infernal.lo infernal.up infernal.dn 9 | 0.919373111782477 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/foo2: -------------------------------------------------------------------------------- 1 | polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up 2 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/foo3: -------------------------------------------------------------------------------- 1 | 0.924848942598187 2 | smallrna.lo smallrna.up smallrna.dn 3 | 0.928058912386707 4 | nonpolya.lo nonpolya.up nonpolya.dn 5 | 0.928625377643505 6 | gc.lo gc.dn gc.up 7 | 0.925981873111782 8 | infernal.lo infernal.up infernal.dn 9 | 0.919373111782477 10 | polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up 11 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/foo4: -------------------------------------------------------------------------------- 1 | 1 0.924848942598187 smallrna.lo smallrna.up smallrna.dn 2 | 2 0.928058912386707 nonpolya.lo nonpolya.up nonpolya.dn 3 | 3 0.928625377643505 gc.lo gc.dn gc.up 4 | 4 0.925981873111782 infernal.lo infernal.up infernal.dn 5 | 5 0.919373111782477 polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up 6 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/foo5: -------------------------------------------------------------------------------- 1 | gc.lo 2 | gc.dn 3 | gc.up 4 | infernal.lo 5 | infernal.up 6 | infernal.dn 7 | polya.lo 8 | polya.up 9 | polya.dn 10 | blastx.lo 11 | blastx.dn 12 | blastx.up 13 | total.lo 14 | total.dn 15 | total.up 16 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/haha.R: -------------------------------------------------------------------------------- 1 | inputF1="/home/hulong/module/feature_selection/input/example.train.mx"; 2 | inputF2="/home/hulong/module/feature_selection/tmp/idx/cv.idx"; 3 | inputF3="5"; 4 | inputF4="/home/hulong/module/feature_selection/tmp/IMP_SBS/delete/feat/baseF.0"; 5 | inputF5="/home/hulong/module/feature_selection/tmp/IMP_SBS/delete/feat/delF.0"; 6 | #Args <-commandArgs(TRUE); 7 | #inputF1= Args[1];#input example.train.mx ; first column is bin second is label, seperated by tab, with header; 8 | #inputF2= Args[2];#input the cv.idx, no header; 9 | #inputF3= Args[3];#input CV fold; 10 | #inputF4= Args[4];#the base feature file; no header;one column; contains only features name 11 | #inputF5= Args[5];#the deleting feature file; no header;tow columns; first is feature names; second is the rank. features with the same rank will be delete together 12 | 13 | 14 | delta=-0.01; 15 | my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 16 | str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N SBS_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 17 | write(str,file=qsubF); 18 | system(paste("rm",outputF)); 19 | system(paste("rm",logF)); 20 | system(paste("qsub -V -o",logF,qsubF)); 21 | } 22 | my_check<-function(file_path,FileList){ 23 | while(1==1){ 24 | filelist_all=list.files(path=file_path,full.names=T); 25 | tmp=is.element(FileList,filelist_all); 26 | if(all(tmp)){break;} 27 | Sys.sleep(30); 28 | } 29 | } 30 | ######## calculate the base feature set performance. 31 | ROLL=0; 32 | featF=inputF4; 33 | qsubF=paste("./qsub/qsub_del",ROLL,sep="."); 34 | logF= paste("./log/log_del",ROLL,sep="."); 35 | perfF=paste("./perf/perf_del",ROLL,sep="."); 36 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 37 | file_path="./perf"; 38 | my_check(file_path,perfF); 39 | Sys.sleep(2); 40 | tmp=as.vector(as.matrix(read.table(perfF,sep="\t",head=F))) 41 | all.acc=tmp[1]; 42 | print(paste("base feature set's performance is: ",all.acc)); 43 | print("start deleting features............................"); 44 | best_performance= all.acc; 45 | baseFeat = as.vector(as.matrix(read.table(inputF4,sep="\t",head=F))); 46 | 47 | 48 | RANK = read.table(inputF5,head=F,sep="\t"); 49 | RANK_groups = RANK[,2]; 50 | RANK_group = as.vector(unique(RANK_groups)); 51 | deleFeat = as.vector(RANK[,1]); 52 | RANK_group_0 = RANK_group; 53 | 54 | for (ROLL in 1:length(RANK_group_0)){ 55 | bad_rank = c(); 56 | bad_feature = c(); 57 | BP = c(); 58 | FileList = c(); 59 | for (j in sort(RANK_group,decreasing=T)){ 60 | DeleFeat = deleFeat[which(RANK_groups==j)]; 61 | remain_features_tmp = setdiff(baseFeat,DeleFeat); 62 | print(paste("deleting this feature now: ",paste(DeleFeat,collapse=" "),sep="")); 63 | featF = paste("./feat/baseF",ROLL,j,sep="."); 64 | qsubF = paste("./qsub/qsub_del",ROLL,j,sep="."); 65 | logF = paste("./log/log_del",ROLL,j,sep="."); 66 | perfF = paste("./perf/perf_del",ROLL,j,sep="."); 67 | write.table(remain_features_tmp,file=featF,quote=F,sep="\t",col.name=F,row.name=F); 68 | my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 69 | FileList = c(FileList,perfF); 70 | } 71 | my_check(file_path,FileList);Sys.sleep(2); 72 | for (j in sort(RANK_group,decreasing=T)){ 73 | perfF = paste("./perf/perf_del",ROLL,j,sep="."); 74 | Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 75 | MYPERFORMANCE = c(Perf[1]); 76 | names(MYPERFORMANCE) = j; 77 | print(paste("performance summary of three trial is: ",MYPERFORMANCE,sep="")); 78 | BP = c(BP,MYPERFORMANCE); 79 | } 80 | BPB = max(BP,na.rm=T); 81 | if(best_performance<(BPB-delta)){ 82 | best_performance = BPB; 83 | bad_rank = sort(RANK_group,decreasing=T)[(which(BP==BPB)[1])]; 84 | bad_feature = deleFeat[which(RANK_groups==bad_rank)]; 85 | } 86 | if (length(bad_feature)==0){ 87 | print("no bad feature can be deleted, break here");break; 88 | }else if(length(baseFeat)<=5){ 89 | print("less than five features");break; 90 | }else{ 91 | print(paste("find the bad feature: ",paste(bad_feature,collapse=" "),sep="")); 92 | print(paste("the best performance is now: ",best_performance,sep="")); 93 | baseFeat = setdiff(baseFeat,bad_feature); 94 | ID = which(!(is.element(RANK[,2],bad_rank))); 95 | RANK = RANK[ID,]; 96 | RANK_groups = RANK[,2]; 97 | RANK_group = as.vector(unique(RANK_groups)); 98 | deleFeat = as.vector(RANK[,1]); 99 | print(paste("now the good features are: ",paste(baseFeat,collapse=" "),sep="")); 100 | } 101 | } 102 | write.table(baseFeat,file=paste("./summary/bestFeat",sep="."),quote=F,col.name=F,row.name=F,sep="\t"); 103 | write.table(best_performance,file=paste("./summary/bestPerf",sep="."),quote=F,col.name=F,row.name=F,sep="\t") 104 | 105 | rm(list=ls()); 106 | 107 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/haha.Rout: -------------------------------------------------------------------------------- 1 | 2 | R version 2.13.1 (2011-07-08) 3 | Copyright (C) 2011 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: x86_64-redhat-linux-gnu (64-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | Natural language support but running in an English locale 12 | 13 | R is a collaborative project with many contributors. 14 | Type 'contributors()' for more information and 15 | 'citation()' on how to cite R or R packages in publications. 16 | 17 | Type 'demo()' for some demos, 'help()' for on-line help, or 18 | 'help.start()' for an HTML browser interface to help. 19 | Type 'q()' to quit R. 20 | 21 | > inputF1="/home/hulong/module/feature_selection/input/example.train.mx"; 22 | > inputF2="/home/hulong/module/feature_selection/tmp/idx/cv.idx"; 23 | > inputF3="5"; 24 | > inputF4="/home/hulong/module/feature_selection/tmp/IMP_SBS/delete/feat/baseF.0"; 25 | > inputF5="/home/hulong/module/feature_selection/tmp/IMP_SBS/delete/feat/delF.0"; 26 | > #Args <-commandArgs(TRUE); 27 | > #inputF1= Args[1];#input example.train.mx ; first column is bin second is label, seperated by tab, with header; 28 | > #inputF2= Args[2];#input the cv.idx, no header; 29 | > #inputF3= Args[3];#input CV fold; 30 | > #inputF4= Args[4];#the base feature file; no header;one column; contains only features name 31 | > #inputF5= Args[5];#the deleting feature file; no header;tow columns; first is feature names; second is the rank. features with the same rank will be delete together 32 | > 33 | > 34 | > delta=-0.01; 35 | > my_qsub<-function(inputF1,inputF2,inputF3,inputF4,outputF,qsubF,logF){ 36 | + str=paste("#$ -S /bin/bash\n#$ -cwd\n#$ -j y\n#$ -q q1.* \n#$ -N SBS_c\nRscript","/home/hulong/module/feature_selection/bin/cv.test.R",inputF1,inputF2,inputF3,inputF4,outputF) 37 | + write(str,file=qsubF); 38 | + system(paste("rm",outputF)); 39 | + system(paste("rm",logF)); 40 | + system(paste("qsub -V -o",logF,qsubF)); 41 | + } 42 | > my_check<-function(file_path,FileList){ 43 | + while(1==1){ 44 | + filelist_all=list.files(path=file_path,full.names=T); 45 | + tmp=is.element(FileList,filelist_all); 46 | + if(all(tmp)){break;} 47 | + Sys.sleep(30); 48 | + } 49 | + } 50 | > ######## calculate the base feature set performance. 51 | > ROLL=0; 52 | > featF=inputF4; 53 | > qsubF=paste("./qsub/qsub_del",ROLL,sep="."); 54 | > logF= paste("./log/log_del",ROLL,sep="."); 55 | > perfF=paste("./perf/perf_del",ROLL,sep="."); 56 | > my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 57 | rm: cannot remove `./perf/perf_del.0': No such file or directory 58 | rm: cannot remove `./log/log_del.0': No such file or directory 59 | Your job 2167013 ("SBS_c") has been submitted 60 | > file_path="./perf"; 61 | > my_check(file_path,perfF); 62 | > Sys.sleep(2); 63 | > tmp=as.vector(as.matrix(read.table(perfF,sep="\t",head=F))) 64 | > all.acc=tmp[1]; 65 | > print(paste("base feature set's performance is: ",all.acc)); 66 | [1] "base feature set's performance is: 0.924848942598187" 67 | > print("start deleting features............................"); 68 | [1] "start deleting features............................" 69 | > best_performance= all.acc; 70 | > baseFeat = as.vector(as.matrix(read.table(inputF4,sep="\t",head=F))); 71 | > 72 | > 73 | > RANK = read.table(inputF5,head=F,sep="\t"); 74 | > RANK_groups = RANK[,2]; 75 | > RANK_group = as.vector(unique(RANK_groups)); 76 | > deleFeat = as.vector(RANK[,1]); 77 | > RANK_group_0 = RANK_group; 78 | > 79 | > for (ROLL in 1:length(RANK_group_0)){ 80 | + bad_rank = c(); 81 | + bad_feature = c(); 82 | + BP = c(); 83 | + FileList = c(); 84 | + for (j in sort(RANK_group,decreasing=T)){ 85 | + DeleFeat = deleFeat[which(RANK_groups==j)]; 86 | + remain_features_tmp = setdiff(baseFeat,DeleFeat); 87 | + print(paste("deleting this feature now: ",paste(DeleFeat,collapse=" "),sep="")); 88 | + featF = paste("./feat/baseF",ROLL,j,sep="."); 89 | + qsubF = paste("./qsub/qsub_del",ROLL,j,sep="."); 90 | + logF = paste("./log/log_del",ROLL,j,sep="."); 91 | + perfF = paste("./perf/perf_del",ROLL,j,sep="."); 92 | + write.table(remain_features_tmp,file=featF,quote=F,sep="\t",col.name=F,row.name=F); 93 | + my_qsub(inputF1,inputF2,inputF3,featF,perfF,qsubF,logF); 94 | + FileList = c(FileList,perfF); 95 | + } 96 | + my_check(file_path,FileList);Sys.sleep(2); 97 | + for (j in sort(RANK_group,decreasing=T)){ 98 | + perfF = paste("./perf/perf_del",ROLL,j,sep="."); 99 | + Perf = as.vector(as.matrix(read.table(perfF,head=F,sep="\t"))); 100 | + MYPERFORMANCE = c(Perf[1]); 101 | + names(MYPERFORMANCE) = j; 102 | + print(paste("performance summary of three trial is: ",MYPERFORMANCE,sep="")); 103 | + BP = c(BP,MYPERFORMANCE); 104 | + } 105 | + BPB = max(BP,na.rm=T); 106 | + if(best_performance<(BPB-delta)){ 107 | + best_performance = BPB; 108 | + bad_rank = sort(RANK_group,decreasing=T)[(which(BP==BPB)[1])]; 109 | + bad_feature = deleFeat[which(RANK_groups==bad_rank)]; 110 | + } 111 | + if (length(bad_feature)==0){ 112 | + print("no bad feature can be deleted, break here");break; 113 | + }else if(length(baseFeat)<=5){ 114 | + print("less than five features");break; 115 | + }else{ 116 | + print(paste("find the bad feature: ",paste(bad_feature,collapse=" "),sep="")); 117 | + print(paste("the best performance is now: ",best_performance,sep="")); 118 | + baseFeat = setdiff(baseFeat,bad_feature); 119 | + ID = which(!(is.element(RANK[,2],bad_rank))); 120 | + RANK = RANK[ID,]; 121 | + RANK_groups = RANK[,2]; 122 | + RANK_group = as.vector(unique(RANK_groups)); 123 | + deleFeat = as.vector(RANK[,1]); 124 | + print(paste("now the good features are: ",paste(baseFeat,collapse=" "),sep="")); 125 | + } 126 | + } 127 | [1] "deleting this feature now: infernal.lo infernal.up infernal.dn" 128 | rm: cannot remove `./perf/perf_del.1.7': No such file or directory 129 | rm: cannot remove `./log/log_del.1.7': No such file or directory 130 | Your job 2167014 ("SBS_c") has been submitted 131 | [1] "deleting this feature now: gc.lo gc.dn gc.up" 132 | rm: cannot remove `./perf/perf_del.1.6': No such file or directory 133 | rm: cannot remove `./log/log_del.1.6': No such file or directory 134 | Your job 2167015 ("SBS_c") has been submitted 135 | [1] "deleting this feature now: nonpolya.lo nonpolya.up nonpolya.dn" 136 | rm: cannot remove `./perf/perf_del.1.5': No such file or directory 137 | rm: cannot remove `./log/log_del.1.5': No such file or directory 138 | Your job 2167016 ("SBS_c") has been submitted 139 | [1] "deleting this feature now: total.lo total.dn total.up" 140 | rm: cannot remove `./perf/perf_del.1.4': No such file or directory 141 | rm: cannot remove `./log/log_del.1.4': No such file or directory 142 | Your job 2167017 ("SBS_c") has been submitted 143 | [1] "deleting this feature now: smallrna.lo smallrna.up smallrna.dn" 144 | rm: cannot remove `./perf/perf_del.1.3': No such file or directory 145 | rm: cannot remove `./log/log_del.1.3': No such file or directory 146 | Your job 2167018 ("SBS_c") has been submitted 147 | [1] "deleting this feature now: blastx.lo blastx.dn blastx.up" 148 | rm: cannot remove `./perf/perf_del.1.2': No such file or directory 149 | rm: cannot remove `./log/log_del.1.2': No such file or directory 150 | Your job 2167019 ("SBS_c") has been submitted 151 | [1] "deleting this feature now: polya.lo polya.up polya.dn" 152 | rm: cannot remove `./perf/perf_del.1.1': No such file or directory 153 | rm: cannot remove `./log/log_del.1.1': No such file or directory 154 | Your job 2167020 ("SBS_c") has been submitted 155 | [1] "performance summary of three trial is: 0.922016616314199" 156 | [1] "performance summary of three trial is: 0.925037764350453" 157 | [1] "performance summary of three trial is: 0.923904833836858" 158 | [1] "performance summary of three trial is: 0.91559667673716" 159 | [1] "performance summary of three trial is: 0.928058912386707" 160 | [1] "performance summary of three trial is: 0.881986404833837" 161 | [1] "performance summary of three trial is: 0.911442598187311" 162 | [1] "find the bad feature: smallrna.lo smallrna.up smallrna.dn" 163 | [1] "the best performance is now: 0.928058912386707" 164 | [1] "now the good features are: polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up nonpolya.lo nonpolya.up nonpolya.dn gc.lo gc.dn gc.up infernal.lo infernal.up infernal.dn" 165 | [1] "deleting this feature now: infernal.lo infernal.up infernal.dn" 166 | rm: cannot remove `./perf/perf_del.2.7': No such file or directory 167 | rm: cannot remove `./log/log_del.2.7': No such file or directory 168 | Your job 2167021 ("SBS_c") has been submitted 169 | [1] "deleting this feature now: gc.lo gc.dn gc.up" 170 | rm: cannot remove `./perf/perf_del.2.6': No such file or directory 171 | rm: cannot remove `./log/log_del.2.6': No such file or directory 172 | Your job 2167022 ("SBS_c") has been submitted 173 | [1] "deleting this feature now: nonpolya.lo nonpolya.up nonpolya.dn" 174 | rm: cannot remove `./perf/perf_del.2.5': No such file or directory 175 | rm: cannot remove `./log/log_del.2.5': No such file or directory 176 | Your job 2167023 ("SBS_c") has been submitted 177 | [1] "deleting this feature now: total.lo total.dn total.up" 178 | rm: cannot remove `./perf/perf_del.2.4': No such file or directory 179 | rm: cannot remove `./log/log_del.2.4': No such file or directory 180 | Your job 2167024 ("SBS_c") has been submitted 181 | [1] "deleting this feature now: blastx.lo blastx.dn blastx.up" 182 | rm: cannot remove `./perf/perf_del.2.2': No such file or directory 183 | rm: cannot remove `./log/log_del.2.2': No such file or directory 184 | Your job 2167025 ("SBS_c") has been submitted 185 | [1] "deleting this feature now: polya.lo polya.up polya.dn" 186 | rm: cannot remove `./perf/perf_del.2.1': No such file or directory 187 | rm: cannot remove `./log/log_del.2.1': No such file or directory 188 | Your job 2167026 ("SBS_c") has been submitted 189 | [1] "performance summary of three trial is: 0.920694864048338" 190 | [1] "performance summary of three trial is: 0.927492447129909" 191 | [1] "performance summary of three trial is: 0.928625377643505" 192 | [1] "performance summary of three trial is: 0.901812688821752" 193 | [1] "performance summary of three trial is: 0.879531722054381" 194 | [1] "performance summary of three trial is: 0.908043806646526" 195 | [1] "find the bad feature: nonpolya.lo nonpolya.up nonpolya.dn" 196 | [1] "the best performance is now: 0.928625377643505" 197 | [1] "now the good features are: polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up gc.lo gc.dn gc.up infernal.lo infernal.up infernal.dn" 198 | [1] "deleting this feature now: infernal.lo infernal.up infernal.dn" 199 | rm: cannot remove `./perf/perf_del.3.7': No such file or directory 200 | rm: cannot remove `./log/log_del.3.7': No such file or directory 201 | Your job 2167027 ("SBS_c") has been submitted 202 | [1] "deleting this feature now: gc.lo gc.dn gc.up" 203 | rm: cannot remove `./perf/perf_del.3.6': No such file or directory 204 | rm: cannot remove `./log/log_del.3.6': No such file or directory 205 | Your job 2167028 ("SBS_c") has been submitted 206 | [1] "deleting this feature now: total.lo total.dn total.up" 207 | rm: cannot remove `./perf/perf_del.3.4': No such file or directory 208 | rm: cannot remove `./log/log_del.3.4': No such file or directory 209 | Your job 2167029 ("SBS_c") has been submitted 210 | [1] "deleting this feature now: blastx.lo blastx.dn blastx.up" 211 | rm: cannot remove `./perf/perf_del.3.2': No such file or directory 212 | rm: cannot remove `./log/log_del.3.2': No such file or directory 213 | Your job 2167030 ("SBS_c") has been submitted 214 | [1] "deleting this feature now: polya.lo polya.up polya.dn" 215 | rm: cannot remove `./perf/perf_del.3.1': No such file or directory 216 | rm: cannot remove `./log/log_del.3.1': No such file or directory 217 | Your job 2167031 ("SBS_c") has been submitted 218 | [1] "performance summary of three trial is: 0.917673716012085" 219 | [1] "performance summary of three trial is: 0.925981873111782" 220 | [1] "performance summary of three trial is: 0.893882175226586" 221 | [1] "performance summary of three trial is: 0.869335347432024" 222 | [1] "performance summary of three trial is: 0.873300604229607" 223 | [1] "find the bad feature: gc.lo gc.dn gc.up" 224 | [1] "the best performance is now: 0.925981873111782" 225 | [1] "now the good features are: polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up infernal.lo infernal.up infernal.dn" 226 | [1] "deleting this feature now: infernal.lo infernal.up infernal.dn" 227 | rm: cannot remove `./perf/perf_del.4.7': No such file or directory 228 | rm: cannot remove `./log/log_del.4.7': No such file or directory 229 | Your job 2167032 ("SBS_c") has been submitted 230 | [1] "deleting this feature now: total.lo total.dn total.up" 231 | rm: cannot remove `./perf/perf_del.4.4': No such file or directory 232 | rm: cannot remove `./log/log_del.4.4': No such file or directory 233 | Your job 2167033 ("SBS_c") has been submitted 234 | [1] "deleting this feature now: blastx.lo blastx.dn blastx.up" 235 | rm: cannot remove `./perf/perf_del.4.2': No such file or directory 236 | rm: cannot remove `./log/log_del.4.2': No such file or directory 237 | Your job 2167034 ("SBS_c") has been submitted 238 | [1] "deleting this feature now: polya.lo polya.up polya.dn" 239 | rm: cannot remove `./perf/perf_del.4.1': No such file or directory 240 | rm: cannot remove `./log/log_del.4.1': No such file or directory 241 | Your job 2167035 ("SBS_c") has been submitted 242 | [1] "performance summary of three trial is: 0.919373111782477" 243 | [1] "performance summary of three trial is: 0.889728096676737" 244 | [1] "performance summary of three trial is: 0.786631419939577" 245 | [1] "performance summary of three trial is: 0.867069486404834" 246 | [1] "find the bad feature: infernal.lo infernal.up infernal.dn" 247 | [1] "the best performance is now: 0.919373111782477" 248 | [1] "now the good features are: polya.lo polya.up polya.dn blastx.lo blastx.dn blastx.up total.lo total.dn total.up" 249 | [1] "deleting this feature now: total.lo total.dn total.up" 250 | rm: cannot remove `./perf/perf_del.5.4': No such file or directory 251 | rm: cannot remove `./log/log_del.5.4': No such file or directory 252 | Your job 2167036 ("SBS_c") has been submitted 253 | [1] "deleting this feature now: blastx.lo blastx.dn blastx.up" 254 | rm: cannot remove `./perf/perf_del.5.2': No such file or directory 255 | rm: cannot remove `./log/log_del.5.2': No such file or directory 256 | Your job 2167037 ("SBS_c") has been submitted 257 | [1] "deleting this feature now: polya.lo polya.up polya.dn" 258 | rm: cannot remove `./perf/perf_del.5.1': No such file or directory 259 | rm: cannot remove `./log/log_del.5.1': No such file or directory 260 | Your job 2167038 ("SBS_c") has been submitted 261 | [1] "performance summary of three trial is: 0.832892749244713" 262 | [1] "performance summary of three trial is: 0.773413897280967" 263 | [1] "performance summary of three trial is: 0.849509063444109" 264 | [1] "no bad feature can be deleted, break here" 265 | > write.table(baseFeat,file=paste("./summary/bestFeat",sep="."),quote=F,col.name=F,row.name=F,sep="\t"); 266 | > write.table(best_performance,file=paste("./summary/bestPerf",sep="."),quote=F,col.name=F,row.name=F,sep="\t") 267 | > 268 | > rm(list=ls()); 269 | > 270 | > 271 | > proc.time() 272 | user system elapsed 273 | 0.879 0.623 403.903 274 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.0: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.3: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.5: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.6: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.1.7: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.5: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.6: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.2.7: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.3.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.3.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.3.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.3.6: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.3.7: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.4.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.4.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.4.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.4.7: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.5.1: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.5.2: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/log/log_del.5.4: -------------------------------------------------------------------------------- 1 | randomForest 4.6-2 2 | Type rfNews() to see new features/changes/bug fixes. 3 | Warning message: 4 | package 'randomForest' was built under R version 2.13.1 5 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.0: -------------------------------------------------------------------------------- 1 | 0.924848942598187 2 | 21 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.1: -------------------------------------------------------------------------------- 1 | 0.911442598187311 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.2: -------------------------------------------------------------------------------- 1 | 0.881986404833837 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.3: -------------------------------------------------------------------------------- 1 | 0.928058912386707 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.4: -------------------------------------------------------------------------------- 1 | 0.91559667673716 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.5: -------------------------------------------------------------------------------- 1 | 0.923904833836858 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.6: -------------------------------------------------------------------------------- 1 | 0.925037764350453 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.1.7: -------------------------------------------------------------------------------- 1 | 0.922016616314199 2 | 18 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.1: -------------------------------------------------------------------------------- 1 | 0.908043806646526 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.2: -------------------------------------------------------------------------------- 1 | 0.879531722054381 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.4: -------------------------------------------------------------------------------- 1 | 0.901812688821752 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.5: -------------------------------------------------------------------------------- 1 | 0.928625377643505 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.6: -------------------------------------------------------------------------------- 1 | 0.927492447129909 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.2.7: -------------------------------------------------------------------------------- 1 | 0.920694864048338 2 | 15 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.3.1: -------------------------------------------------------------------------------- 1 | 0.873300604229607 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.3.2: -------------------------------------------------------------------------------- 1 | 0.869335347432024 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.3.4: -------------------------------------------------------------------------------- 1 | 0.893882175226586 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.3.6: -------------------------------------------------------------------------------- 1 | 0.925981873111782 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.3.7: -------------------------------------------------------------------------------- 1 | 0.917673716012085 2 | 12 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.4.1: -------------------------------------------------------------------------------- 1 | 0.867069486404834 2 | 9 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.4.2: -------------------------------------------------------------------------------- 1 | 0.786631419939577 2 | 9 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.4.4: -------------------------------------------------------------------------------- 1 | 0.889728096676737 2 | 9 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.4.7: -------------------------------------------------------------------------------- 1 | 0.919373111782477 2 | 9 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.5.1: -------------------------------------------------------------------------------- 1 | 0.849509063444109 2 | 6 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.5.2: -------------------------------------------------------------------------------- 1 | 0.773413897280967 2 | 6 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/perf/perf_del.5.4: -------------------------------------------------------------------------------- 1 | 0.832892749244713 2 | 6 3 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.0: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 /home/hulong/module/feature_selection/tmp/IMP_SBS/delete/feat/baseF.0 ./perf/perf_del.0 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.1 ./perf/perf_del.1.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.2 ./perf/perf_del.1.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.3: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.3 ./perf/perf_del.1.3 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.4 ./perf/perf_del.1.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.5: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.5 ./perf/perf_del.1.5 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.6: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.6 ./perf/perf_del.1.6 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.1.7: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.1.7 ./perf/perf_del.1.7 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.1 ./perf/perf_del.2.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.2 ./perf/perf_del.2.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.4 ./perf/perf_del.2.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.5: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.5 ./perf/perf_del.2.5 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.6: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.6 ./perf/perf_del.2.6 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.2.7: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.2.7 ./perf/perf_del.2.7 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.3.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.3.1 ./perf/perf_del.3.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.3.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.3.2 ./perf/perf_del.3.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.3.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.3.4 ./perf/perf_del.3.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.3.6: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.3.6 ./perf/perf_del.3.6 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.3.7: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.3.7 ./perf/perf_del.3.7 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.4.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.4.1 ./perf/perf_del.4.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.4.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.4.2 ./perf/perf_del.4.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.4.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.4.4 ./perf/perf_del.4.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.4.7: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.4.7 ./perf/perf_del.4.7 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.5.1: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.5.1 ./perf/perf_del.5.1 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.5.2: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.5.2 ./perf/perf_del.5.2 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/qsub/qsub_del.5.4: -------------------------------------------------------------------------------- 1 | #$ -S /bin/bash 2 | #$ -cwd 3 | #$ -j y 4 | #$ -q q1.* 5 | #$ -N SBS_c 6 | Rscript /home/hulong/module/feature_selection/bin/cv.test.R /home/hulong/module/feature_selection/input/example.train.mx /home/hulong/module/feature_selection/tmp/idx/cv.idx 5 ./feat/baseF.5.4 ./perf/perf_del.5.4 7 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/summary/bestFeat: -------------------------------------------------------------------------------- 1 | polya.lo 2 | polya.up 3 | polya.dn 4 | blastx.lo 5 | blastx.dn 6 | blastx.up 7 | total.lo 8 | total.dn 9 | total.up 10 | -------------------------------------------------------------------------------- /feature_selection/tmp/IMP_SBS/delete/summary/bestPerf: -------------------------------------------------------------------------------- 1 | 0.919373111782477 2 | -------------------------------------------------------------------------------- /plots/1.boxplots.R: -------------------------------------------------------------------------------- 1 | ####1.Box plots 2 | library(ggplot2) 3 | # Read the input files 4 | # “header=T” means that the data has a title, and sep="\t" is used as the separator 5 | data <-read.table("box_plots_mtcars.txt",header=T,sep="\t") 6 | # The function c(,,) means create the vector type data 7 | df <- data[, c("mpg", "cyl", "wt")] 8 | df$cyl <- as.factor(df$cyl) 9 | df7 <- read.table("box_plots_David_GO.txt",header=T,sep="\t") 10 | df7 <- df7[1:10,] 11 | df7$Term <- sapply(strsplit(as.vector(df7$Term),'~'),'[',2) 12 | #1.1 Basic box plot 13 | pdf("1.1.Basic_boxplot.pdf", height = 3, width = 3) 14 | ggplot(df, aes(x=cyl, y=mpg)) + 15 | geom_boxplot(fill="gray")+ 16 | labs(title="Plot of mpg per cyl",x="Cyl", y = "Mpg")+ 17 | theme_classic() 18 | dev.off() 19 | #1.2 Change continuous color by groups 20 | pdf("1.2.Customized_boxplot.pdf", height = 3, width = 3) 21 | ggplot(df, aes(x=cyl, y=mpg, fill=cyl)) + 22 | geom_boxplot()+ 23 | labs(title="Plot of mpg per cyl",x="Cyl", y = "Mpg") + 24 | scale_fill_brewer(palette="Blues") + 25 | theme_bw() 26 | dev.off() 27 | #1.3 Box plot for GO results 28 | pdf("1.3.Customized_boxplot2.pdf", height = 5, width = 10) 29 | ggplot(df7) + geom_bar(stat="identity", width=0.6, aes(Term,Fold.Enrichment, fill=-1*log10(PValue)),colour="#1d2a33") + 30 | coord_flip() + 31 | scale_fill_gradient(low="#e8f3f7",high="#236eba")+ 32 | labs(fill=expression(-log10_Pvalue), x="GO Terms",y="foldEnrichment", title="GO Biological Process") + 33 | theme_bw() + 34 | theme(plot.title = element_text(hjust = 0.5)) + 35 | theme(axis.title.x =element_text(size=16), 36 | axis.title.y=element_text(size=14)) + 37 | theme(axis.text.y = element_text(size = 10,face="bold"), 38 | axis.text.x = element_text(size = 12,face="bold")) 39 | dev.off() 40 | 41 | pdf("1.4.Customized_boxplot3.pdf", height = 5, width = 10) 42 | ggplot(df7) + geom_bar(stat="identity", width=0.6, aes(Term,Fold.Enrichment, fill=-1*log10(PValue)),colour="#1d2a33") + 43 | coord_flip() + 44 | scale_fill_gradient(low="#feff2b",high="#fe0100")+ 45 | labs(fill=expression(-log10_Pvalue), x="GO Terms",y="foldEnrichment", title="GO Biological Process") + 46 | theme_bw() + 47 | theme(plot.title = element_text(hjust = 0.5)) + 48 | theme(axis.title.x =element_text(size=16), 49 | axis.title.y=element_text(size=14)) + 50 | theme(axis.text.y = element_text(size = 10,face="bold"), 51 | axis.text.x = element_text(size = 12,face="bold")) 52 | dev.off() 53 | -------------------------------------------------------------------------------- /plots/10.ballonplots.R: -------------------------------------------------------------------------------- 1 | ####10.Ballon plots 2 | df6 <- read.table("ballon_plots_GO.txt", header=T, sep="\t") 3 | library(ggplot2) 4 | #10.1.basic ballon plots 5 | pdf("10.1.Basic_ballonplot.pdf", height=6, width=8) 6 | ggplot(df6, aes(x=Fold.enrichment, y=Biological.process)) + 7 | geom_point(aes(size = X.log10.Pvalue.)) + 8 | scale_x_continuous(limits=c(0,7),breaks=0:7) + 9 | scale_size(breaks=c(1,5,10,15,20,25)) + 10 | theme_light() + 11 | theme(panel.border=element_rect(fill='transparent', color='black', size=1), 12 | plot.title = element_text(color="black", size=14, hjust=0.5, face="bold", lineheight=1), 13 | axis.title.x = element_text(color="black", size=12, face="bold"), 14 | axis.title.y = element_text(color="black", size=12, vjust=1.5, face="bold"), 15 | axis.text.x = element_text(size=12,color="black",face="bold"), 16 | axis.text.y = element_text(size=12,color="black",face="bold"), 17 | legend.text = element_text(color="black", size=10, hjust=-2), 18 | legend.position="bottom") + 19 | labs(x="Fold Enrichment",y="Biological Process",size="-log10(Pvalue)", title="GO Enrichment",face="bold") 20 | dev.off() 21 | #10.2.change the dot colors 22 | pdf("10.2.Customized_ballonplot.pdf") 23 | ggplot(df6, aes(x=col, y=Biological.process,color=X.log10.Pvalue.)) + 24 | geom_point(aes(size = Fold.enrichment)) + 25 | scale_x_discrete(limits=c("1")) + 26 | scale_size(breaks=c(1,2,4,6)) + 27 | scale_color_gradient(low="#fcbba1", high="#a50f15") + 28 | theme_classic() + 29 | theme(panel.border=element_rect(fill='transparent', color='black', size=1), 30 | plot.title = element_text(color="black", size=14, hjust=0.5, face="bold", lineheight=1), 31 | axis.title.x = element_blank(), 32 | axis.title.y = element_text(color="black", size=12, face="bold"), 33 | axis.text.x = element_blank(), 34 | axis.ticks = element_blank(), 35 | axis.text.y = element_text(size=12,color="black",face="bold"), 36 | legend.text = element_text(color="black", size=10)) + 37 | labs(y="Biological Process",size="Fold Enrichment", color="-Log10(Pvalue)",title="GO Enrichment",face="bold") 38 | dev.off() 39 | -------------------------------------------------------------------------------- /plots/11.vennpieplots.R: -------------------------------------------------------------------------------- 1 | ####11.Vennpie plots 2 | library(plotrix) 3 | #The vennpie plot is the combination of a venn diagram and a pie chart. 4 | #11.1.data input (number of reads mapped to each category) 5 | total=100 6 | rRNA=5 7 | mtRNA=7 8 | intergenic=48 9 | introns=12 10 | exons=30 11 | upstream=3 12 | downstream=6 13 | not_near_genes=40 14 | 15 | rest=total-rRNA-mtRNA 16 | genic=rest-intergenic 17 | introns_and_exons=introns+exons-genic 18 | #11.2 draw the plot 19 | install.packages("plotrix") 20 | library(plotrix) 21 | pdf("11.Customized_vennpieplot.pdf", height=8, width=6) 22 | # parameter for pie chart 23 | iniR=0.2 # initial radius 24 | colors=list(NO='white',total='black',mtRNA='#e5f5e0',rRNA='#a1d99b', 25 | genic='#3182bd',intergenic='#fec44f',introns='#fc9272', 26 | exons='#9ecae1',upstream='#ffeda0',downstream='#fee0d2', 27 | not_near_genes='#d95f0e') 28 | 29 | # from outer circle to inner circle 30 | #0 circle: blank 31 | pie(1, radius=iniR, init.angle=90, col=c('white'), border = NA, labels='') 32 | #4 circle: show genic:exons and intergenic:downstream 33 | floating.pie(0,0, 34 | c(exons, genic-exons+not_near_genes, downstream, mtRNA+rRNA+intergenic-not_near_genes-downstream), 35 | radius=5*iniR, 36 | startpos=pi/2, 37 | col=as.character(colors[c('exons','NO','downstream','NO')]), 38 | border=NA) 39 | #3 circle: show genic:introns and intergenic:not_near_genes | upstream 40 | floating.pie(0,0, 41 | c(genic-introns, introns, not_near_genes, intergenic-upstream-not_near_genes, upstream, mtRNA+rRNA), 42 | radius=4*iniR, 43 | startpos=pi/2, 44 | col=as.character(colors[c('NO','introns','not_near_genes','NO','upstream','NO')]), 45 | border=NA) 46 | #2 circle: divide the rest into genic and intergenic 47 | floating.pie(0,0, 48 | c(genic, intergenic, mtRNA+rRNA), 49 | radius=3*iniR, 50 | startpos=pi/2, 51 | col=as.character(colors[c('genic','intergenic','NO')]), 52 | border=NA) 53 | #1 circle: for rRNA+mtRNA+rest 54 | floating.pie(0,0, 55 | c(rest, rRNA,mtRNA), 56 | radius=2*iniR, 57 | startpos=pi/2, 58 | col=as.character(colors[c('NO','rRNA','mtRNA')]), 59 | border = NA) 60 | legend(0, 6*iniR, gsub("_"," ",names(colors)[-1]), 61 | col=as.character(colors[-1]), 62 | pch=19, bty='n', ncol=2) 63 | dev.off() 64 | ## or, in one column with reads count and % 65 | #names=gsub("_"," ",names(colors)[-1]) 66 | #values = sapply(names(colors)[-1], get) 67 | #percent=format(100*values/total, digits=2, trim=T) 68 | #values = format(values, big.mark=",", scientific=FALSE, trim=T) 69 | #cl=as.character(colors[-1]) 70 | #pchs=rep(19, length(cl)); pchs[1]=1; 71 | #legend(0, 5*iniR, paste(names," (",values,", ", percent,"%)", sep=""), 72 | # col=cl, pch=pchs,bty='n', ncol=1, cex=0.6) 73 | 74 | -------------------------------------------------------------------------------- /plots/2.violinplots.R: -------------------------------------------------------------------------------- 1 | ####2.Violin plots 2 | data <-read.table("box_plots_mtcars.txt",header=T,sep="\t") 3 | df <- data[, c("mpg", "cyl", "wt")] 4 | df$cyl <- as.factor(df$cyl) 5 | library(ggplot2) 6 | #2.1 Basic violin plot 7 | pdf("2.1.Basic_violinplot.pdf", height = 3, width = 3) 8 | ggplot(df, aes(x=cyl, y=mpg)) + 9 | geom_violin(trim=FALSE) + 10 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") 11 | dev.off() 12 | #2.2 Add summary statistics on a violin plot 13 | #2.2.1 Add median and quartile 14 | pdf("2.2.1.Add_median&quartile1_violinplot.pdf", height = 3, width = 3) 15 | ggplot(df, aes(x=cyl, y=mpg)) + 16 | geom_violin(trim=FALSE) + 17 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") + 18 | stat_summary(fun.y=mean, geom="point", shape=23, size=2, color="red") 19 | dev.off() 20 | #or 21 | pdf("2.2.1.Add_median&quartile2_violinplot.pdf", height = 3, width = 3) 22 | ggplot(df, aes(x=cyl, y=mpg)) + 23 | geom_violin(trim=FALSE) + 24 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") + 25 | geom_boxplot(width=0.1) 26 | dev.off() 27 | #2.2.2 Add mean and standard deviation 28 | pdf("2.2.2.Add_mean&sd_violinplot1.pdf", height = 3, width = 3) 29 | ggplot(df, aes(x=cyl, y=mpg)) + 30 | geom_violin(trim=FALSE) + 31 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") + 32 | stat_summary(fun.data="mean_sdl", fun.args = list(mult = 1), geom="crossbar", width=0.1 ) 33 | dev.off() 34 | #or 35 | pdf("2.2.2.Add_mean&sd_violinplot2.pdf", height = 3, width = 3) 36 | ggplot(df, aes(x=cyl, y=mpg)) + 37 | geom_violin(trim=FALSE) + 38 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") + 39 | stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1), geom="pointrange", color="red") 40 | dev.off() 41 | #2.3 Change violin plot fill colors 42 | pdf("2.3.Customized_violinplot.pdf", height = 3, width = 3) 43 | ggplot(df, aes(x=cyl, y=mpg, fill=cyl)) + 44 | geom_violin(trim=FALSE) + 45 | geom_boxplot(width=0.1, fill="white") + 46 | labs(title="Plot of mpg per cyl", x="Cyl", y = "Mpg") + 47 | scale_fill_brewer(palette="Blues") + 48 | theme_classic() 49 | dev.off() -------------------------------------------------------------------------------- /plots/3.histogramplots.R: -------------------------------------------------------------------------------- 1 | ####3.Histogram plots 2 | df2 <-read.table("histogram_plots.txt",header=T,sep="\t") 3 | library(ggplot2) 4 | library(plyr) 5 | #3.1 Basic histogram plot 6 | pdf("3.1.Basic_histogramplot.pdf", height = 3, width = 3) 7 | ggplot(df2, aes(x=weight)) + geom_histogram(binwidth=1) 8 | dev.off() 9 | #3.2 Add mean line on a histogram plot 10 | pdf("3.2.Add_meanline_histogramplot.pdf", height = 3, width = 3) 11 | ggplot(df2, aes(x=weight)) + 12 | geom_histogram(binwidth=1, color="black", fill="white") + 13 | geom_vline(aes(xintercept=mean(weight)),color="black", linetype="dashed", size=0.5) 14 | dev.off() 15 | #3.3 Change histogram plot fill colors 16 | #Use the plyr package plyr to calculate the average weight of each group : 17 | mu <- ddply(df2, "sex", summarise, grp.mean=mean(weight)) 18 | head(mu) 19 | #draw the plot 20 | pdf("3.3.Customized_histogramplot.pdf", height = 3, width = 3) 21 | ggplot(df2, aes(x=weight, color=sex)) + 22 | geom_histogram(binwidth=1, fill="white", position="dodge")+ 23 | geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed") + 24 | scale_color_brewer(palette="Paired") + 25 | theme_classic()+ 26 | theme(legend.position="top") 27 | dev.off() 28 | -------------------------------------------------------------------------------- /plots/4.densityplots.R: -------------------------------------------------------------------------------- 1 | ####4.Density plots 2 | df2 <-read.table("histogram_plots.txt",header=T,sep="\t") 3 | library(ggplot2) 4 | library(plyr) 5 | #4.1 Basic density 6 | pdf("4.1.Basic_densityplot.pdf", height = 3, width = 3) 7 | ggplot(df2, aes(x=weight)) + 8 | geom_density() 9 | dev.off() 10 | #4.2 Add mean line on a density plot 11 | pdf("4.2.Add_meanline_densityplot.pdf", height = 3, width = 3) 12 | ggplot(df2, aes(x=weight)) + 13 | geom_density() + 14 | geom_vline(aes(xintercept=mean(weight)), color="black", linetype="dashed", size=0.5) 15 | dev.off() 16 | #4.3 Change density plot fill colors 17 | #Use the plyr package plyr to calculate the average weight of each group : 18 | mu <- ddply(df2, "sex", summarise, grp.mean=mean(weight)) 19 | head(mu) 20 | #draw the plot 21 | #4.3.1 Change fill colors 22 | pdf("4.3.1.Customized_histogramplot1.pdf", height = 3, width = 3) 23 | ggplot(df2, aes(x=weight, fill=sex)) + 24 | geom_density(alpha=0.7)+ 25 | geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+ 26 | labs(title="Weight density curve",x="Weight(kg)", y = "Density") + 27 | scale_color_brewer(palette="Paired") + 28 | scale_fill_brewer(palette="Blues") + 29 | theme_classic() 30 | dev.off() 31 | #4.3.2 Change line colors 32 | pdf("4.3.2.Customized_histogramplot2.pdf", height = 3, width = 3) 33 | ggplot(df2, aes(x=weight, color=sex)) + 34 | geom_density()+ 35 | geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+ 36 | labs(title="Weight density curve",x="Weight(kg)", y = "Density") + 37 | scale_color_brewer(palette="Paired") + 38 | theme_classic() 39 | dev.off() 40 | #4.3.3 Combine histogram and density plots 41 | pdf("4.3.3.Customized_histogramplot3.pdf", height = 3, width = 3) 42 | ggplot(df2, aes(x=weight, color=sex, fill=sex)) + 43 | geom_histogram(binwidth=1, aes(y=..density..), alpha=0.5, position="identity") + 44 | geom_density(alpha=.2) + 45 | labs(title="Weight density curve",x="Weight(kg)", y = "Density") + 46 | scale_color_brewer(palette="Paired") + 47 | scale_fill_brewer(palette="Blues") + 48 | theme_classic() 49 | dev.off() 50 | -------------------------------------------------------------------------------- /plots/5.dotplots.R: -------------------------------------------------------------------------------- 1 | ####5.Dot plots 2 | data <-read.table("box_plots_mtcars.txt",header=T,sep="\t") 3 | df <- data[, c("mpg", "cyl", "wt")] 4 | df$cyl <- as.factor(df$cyl) 5 | library(ggplot2) 6 | #5.1 Basic dot plots 7 | pdf("5.1.Basic_dotplot.pdf", height = 3, width = 3) 8 | ggplot(df, aes(x=cyl, y=mpg)) + 9 | geom_dotplot(binaxis='y', stackdir='center', binwidth=1) 10 | dev.off() 11 | #5.2 Add mean and standard deviation 12 | pdf("5.2.Add_mean&sd1_dotplot.pdf", height = 3, width = 3) 13 | ggplot(df, aes(x=cyl, y=mpg)) + 14 | geom_dotplot(binaxis='y', stackdir='center', binwidth=1) + 15 | stat_summary(fun.data="mean_sdl", fun.args = list(mult=1), geom="crossbar", width=0.5) 16 | dev.off() 17 | #or 18 | pdf("5.2.Add_mean&sd2_dotplot.pdf", height = 3, width = 3) 19 | ggplot(df, aes(x=cyl, y=mpg)) + 20 | geom_dotplot(binaxis='y', stackdir='center', binwidth=1) + 21 | stat_summary(fun.data="mean_sdl", fun.args = list(mult=1), geom="pointrange", color="red") 22 | dev.off() 23 | #5.3 Change dot colors 24 | pdf("5.3.Customized_dotplot.pdf", height = 3, width = 3) 25 | ggplot(df, aes(x=cyl, y=mpg, fill=cyl, shape=cyl)) + 26 | geom_dotplot(binaxis='y', stackdir='center', binwidth=1, dotsize=0.8) + 27 | labs(title="Plot of mpg per cyl",x="Cyl", y = "Mpg") + 28 | #stat_summary(fun.data="mean_sdl", fun.args = list(mult=1), geom="crossbar", width=0.5) + 29 | scale_fill_brewer(palette="Blues") + 30 | #scale_color_brewer(palette="Blues") + 31 | theme_classic() 32 | dev.off() 33 | #5.4 Change dot colors, shapes and align types 34 | pdf("5.4.Customized_dotplot.pdf", height = 3, width = 3) 35 | ggplot(df, aes(x=cyl, y=mpg, color=cyl, shape=cyl)) + 36 | geom_jitter(position=position_jitter(0.1), cex=2)+ 37 | labs(title="Plot of mpg per cyl",x="Cyl", y = "Mpg") + 38 | scale_color_brewer(palette="Blues") + 39 | theme_classic() 40 | dev.off() 41 | -------------------------------------------------------------------------------- /plots/6.scatterplots.R: -------------------------------------------------------------------------------- 1 | ####6.Scatter plots 2 | data <-read.table("box_plots_mtcars.txt",header=T,sep="\t") 3 | df <- data[, c("mpg", "cyl", "wt")] 4 | df$cyl <- as.factor(df$cyl) 5 | library(ggplot2) 6 | #6.1 Basic scatter plots 7 | pdf("6.1.Basic_scatterplot.pdf", height = 3, width = 3) 8 | ggplot(df, aes(x=wt, y=mpg)) + 9 | geom_point(size=1.5) 10 | dev.off() 11 | #6.2 Add regression lines and change the point colors, shapes and sizes 12 | pdf("6.2.Customized_scatterplot.pdf", height = 3, width = 3) 13 | ggplot(df, aes(x=wt, y=mpg, color=cyl, shape=cyl)) + 14 | geom_point(size=1.5) + 15 | geom_smooth(method=lm, se=FALSE, fullrange=TRUE) + 16 | theme_classic() 17 | dev.off() 18 | -------------------------------------------------------------------------------- /plots/7.volcanoplots.R: -------------------------------------------------------------------------------- 1 | ####7.Volcano plots 2 | df3 <- read.table("volcano_plots.txt", header=TRUE) 3 | library(ggplot2) 4 | 5 | df3$threshold <- as.factor(ifelse(df3$padj < 0.05 & abs(df3$log2FoldChange) >=1,ifelse(df3$log2FoldChange > 1 ,'Up','Down'),'Not')) 6 | pdf("7.Customized_volcanoplot.pdf", height = 5, width = 5) 7 | ggplot(data=df3, aes(x=log2FoldChange, y =-log10(padj), color=threshold,fill=threshold)) + 8 | scale_color_manual(values=c("blue", "grey","red"))+ 9 | geom_point(size=1) + 10 | xlim(c(-3, 3)) + 11 | theme_bw(base_size = 12, base_family = "Times") + 12 | geom_vline(xintercept=c(-1,1),lty=4,col="grey",lwd=0.6)+ 13 | geom_hline(yintercept = -log10(0.05),lty=4,col="grey",lwd=0.6)+ 14 | theme(legend.position="right", 15 | panel.grid=element_blank(), 16 | legend.title = element_blank(), 17 | legend.text= element_text(face="bold", color="black",family = "Times", size=8), 18 | plot.title = element_text(hjust = 0.5), 19 | axis.text.x = element_text(face="bold", color="black", size=12), 20 | axis.text.y = element_text(face="bold", color="black", size=12), 21 | axis.title.x = element_text(face="bold", color="black", size=12), 22 | axis.title.y = element_text(face="bold",color="black", size=12))+ 23 | labs(x="log2FoldChange",y="-log10 (adjusted p-value)",title="Volcano plot of DEG", face="bold") 24 | dev.off() -------------------------------------------------------------------------------- /plots/8.manhattanplots.R: -------------------------------------------------------------------------------- 1 | ####8.Manhattan plots 2 | df4 <- read.table("manhattan_plots_gwasResults.txt",header=T,sep="\t") 3 | library(qqman) 4 | 5 | pdf("8.Customized_manhattanplot.pdf", height = 5, width = 10) 6 | manhattan(df4, main = "GWAS results", ylim = c(0, 8), 7 | cex = 0.5, cex.axis=0.8, col=c("dodgerblue4","deepskyblue"), 8 | #suggestiveline = F, genomewideline = F, #remove the suggestive and genome-wide significance lines 9 | chrlabs = as.character(c(1:22))) 10 | dev.off() -------------------------------------------------------------------------------- /plots/9.heatmaps.R: -------------------------------------------------------------------------------- 1 | ####9.Heatmaps 2 | df5 <-read.table("heatmaps.txt",header=T,sep="\t") 3 | # Covert data into matrix format 4 | # nrow(df5) and ncol(df5) return the number of rows and columns of matrix df5 respectively. 5 | dm <- data.matrix(df5[1:nrow(df5),2:ncol(df5)]) 6 | # Get the row names 7 | row.names(dm) <- df5[,1] 8 | 9 | library(gplots) 10 | library(pheatmap) 11 | library(scales) 12 | library(reshape2) 13 | library(RColorBrewer) 14 | 15 | #9.1.Draw the heatmap with the gplots package, heatmap.2 function 16 | pdf("9.1.Customized_heatmap.pdf") 17 | #to draw high expression value in red, we use colorRampPalette instead of redblue in heatmap.2 18 | #colorRampPalette is a function in the RColorBrewer package 19 | cr <- colorRampPalette(c("blue","white","red")) 20 | heatmap.2(dm, 21 | scale="row", #scale the rows, scale each gene's expression value 22 | key=T, keysize=1.1, 23 | cexCol=0.9,cexRow=0.8, 24 | col=cr(1000), 25 | ColSideColors=c(rep(c("blue","red"),5)), 26 | density.info="none",trace="none", 27 | #dendrogram='none', #if you want to remove dendrogram 28 | Colv = T,Rowv = T #clusters by both row and col 29 | ) 30 | dev.off() 31 | #9.2.Draw the heatmap with the pheatmap pacakge, pheatmap function 32 | #add column and row annotations 33 | annotation_col = data.frame(CellType = factor(rep(c("Control", "Tumor"), 5)), Time = 1:5) 34 | rownames(annotation_col) = colnames(dm) 35 | annotation_row = data.frame(GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6)))) 36 | rownames(annotation_row) = paste("Gene", 1:20, sep = "") 37 | #set colors of each group 38 | ann_colors = list(Time = c("white", "springgreen4"), 39 | CellType = c(Control = "#7FBC41", Tumor = "#DE77AE"), 40 | GeneClass = c(Path1 = "#807DBA", Path2 = "#9E9AC8", Path3 = "#BCBDDC")) 41 | pdf("9.2.Customized_heatmap.pdf") 42 | pheatmap(dm, 43 | cutree_col = 2, cutree_row = 3, #break up the heatmap by clusters you define 44 | cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, #by default, pheatmap clusters by both row and col 45 | annotation_col = annotation_col, annotation_row = annotation_row,annotation_colors = ann_colors) 46 | dev.off() 47 | #9.3.Draw the heatmap with the ggplot2 package 48 | #9.3.1.cluster by row and col 49 | #cluster and re-order rows 50 | rowclust = hclust(dist(dm)) 51 | reordered = dm[rowclust$order,] 52 | #cluster and re-order columns 53 | colclust = hclust(dist(t(dm))) 54 | #9.3.2.scale each row value in [0,1] 55 | dm.reordered = reordered[, colclust$order] 56 | dm.reordered=apply(dm.reordered,1,rescale) #rescale is a function in the scales package 57 | dm.reordered=t(dm.reordered) #transposed matrix 58 | #9.3.3.save col and row names before changing the matrix format 59 | col_name=colnames(dm.reordered) 60 | row_name=rownames(dm.reordered) 61 | #9.3.4.change data format for geom_title 62 | colnames(dm.reordered)=1:ncol(dm.reordered) 63 | rownames(dm.reordered)=1:nrow(dm.reordered) 64 | dm.reordered=melt(dm.reordered) #melt is a function in the reshape2 package 65 | head(dm.reordered) 66 | #9.3.5.draw the heatmap 67 | pdf("9.3.Customized_heatmap.pdf") 68 | ggplot(dm.reordered, aes(Var2, Var1)) + 69 | geom_tile(aes(fill = value), color = "white") + 70 | scale_fill_gradient(low = "white", high = "steelblue") + 71 | theme_grey(base_size = 10) + 72 | labs(x = "", y = "") + 73 | scale_x_continuous(expand = c(0, 0),labels=col_name,breaks=1:length(col_name)) + 74 | scale_y_continuous(expand = c(0, 0),labels=row_name,breaks=1:length(row_name)) 75 | dev.off() 76 | 77 | -------------------------------------------------------------------------------- /plots/README.md: -------------------------------------------------------------------------------- 1 | # Scripts of plots 2 | -------------------------------------------------------------------------------- /plots/ballon_plots_GO.txt: -------------------------------------------------------------------------------- 1 | Biological process Fold enrichment -log10(Pvalue) col 2 | Small molecule metabolic process 1 16 1 3 | Single-organism catabolic process 1.5 12 1 4 | Oxoacid metabolic process 2 23 1 5 | Small molecule biosynthetic process 2.5 6 1 6 | Carboxylic acid metabolic process 2.7 24 1 7 | Organic acid metabolic process 2.7 25 1 8 | Monocarboxylic acid metabolic process 3 20 1 9 | Organonitrogen compound catabolic process 3.2 8 1 10 | Organic acid biosynthetic process 3.4 5 1 11 | Steroid metabolic process 3.5 14 1 12 | Small molecule catabolic process 3.8 18 1 13 | Alpha-amino acid metabolic process 4 10 1 14 | Carboxylic acid catabolic process 4.5 16 1 15 | Organic acid catabolic process 5.5 19 1 16 | Triglyceride metabolic process 6.75 15 1 17 | -------------------------------------------------------------------------------- /plots/box_plots_mtcars.txt: -------------------------------------------------------------------------------- 1 | "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb" 2 | "Mazda RX4" 21 6 160 110 3.9 2.62 16.46 0 1 4 4 3 | "Mazda RX4 Wag" 21 6 160 110 3.9 2.875 17.02 0 1 4 4 4 | "Datsun 710" 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1 5 | "Hornet 4 Drive" 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 6 | "Hornet Sportabout" 18.7 8 360 175 3.15 3.44 17.02 0 0 3 2 7 | "Valiant" 18.1 6 225 105 2.76 3.46 20.22 1 0 3 1 8 | "Duster 360" 14.3 8 360 245 3.21 3.57 15.84 0 0 3 4 9 | "Merc 240D" 24.4 4 146.7 62 3.69 3.19 20 1 0 4 2 10 | "Merc 230" 22.8 4 140.8 95 3.92 3.15 22.9 1 0 4 2 11 | "Merc 280" 19.2 6 167.6 123 3.92 3.44 18.3 1 0 4 4 12 | "Merc 280C" 17.8 6 167.6 123 3.92 3.44 18.9 1 0 4 4 13 | "Merc 450SE" 16.4 8 275.8 180 3.07 4.07 17.4 0 0 3 3 14 | "Merc 450SL" 17.3 8 275.8 180 3.07 3.73 17.6 0 0 3 3 15 | "Merc 450SLC" 15.2 8 275.8 180 3.07 3.78 18 0 0 3 3 16 | "Cadillac Fleetwood" 10.4 8 472 205 2.93 5.25 17.98 0 0 3 4 17 | "Lincoln Continental" 10.4 8 460 215 3 5.424 17.82 0 0 3 4 18 | "Chrysler Imperial" 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 19 | "Fiat 128" 32.4 4 78.7 66 4.08 2.2 19.47 1 1 4 1 20 | "Honda Civic" 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 21 | "Toyota Corolla" 33.9 4 71.1 65 4.22 1.835 19.9 1 1 4 1 22 | "Toyota Corona" 21.5 4 120.1 97 3.7 2.465 20.01 1 0 3 1 23 | "Dodge Challenger" 15.5 8 318 150 2.76 3.52 16.87 0 0 3 2 24 | "AMC Javelin" 15.2 8 304 150 3.15 3.435 17.3 0 0 3 2 25 | "Camaro Z28" 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4 26 | "Pontiac Firebird" 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 27 | "Fiat X1-9" 27.3 4 79 66 4.08 1.935 18.9 1 1 4 1 28 | "Porsche 914-2" 26 4 120.3 91 4.43 2.14 16.7 0 1 5 2 29 | "Lotus Europa" 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2 30 | "Ford Pantera L" 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 31 | "Ferrari Dino" 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 32 | "Maserati Bora" 15 8 301 335 3.54 3.57 14.6 0 1 5 8 33 | "Volvo 142E" 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 34 | -------------------------------------------------------------------------------- /plots/heatmaps.txt: -------------------------------------------------------------------------------- 1 | "Gene_name" "Control1" "Tumor2" "Control3" "Tumor4" "Control5" "Tumor1" "Control2" "Tumor3" "Control4" "Tumor5" 2 | "Gene1" 3.64605770284245 -0.989902484025789 2.21040409145792 -0.206304980860121 2.85974410397997 1.33042841525814 2.69037625197628 0.613594315459147 2.47041319997306 0.515824550372639 3 | "Gene2" 4.27117208852717 -1.16217764717461 2.73411886218634 -2.47821731114131 3.75201305790091 0.0255638999964919 4.47179536686732 1.6516242241333 2.7355075766796 -0.583778396977907 4 | "Gene3" 3.53044799733284 1.11451101193961 1.63548452888795 -0.424121515422879 3.70142692278906 1.22633124237489 3.58878653397491 -0.634965559964648 1.99984447367284 0.14173487588181 5 | "Gene4" 3.0611222403098 -1.18791027226494 4.33122898924391 0.873331409488203 2.34935152957398 0.482547916074253 1.85443253138361 -1.22376835000928 1.15437690988942 -0.9301261494233 6 | "Gene5" 1.95681668546221 0.254310418019742 1.98443800371317 1.27138446830405 1.68591661042142 1.45547391118914 2.44583005685769 0.331690926453787 2.71516332682116 0.186639970826385 7 | "Gene6" 2.00091941443115 0.0601597235713407 4.48090119958985 0.978068229260669 3.06347532108749 -0.422299375776831 3.58536601640637 1.06889999980548 2.56342211741441 1.34658304495794 8 | "Gene7" 2.57026682270886 0.281997080771017 2.88514936193807 -0.833096351432355 4.02852364951572 -1.0544813243986 3.18474229290156 -0.0553177643295568 2.29431468921884 -1.11600545536687 9 | "Gene8" 2.04286722330107 -0.143951079770308 3.52312844334888 1.15449085459854 1.68144401610709 1.30714831480423 3.83283337706375 -0.560312811857328 3.09421908611858 -1.32255759135237 10 | "Gene9" 3.20640154898874 -0.492425099239335 2.42976202463454 -0.980646283007113 3.73230350466544 -0.933593245436004 3.35123443221584 -1.04893278222907 3.20880605355573 0.418536028501479 11 | "Gene10" 3.01324929722122 0.0831220884552451 2.71621230935996 0.264136630733601 3.00315539796384 -1.48891326149852 1.89371778095603 0.241823626446297 2.75874772290355 0.992774713099798 12 | "Gene11" 0.525245335516337 -0.125281815397116 1.98258801323099 1.25587304330484 1.72130657528429 3.33443440097141 -1.14671997991542 2.79531695730023 0.230179278745857 1.87567547912315 13 | "Gene12" 0.64135873685689 1.08074383695762 -0.41434472716078 2.03200523936978 1.63076146002238 0.594641641199243 -0.385276575773152 3.09480549944123 0.89946266818841 2.13953572986591 14 | "Gene13" 0.778467289623196 1.39397888234478 -0.641416834434727 2.58302186431346 -0.760241941379953 1.11448120561976 0.0147965421059807 1.72148599648951 0.582310073603496 3.95199000440402 15 | "Gene14" -1.31211856524232 0.929559725738129 -0.192162399713495 2.21999353695336 0.0416833996198677 3.04316212173241 -0.232447312057876 2.06434835994836 0.272114877479298 2.35771608111461 16 | "Gene15" -0.450299963274045 6.1817611985431 -0.769247278991498 6.01558532444451 -1.11681617334806 5.04220751099756 -0.86980073722054 5.86912996563008 0.317280150902204 6.93394317613965 17 | "Gene16" -0.980681254892518 5.59943311435535 0.648858582130001 4.94696867458044 0.964588124184688 4.8888037478231 -1.43675781049529 6.10711379894097 0.533081706576278 6.9532811858812 18 | "Gene17" 0.461840735291481 4.88727254034774 0.264777657556204 5.72889541823275 -0.792803842730288 5.98420158556085 -0.334034636686585 6.04522662186325 -1.09721194924943 5.0488847428813 19 | "Gene18" 0.184399611963377 6.42377185710288 -0.634704692232601 6.17761050224891 -0.424493866389451 5.8012474805791 0.196534171669003 7.04571882418232 -0.320011422067175 4.45596626986551 20 | "Gene19" 0.216633814153591 7.36334499151954 -0.701505779056565 7.67986995313643 0.342106410436628 4.35765331289268 -0.625395703857065 4.78555386167032 0.570860379149449 6.60833752283822 21 | "Gene20" -1.49041967634056 3.02488322276605 -0.604289731843943 5.87370324649717 1.26157381504875 7.24630380161124 -0.271302342568583 5.20837547184471 -0.870032809024178 4.38640133939197 22 | -------------------------------------------------------------------------------- /plots/histogram_plots.txt: -------------------------------------------------------------------------------- 1 | "sex" "weight" 2 | "1" "F" 49 3 | "2" "F" 56 4 | "3" "F" 60 5 | "4" "F" 43 6 | "5" "F" 57 7 | "6" "F" 58 8 | "7" "F" 52 9 | "8" "F" 52 10 | "9" "F" 52 11 | "10" "F" 51 12 | "11" "F" 53 13 | "12" "F" 50 14 | "13" "F" 51 15 | "14" "F" 55 16 | "15" "F" 60 17 | "16" "F" 54 18 | "17" "F" 52 19 | "18" "F" 50 20 | "19" "F" 51 21 | "20" "F" 67 22 | "21" "F" 56 23 | "22" "F" 53 24 | "23" "F" 53 25 | "24" "F" 57 26 | "25" "F" 52 27 | "26" "F" 48 28 | "27" "F" 58 29 | "28" "F" 50 30 | "29" "F" 55 31 | "30" "F" 50 32 | "31" "F" 61 33 | "32" "F" 53 34 | "33" "F" 51 35 | "34" "F" 52 36 | "35" "F" 47 37 | "36" "F" 49 38 | "37" "F" 44 39 | "38" "F" 48 40 | "39" "F" 54 41 | "40" "F" 53 42 | "41" "F" 62 43 | "42" "F" 50 44 | "43" "F" 51 45 | "44" "F" 54 46 | "45" "F" 50 47 | "46" "F" 50 48 | "47" "F" 49 49 | "48" "F" 49 50 | "49" "F" 52 51 | "50" "F" 53 52 | "51" "F" 46 53 | "52" "F" 52 54 | "53" "F" 49 55 | "54" "F" 50 56 | "55" "F" 54 57 | "56" "F" 58 58 | "57" "F" 63 59 | "58" "F" 51 60 | "59" "F" 63 61 | "60" "F" 49 62 | "61" "F" 58 63 | "62" "F" 68 64 | "63" "F" 55 65 | "64" "F" 52 66 | "65" "F" 55 67 | "66" "F" 64 68 | "67" "F" 49 69 | "68" "F" 62 70 | "69" "F" 62 71 | "70" "F" 57 72 | "71" "F" 55 73 | "72" "F" 53 74 | "73" "F" 53 75 | "74" "F" 58 76 | "75" "F" 65 77 | "76" "F" 54 78 | "77" "F" 48 79 | "78" "F" 51 80 | "79" "F" 56 81 | "80" "F" 53 82 | "81" "F" 54 83 | "82" "F" 54 84 | "83" "F" 48 85 | "84" "F" 54 86 | "85" "F" 59 87 | "86" "F" 58 88 | "87" "F" 58 89 | "88" "F" 53 90 | "89" "F" 54 91 | "90" "F" 49 92 | "91" "F" 55 93 | "92" "F" 56 94 | "93" "F" 64 95 | "94" "F" 60 96 | "95" "F" 53 97 | "96" "F" 57 98 | "97" "F" 49 99 | "98" "F" 59 100 | "99" "F" 60 101 | "100" "F" 66 102 | "101" "F" 57 103 | "102" "F" 53 104 | "103" "F" 55 105 | "104" "F" 52 106 | "105" "F" 51 107 | "106" "F" 56 108 | "107" "F" 51 109 | "108" "F" 56 110 | "109" "F" 57 111 | "110" "F" 55 112 | "111" "F" 54 113 | "112" "F" 52 114 | "113" "F" 49 115 | "114" "F" 59 116 | "115" "F" 55 117 | "116" "F" 59 118 | "117" "F" 49 119 | "118" "F" 56 120 | "119" "F" 58 121 | "120" "F" 55 122 | "121" "F" 54 123 | "122" "F" 51 124 | "123" "F" 65 125 | "124" "F" 59 126 | "125" "F" 64 127 | "126" "F" 55 128 | "127" "F" 52 129 | "128" "F" 47 130 | "129" "F" 52 131 | "130" "F" 56 132 | "131" "F" 60 133 | "132" "F" 56 134 | "133" "F" 49 135 | "134" "F" 58 136 | "135" "F" 47 137 | "136" "F" 53 138 | "137" "F" 53 139 | "138" "F" 45 140 | "139" "F" 60 141 | "140" "F" 52 142 | "141" "F" 53 143 | "142" "F" 62 144 | "143" "F" 58 145 | "144" "F" 54 146 | "145" "F" 58 147 | "146" "F" 57 148 | "147" "F" 63 149 | "148" "F" 56 150 | "149" "F" 58 151 | "150" "F" 57 152 | "151" "F" 53 153 | "152" "F" 55 154 | "153" "F" 63 155 | "154" "F" 51 156 | "155" "F" 56 157 | "156" "F" 62 158 | "157" "F" 54 159 | "158" "F" 50 160 | "159" "F" 51 161 | "160" "F" 53 162 | "161" "F" 51 163 | "162" "F" 54 164 | "163" "F" 53 165 | "164" "F" 54 166 | "165" "F" 57 167 | "166" "F" 58 168 | "167" "F" 63 169 | "168" "F" 55 170 | "169" "F" 53 171 | "170" "F" 62 172 | "171" "F" 64 173 | "172" "F" 55 174 | "173" "F" 53 175 | "174" "F" 46 176 | "175" "F" 62 177 | "176" "F" 51 178 | "177" "F" 49 179 | "178" "F" 70 180 | "179" "F" 56 181 | "180" "F" 55 182 | "181" "F" 41 183 | "182" "F" 55 184 | "183" "F" 60 185 | "184" "F" 57 186 | "185" "F" 60 187 | "186" "F" 65 188 | "187" "F" 61 189 | "188" "F" 52 190 | "189" "F" 59 191 | "190" "F" 54 192 | "191" "F" 52 193 | "192" "F" 41 194 | "193" "F" 51 195 | "194" "F" 57 196 | "195" "F" 66 197 | "196" "F" 58 198 | "197" "F" 58 199 | "198" "F" 50 200 | "199" "F" 56 201 | "200" "F" 45 202 | "201" "M" 67 203 | "202" "M" 68 204 | "203" "M" 66 205 | "204" "M" 69 206 | "205" "M" 67 207 | "206" "M" 69 208 | "207" "M" 74 209 | "208" "M" 71 210 | "209" "M" 65 211 | "210" "M" 59 212 | "211" "M" 67 213 | "212" "M" 63 214 | "213" "M" 72 215 | "214" "M" 57 216 | "215" "M" 63 217 | "216" "M" 67 218 | "217" "M" 64 219 | "218" "M" 62 220 | "219" "M" 63 221 | "220" "M" 68 222 | "221" "M" 69 223 | "222" "M" 68 224 | "223" "M" 76 225 | "224" "M" 71 226 | "225" "M" 66 227 | "226" "M" 62 228 | "227" "M" 80 229 | "228" "M" 68 230 | "229" "M" 62 231 | "230" "M" 66 232 | "231" "M" 63 233 | "232" "M" 64 234 | "233" "M" 60 235 | "234" "M" 66 236 | "235" "M" 67 237 | "236" "M" 60 238 | "237" "M" 49 239 | "238" "M" 64 240 | "239" "M" 65 241 | "240" "M" 68 242 | "241" "M" 65 243 | "242" "M" 67 244 | "243" "M" 60 245 | "244" "M" 69 246 | "245" "M" 69 247 | "246" "M" 66 248 | "247" "M" 72 249 | "248" "M" 67 250 | "249" "M" 66 251 | "250" "M" 66 252 | "251" "M" 67 253 | "252" "M" 70 254 | "253" "M" 67 255 | "254" "M" 68 256 | "255" "M" 59 257 | "256" "M" 63 258 | "257" "M" 72 259 | "258" "M" 59 260 | "259" "M" 66 261 | "260" "M" 67 262 | "261" "M" 70 263 | "262" "M" 63 264 | "263" "M" 66 265 | "264" "M" 56 266 | "265" "M" 67 267 | "266" "M" 62 268 | "267" "M" 64 269 | "268" "M" 59 270 | "269" "M" 67 271 | "270" "M" 68 272 | "271" "M" 63 273 | "272" "M" 74 274 | "273" "M" 68 275 | "274" "M" 70 276 | "275" "M" 75 277 | "276" "M" 62 278 | "277" "M" 69 279 | "278" "M" 70 280 | "279" "M" 65 281 | "280" "M" 67 282 | "281" "M" 60 283 | "282" "M" 67 284 | "283" "M" 61 285 | "284" "M" 69 286 | "285" "M" 61 287 | "286" "M" 67 288 | "287" "M" 61 289 | "288" "M" 64 290 | "289" "M" 57 291 | "290" "M" 66 292 | "291" "M" 70 293 | "292" "M" 66 294 | "293" "M" 56 295 | "294" "M" 62 296 | "295" "M" 73 297 | "296" "M" 74 298 | "297" "M" 59 299 | "298" "M" 63 300 | "299" "M" 67 301 | "300" "M" 67 302 | "301" "M" 62 303 | "302" "M" 60 304 | "303" "M" 64 305 | "304" "M" 70 306 | "305" "M" 65 307 | "306" "M" 62 308 | "307" "M" 62 309 | "308" "M" 73 310 | "309" "M" 63 311 | "310" "M" 69 312 | "311" "M" 72 313 | "312" "M" 67 314 | "313" "M" 63 315 | "314" "M" 65 316 | "315" "M" 63 317 | "316" "M" 71 318 | "317" "M" 64 319 | "318" "M" 73 320 | "319" "M" 62 321 | "320" "M" 62 322 | "321" "M" 66 323 | "322" "M" 65 324 | "323" "M" 62 325 | "324" "M" 57 326 | "325" "M" 65 327 | "326" "M" 61 328 | "327" "M" 70 329 | "328" "M" 60 330 | "329" "M" 71 331 | "330" "M" 62 332 | "331" "M" 66 333 | "332" "M" 69 334 | "333" "M" 62 335 | "334" "M" 68 336 | "335" "M" 65 337 | "336" "M" 59 338 | "337" "M" 64 339 | "338" "M" 73 340 | "339" "M" 64 341 | "340" "M" 61 342 | "341" "M" 65 343 | "342" "M" 67 344 | "343" "M" 70 345 | "344" "M" 71 346 | "345" "M" 66 347 | "346" "M" 71 348 | "347" "M" 61 349 | "348" "M" 53 350 | "349" "M" 63 351 | "350" "M" 62 352 | "351" "M" 53 353 | "352" "M" 68 354 | "353" "M" 61 355 | "354" "M" 64 356 | "355" "M" 57 357 | "356" "M" 68 358 | "357" "M" 74 359 | "358" "M" 61 360 | "359" "M" 64 361 | "360" "M" 75 362 | "361" "M" 70 363 | "362" "M" 75 364 | "363" "M" 65 365 | "364" "M" 64 366 | "365" "M" 62 367 | "366" "M" 72 368 | "367" "M" 59 369 | "368" "M" 67 370 | "369" "M" 65 371 | "370" "M" 76 372 | "371" "M" 62 373 | "372" "M" 57 374 | "373" "M" 66 375 | "374" "M" 65 376 | "375" "M" 61 377 | "376" "M" 66 378 | "377" "M" 64 379 | "378" "M" 62 380 | "379" "M" 68 381 | "380" "M" 63 382 | "381" "M" 56 383 | "382" "M" 52 384 | "383" "M" 62 385 | "384" "M" 72 386 | "385" "M" 69 387 | "386" "M" 71 388 | "387" "M" 70 389 | "388" "M" 67 390 | "389" "M" 57 391 | "390" "M" 66 392 | "391" "M" 73 393 | "392" "M" 48 394 | "393" "M" 61 395 | "394" "M" 71 396 | "395" "M" 68 397 | "396" "M" 69 398 | "397" "M" 67 399 | "398" "M" 68 400 | "399" "M" 65 401 | "400" "M" 60 402 | -------------------------------------------------------------------------------- /pre-processGff/bin/getlncRNAexon.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/python 3 | 4 | #print '''Usage: get ncRNA2 subtype exons from exon.gff 5 | # e.g. python getlncRNAexon.py lncRNA_gold_intergenic.gff lncRNA_gold_intergenic.exon.gff 6 | #''' 7 | import sys 8 | import re 9 | 10 | dict = {} 11 | f1 = open(sys.argv[1]) ## ncRNA2 subtypes 12 | for i in f1: 13 | l = i.strip().split('\t') 14 | trID = l[8].split('; ')[1].split(' ')[1] 15 | dict[trID] = 'ncRNAtr' 16 | f1.close() 17 | 18 | f2 = open('exon.gff') 19 | f3 = open(sys.argv[2],'w') ## ncRNA2 sbutype exon gff 20 | for j in f2: 21 | if re.search('lincRNA',j): 22 | Parent = j.strip().split('\t')[8].split('; ')[1].split(' ')[1] 23 | if Parent in dict: 24 | f3.write(j) 25 | 26 | f3.close() 27 | f2.close() 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /pre-processGff/bin/makeIntron.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | print ''' Usage: get introns for coding genes or lncRNAs 4 | use sorted exon.gtf) 5 | python makeIntron.py coding_exon.gff coding_intron.gff 6 | ''' 7 | import sys 8 | import re 9 | 10 | trs1 = '' 11 | f1 = open(sys.argv[1]) # exon.gff 12 | f2 = open(sys.argv[2],'w') # intron.gff 13 | for i in f1: 14 | line = i.strip().split('\t') 15 | anno = line[8] 16 | trs2 = anno.strip().split(' ')[3] 17 | if trs2 != trs1: 18 | intron_start = str(int(line[4]) + 1) 19 | trs1 = trs2 20 | else: 21 | intron_end = str(int(line[3]) - 1) 22 | f2.write(line[0] + '\t' + line[1] + '\tintron\t' + intron_start + '\t' + intron_end + '\t.\t' + line[6] + '\t.\t' + line[8] + '\n') 23 | intron_start = str(int(line[4]) + 1) 24 | f2.close() 25 | f1.close() 26 | -------------------------------------------------------------------------------- /pre-processGff/bin/specify5-3UTR.R: -------------------------------------------------------------------------------- 1 | library(GenomicRanges) # From Bioconductor 2 | Args<-commandArgs(T) 3 | 4 | genes <- read.table(Args[1], sep = '\t', skip = 5, stringsAsFactors = FALSE) 5 | whichCodingTranscripts <- genes[, 3] == "transcript" & grepl("transcript_type protein_coding", genes[, 9], fixed = TRUE) 6 | proteinTranscripts <- genes[whichCodingTranscripts, ] 7 | strands <- proteinTranscripts[, 7] 8 | 9 | allFeaturesTranscripts <- gsub("transcript_id ", '', sapply(strsplit(genes[, 9], "; "), '[', 2)) 10 | proteinTranscriptsNames <- allFeaturesTranscripts[whichCodingTranscripts] 11 | whichCDS <- genes[, 3] == "CDS" & allFeaturesTranscripts %in% proteinTranscriptsNames 12 | transcriptsCDS <- genes[whichCDS, ] 13 | transcriptsCDS <- split(GRanges(transcriptsCDS[, 1], IRanges(transcriptsCDS[, 4], transcriptsCDS[, 5]), transcriptsCDS[, 7]), 14 | factor(allFeaturesTranscripts[whichCDS], levels = proteinTranscriptsNames)) 15 | #firstCDS <- mapply(function(CDS, strand) {strand_value=as.list(strand(CDS))[[1]]; if(strand_value == '+') {head(CDS,n=1)} else {tail(CDS,n=1)}}, transcriptsCDS, strand(transcriptsCDS)) 16 | firstCDS <- endoapply(transcriptsCDS, function(CDS) {strand_value=as.list(strand(CDS))[[1]]; if(strand_value == '+') {head(CDS,n=1)} else {tail(CDS,n=1)}}) 17 | 18 | lastCDS <- endoapply(transcriptsCDS, function(CDS) {strand_value=as.list(strand(CDS))[[1]]; if(strand_value == '+') {head(CDS,n=1)} else {tail(CDS,n=1)}}) 19 | 20 | whichUTR <- genes[, 3] == "UTR" & allFeaturesTranscripts %in% proteinTranscriptsNames 21 | transcriptsUTR <- genes[whichUTR, ] 22 | transcriptsUTR <- split(GRanges(transcriptsUTR[, 1], IRanges(transcriptsUTR[, 4], transcriptsUTR[, 5]), transcriptsUTR[, 7]), 23 | factor(allFeaturesTranscripts[whichUTR], levels = names(firstCDS))) 24 | 25 | transcriptsUTR5 <- mendoapply(function(UTR, CDS) 26 | { strand_value=as.list(strand(CDS))[[1]]; 27 | if(strand_value == '+') UTR[UTR < CDS[1]] else UTR[UTR > CDS[length(CDS)]] 28 | }, transcriptsUTR, firstCDS) 29 | 30 | transcriptsUTR3 <- mendoapply(function(UTR, CDS) 31 | { strand_value=as.list(strand(CDS))[[1]]; 32 | if(strand_value == '+') UTR[UTR > CDS[length(CDS)]] else UTR[UTR < CDS[1]] 33 | }, transcriptsUTR, firstCDS) 34 | 35 | Format <- function(UTR){ 36 | data.frame(seqnames=as.vector(seqnames(unlist(UTR))), 37 | starts=start(unlist(UTR)), 38 | ends=end(unlist(UTR)), 39 | names=names(unlist(UTR)), 40 | strands=as.vector(strand(unlist(UTR))))} 41 | 42 | coorsUTR5 <- Format(transcriptsUTR5) 43 | coorsUTR3 <- Format(transcriptsUTR3) 44 | coorsUTR5$type="5UTR" 45 | coorsUTR3$type="3UTR" 46 | #library(rtracklayer) 47 | #coorsUTR5=export(Reduce(c,transcriptsUTR5),format="gff3") 48 | UTR=rbind(coorsUTR5,coorsUTR3) 49 | write.table(UTR,file=Args[2],quote=F, row.names=F, col.names=F,sep="\t") 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /pre-processGff/bin/specify5-3UTR.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | dict = {} 5 | f1 = open(sys.argv[1]) 6 | for i in f1: 7 | l = i.strip().split('\t') 8 | utrid0 = '\t'.join(l[0:5]) 9 | dict[utrid0] = l[5] 10 | f1.close() 11 | 12 | f2 = open(sys.argv[2]) 13 | for j in f2: 14 | ll = j.strip().split('\t') 15 | trid = ll[8].strip().split('; ')[1].split(' ')[1].strip('\"') 16 | utrid = ll[0]+'\t'+ll[3]+'\t'+ll[4]+'\t'+trid+'\t'+ll[6] 17 | if utrid in dict: 18 | print ll[0]+'\t'+ll[1]+'\t'+dict[utrid]+'\t'+ll[3]+'\t'+ll[4]+'\t.\t'+ll[6]+'\t.\t'+ll[8] 19 | 20 | f2.close() 21 | -------------------------------------------------------------------------------- /pre-processGff/output/clean_gtf/Mt_rRNA.gtf: -------------------------------------------------------------------------------- 1 | chrM ENSEMBL gene 648 1601 . + . gene_id "ENSG00000211459.2"; transcript_id "ENSG00000211459.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR1"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR1"; level 3; 2 | chrM ENSEMBL transcript 648 1601 . + . gene_id "ENSG00000211459.2"; transcript_id "ENST00000389680.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR1"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR1-201"; level 3; tag "basic"; 3 | chrM ENSEMBL exon 648 1601 . + . gene_id "ENSG00000211459.2"; transcript_id "ENST00000389680.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR1"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR1-201"; exon_number 1; exon_id "ENSE00001544499.2"; level 3; tag "basic"; 4 | chrM ENSEMBL gene 1671 3229 . + . gene_id "ENSG00000210082.2"; transcript_id "ENSG00000210082.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR2"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR2"; level 3; 5 | chrM ENSEMBL transcript 1671 3229 . + . gene_id "ENSG00000210082.2"; transcript_id "ENST00000387347.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR2"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR2-201"; level 3; tag "basic"; 6 | chrM ENSEMBL exon 1671 3229 . + . gene_id "ENSG00000210082.2"; transcript_id "ENST00000387347.2"; gene_type "Mt_rRNA"; gene_status "KNOWN"; gene_name "MT-RNR2"; transcript_type "Mt_rRNA"; transcript_status "KNOWN"; transcript_name "MT-RNR2-201"; exon_number 1; exon_id "ENSE00001544497.2"; level 3; tag "basic"; 7 | -------------------------------------------------------------------------------- /pre-processGff/output/clean_gtf/Mt_tRNA.gtf: -------------------------------------------------------------------------------- 1 | chrM ENSEMBL gene 577 647 . + . gene_id "ENSG00000210049.1"; transcript_id "ENSG00000210049.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TF"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TF"; level 3; 2 | chrM ENSEMBL transcript 577 647 . + . gene_id "ENSG00000210049.1"; transcript_id "ENST00000387314.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TF"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TF-201"; level 3; tag "basic"; 3 | chrM ENSEMBL exon 577 647 . + . gene_id "ENSG00000210049.1"; transcript_id "ENST00000387314.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TF"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TF-201"; exon_number 1; exon_id "ENSE00001544501.1"; level 3; tag "basic"; 4 | chrM ENSEMBL gene 1602 1670 . + . gene_id "ENSG00000210077.1"; transcript_id "ENSG00000210077.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TV"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TV"; level 3; 5 | chrM ENSEMBL transcript 1602 1670 . + . gene_id "ENSG00000210077.1"; transcript_id "ENST00000387342.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TV"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TV-201"; level 3; tag "basic"; 6 | chrM ENSEMBL exon 1602 1670 . + . gene_id "ENSG00000210077.1"; transcript_id "ENST00000387342.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TV"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TV-201"; exon_number 1; exon_id "ENSE00001544498.1"; level 3; tag "basic"; 7 | chrM ENSEMBL gene 3230 3304 . + . gene_id "ENSG00000209082.1"; transcript_id "ENSG00000209082.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL1"; level 3; 8 | chrM ENSEMBL transcript 3230 3304 . + . gene_id "ENSG00000209082.1"; transcript_id "ENST00000386347.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL1-201"; level 3; tag "basic"; 9 | chrM ENSEMBL exon 3230 3304 . + . gene_id "ENSG00000209082.1"; transcript_id "ENST00000386347.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL1-201"; exon_number 1; exon_id "ENSE00002006242.1"; level 3; tag "basic"; 10 | chrM ENSEMBL gene 4263 4331 . + . gene_id "ENSG00000210100.1"; transcript_id "ENSG00000210100.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TI"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TI"; level 3; 11 | chrM ENSEMBL transcript 4263 4331 . + . gene_id "ENSG00000210100.1"; transcript_id "ENST00000387365.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TI"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TI-201"; level 3; tag "basic"; 12 | chrM ENSEMBL exon 4263 4331 . + . gene_id "ENSG00000210100.1"; transcript_id "ENST00000387365.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TI"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TI-201"; exon_number 1; exon_id "ENSE00001993597.1"; level 3; tag "basic"; 13 | chrM ENSEMBL gene 4329 4400 . - . gene_id "ENSG00000210107.1"; transcript_id "ENSG00000210107.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TQ"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TQ"; level 3; 14 | chrM ENSEMBL transcript 4329 4400 . - . gene_id "ENSG00000210107.1"; transcript_id "ENST00000387372.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TQ"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TQ-201"; level 3; tag "basic"; 15 | chrM ENSEMBL exon 4329 4400 . - . gene_id "ENSG00000210107.1"; transcript_id "ENST00000387372.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TQ"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TQ-201"; exon_number 1; exon_id "ENSE00001544494.1"; level 3; tag "basic"; 16 | chrM ENSEMBL gene 4402 4469 . + . gene_id "ENSG00000210112.1"; transcript_id "ENSG00000210112.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TM"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TM"; level 3; 17 | chrM ENSEMBL transcript 4402 4469 . + . gene_id "ENSG00000210112.1"; transcript_id "ENST00000387377.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TM"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TM-201"; level 3; tag "basic"; 18 | chrM ENSEMBL exon 4402 4469 . + . gene_id "ENSG00000210112.1"; transcript_id "ENST00000387377.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TM"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TM-201"; exon_number 1; exon_id "ENSE00001544493.1"; level 3; tag "basic"; 19 | chrM ENSEMBL gene 5512 5579 . + . gene_id "ENSG00000210117.1"; transcript_id "ENSG00000210117.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TW"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TW"; level 3; 20 | chrM ENSEMBL transcript 5512 5579 . + . gene_id "ENSG00000210117.1"; transcript_id "ENST00000387382.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TW"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TW-201"; level 3; tag "basic"; 21 | chrM ENSEMBL exon 5512 5579 . + . gene_id "ENSG00000210117.1"; transcript_id "ENST00000387382.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TW"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TW-201"; exon_number 1; exon_id "ENSE00001544492.1"; level 3; tag "basic"; 22 | chrM ENSEMBL gene 5587 5655 . - . gene_id "ENSG00000210127.1"; transcript_id "ENSG00000210127.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TA"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TA"; level 3; 23 | chrM ENSEMBL transcript 5587 5655 . - . gene_id "ENSG00000210127.1"; transcript_id "ENST00000387392.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TA"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TA-201"; level 3; tag "basic"; 24 | chrM ENSEMBL exon 5587 5655 . - . gene_id "ENSG00000210127.1"; transcript_id "ENST00000387392.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TA"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TA-201"; exon_number 1; exon_id "ENSE00001544491.1"; level 3; tag "basic"; 25 | chrM ENSEMBL gene 5657 5729 . - . gene_id "ENSG00000210135.1"; transcript_id "ENSG00000210135.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TN"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TN"; level 3; 26 | chrM ENSEMBL transcript 5657 5729 . - . gene_id "ENSG00000210135.1"; transcript_id "ENST00000387400.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TN"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TN-201"; level 3; tag "basic"; 27 | chrM ENSEMBL exon 5657 5729 . - . gene_id "ENSG00000210135.1"; transcript_id "ENST00000387400.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TN"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TN-201"; exon_number 1; exon_id "ENSE00001544490.1"; level 3; tag "basic"; 28 | chrM ENSEMBL gene 5761 5826 . - . gene_id "ENSG00000210140.1"; transcript_id "ENSG00000210140.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TC"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TC"; level 3; 29 | chrM ENSEMBL transcript 5761 5826 . - . gene_id "ENSG00000210140.1"; transcript_id "ENST00000387405.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TC"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TC-201"; level 3; tag "basic"; 30 | chrM ENSEMBL exon 5761 5826 . - . gene_id "ENSG00000210140.1"; transcript_id "ENST00000387405.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TC"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TC-201"; exon_number 1; exon_id "ENSE00001544489.1"; level 3; tag "basic"; 31 | chrM ENSEMBL gene 5826 5891 . - . gene_id "ENSG00000210144.1"; transcript_id "ENSG00000210144.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TY"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TY"; level 3; 32 | chrM ENSEMBL transcript 5826 5891 . - . gene_id "ENSG00000210144.1"; transcript_id "ENST00000387409.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TY"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TY-201"; level 3; tag "basic"; 33 | chrM ENSEMBL exon 5826 5891 . - . gene_id "ENSG00000210144.1"; transcript_id "ENST00000387409.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TY"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TY-201"; exon_number 1; exon_id "ENSE00001544488.1"; level 3; tag "basic"; 34 | chrM ENSEMBL gene 7446 7514 . - . gene_id "ENSG00000210151.2"; transcript_id "ENSG00000210151.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS1"; level 3; 35 | chrM ENSEMBL transcript 7446 7514 . - . gene_id "ENSG00000210151.2"; transcript_id "ENST00000387416.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS1-201"; level 3; tag "basic"; 36 | chrM ENSEMBL exon 7446 7514 . - . gene_id "ENSG00000210151.2"; transcript_id "ENST00000387416.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS1"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS1-201"; exon_number 1; exon_id "ENSE00001544487.2"; level 3; tag "basic"; 37 | chrM ENSEMBL gene 7518 7585 . + . gene_id "ENSG00000210154.1"; transcript_id "ENSG00000210154.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TD"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TD"; level 3; 38 | chrM ENSEMBL transcript 7518 7585 . + . gene_id "ENSG00000210154.1"; transcript_id "ENST00000387419.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TD"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TD-201"; level 3; tag "basic"; 39 | chrM ENSEMBL exon 7518 7585 . + . gene_id "ENSG00000210154.1"; transcript_id "ENST00000387419.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TD"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TD-201"; exon_number 1; exon_id "ENSE00001544486.1"; level 3; tag "basic"; 40 | chrM ENSEMBL gene 8295 8364 . + . gene_id "ENSG00000210156.1"; transcript_id "ENSG00000210156.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TK"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TK"; level 3; 41 | chrM ENSEMBL transcript 8295 8364 . + . gene_id "ENSG00000210156.1"; transcript_id "ENST00000387421.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TK"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TK-201"; level 3; tag "basic"; 42 | chrM ENSEMBL exon 8295 8364 . + . gene_id "ENSG00000210156.1"; transcript_id "ENST00000387421.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TK"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TK-201"; exon_number 1; exon_id "ENSE00001544484.1"; level 3; tag "basic"; 43 | chrM ENSEMBL gene 9991 10058 . + . gene_id "ENSG00000210164.1"; transcript_id "ENSG00000210164.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TG"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TG"; level 3; 44 | chrM ENSEMBL transcript 9991 10058 . + . gene_id "ENSG00000210164.1"; transcript_id "ENST00000387429.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TG"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TG-201"; level 3; tag "basic"; 45 | chrM ENSEMBL exon 9991 10058 . + . gene_id "ENSG00000210164.1"; transcript_id "ENST00000387429.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TG"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TG-201"; exon_number 1; exon_id "ENSE00001544483.1"; level 3; tag "basic"; 46 | chrM ENSEMBL gene 10405 10469 . + . gene_id "ENSG00000210174.1"; transcript_id "ENSG00000210174.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TR"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TR"; level 3; 47 | chrM ENSEMBL transcript 10405 10469 . + . gene_id "ENSG00000210174.1"; transcript_id "ENST00000387439.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TR"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TR-201"; level 3; tag "basic"; 48 | chrM ENSEMBL exon 10405 10469 . + . gene_id "ENSG00000210174.1"; transcript_id "ENST00000387439.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TR"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TR-201"; exon_number 1; exon_id "ENSE00001544482.1"; level 3; tag "basic"; 49 | chrM ENSEMBL gene 12138 12206 . + . gene_id "ENSG00000210176.1"; transcript_id "ENSG00000210176.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TH"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TH"; level 3; 50 | chrM ENSEMBL transcript 12138 12206 . + . gene_id "ENSG00000210176.1"; transcript_id "ENST00000387441.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TH"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TH-201"; level 3; tag "basic"; 51 | chrM ENSEMBL exon 12138 12206 . + . gene_id "ENSG00000210176.1"; transcript_id "ENST00000387441.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TH"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TH-201"; exon_number 1; exon_id "ENSE00001544480.1"; level 3; tag "basic"; 52 | chrM ENSEMBL gene 12207 12265 . + . gene_id "ENSG00000210184.1"; transcript_id "ENSG00000210184.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS2"; level 3; 53 | chrM ENSEMBL transcript 12207 12265 . + . gene_id "ENSG00000210184.1"; transcript_id "ENST00000387449.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS2-201"; level 3; tag "basic"; 54 | chrM ENSEMBL exon 12207 12265 . + . gene_id "ENSG00000210184.1"; transcript_id "ENST00000387449.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TS2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TS2-201"; exon_number 1; exon_id "ENSE00001544479.1"; level 3; tag "basic"; 55 | chrM ENSEMBL gene 12266 12336 . + . gene_id "ENSG00000210191.1"; transcript_id "ENSG00000210191.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL2"; level 3; 56 | chrM ENSEMBL transcript 12266 12336 . + . gene_id "ENSG00000210191.1"; transcript_id "ENST00000387456.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL2-201"; level 3; tag "basic"; 57 | chrM ENSEMBL exon 12266 12336 . + . gene_id "ENSG00000210191.1"; transcript_id "ENST00000387456.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TL2"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TL2-201"; exon_number 1; exon_id "ENSE00001544478.1"; level 3; tag "basic"; 58 | chrM ENSEMBL gene 14674 14742 . - . gene_id "ENSG00000210194.1"; transcript_id "ENSG00000210194.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TE"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TE"; level 3; 59 | chrM ENSEMBL transcript 14674 14742 . - . gene_id "ENSG00000210194.1"; transcript_id "ENST00000387459.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TE"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TE-201"; level 3; tag "basic"; 60 | chrM ENSEMBL exon 14674 14742 . - . gene_id "ENSG00000210194.1"; transcript_id "ENST00000387459.1"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TE"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TE-201"; exon_number 1; exon_id "ENSE00001544476.1"; level 3; tag "basic"; 61 | chrM ENSEMBL gene 15888 15953 . + . gene_id "ENSG00000210195.2"; transcript_id "ENSG00000210195.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TT"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TT"; level 3; 62 | chrM ENSEMBL transcript 15888 15953 . + . gene_id "ENSG00000210195.2"; transcript_id "ENST00000387460.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TT"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TT-201"; level 3; tag "basic"; 63 | chrM ENSEMBL exon 15888 15953 . + . gene_id "ENSG00000210195.2"; transcript_id "ENST00000387460.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TT"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TT-201"; exon_number 1; exon_id "ENSE00001544475.2"; level 3; tag "basic"; 64 | chrM ENSEMBL gene 15956 16023 . - . gene_id "ENSG00000210196.2"; transcript_id "ENSG00000210196.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TP"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TP"; level 3; 65 | chrM ENSEMBL transcript 15956 16023 . - . gene_id "ENSG00000210196.2"; transcript_id "ENST00000387461.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TP"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TP-201"; level 3; tag "basic"; 66 | chrM ENSEMBL exon 15956 16023 . - . gene_id "ENSG00000210196.2"; transcript_id "ENST00000387461.2"; gene_type "Mt_tRNA"; gene_status "KNOWN"; gene_name "MT-TP"; transcript_type "Mt_tRNA"; transcript_status "KNOWN"; transcript_name "MT-TP-201"; exon_number 1; exon_id "ENSE00001544473.2"; level 3; tag "basic"; 67 | -------------------------------------------------------------------------------- /pre-processGff/output/clean_gtf/non_coding.gtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lulab/shared_scripts/a49e08b6a09097a26baaa669bcf0ec2c1e71c741/pre-processGff/output/clean_gtf/non_coding.gtf -------------------------------------------------------------------------------- /pre-processGff/processGFF.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | # Print help message if no parameter given 5 | if [ "$#" == 0 ];then 6 | echo "Usage: ./processGFF.sh input_dir output_dir 7 | 1. separate specific biotypes 8 | 2. seperate 5' and 3' UTR 9 | 3. define intergenic region 10 | 4. clean gtf for cufflinks 11 | input: gencode.v19.annotation.gtf,gencode.v19.tRNAs.gtf,miRbase_hsa.gff3,gencode.v19.long_noncoding_RNAs.gtf et al 12 | output: output_dir/separate_gff output_dir/merge_gff output_dir/clean_gtf/ 13 | " 14 | exit; 15 | fi 16 | ## input_dir=/data1/users1/chaodi/ncRNA/human/gff/Gencode19/Input 17 | input_dir=$1 18 | ## specify your own output dir 19 | output_dir=$2 20 | gtf=$input_dir/gencode.v19.annotation.gtf 21 | [[ -d $output_dir/separate_gff ]] || mkdir $output_dir/separate_gff 22 | 23 | ## look at genomic element types 24 | sed '1,5d' $gtf | awk -F "\t" 'split($9,a,"; "){print $2"\t"$3"\t"a[6]}' | sort |uniq -c > $output_dir/feature+trtype 25 | sed '1,5d' $gtf | awk -F "\t" 'split($9,a,"; "){print $2"\t"$3"\t"a[3]}' | sort |uniq -c > $output_dir/feature+genetype 26 | 27 | ######## 1. separate whole gtf file to gffs for each biotype ########## 28 | ### notice that "gencode.v10.annotation_updated_ncrna_host.gtf" do not include: tRNA, TE, and ancestral repeats, miRNA should get from mirBase 29 | ## tRNA in "gencode.v19.tRNAs.gtf", TE generated from RepeatMasker.txt(UCSC), ancestral repeats from hg19 multiple alingnments(maf file), use John's script 30 | ## all CDS(including cCDS) 31 | awk '$3=="CDS"{print}' $gtf | grep "transcript_type \"protein_coding\"" > $output_dir/separate_gff/CDS.gff 32 | ## level1+2 CDS only in protein_coding as gold-standard 33 | grep "level 1\|level 2" $output_dir/separate_gff/CDS.gff > $output_dir/separate_gff/CDS_gold.gff 34 | 35 | ## all UTR 36 | awk '$3=="UTR"{print}' $gtf | grep "transcript_type \"protein_coding\"" > $output_dir/separate_gff/UTR.gff 37 | 38 | ####### ncRNA1 type: 7 types ###### 39 | ## rRNA(gene=transcript=exon) 40 | grep "transcript_type \"rRNA\"" $gtf | awk '$3=="transcript"{print}'|awk 'BEGIN{FS=OFS="\t"} $3="rRNA"{print}' > $output_dir/separate_gff/rRNA.gff 41 | ## snoRNA(gene=transcript=exon) 42 | grep "transcript_type \"snoRNA\"" $gtf | awk '$3=="transcript"{print}'|awk 'BEGIN{FS=OFS="\t"} $3="snoRNA"{print}' > $output_dir/separate_gff/snoRNA.gff 43 | ## snRNA(gene=transcript=exon) 44 | grep "transcript_type \"snRNA\"" $gtf | awk '$3=="transcript"{print}' |awk 'BEGIN{FS=OFS="\t"} $3="snRNA"{print}'> $output_dir/separate_gff/snRNA.gff 45 | ## miRNA from mirBase 46 | grep miRNA_primary_transcript $input_dir/miRbase_hsa.gff3 | sed 1d |awk 'BEGIN{FS=OFS="\t"} $2="miRBase"{print}'> $output_dir/separate_gff/miRNA.gff 47 | ## tRNA from tRNA genecode 48 | sed '1,5d' $input_dir/gencode.v19.tRNAs.gtf |awk 'BEGIN{FS=OFS="\t"} $3="tRNA"{print}'> $output_dir/separate_gff/tRNA.gff 49 | ## Y_RNA and 7SK from miscRNA(gene=transcript=exon) 50 | grep "transcript_type \"misc_RNA\"" $gtf | awk '$3=="transcript"{print}' | grep Y_RNA |awk 'BEGIN{FS=OFS="\t"} $3="Y_RNA"{print}'> $output_dir/separate_gff/Y_RNA.gff 51 | grep "transcript_type \"misc_RNA\"" $gtf | awk '$3=="transcript"{print}' | grep 7SK |awk 'BEGIN{FS=OFS="\t"} $3="7SK_RNA"{print}'> $output_dir/separate_gff/7SK_RNA.gff 52 | ## combine 53 | cd $output_dir/separate_gff/ 54 | cat rRNA.gff snoRNA.gff snRNA.gff miRNA.gff tRNA.gff Y_RNA.gff 7SK_RNA.gff > canonical_ncRNA.gff 55 | cd ../../ 56 | 57 | ##### level1+level2 lncRNAs, exclude overlapped with ncRNA1 as gold-standard lncRNA##### 58 | ## all lncRNA(transcript) 59 | awk '$3=="transcript"{print}' $input_dir/gencode.v19.long_noncoding_RNAs.gtf > $output_dir/separate_gff/lncRNA.gff 60 | grep "transcript_type \"lincRNA\"" $input_dir/gencode.v19.long_noncoding_RNAs.gtf | awk '$3=="transcript"{print}' > $output_dir/separate_gff/lincRNA.gff 61 | intersectBed -a $output_dir/separate_gff/lncRNA.gff -b $output_dir/separate_gff/canonical_ncRNA.gff -v | grep "level 1\|level 2" > Output/separate_gff/lncRNA_gold.gff 62 | 63 | ### many kind of pseudogenes: CDS/UTR "polymorphic_pseudogene";"TR_J_pseudogene" ;"Mt_tRNA_pseudogene" 64 | grep pseudogene $gtf | awk '$3=="transcript"{print}' > $output_dir/separate_gff/pseudogene.gff 65 | ## TE from reapmaskter of UCSC, class:LTR,SINE,LINE,DNA as transposable element 66 | awk '$6=="LTR"||$6=="SINE"||$6=="LINE"||$6=="DNA"{print}' $input_dir/RepeatMasker.bed | awk '{print $1"\thg19_rmsk\tTE\t"$2"\t"$3"\t.\t"$4"\t.\trepName="$5"; repClass="$6"; repFamily="$7";"}' | awk '$1!~/_/{print}' > $output_dir/separate_gff/TE.gff 67 | ## genes and transcripts (all types) 68 | awk '$3=="gene"{print}' $gtf > $output_dir/separate_gff/gene.gff 69 | awk '$3=="exon"{print}' $gtf > $output_dir/separate_gff/exon.gff 70 | awk '$3=="transcript"{print}' $gtf > $output_dir/separate_gff/transcript.gff 71 | cd $output_dir/separate_gff/ 72 | # mRNA 73 | grep "transcript_type \"protein_coding\"" transcript.gff > mRNA.gff 74 | ## exon (coding) 75 | grep "transcript_type \"protein_coding\"" exon.gff > coding_exon.gff 76 | ## introns(coding) 77 | sort -k1,1 -k12,12 -k4,4n coding_exon.gff >foo 78 | mv foo coding_exon.gff 79 | python ../../bin/makeIntron.py coding_exon.gff coding_intron.gff 80 | echo -e "separate gff done! \n" 81 | 82 | ##### merge for genic region and then get intergeic region 83 | cat gene.gff TE.gff | sort -k1,1 -k4,4n | mergeBed > merged_genic.bed 84 | 85 | ### get intergenic region 86 | 87 | bedtools slop -i merged_genic.bed -b 2000 -g $input_dir/hg19.genome | mergeBed > genic+2000_merge.bed 88 | ## download 89 | complementBed -i genic+2000_merge.bed -g $input_dir/hg19.genome > intergenic.bed 90 | awk '{print $0"\tintergenic"}' intergenic.bed > foo 91 | mv foo intergenic.bed 92 | cd ../../ 93 | 94 | echo -e "get intergenic reigon done! \n" 95 | 96 | 97 | ########### 3. seperate UTR to 5' and 3', output: sepUTR.bed ######## 98 | ## only transcripts type=="protein_coding", the UTR counts, not gene type="protein_coding"(some protein coding gene have "nonsense_mediated_decay" transcripts) 99 | mkdir $output_dir/separate_gff/sepUTR/ 100 | cd $output_dir/separate_gff/sepUTR 101 | Rscript ../../../bin/specify5-3UTR.R $gtf sepUTR.bed 102 | python ../../../bin/specify5-3UTR.py sepUTR.bed ../UTR.gff > sepUTR.gff 103 | awk '$3=="5UTR"{print}' sepUTR.gff > 5UTR.gff 104 | awk '$3=="3UTR"{print}' sepUTR.gff > 3UTR.gff 105 | cd ../../../ 106 | 107 | echo -e "separate UTR done! \n" 108 | 109 | ###### 4.clean gtf for use of cufflinks ############ 110 | mkdir -p $output_dir/clean_gtf 111 | for i in rRNA snRNA snoRNA misc_RNA miRNA Mt_rRNA Mt_tRNA lincRNA non_coding sense_intronic sense_overlapping antisense 3prime_overlapping_ncrna; do 112 | grep "gene_type \"$i\"" $gtf > $output_dir/clean_gtf/$i.gtf 113 | done 114 | grep "gene_type \"protein_coding\"" $gtf | grep -v Selenocysteine > $output_dir/clean_gtf/protein_coding.gtf 115 | grep "IG_[CDJV]\|TR_[CDJV]" $gtf > $output_dir/clean_gtf/IG_TR.gtf 116 | cp $output_dir/separate_gff/tRNA.gff $output_dir/clean_gtf/tRNA.gtf 117 | cp $output_dir/separate_gff/pseudogene.gff $output_dir/clean_gtf/pseudogene.gtf 118 | cd $output_dir/clean_gtf/ 119 | cat antisense.gtf non_coding.gtf sense_intronic.gtf sense_overlapping.gtf 3prime_overlapping_ncrna.gtf > otherType_ncRNA.gtf 120 | cat rRNA.gtf snRNA.gtf snoRNA.gtf misc_RNA.gtf miRNA.gtf Mt_rRNA.gtf Mt_tRNA.gtf lincRNA.gtf non_coding.gtf sense_intronic.gtf sense_overlapping.gtf antisense.gtf 3prime_overlapping_ncrna.gtf tRNA.gtf otherType_ncRNA.gtf > known_ncRNA.gtf 121 | cat protein_coding.gtf pseudogene.gtf known_ncRNA.gtf |awk '$3!~/^gene/ && $3!~/^transcript/{print}' > gencode19.lite.gtf 122 | cd ../../ 123 | 124 | echo "clean gtf in Output/clean_gtf done!" 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /pre-processGff/readme: -------------------------------------------------------------------------------- 1 | 2 | # This script is used for pre-process genome gtf files 3 | prepare the genome annotation file as input, for human the inputs are: 4 | gencode.v19.annotation.gtf,gencode.v19.tRNAs.gtf,miRbase_hsa.gff3,gencode.v19.long_noncoding_RNAs.gtf et al 5 | 6 | you can use the input in /data1/users1/chaodi/ncRNA/human/gff/Gencode19/Input for testing ! 7 | you will get these results: 8 | 1. separated specific biotypes 9 | 2. seperated 5' and 3' UTR 10 | 3. defined intergenic region 11 | 4. cleaned gtf for cufflinks 12 | the output files are in output: output_dir/separate_gff output_dir/merge_gff output_dir/clean_gtf/ 13 | --------------------------------------------------------------------------------