├── .gitignore ├── massurl ├── test.data ├── Makefile ├── urlparse.h ├── linkedlist.h ├── tree.h ├── README.md ├── linkedlist.c ├── urlparse.c ├── tree.c └── massurl.c /.gitignore: -------------------------------------------------------------------------------- 1 | /.ccls-cache/ 2 | -------------------------------------------------------------------------------- /massurl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arg3t/massurl/HEAD/massurl -------------------------------------------------------------------------------- /test.data: -------------------------------------------------------------------------------- 1 | http://test.com?param4=var4 2 | http://test.com?param3=var3 3 | http://abc.com?abcpar=123&asdasd=asdas 4 | http://abc.com?abcpar123=123 5 | http://test.com/path1?param3=var3 6 | http://test.com/path1?param1=var2¶m2=var2 7 | http://bc.com 8 | http://test.com?param1=var2¶m2=var2 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ## 2 | # massurl 3 | # 4 | # @file Makefile 5 | # @version 0.1 6 | 7 | VERSION = 0.1 8 | CC = gcc 9 | CFLAGS = -g -w 10 | SRC = linkedlist.c urlparse.c tree.c massurl.c 11 | 12 | all: massurl 13 | 14 | massurl: $(SRC) 15 | ${CC} $(SRC) -o massurl $(CFLAGS) 16 | 17 | 18 | # end 19 | -------------------------------------------------------------------------------- /urlparse.h: -------------------------------------------------------------------------------- 1 | /* 2 | * urlparse.h 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "linkedlist.h" 9 | 10 | #ifndef urlparse_h 11 | #define urlparse_h 12 | 13 | typedef struct{ 14 | unsigned int https : 1; 15 | char *base; 16 | LinkedList *params; 17 | int nparams; 18 | } URL; 19 | 20 | URL *parseurl(char *urlstr); 21 | URL *urlalloc(void); 22 | 23 | #endif /* Symbol’s value as variable is void: \. */ 24 | -------------------------------------------------------------------------------- /linkedlist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * linkedlist.h 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #ifndef linkedlist_h 9 | #define linkedlist_h 10 | 11 | #include 12 | 13 | typedef struct { 14 | struct linkedlist *next; 15 | char *data; 16 | } LinkedList; 17 | 18 | LinkedList *linkedlistalloc(void); 19 | int linkedlistfind(LinkedList *p, char *str); 20 | LinkedList*linkedlistadd(LinkedList *p, char *data); 21 | void linkedlistprint(LinkedList *p, FILE *out, char *payload); 22 | #endif /* Symbol’s value as variable is void: \. */ 23 | -------------------------------------------------------------------------------- /tree.h: -------------------------------------------------------------------------------- 1 | /* 2 | * tree.h 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "urlparse.h" 9 | #include 10 | 11 | #ifndef tree_h 12 | #define tree_h 13 | 14 | typedef struct tnode { 15 | char *path; 16 | unsigned int red : 1; 17 | LinkedList *params; 18 | int nparams; 19 | struct tnode *parent; 20 | struct tnode *left; 21 | struct tnode *right; 22 | } TreeNode; 23 | 24 | TreeNode *addtree(TreeNode *parent, TreeNode *node); 25 | void rotatetreeright(TreeNode *node); 26 | void rotatetreeleft(TreeNode *node); 27 | void balancetree(TreeNode *root, TreeNode *node); 28 | TreeNode *treealloc(void); 29 | void printtree(TreeNode *root, FILE *out, char *payload, int minparams); 30 | 31 | #endif /* Symbol’s value as variable is void: \. */ 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # massurl 2 | 3 | massurl is a simple tool that aims to parse the outputs of tools like gau, and 4 | extract the parameters for each URL, remove duplicates and do it all very 5 | quickly. Because web scraping tools' outputs can get very large very quickly, 6 | it is nice to have a tool that parses them and and outputs something clean and 7 | easy to read. 8 | 9 | ## How to use? 10 | 11 | Simply clone the git repository and run `make` which outputs the binary 12 | *massurl*. You can then simply pipe the output of any command that outputs urls 13 | into it or pass the filename where you want it to read the urls from. It 14 | expects each line to have only one url. It has several parameters: 15 | 16 | ``` sh 17 | usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file 18 | ``` 19 | 20 | You can specify an output file, which it will write instead of stdout, you can 21 | also give it a list of payloads which massurl will automatically enter as the 22 | values for each parameter. If you are testing for reflected values, in 23 | parameters, you can put a pseudorandom value in each param using the flag -r. 24 | And finally, you can specify the minimum amount of parameters a url must have 25 | to be outputted, this value is zero by default but I recommend you use 1. 26 | 27 | ## How fast is it? 28 | 29 | The tool uses a binary tree to store the urls and keeps it balanced using the 30 | red-black self balancing tree algorithm, which allows it to run at incredible 31 | speeds. 32 | 33 | ## Contributing 34 | 35 | This is a very simple project so you shouldn't have trouble reading the code 36 | and fixing the bugs you encounter. If you do so, feel free to send a PR. Or, if 37 | you can't seem to fix it yourself, don't be shy and open an issue! 38 | -------------------------------------------------------------------------------- /linkedlist.c: -------------------------------------------------------------------------------- 1 | /* 2 | * linkedlist.c 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "linkedlist.h" 9 | #include 10 | #include 11 | #include 12 | 13 | #define RANDLEN 6 14 | 15 | 16 | LinkedList *linkedlistalloc(void){ 17 | return (LinkedList *) malloc(sizeof(LinkedList)); 18 | } 19 | 20 | int linkedlistfind(LinkedList *p, char *str) { 21 | int count = 0; 22 | while(p != NULL){ 23 | if(!strcmp(p->data, str)) 24 | return count; 25 | count++; 26 | p = p->next; 27 | } 28 | return -1; 29 | } 30 | 31 | LinkedList *linkedlistadd(LinkedList *p, char *data){ 32 | if(p == NULL){ 33 | p = linkedlistalloc(); 34 | p->next = NULL; 35 | p->data = data; 36 | }else 37 | p->next = linkedlistadd(p->next, data); 38 | return p; 39 | } 40 | 41 | 42 | char rstr[RANDLEN+1]; 43 | 44 | char *randstr(){ 45 | char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 46 | int n = RANDLEN; 47 | while((--n) > -1){ 48 | size_t index = (double) rand()/RAND_MAX * (sizeof charset - 1); 49 | rstr[n] = charset[index]; 50 | } 51 | return rstr; 52 | } 53 | 54 | void linkedlistprint(LinkedList *p, FILE *out, char* payload){ 55 | int random = 0; 56 | if(!payload){ 57 | random = 1; 58 | payload = randstr(); 59 | } 60 | if(p != NULL){ 61 | (p->data == NULL) ? fprintf(out, "NULL=NULL") : fprintf(out, "%s=%s", p->data, payload); 62 | (p->next == NULL) ? : fprintf(out, "%c",'&'); 63 | if(random) 64 | payload = NULL; 65 | linkedlistprint(p->next, out, payload); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /urlparse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * urlparse.c 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "urlparse.h" 9 | #include "linkedlist.h" 10 | #include 11 | #include 12 | #include 13 | 14 | URL *parseurl(char *url) { 15 | URL *urlp = urlalloc(); 16 | urlp->params = NULL; 17 | urlp->nparams = 0; 18 | short stage = 0; /* var to keep track of where we are in url */ 19 | int counter = 0; 20 | 21 | while (*url != '\0' && *url != '\n') { 22 | switch (*url++) { 23 | case ':': 24 | counter++; 25 | if (stage == 0) { 26 | urlp->https = *(url - 2) == 's'; 27 | if (*(url + 1) == '\0' || *url == '\0' || *url == '\n') /* weird stuff would happen with strings like "http:" */ 28 | return NULL; 29 | url += 2; /* Skip the // after the :*/ 30 | stage = 1; 31 | counter+=3; 32 | } 33 | break; 34 | 35 | case '?': 36 | if (stage == 1) { 37 | urlp->base = 38 | (char *)malloc(counter); /* +1 for the '\0' in the end */ 39 | strncpy(urlp->base, url - counter, counter - 1); 40 | stage = 2; 41 | counter = 1; 42 | } else { 43 | return NULL; 44 | } 45 | break; 46 | 47 | case '=': 48 | if (stage == 2) { 49 | char *foo; 50 | foo = (char *)malloc(counter); 51 | strncpy(foo, url - counter, counter-1); 52 | counter = 1; 53 | if (urlp->params == NULL){ 54 | urlp->params = linkedlistalloc(); 55 | urlp->params->data = foo; 56 | }else 57 | urlp->params = linkedlistadd(urlp->params, foo); 58 | urlp->nparams++; 59 | while(*url != '&' && *url != '\0' && *url != '\n') 60 | url++; 61 | url++; 62 | } 63 | break; 64 | 65 | default: 66 | counter++; 67 | break; 68 | } 69 | } 70 | 71 | switch(stage){ 72 | case 0: 73 | return NULL; 74 | break; 75 | case 1: 76 | urlp->base = (char *)malloc(counter); /* +1 for the '\0' in the end */ 77 | strncpy(urlp->base, url - (counter-1), counter - 1); 78 | break; 79 | case 2: 80 | break; 81 | default: 82 | return NULL; 83 | } 84 | return urlp; 85 | } 86 | 87 | URL *urlalloc(void) { return (URL *)malloc(sizeof(URL)); } 88 | -------------------------------------------------------------------------------- /tree.c: -------------------------------------------------------------------------------- 1 | /* 2 | * tree.c 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "tree.h" 9 | #include "linkedlist.h" 10 | #include "urlparse.h" 11 | #include 12 | #include 13 | #include 14 | 15 | extern TreeNode *root; 16 | 17 | TreeNode *addtree(TreeNode *parent, TreeNode *p) { 18 | if (parent == NULL) 19 | return p; 20 | int strdiff = strcmp(parent->path, p->path); 21 | if (!strdiff) { 22 | while (p->params != NULL) { 23 | if (p->params == NULL || 24 | linkedlistfind(parent->params, p->params->data) == -1) { 25 | p->params = linkedlistadd(parent->params, p->params->data); 26 | } 27 | p->params = p->params->next; 28 | } 29 | } else if (strdiff < 0) { 30 | parent->left = addtree(parent->left, p); 31 | parent->left->parent = parent; 32 | } else { 33 | parent->right = addtree(parent->right, p); 34 | parent->right->parent = parent; 35 | } 36 | return parent; 37 | } 38 | 39 | void rotatetreeleft(TreeNode *p) { 40 | TreeNode *r = p->right; 41 | p->right = r->left; 42 | if (p->right) 43 | p->right->parent = p; 44 | r->parent = p->parent; 45 | if (p->parent == NULL) 46 | root = r; 47 | else if (p->parent->left == p) 48 | p->parent->left = r; 49 | else 50 | p->parent->right = r; 51 | r->left = p; 52 | p->parent = r; 53 | } 54 | 55 | void rotatetreeright(TreeNode *p) { 56 | TreeNode *l = p->left; 57 | p->left = l->right; 58 | if (p->left) 59 | p->left->parent = p; 60 | l->parent = p->parent; 61 | if (p->parent == NULL) 62 | root = l; 63 | else if (p->parent->left == p) 64 | p->parent->left = l; 65 | else 66 | p->parent->right = l; 67 | l->right = p; 68 | p->parent = l; 69 | } 70 | 71 | void balancetree(TreeNode *root, TreeNode *node) { 72 | TreeNode *p = NULL; 73 | TreeNode *gP = NULL; 74 | 75 | while (node->parent != NULL && node->parent->parent != NULL && node->red && node->parent->red ) { 76 | p = node->parent; 77 | gP = node->parent->parent; 78 | if (gP->left == p) { 79 | if (gP->right != NULL && gP->right->red) { 80 | gP->red = 1; 81 | gP->left->red = 0; 82 | gP->right->red = 0; 83 | node = gP; 84 | }else{ 85 | if(p->right == node){ 86 | rotatetreeleft(p); 87 | node = p; 88 | p = node->parent; 89 | }else{ 90 | rotatetreeright(gP); 91 | int c = p->red; 92 | p->red = gP->red; 93 | gP->red = c; 94 | node = p; 95 | } 96 | } 97 | } else { 98 | if(gP->left != NULL && gP->left->red){ 99 | gP->red = 1; 100 | gP->left->red = 0; 101 | gP->right->red = 0; 102 | node = gP; 103 | }else{ 104 | if(p->left == node){ 105 | rotatetreeright(p); 106 | node = p; 107 | p = node->parent; 108 | }else{ 109 | rotatetreeleft(gP); 110 | int c = p->red; 111 | p->red = gP->red; 112 | gP->red = c; 113 | node = p; 114 | } 115 | } 116 | } 117 | } 118 | root->red = 0; 119 | } 120 | 121 | TreeNode *treealloc(void) { return (TreeNode *)malloc(sizeof(TreeNode)); } 122 | 123 | void printtree(TreeNode *root, FILE *out, char *payload, int minparams) { 124 | if (root != NULL) { 125 | printtree(root->left, out, payload, minparams); 126 | if(root->nparams >= minparams){ 127 | fprintf(out, "%s", root->path); 128 | (!root->nparams) ? : fprintf(out, "%c",'?'); 129 | linkedlistprint(root->params, out, payload); 130 | fprintf(out, "%c", '\n'); 131 | } 132 | printtree(root->right, out, payload, minparams); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /massurl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * massurl.c 3 | * 4 | * Created by Yigit Colakoglu on 07/06/2021. 5 | * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. 6 | */ 7 | 8 | #include "strings.h" 9 | #include "tree.h" 10 | #include "urlparse.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | #define MAXURL 100000 16 | #define MAXPAYLOAD 10000 17 | 18 | static void usage(void) { 19 | fputs("\ 20 | usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr); 21 | exit(1); 22 | } 23 | 24 | TreeNode *root = NULL; 25 | 26 | int main(int argc, char *argv[]) { 27 | 28 | FILE *fin = stdin, *fout = stdout, *payloads = NULL; 29 | char *param, urlstr[MAXURL], payload[MAXPAYLOAD]; 30 | int minparamn, verbose = 0, npayloads = 1; 31 | int randpayloads = 0; 32 | time_t begin = time(NULL); 33 | unsigned long lines, errors = 0; 34 | 35 | while (--argc > 0) { 36 | param = *++argv; 37 | if (param[0] == '-') { 38 | param++; 39 | switch (*param) { 40 | case 'o': 41 | if ((fout = fopen(*++argv, "w")) == NULL) { 42 | fprintf(stderr, "Can't open output file for writing.\n"); 43 | return 1; 44 | } 45 | if (ferror(fout)) { 46 | fprintf(stderr, "Can't open output file for writing.\n"); 47 | return 1; 48 | } 49 | break; 50 | case 'n': 51 | minparamn = atoi(*++argv); 52 | argc--; 53 | break; 54 | case 'v': 55 | verbose = 1; 56 | break; 57 | case 'r': 58 | randpayloads = 1; 59 | break; 60 | case 'h': 61 | usage(); 62 | break; 63 | case 'p': 64 | if ((payloads = fopen(*++argv, "r")) == NULL) { 65 | fprintf(stderr, "Can't open payload file for reading.\n"); 66 | return 1; 67 | } 68 | if (ferror(fout)) { 69 | fprintf(stderr, "Can't open payload file for reading.\n"); 70 | return 1; 71 | } 72 | break; 73 | default: 74 | fprintf(stderr, "Parameter -%c does not exist!\n", *param); 75 | usage(); 76 | } 77 | } else { 78 | if ((fin = fopen(param, "r")) == NULL) { 79 | fprintf(stderr, "Can't open file %s\n", param); 80 | return 1; 81 | } 82 | } 83 | } 84 | 85 | URL *url; 86 | while (fgets(urlstr, MAXURL, fin) != NULL) { 87 | lines++; 88 | if ((url = parseurl(urlstr)) == NULL) { 89 | errors++; 90 | if (verbose) 91 | fprintf(stderr, "Malformed URL %s", urlstr); 92 | continue; 93 | } 94 | TreeNode *newnode = treealloc(); 95 | newnode->path = url->base; 96 | newnode->params = url->params; 97 | newnode->parent = NULL; 98 | newnode->left = newnode->right = NULL; 99 | newnode->nparams = url->nparams; 100 | newnode->red = 1; /* Always color new nodes red */ 101 | root = addtree(root, newnode); 102 | balancetree(root, newnode); 103 | } 104 | 105 | if ( randpayloads ) 106 | printtree(root, fout, NULL, minparamn); 107 | else if ( payloads == NULL ) 108 | printtree(root, fout, "%s", minparamn); 109 | if ( payloads ) { 110 | while (fgets(payload, MAXPAYLOAD, payloads) != NULL) { 111 | npayloads++; 112 | for(int i=0; i