├── .gitignore ├── test.txt ├── README.md ├── LICENSE └── uniqqs.c /.gitignore: -------------------------------------------------------------------------------- 1 | /uniqqs -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- 1 | https://www.facebook.com/endpoint_a?p1=123&p2=123123 2 | https://www.facebook.com/endpoint_a?p4=123&p3=123123 3 | https://www.facebook.com/endpoint_b?p4=123&p3=123123 4 | https://www.facebook.com/endpoint_b?p5=123&p6=123123 5 | https://www.facebook.com/endpoint_a?p5=123&p6=123123 6 | https://www.facebook.com/endpoint_b?p1=123&p2=123123 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # uniqqs 2 | 3 | A uniq for URL integrated with query string result. 4 | 5 | # Installation 6 | ``` 7 | git clone https://github.com/ammarfaizi2/uniqqs 8 | cd uniqqs 9 | gcc -O3 uniqqs.c -o uniqqs 10 | ./uniqqs < test.txt 11 | ``` 12 | 13 | # Example Case 14 | ### List 15 | ``` 16 | https://www.facebook.com/endpoint_a?p1=123&p2=123123 17 | https://www.facebook.com/endpoint_a?p4=123&p3=123123 18 | https://www.facebook.com/endpoint_b?p4=123&p3=123123 19 | https://www.facebook.com/endpoint_b?p5=123&p6=123123 20 | https://www.facebook.com/endpoint_a?p5=123&p6=123123 21 | https://www.facebook.com/endpoint_b?p1=123&p2=123123 22 | ``` 23 | ### Result 24 | ``` 25 | https://www.facebook.com/endpoint_a?p1=xxx&p2=xxx&p4=xxx&p3=xxx&p5=xxx&p6=xxx 26 | https://www.facebook.com/endpoint_b?p4=xxx&p3=xxx&p5=xxx&p6=xxx&p1=xxx&p2=xxx 27 | ``` 28 | 29 | # License 30 | MIT 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ammar Faizi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /uniqqs.c: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * @author Ammar Faizi https://www.facebook.com/ammarfaizi2 4 | * @license MIT 5 | * 6 | * uniqqs: A uniq for URL integrated with query string result. 7 | */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define FIRST_ALLOCATED_QS 300 15 | 16 | struct bucket { 17 | char *endpoint; 18 | char **qs; 19 | size_t endpoint_len; 20 | size_t qs_count; 21 | size_t allocated_qs; 22 | }; 23 | 24 | void parse_uniqqs(uint32_t bucket_alloc, FILE *h); 25 | uint32_t get_bkt_index( 26 | struct bucket *bkt, 27 | char *fix_url, 28 | uint32_t bkt_count, 29 | bool *bkt_find 30 | ); 31 | bool qs_is_new(char **qs, size_t qs_siz, char *qsn); 32 | 33 | int main(int argc, char **argv) 34 | { 35 | if (argc == 1) { 36 | parse_uniqqs(50000, stdin); 37 | } 38 | } 39 | 40 | /** 41 | * @param uint32_t bucket_alloc 42 | * @param FILE *h 43 | * @return void 44 | */ 45 | void parse_uniqqs(uint32_t bucket_alloc, FILE *h) 46 | { 47 | size_t endpoint_len, len; 48 | uint32_t bkt_count = 0, ci; 49 | struct bucket *bkt, *bkt_p; 50 | bool has_qs, bkt_find, next_parse; 51 | char *dt, *dx, *fix_url, buffer[4096]; 52 | 53 | bkt = (struct bucket *)malloc(sizeof(struct bucket) * bucket_alloc); 54 | 55 | while (fgets(buffer, 4096, h)) { 56 | 57 | // Get '?' 58 | dt = buffer; 59 | while ((*dt != '?') && (*dt != '\n') && (*dt != '\0')) { 60 | dt++; 61 | } 62 | 63 | // and URL is assumed to have query string 64 | // if and only if there is '?' character. 65 | has_qs = (*dt == '?'); 66 | *dt = '\0'; 67 | endpoint_len = strlen(buffer); 68 | fix_url = (char *)malloc(endpoint_len + 1); 69 | strcpy(fix_url, buffer); 70 | 71 | // Check whether the URL has been parsed (?) 72 | bkt_find = false; 73 | ci = get_bkt_index(bkt, fix_url, bkt_count, &bkt_find); 74 | if (bkt_find) { 75 | free(fix_url); 76 | bkt_p = &(bkt[ci]); 77 | 78 | // Add more query string record. 79 | if (has_qs) { 80 | dx = ++dt; 81 | 82 | qparse_002: 83 | next_parse = false; 84 | while ((*dt != '=') && (*dt != '&') && (*dt != '\n') && (*dt != '\0')) { 85 | dt++; 86 | } 87 | 88 | if (*dt == '=') { 89 | *dt = '\0'; 90 | if (!qs_is_new(bkt_p->qs, bkt_p->qs_count, dx)) { 91 | len = strlen(dx); 92 | bkt_p->qs[bkt_p->qs_count] = (char *)malloc(len + 1); 93 | strcpy(bkt_p->qs[bkt_p->qs_count], dx); 94 | bkt_p->qs_count++; 95 | } 96 | dt++; 97 | while ((*dt != '&') && (*dt != '\n') && (*dt != '\0')) { 98 | dt++; 99 | } 100 | if (*dt == '&') { 101 | *dt = '\0'; 102 | dx = ++dt; 103 | next_parse = true; 104 | } 105 | } 106 | 107 | if (next_parse) { 108 | goto qparse_002; 109 | } 110 | } 111 | 112 | } else { 113 | bkt_p = &(bkt[bkt_count]); 114 | 115 | bkt_p->endpoint = fix_url; 116 | bkt_p->endpoint_len = endpoint_len; 117 | 118 | if (has_qs) { 119 | bkt_p->qs_count = 0; 120 | bkt_p->allocated_qs = sizeof(char *) * FIRST_ALLOCATED_QS; 121 | bkt_p->qs = (char **)malloc(bkt_p->allocated_qs); 122 | 123 | dx = ++dt; 124 | 125 | qparse_001: 126 | next_parse = false; 127 | while ((*dt != '=') && (*dt != '&') && (*dt != '\n') && (*dt != '\0')) { 128 | dt++; 129 | } 130 | 131 | if (*dt == '=') { 132 | *dt = '\0'; 133 | if (!qs_is_new(bkt_p->qs, bkt_p->qs_count, dx)) { 134 | len = strlen(dx); 135 | bkt_p->qs[bkt_p->qs_count] = (char *)malloc(len + 1); 136 | strcpy(bkt_p->qs[bkt_p->qs_count], dx); 137 | bkt_p->qs_count++; 138 | } 139 | dt++; 140 | while ((*dt != '&') && (*dt != '\n') && (*dt != '\0')) { 141 | dt++; 142 | } 143 | if (*dt == '&') { 144 | *dt = '\0'; 145 | dx = ++dt; 146 | next_parse = true; 147 | } 148 | } 149 | 150 | if (next_parse) { 151 | goto qparse_001; 152 | } 153 | } else { 154 | bkt_p->allocated_qs = 0; 155 | bkt_p->qs = NULL; 156 | bkt_p->qs_count = 0; 157 | } 158 | 159 | bkt_count++; 160 | } 161 | } 162 | 163 | for (size_t i = 0; i < bkt_count; ++i) { 164 | printf("%s", bkt[i].endpoint); 165 | for (size_t j = 0; j < bkt[i].qs_count; j++) { 166 | printf("%c%s=xxx", j ? '&' : '?', bkt[i].qs[j]); 167 | free(bkt[i].qs[j]); 168 | } 169 | free(bkt[i].qs); 170 | free(bkt[i].endpoint); 171 | printf("\n"); 172 | } 173 | free(bkt); 174 | } 175 | 176 | /** 177 | * @param struct bucket *bkt 178 | * @param char *fix_url 179 | * @param uint32_t bkt_count 180 | * @param bool *bkt_find 181 | * @return uint32_t 182 | */ 183 | uint32_t get_bkt_index( 184 | struct bucket *bkt, 185 | char *fix_url, 186 | uint32_t bkt_count, 187 | bool *bkt_find 188 | ) 189 | { 190 | for (uint32_t i = 0; i < bkt_count; i++) { 191 | if (!strcmp(bkt[i].endpoint, fix_url)) { 192 | *bkt_find = true; 193 | return i; 194 | } 195 | } 196 | return 0; 197 | } 198 | 199 | /** 200 | * @param char **qs 201 | * @param size_t qs_siz 202 | * @param char *qsn 203 | * @return bool 204 | */ 205 | bool qs_is_new(char **qs, size_t qs_siz, char *qsn) 206 | { 207 | for (size_t i = 0; i < qs_siz; i++) { 208 | if (!strcmp(qs[i], qsn)) { 209 | return true; 210 | } 211 | } 212 | return false; 213 | } 214 | --------------------------------------------------------------------------------