├── CMakeLists.txt ├── LICENSE.txt ├── README.md └── bs4kass.c /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (bs4kass) 3 | file(GLOB CORE_FILES ${PROJECT_SOURCE_DIR}/*.c) 4 | add_executable(bs4kass ${CORE_FILES}) 5 | set (CMAKE_C_STANDARD 99) 6 | set_property(TARGET bs4kass PROPERTY C_STANDARD 99) 7 | 8 | add_definitions(-DXML_STATIC) 9 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 10 | include_directories(vendor/include) 11 | target_link_libraries(bs4kass 12 | ${PROJECT_SOURCE_DIR}/vendor/lib-msvc-x64/libavcodec.a 13 | ${PROJECT_SOURCE_DIR}/vendor/lib-msvc-x64/libavformat.a 14 | ${PROJECT_SOURCE_DIR}/vendor/lib-msvc-x64/libavutil.a 15 | ${PROJECT_SOURCE_DIR}/vendor/lib-msvc-x64/libexpat.lib 16 | bcrypt.lib 17 | ) 18 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Xinyue Lu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | It extracts ARIB STD-B69 subtitle from 4K/8K TV Program and converts it to Advanced SubStation Alpha (ASS) file. 2 | 3 | ## Compiling 4 | 5 | Link with avcodec, avformat, avutil and expat. 6 | 7 | If manually compiling ffmpeg, use the following flags. 8 | 9 | ./configure --disable-pthreads --disable-bzlib --disable-iconv --disable-lzma --disable-xlib --disable-everything --disable-network --disable-programs --disable-doc --disable-avdevice --disable-swresample --disable-swscale --disable-postproc --disable-avfilter --enable-protocol=file --enable-demuxer=mpegts ... 10 | -------------------------------------------------------------------------------- /bs4kass.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #ifndef WIN32 9 | #include 10 | #endif 11 | 12 | static int insert_new_line = 0; 13 | static int insert_new_text = 0; 14 | static int insert_bracket = 0; 15 | #ifndef WIN32 16 | static struct timeval last_flush = {0}; 17 | #endif 18 | 19 | static int missing_end = 0; 20 | static char* dialog_buffer = NULL; 21 | static char* dialog_buffer_tail = NULL; 22 | 23 | static void XMLCALL 24 | tt_start(void *userData, const char *name, const char **attr) 25 | { 26 | FILE* fp = (FILE*)userData; 27 | 28 | if (strcmp(name, "div") == 0) 29 | { 30 | // Set time 31 | char begin[] = "0:00:00.00"; 32 | char end[] = "0:00:00.00"; 33 | missing_end = 1; 34 | for (int i = 0; attr[i]; i += 2) 35 | { 36 | if (strcmp(attr[i], "begin") == 0) 37 | { 38 | strncpy(begin, attr[i+1]+1, 10); 39 | if (dialog_buffer != dialog_buffer_tail) 40 | { 41 | strncpy(dialog_buffer+23, attr[i+1]+1, 10); 42 | fputs(dialog_buffer, fp); 43 | } 44 | } 45 | if (strcmp(attr[i], "end") == 0) 46 | { 47 | strncpy(end, attr[i+1]+1, 10); 48 | missing_end = 0; 49 | } 50 | } 51 | dialog_buffer_tail = dialog_buffer; 52 | dialog_buffer_tail += sprintf(dialog_buffer_tail, "Dialogue: 0,%s,%s,Default,,0,0,0,,", begin, end); 53 | #ifndef WIN32 54 | struct timeval now; 55 | gettimeofday(&now, NULL); 56 | if ((now.tv_sec - last_flush.tv_sec) * 1000000 + (now.tv_usec - last_flush.tv_usec) >= 250000) 57 | { 58 | printf("%s \r", begin); 59 | fflush(stdout); 60 | last_flush = now; 61 | } 62 | #else 63 | printf("%s \r", begin); 64 | #endif 65 | } 66 | else if (strcmp(name, "span") == 0) 67 | { 68 | if (insert_new_line) dialog_buffer_tail += sprintf(dialog_buffer_tail, "\\N"); 69 | insert_new_line = 0; 70 | insert_new_text = 1; 71 | for (int i = 0; attr[i]; i += 2) 72 | { 73 | if (strcmp(attr[i], "style") == 0) 74 | { 75 | if (strstr(attr[i+1], "smallSize") != NULL) 76 | { 77 | *(dialog_buffer_tail++) = '<'; 78 | insert_bracket = 1; 79 | } 80 | break; 81 | } 82 | } 83 | } 84 | } 85 | 86 | static void XMLCALL 87 | tt_end(void *userData, const char *name) 88 | { 89 | FILE* fp = (FILE*)userData; 90 | 91 | if (strcmp(name, "div") == 0) 92 | { 93 | insert_new_line = 0; 94 | *(dialog_buffer_tail++) = '\n'; 95 | *(dialog_buffer_tail++) = 0; 96 | if (!missing_end) 97 | { 98 | fputs(dialog_buffer, fp); 99 | dialog_buffer_tail = dialog_buffer; 100 | } 101 | } 102 | else if (strcmp(name, "p") == 0) 103 | { 104 | insert_new_line = 1; 105 | } 106 | else if (strcmp(name, "span") == 0) 107 | { 108 | if (insert_bracket) 109 | *(dialog_buffer_tail++) = '>'; 110 | insert_new_text = 0; 111 | insert_bracket = 0; 112 | } 113 | } 114 | 115 | static void XMLCALL 116 | tt_text(void *userData, const char *s, int len) 117 | { 118 | if (insert_new_text) 119 | { 120 | memcpy(dialog_buffer_tail, s, len); 121 | dialog_buffer_tail += len; 122 | *dialog_buffer_tail = 0; 123 | } 124 | } 125 | 126 | void init_ass(FILE* fp) 127 | { 128 | fputs( 129 | "[Script Info]\n" 130 | "; Script generated by BS4KASS\n" 131 | "ScriptType: v4.00+\n" 132 | "PlayResX: 1920\n" 133 | "PlayResY: 1080\n" 134 | "\n" 135 | "[V4+ Styles]\n" 136 | "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" 137 | "Style: Default,MS UI Gothic,50,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,1,2,10,10,50,0\n" 138 | "\n" 139 | "[Events]\n" 140 | "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" 141 | , fp); 142 | } 143 | 144 | int main(int argc, char **argv) 145 | { 146 | const char *filename; 147 | FILE *ass_file = NULL; 148 | 149 | AVFormatContext *fmt_ctx = NULL; 150 | int data_stream_idx = -1; 151 | AVPacket pkt; 152 | 153 | av_log_set_level(AV_LOG_QUIET); 154 | setvbuf(stdout, NULL, _IOLBF, 128); 155 | dialog_buffer = (char*)malloc(1048576); 156 | dialog_buffer_tail = dialog_buffer; 157 | 158 | if (argc != 2) { 159 | fprintf(stderr, "Usage: %s Source.m2ts\n", argv[0]); 160 | exit(0); 161 | } 162 | filename = argv[1]; 163 | size_t len = strlen(filename); 164 | char* ass_filename = (char*) malloc(len+14); 165 | strcpy(ass_filename, filename); 166 | if (strcmp(ass_filename+len-5, ".m2ts") == 0) 167 | ass_filename[len-5] = 0; 168 | strcat(ass_filename, ".kingyubi.ass"); 169 | 170 | AVDictionary *options = NULL; 171 | av_dict_set(&options, "analyzeduration", "60000000", 0); // 60 seconds 172 | av_dict_set(&options, "probesize", "104857600", 0); // 100 MiB 173 | 174 | if (avformat_open_input(&fmt_ctx, filename, NULL, &options) < 0) { 175 | fprintf(stderr, "Could not open source file %s\n", filename); 176 | exit(1); 177 | } 178 | if (avformat_find_stream_info(fmt_ctx, NULL) < 0) { 179 | fprintf(stderr, "Could not find stream information\n"); 180 | exit(1); 181 | } 182 | 183 | data_stream_idx = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_DATA, -1, -1, NULL, 0); 184 | if (data_stream_idx < 0) 185 | data_stream_idx = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_SUBTITLE, -1, -1, NULL, 0); 186 | if (data_stream_idx < 0) { 187 | fprintf(stderr, "Could not find data stream in input file\n"); 188 | exit(1); 189 | } 190 | 191 | ass_file = fopen(ass_filename, "wb"); 192 | if (!ass_file) { 193 | fprintf(stderr, "Could not open destination file %s\n", ass_filename); 194 | exit(1); 195 | } 196 | 197 | printf("[Source] %s\n", filename); 198 | printf("[Target] %s\n", ass_filename); 199 | printf("[Track#] %d\n", data_stream_idx); 200 | fflush(stdout); 201 | 202 | av_init_packet(&pkt); 203 | pkt.data = NULL; 204 | pkt.size = 0; 205 | 206 | init_ass(ass_file); 207 | XML_Parser p = NULL; 208 | int xml_offset = -1; 209 | while (av_read_frame(fmt_ctx, &pkt) >= 0) { 210 | if (data_stream_idx < 2) { 211 | // assuming we got the wrong track because 0 is usually video and 1 is usually audio 212 | int size = 1024; 213 | if (pkt.size < size) size = pkt.size; 214 | for (int i = 0; i < size - 2; i++) { 215 | // search for 'tts' 216 | if (pkt.data[i] != 't') continue; 217 | if (pkt.data[i+1] != 't') continue; 218 | if (pkt.data[i+2] != 's') continue; 219 | data_stream_idx = pkt.stream_index; 220 | printf("[Track#] %d\n", data_stream_idx); 221 | break; 222 | } 223 | if (data_stream_idx < 2) 224 | continue; 225 | } 226 | if (pkt.stream_index == data_stream_idx) 227 | { 228 | int last = strncmp(pkt.data + pkt.size - 5, "", 5) == 0; 229 | last = last || (strncmp(pkt.data + pkt.size - 7, "", 6) == 0); 230 | if (p) 231 | { 232 | XML_Parse(p, pkt.data + 1, pkt.size - 1, last); // TODO: test this scenario on ts file 233 | } 234 | else 235 | { 236 | p = XML_ParserCreate("UTF-8"); 237 | 238 | if (!p) { 239 | fprintf(stderr, "Couldn't allocate memory for XML parser\n"); 240 | exit(1); 241 | } 242 | XML_SetElementHandler(p, tt_start, tt_end); 243 | XML_SetCharacterDataHandler(p, tt_text); 244 | XML_SetUserData(p, (void*)ass_file); 245 | // detect xml offset 246 | if (xml_offset < 0) { 247 | int size = 32; 248 | if (pkt.size < size) size = pkt.size; 249 | for (int i = 0; i < size - 4; i++) { 250 | // search for '