├── .gitignore ├── LICENSE ├── README ├── TODO ├── aribgaiji.py ├── aribstr.py ├── aribtable.py ├── constant.py ├── epgdump.py ├── parser.py └── xmltv.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2011 Yasumasa Murakami. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | python epgdump.py 2 | 3 | USAGE: epgdump_py -c CHANNEL_ID -i INPUT_FILE -o OUTPUT_FILE 4 | -h, --help print help message 5 | -c, --channel-id specify channel identifier 6 | -f, --format format xml 7 | -i, --input specify ts file 8 | -o, --output specify xml file 9 | 10 | --formatオプションを使用するよりも、xmlstarletコマンドを使用し 11 | た方がよい。 12 | xmlstarlet format XML_FILE 13 | 14 | 番組検索用シェルスクリプト 15 | function select_time() { 16 | xmlstarlet sel --encode utf-8 -t -m '//programme' -v '@start' -n $@ | 17 | python -c ' 18 | import datetime 19 | import sys 20 | for line in sys.stdin: 21 | str = line.split() 22 | if str: 23 | print datetime.datetime.strptime(str[0], "%Y%m%d%H%M%S")' 24 | } 25 | function select_anime() { 26 | xmlstarlet sel --encode utf-8 -t -m "//programme" \ 27 | -m "category[contains(., 'アニメ')]" \ 28 | -v 'normalize-space(../title)' -o ' ' -v '../@start' -n $@ 29 | } 30 | function select_actor() { 31 | xmlstarlet sel --encode utf-8 -t -m "//programme" \ 32 | -m "desc[contains(., '星野真里')]" \ 33 | -v 'normalize-space(../title)' -o ' ' -v '../@start' -n $@ 34 | } 35 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/murakamiy/epgdump_py/1b6f701eede2310f61570cc75d1e0d76cc9ed69f/TODO -------------------------------------------------------------------------------- /aribgaiji.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | GAIJI_MAP_TITLE = { 5 | 0x7A50:"【HV】", 6 | 0x7A51:"【SD】", 7 | 0x7A52:"【P】", 8 | 0x7A53:"【W】", 9 | 0x7A54:"【MV】", 10 | 0x7A55:"【手】", 11 | 0x7A56:"【字】", 12 | 0x7A57:"【双】", 13 | 0x7A58:"【デ】", 14 | 0x7A59:"【S】", 15 | 0x7A5A:"【二】", 16 | 0x7A5B:"【多】", 17 | 0x7A5C:"【解】", 18 | 0x7A5D:"【SS】", 19 | 0x7A5E:"【B】", 20 | 0x7A5F:"【N】", 21 | 0x7A62:"【天】", 22 | 0x7A63:"【交】", 23 | 0x7A64:"【映】", 24 | 0x7A65:"【無】", 25 | 0x7A66:"【料】", 26 | 0x7A67:"【年齢制限】", 27 | 0x7A68:"【前】", 28 | 0x7A69:"【後】", 29 | 0x7A6A:"【再】", 30 | 0x7A6B:"【新】", 31 | 0x7A6C:"【初】", 32 | 0x7A6D:"【終】", 33 | 0x7A6E:"【生】", 34 | 0x7A6F:"【販】", 35 | 0x7A70:"【声】", 36 | 0x7A71:"【吹】", 37 | 0x7A72:"【PPV】", 38 | } 39 | 40 | GAIJI_MAP_OTHER = { 41 | 0x7A60:"■", 42 | 0x7A61:"●", 43 | 0x7A73:"(秘)", 44 | 0x7A74:"ほか", 45 | 46 | 0x7C21:"→", 47 | 0x7C22:"←", 48 | 0x7C23:"↑", 49 | 0x7C24:"↓", 50 | 0x7C25:"●", 51 | 0x7C26:"○", 52 | 0x7C27:"年", 53 | 0x7C28:"月", 54 | 0x7C29:"日", 55 | 0x7C2A:"円", 56 | 0x7C2B:"㎡", 57 | 0x7C2C:"㎥", 58 | 0x7C2D:"㎝", 59 | 0x7C2E:"㎠", 60 | 0x7C2F:"㎤", 61 | 0x7C30:"0.", 62 | 0x7C31:"1.", 63 | 0x7C32:"2.", 64 | 0x7C33:"3.", 65 | 0x7C34:"4.", 66 | 0x7C35:"5.", 67 | 0x7C36:"6.", 68 | 0x7C37:"7.", 69 | 0x7C38:"8.", 70 | 0x7C39:"9.", 71 | 0x7C3A:"氏", 72 | 0x7C3B:"副", 73 | 0x7C3C:"元", 74 | 0x7C3D:"故", 75 | 0x7C3E:"前", 76 | 0x7C3F:"[新]", 77 | 0x7C40:"0,", 78 | 0x7C41:"1,", 79 | 0x7C42:"2,", 80 | 0x7C43:"3,", 81 | 0x7C44:"4,", 82 | 0x7C45:"5,", 83 | 0x7C46:"6,", 84 | 0x7C47:"7,", 85 | 0x7C48:"8,", 86 | 0x7C49:"9,", 87 | 0x7C4A:"(社)", 88 | 0x7C4B:"(財)", 89 | 0x7C4C:"(有)", 90 | 0x7C4D:"(株)", 91 | 0x7C4E:"(代)", 92 | 0x7C4F:"(問)", 93 | 0x7C50:"▶", 94 | 0x7C51:"◀", 95 | 0x7C52:"〖", 96 | 0x7C53:"〗", 97 | 0x7C54:"⟐", 98 | 0x7C55:"^2", 99 | 0x7C56:"^3", 100 | 0x7C57:"(CD)", 101 | 0x7C58:"(vn)", 102 | 0x7C59:"(ob)", 103 | 0x7C5A:"(cb)", 104 | 0x7C5B:"(ce", 105 | 0x7C5C:"mb)", 106 | 0x7C5D:"(hp)", 107 | 0x7C5E:"(br)", 108 | 0x7C5F:"(p)", 109 | 0x7C60:"(s)", 110 | 0x7C61:"(ms)", 111 | 0x7C62:"(t)", 112 | 0x7C63:"(bs)", 113 | 0x7C64:"(b)", 114 | 0x7C65:"(tb)", 115 | 0x7C66:"(tp)", 116 | 0x7C67:"(ds)", 117 | 0x7C68:"(ag)", 118 | 0x7C69:"(eg)", 119 | 0x7C6A:"(vo)", 120 | 0x7C6B:"(fl)", 121 | 0x7C6C:"(ke", 122 | 0x7C6D:"y)", 123 | 0x7C6E:"(sa", 124 | 0x7C6F:"x)", 125 | 0x7C70:"(sy", 126 | 0x7C71:"n)", 127 | 0x7C72:"(or", 128 | 0x7C73:"g)", 129 | 0x7C74:"(pe", 130 | 0x7C75:"r)", 131 | 0x7C76:"(R)", 132 | 0x7C77:"(C)", 133 | 0x7C78:"(箏)", 134 | 0x7C79:"DJ", 135 | 0x7C7A:"[演]", 136 | 0x7C7B:"Fax", 137 | 138 | 0x7D21:"㈪", 139 | 0x7D22:"㈫", 140 | 0x7D23:"㈬", 141 | 0x7D24:"㈭", 142 | 0x7D25:"㈮", 143 | 0x7D26:"㈯", 144 | 0x7D27:"㈰", 145 | 0x7D28:"㈷", 146 | 0x7D29:"㍾", 147 | 0x7D2A:"㍽", 148 | 0x7D2B:"㍼", 149 | 0x7D2C:"㍻", 150 | 0x7D2D:"№", 151 | 0x7D2E:"℡", 152 | 0x7D2F:"〶", 153 | 0x7D30:"○", 154 | 0x7D31:"〔本〕", 155 | 0x7D32:"〔三〕", 156 | 0x7D33:"〔二〕", 157 | 0x7D34:"〔安〕", 158 | 0x7D35:"〔点〕", 159 | 0x7D36:"〔打〕", 160 | 0x7D37:"〔盗〕", 161 | 0x7D38:"〔勝〕", 162 | 0x7D39:"〔敗〕", 163 | 0x7D3A:"〔S〕", 164 | 0x7D3B:"[投]", 165 | 0x7D3C:"[捕]", 166 | 0x7D3D:"[一]", 167 | 0x7D3E:"[二]", 168 | 0x7D3F:"[三]", 169 | 0x7D40:"[遊]", 170 | 0x7D41:"[左]", 171 | 0x7D42:"[中]", 172 | 0x7D43:"[右]", 173 | 0x7D44:"[指]", 174 | 0x7D45:"[走]", 175 | 0x7D46:"[打]", 176 | 0x7D47:"㍑", 177 | 0x7D48:"㎏", 178 | 0x7D49:"㎐", 179 | 0x7D4A:"ha", 180 | 0x7D4B:"㎞", 181 | 0x7D4C:"㎢", 182 | 0x7D4D:"㍱", 183 | 0x7D4E:"・", 184 | 0x7D4F:"・", 185 | 0x7D50:"1/2", 186 | 0x7D51:"0/3", 187 | 0x7D52:"1/3", 188 | 0x7D53:"2/3", 189 | 0x7D54:"1/4", 190 | 0x7D55:"3/4", 191 | 0x7D56:"1/5", 192 | 0x7D57:"2/5", 193 | 0x7D58:"3/5", 194 | 0x7D59:"4/5", 195 | 0x7D5A:"1/6", 196 | 0x7D5B:"5/6", 197 | 0x7D5C:"1/7", 198 | 0x7D5D:"1/8", 199 | 0x7D5E:"1/9", 200 | 0x7D5F:"1/10", 201 | 0x7D60:"☀", 202 | 0x7D61:"☁", 203 | 0x7D62:"☂", 204 | 0x7D63:"☃", 205 | 0x7D64:"☖", 206 | 0x7D65:"☗", 207 | 0x7D66:"▽", 208 | 0x7D67:"▼", 209 | 0x7D68:"♦", 210 | 0x7D69:"♥", 211 | 0x7D6A:"♣", 212 | 0x7D6B:"♠", 213 | 0x7D6C:"⌺", 214 | 0x7D6D:"⦿", 215 | 0x7D6E:"‼", 216 | 0x7D6F:"⁉", 217 | 0x7D70:"(曇/晴)", 218 | 0x7D71:"☔", 219 | 0x7D72:"(雨)", 220 | 0x7D73:"(雪)", 221 | 0x7D74:"(大雪)", 222 | 0x7D75:"⚡", 223 | 0x7D76:"(雷雨)", 224 | 0x7D77:" ", 225 | 0x7D78:"・", 226 | 0x7D79:"・", 227 | 0x7D7A:"♬", 228 | 0x7D7B:"☎", 229 | 230 | 0x7E21:"Ⅰ", 231 | 0x7E22:"Ⅱ", 232 | 0x7E23:"Ⅲ", 233 | 0x7E24:"Ⅳ", 234 | 0x7E25:"Ⅴ", 235 | 0x7E26:"Ⅵ", 236 | 0x7E27:"Ⅶ", 237 | 0x7E28:"Ⅷ", 238 | 0x7E29:"Ⅸ", 239 | 0x7E2A:"Ⅹ", 240 | 0x7E2B:"Ⅺ", 241 | 0x7E2C:"Ⅻ", 242 | 0x7E2D:"⑰", 243 | 0x7E2E:"⑱", 244 | 0x7E2F:"⑲", 245 | 0x7E30:"⑳", 246 | 0x7E31:"⑴", 247 | 0x7E32:"⑵", 248 | 0x7E33:"⑶", 249 | 0x7E34:"⑷", 250 | 0x7E35:"⑸", 251 | 0x7E36:"⑹", 252 | 0x7E37:"⑺", 253 | 0x7E38:"⑻", 254 | 0x7E39:"⑼", 255 | 0x7E3A:"⑽", 256 | 0x7E3B:"⑾", 257 | 0x7E3C:"⑿", 258 | 0x7E3D:"㉑", 259 | 0x7E3E:"㉒", 260 | 0x7E3F:"㉓", 261 | 0x7E40:"㉔", 262 | 0x7E41:"(A)", 263 | 0x7E42:"(B)", 264 | 0x7E43:"(C)", 265 | 0x7E44:"(D)", 266 | 0x7E45:"(E)", 267 | 0x7E46:"(F)", 268 | 0x7E47:"(G)", 269 | 0x7E48:"(H)", 270 | 0x7E49:"(I)", 271 | 0x7E4A:"(J)", 272 | 0x7E4B:"(K)", 273 | 0x7E4C:"(L)", 274 | 0x7E4D:"(M)", 275 | 0x7E4E:"(N)", 276 | 0x7E4F:"(O)", 277 | 0x7E50:"(P)", 278 | 0x7E51:"(Q)", 279 | 0x7E52:"(R)", 280 | 0x7E53:"(S)", 281 | 0x7E54:"(T)", 282 | 0x7E55:"(U)", 283 | 0x7E56:"(V)", 284 | 0x7E57:"(W)", 285 | 0x7E58:"(X)", 286 | 0x7E59:"(Y)", 287 | 0x7E5A:"(Z)", 288 | 0x7E5B:"㉕", 289 | 0x7E5C:"㉖", 290 | 0x7E5D:"㉗", 291 | 0x7E5E:"㉘", 292 | 0x7E5F:"㉙", 293 | 0x7E60:"㉚", 294 | 0x7E61:"①", 295 | 0x7E62:"②", 296 | 0x7E63:"③", 297 | 0x7E64:"④", 298 | 0x7E65:"⑤", 299 | 0x7E66:"⑥", 300 | 0x7E67:"⑦", 301 | 0x7E68:"⑧", 302 | 0x7E69:"⑨", 303 | 0x7E6A:"⑩", 304 | 0x7E6B:"⑪", 305 | 0x7E6C:"⑫", 306 | 0x7E6D:"⑬", 307 | 0x7E6E:"⑭", 308 | 0x7E6F:"⑮", 309 | 0x7E70:"⑯", 310 | 0x7E71:"❶", 311 | 0x7E72:"❷", 312 | 0x7E73:"❸", 313 | 0x7E74:"❹", 314 | 0x7E75:"❺", 315 | 0x7E76:"❻", 316 | 0x7E77:"❼", 317 | 0x7E78:"❽", 318 | 0x7E79:"❾", 319 | 0x7E7A:"❿", 320 | 0x7E7B:"⓫", 321 | 0x7E7C:"⓬", 322 | 0x7E7D:"㉛", 323 | 324 | 0x7521:"㐂", 325 | 0x7522:"亭", 326 | 0x7523:"份", 327 | 0x7524:"仿", 328 | 0x7525:"侚", 329 | 0x7526:"俉", 330 | 0x7527:"傜", 331 | 0x7528:"儞", 332 | 0x7529:"冼", 333 | 0x752A:"㔟", 334 | 0x752B:"匇", 335 | 0x752C:"卡", 336 | 0x752D:"卬", 337 | 0x752E:"詹", 338 | 0x752F:"吉", 339 | 0x7530:"呍", 340 | 0x7531:"咖", 341 | 0x7532:"咜", 342 | 0x7533:"咩", 343 | 0x7534:"唎", 344 | 0x7535:"啊", 345 | 0x7536:"噲", 346 | 0x7537:"囤", 347 | 0x7538:"圳", 348 | 0x7539:"圴", 349 | 0x753A:"塚", 350 | 0x753B:"墀", 351 | 0x753C:"姤", 352 | 0x753D:"娣", 353 | 0x753E:"婕", 354 | 0x753F:"寬", 355 | 0x7540:"﨑", 356 | 0x7541:"㟢", 357 | 0x7542:"庬", 358 | 0x7543:"弴", 359 | 0x7544:"彅", 360 | 0x7545:"德", 361 | 0x7546:"怗", 362 | 0x7547:"恵", 363 | 0x7548:"愰", 364 | 0x7549:"昤", 365 | 0x754A:"曈", 366 | 0x754B:"曙", 367 | 0x754C:"曺", 368 | 0x754D:"曻", 369 | 0x754E:"桒", 370 | 0x754F:"・", 371 | 0x7550:"椑", 372 | 0x7551:"椻", 373 | 0x7552:"橅", 374 | 0x7553:"檑", 375 | 0x7554:"櫛", 376 | 0x7555:"・", 377 | 0x7556:"・", 378 | 0x7557:"・", 379 | 0x7558:"毱", 380 | 0x7559:"泠", 381 | 0x755A:"洮", 382 | 0x755B:"海", 383 | 0x755C:"涿", 384 | 0x755D:"淊", 385 | 0x755E:"淸", 386 | 0x755F:"渚", 387 | 0x7560:"潞", 388 | 0x7561:"濹", 389 | 0x7562:"灤", 390 | 0x7563:"・", 391 | 0x7564:"・", 392 | 0x7565:"煇", 393 | 0x7566:"燁", 394 | 0x7567:"爀", 395 | 0x7568:"玟", 396 | 0x7569:"・", 397 | 0x756A:"珉", 398 | 0x756B:"珖", 399 | 0x756C:"琛", 400 | 0x756D:"琡", 401 | 0x756E:"琢", 402 | 0x756F:"琦", 403 | 0x7570:"琪", 404 | 0x7571:"琬", 405 | 0x7572:"琹", 406 | 0x7573:"瑋", 407 | 0x7574:"㻚", 408 | 0x7575:"畵", 409 | 0x7576:"疁", 410 | 0x7577:"睲", 411 | 0x7578:"䂓", 412 | 0x7579:"磈", 413 | 0x757A:"磠", 414 | 0x757B:"祇", 415 | 0x757C:"禮", 416 | 0x757D:"・", 417 | 0x757E:"・", 418 | 419 | 0x7621:"・", 420 | 0x7622:"秚", 421 | 0x7623:"稞", 422 | 0x7624:"筿", 423 | 0x7625:"簱", 424 | 0x7626:"䉤", 425 | 0x7627:"綋", 426 | 0x7628:"羡", 427 | 0x7629:"脘", 428 | 0x762A:"脺", 429 | 0x762B:"・", 430 | 0x762C:"芮", 431 | 0x762D:"葛", 432 | 0x762E:"蓜", 433 | 0x762F:"蓬", 434 | 0x7630:"蕙", 435 | 0x7631:"藎", 436 | 0x7632:"蝕", 437 | 0x7633:"蟬", 438 | 0x7634:"蠋", 439 | 0x7635:"裵", 440 | 0x7636:"角", 441 | 0x7637:"諶", 442 | 0x7638:"跎", 443 | 0x7639:"辻", 444 | 0x763A:"迶", 445 | 0x763B:"郝", 446 | 0x763C:"鄧", 447 | 0x763D:"鄭", 448 | 0x763E:"醲", 449 | 0x763F:"鈳", 450 | 0x7640:"銈", 451 | 0x7641:"錡", 452 | 0x7642:"鍈", 453 | 0x7643:"閒", 454 | 0x7644:"雞", 455 | 0x7645:"餃", 456 | 0x7646:"饀", 457 | 0x7647:"髙", 458 | 0x7648:"鯖", 459 | 0x7649:"鷗", 460 | 0x764A:"麴", 461 | 0x764B:"麵", 462 | } 463 | 464 | GAIJI_MAP = {} 465 | GAIJI_MAP.update(GAIJI_MAP_TITLE) 466 | GAIJI_MAP.update(GAIJI_MAP_OTHER) 467 | -------------------------------------------------------------------------------- /aribstr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import array 4 | import sys 5 | import StringIO 6 | from aribgaiji import * 7 | # import copy 8 | 9 | class Code: 10 | KANJI = 'KANJI' 11 | ALPHANUMERIC = 'ALPHANUMERIC' 12 | HIRAGANA = 'HIRAGANA' 13 | KATAKANA = 'KATAKANA' 14 | MOSAIC_A = 'MOSAIC_A' 15 | MOSAIC_B = 'MOSAIC_B' 16 | MOSAIC_C = 'MOSAIC_C' 17 | MOSAIC_D = 'MOSAIC_D' 18 | PROP_ALPHANUMERIC = 'PROP_ALPHANUMERIC' 19 | PROP_HIRAGANA = 'PROP_HIRAGANA' 20 | PROP_KATAKANA = 'PROP_KATAKANA' 21 | JIS_X0201_KATAKANA = 'JIS_X0201_KATAKANA' 22 | JIS_KANJI_PLANE_1 = 'JIS_KANJI_PLANE_1' 23 | JIS_KANJI_PLANE_2 = 'JIS_KANJI_PLANE_2' 24 | ADDITIONAL_SYMBOLS = 'ADDITIONAL_SYMBOLS' 25 | UNSUPPORTED = 'UNSUPPORTED' 26 | 27 | CODE_SET_G = { 28 | 0x42:(Code.KANJI, 2), 29 | 0x4A:(Code.ALPHANUMERIC, 1), 30 | 0x30:(Code.HIRAGANA, 1), 31 | 0x31:(Code.KATAKANA, 1), 32 | 0x32:(Code.MOSAIC_A, 1), 33 | 0x33:(Code.MOSAIC_B, 1), 34 | 0x34:(Code.MOSAIC_C, 1), 35 | 0x35:(Code.MOSAIC_D, 1), 36 | 0x36:(Code.PROP_ALPHANUMERIC, 1), 37 | 0x37:(Code.PROP_HIRAGANA, 1), 38 | 0x38:(Code.PROP_KATAKANA, 1), 39 | 0x49:(Code.JIS_X0201_KATAKANA, 1), 40 | 0x39:(Code.JIS_KANJI_PLANE_1, 2), 41 | 0x3A:(Code.JIS_KANJI_PLANE_2, 2), 42 | 0x3B:(Code.ADDITIONAL_SYMBOLS, 2), 43 | } 44 | CODE_SET_DRCS = { 45 | 0x40:(Code.UNSUPPORTED, 2), # DRCS-0 46 | 0x41:(Code.UNSUPPORTED, 1), # DRCS-1 47 | 0x42:(Code.UNSUPPORTED, 1), # DRCS-2 48 | 0x43:(Code.UNSUPPORTED, 1), # DRCS-3 49 | 0x44:(Code.UNSUPPORTED, 1), # DRCS-4 50 | 0x45:(Code.UNSUPPORTED, 1), # DRCS-5 51 | 0x46:(Code.UNSUPPORTED, 1), # DRCS-6 52 | 0x47:(Code.UNSUPPORTED, 1), # DRCS-7 53 | 0x48:(Code.UNSUPPORTED, 1), # DRCS-8 54 | 0x49:(Code.UNSUPPORTED, 1), # DRCS-9 55 | 0x4A:(Code.UNSUPPORTED, 1), # DRCS-10 56 | 0x4B:(Code.UNSUPPORTED, 1), # DRCS-11 57 | 0x4C:(Code.UNSUPPORTED, 1), # DRCS-12 58 | 0x4D:(Code.UNSUPPORTED, 1), # DRCS-13 59 | 0x4E:(Code.UNSUPPORTED, 1), # DRCS-14 60 | 0x4F:(Code.UNSUPPORTED, 1), # DRCS-15 61 | 0x70:(Code.UNSUPPORTED, 1), # MACRO 62 | } 63 | CODE_SET_KEYS = CODE_SET_DRCS.keys() + CODE_SET_G.keys() 64 | 65 | ARIB_BASE = { 66 | 0x79:0x3C, 67 | 0x7A:0x23, 68 | 0x7B:0x56, 69 | 0x7C:0x57, 70 | 0x7D:0x22, 71 | 0x7E:0x26 72 | } 73 | ARIB_HIRAGANA_MAP = { 74 | 0x77:0x35, 75 | 0x78:0x36, 76 | } 77 | ARIB_KATAKANA_MAP = { 78 | 0x77:0x33, 79 | 0x78:0x34, 80 | } 81 | ARIB_KATAKANA_MAP.update(ARIB_BASE) 82 | ARIB_HIRAGANA_MAP.update(ARIB_BASE) 83 | # ひらがな カタカナ 84 | # ゝ 35 ヽ 33 85 | # ゞ 36 ヾ 34 86 | # ー 3c ー 3c 87 | # 。 23 。 23 88 | # 「 56 「 56 89 | # 」 57 」 57 90 | # 、 22 、 22 91 | # ・ 26 ・ 26 92 | 93 | ESC_SEQ_ASCII = (0x1B, 0x28, 0x42) 94 | ESC_SEQ_ZENKAKU = (0x1B, 0x24, 0x42) 95 | ESC_SEQ_HANKAKU = (0x1B, 0x28, 0x49) 96 | 97 | class Buffer: 98 | G0 = 'G0' 99 | G1 = 'G1' 100 | G2 = 'G2' 101 | G3 = 'G3' 102 | 103 | class CodeArea: 104 | LEFT = 'LEFT' 105 | RIGHT = 'RIGHT' 106 | 107 | class AribIndexError(Exception): 108 | pass 109 | class EscapeSequenceError(Exception): 110 | pass 111 | class DegignationError(Exception): 112 | pass 113 | 114 | class CodeSetController: 115 | def __init__(self): 116 | self.v_buffer = { 117 | Buffer.G0:CODE_SET_G[0x42], # KANJI 118 | Buffer.G1:CODE_SET_G[0x4a], # ALPHANUMERIC 119 | Buffer.G2:CODE_SET_G[0x30], # HIRAGANA 120 | Buffer.G3:CODE_SET_G[0x31], # KATAKANA 121 | } 122 | self.single_shift = None 123 | self.graphic_left = Buffer.G0 # KANJI 124 | self.graphic_right = Buffer.G2 # HIRAGANA 125 | self.esc_seq_count = 0 126 | self.esc_buffer_index = Buffer.G0 127 | self.esc_drcs = False 128 | def degignate(self, code): 129 | if not code in CODE_SET_KEYS: 130 | raise DegignationError, 'esc_seq_count=%i esc_buffer_index=%s code=0x%02X' % ( 131 | self.esc_seq_count, self.esc_buffer_index, code) 132 | if self.esc_drcs: 133 | self.v_buffer[self.esc_buffer_index] = CODE_SET_DRCS[code] 134 | else: 135 | self.v_buffer[self.esc_buffer_index] = CODE_SET_G[code] 136 | self.esc_seq_count = 0 137 | def invoke(self, buffer_index, area, locking_shift=True): 138 | if CodeArea.LEFT == area: 139 | if locking_shift: 140 | self.graphic_left = buffer_index 141 | else: 142 | self.single_shift = buffer_index 143 | elif CodeArea.RIGHT == area: 144 | self.graphic_right = buffer_index 145 | self.esc_seq_count = 0 146 | def get_current_code(self, data): 147 | if data >= 0x21 and data <= 0x7E: 148 | if self.single_shift: 149 | code = self.v_buffer[self.single_shift] 150 | self.single_shift = None 151 | return code 152 | else: 153 | return self.v_buffer[self.graphic_left] 154 | elif data >= 0xA1 and data <= 0xFE: 155 | return self.v_buffer[self.graphic_right] 156 | return None 157 | def set_escape(self, buffer_index, drcs): 158 | if buffer_index != None: 159 | self.esc_buffer_index = buffer_index 160 | self.esc_drcs = drcs 161 | self.esc_seq_count += 1 162 | 163 | class AribArray(array.array): 164 | esc_seq = None 165 | def pop0(self): 166 | try: 167 | return self.pop(0) 168 | except IndexError: 169 | raise AribIndexError 170 | def append_str(self, esc_seq, *string): 171 | if self.esc_seq != esc_seq: 172 | self.extend(esc_seq) 173 | self.esc_seq = esc_seq 174 | if len(string) > 1: 175 | self.extend(string) 176 | else: 177 | self.append(string[0]) 178 | 179 | class AribString: 180 | def __init__(self, array): 181 | self.control = CodeSetController() 182 | self.arib_array = AribArray('B', array) 183 | self.jis_array = AribArray('B') 184 | self.utf_buffer = StringIO.StringIO() 185 | self.utf_buffer_symbol = StringIO.StringIO() 186 | self.split_symbol = False 187 | def convert_utf_split(self): 188 | self.split_symbol = True 189 | self.convert() 190 | self.flush_jis_array() 191 | return (self.utf_buffer.getvalue(), self.utf_buffer_symbol.getvalue()) 192 | def convert_utf(self): 193 | self.convert() 194 | self.flush_jis_array() 195 | return self.utf_buffer.getvalue() 196 | def flush_jis_array(self): 197 | if len(self.jis_array) > 0: 198 | uni = 'UnicodeDecodeError' 199 | try: 200 | uni = unicode(self.jis_array.tostring(), 'iso-2022-jp').encode('utf-8') 201 | except UnicodeDecodeError: 202 | pass 203 | self.utf_buffer.write(uni) 204 | self.jis_array = AribArray('B') 205 | def convert(self): 206 | while True: 207 | try: 208 | data = self.arib_array.pop0() 209 | if self.control.esc_seq_count: 210 | self.do_escape(data) 211 | else: 212 | if (data >= 0x21 and data <= 0x7E) or (data >= 0xA1 and data <= 0xFE): 213 | # GL/GR Table 214 | self.do_convert(data) 215 | elif data in ( 216 | 0x20, # space 217 | 0xA0, # space (arib) 218 | 0x09): # HT 219 | self.jis_array.append_str(ESC_SEQ_ASCII, 0x20) 220 | elif data in ( 221 | 0x0D, # CR 222 | 0x0A): # LF 223 | self.jis_array.append_str(ESC_SEQ_ASCII, 0x0A) 224 | else: 225 | # Control Character 226 | self.do_control(data) 227 | except AribIndexError: 228 | break 229 | return self.jis_array 230 | def do_convert(self, data): 231 | (code, size) = self.control.get_current_code(data) 232 | char = data 233 | char2 = 0x0 234 | if size == 2: 235 | char2 = self.arib_array.pop0() 236 | if char >= 0xA1 and char <= 0xFE: 237 | char = char & 0x7F 238 | char2 = char2 & 0x7F 239 | if code in (Code.KANJI, Code.JIS_KANJI_PLANE_1, Code.JIS_KANJI_PLANE_2): 240 | # 漢字コード出力 241 | self.jis_array.append_str(ESC_SEQ_ZENKAKU, char, char2) 242 | elif code in (Code.ALPHANUMERIC, Code.PROP_ALPHANUMERIC): 243 | # 英数字コード出力 244 | self.jis_array.append_str(ESC_SEQ_ASCII, char) 245 | elif code in (Code.HIRAGANA, Code.PROP_HIRAGANA): 246 | # ひらがなコード出力 247 | if char >= 0x77: 248 | self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x21, ARIB_HIRAGANA_MAP[char]) 249 | else: 250 | self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x24, char) 251 | elif code in (Code.PROP_KATAKANA, Code.KATAKANA): 252 | # カタカナコード出力 253 | if char >= 0x77: 254 | self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x21, ARIB_KATAKANA_MAP[char]) 255 | else: 256 | self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x25, char) 257 | elif code == Code.JIS_X0201_KATAKANA: 258 | # 半角カタカナコード出力 259 | self.jis_array.append_str(ESC_SEQ_HANKAKU, char) 260 | elif code == Code.ADDITIONAL_SYMBOLS: 261 | # 追加シンボル文字コード出力 262 | self.flush_jis_array() 263 | if self.split_symbol: 264 | wchar = ((char << 8) + char2) 265 | gaiji = GAIJI_MAP_TITLE.get(wchar) 266 | if gaiji != None: 267 | self.utf_buffer_symbol.write(gaiji) 268 | else: 269 | self.utf_buffer.write(GAIJI_MAP_OTHER.get(wchar, "??")) 270 | else: 271 | self.utf_buffer.write(GAIJI_MAP.get(((char << 8) + char2), "??")) 272 | def do_control(self, data): 273 | if data == 0x0F: 274 | self.control.invoke(Buffer.G0, CodeArea.LEFT, True) # LS0 275 | elif data == 0x0E: 276 | self.control.invoke(Buffer.G1, CodeArea.LEFT, True) # LS1 277 | elif data == 0x19: 278 | self.control.invoke(Buffer.G2, CodeArea.LEFT, False) # SS2 279 | elif data == 0x1D: 280 | self.control.invoke(Buffer.G3, CodeArea.LEFT, False) # SS3 281 | elif data == 0x1B: 282 | self.control.esc_seq_count = 1 283 | def do_escape(self, data): 284 | if self.control.esc_seq_count == 1: 285 | if data == 0x6E: 286 | self.control.invoke(Buffer.G2, CodeArea.LEFT, True) # LS2 287 | elif data == 0x6F: 288 | self.control.invoke(Buffer.G3, CodeArea.LEFT, True) # LS3 289 | elif data == 0x7E: 290 | self.control.invoke(Buffer.G1, CodeArea.RIGHT, True) # LS1R 291 | elif data == 0x7D: 292 | self.control.invoke(Buffer.G2, CodeArea.RIGHT, True) # LS2R 293 | elif data == 0x7C: 294 | self.control.invoke(Buffer.G3, CodeArea.RIGHT, True) # LS3R 295 | elif data == 0x24 or data == 0x28: 296 | self.control.set_escape(Buffer.G0, False) 297 | elif data == 0x29: 298 | self.control.set_escape(Buffer.G1, False) 299 | elif data == 0x2A: 300 | self.control.set_escape(Buffer.G2, False) 301 | elif data == 0x2B: 302 | self.control.set_escape(Buffer.G3, False) 303 | else: 304 | raise EscapeSequenceError, 'esc_seq_count=%i data=0x%02X' % ( 305 | self.control.esc_seq_count, data) 306 | elif self.control.esc_seq_count == 2: 307 | if data == 0x20: 308 | self.control.set_escape(None, True) 309 | elif data == 0x28: 310 | self.control.set_escape(Buffer.G0, False) 311 | elif data == 0x29: 312 | self.control.set_escape(Buffer.G1, False) 313 | elif data == 0x2A: 314 | self.control.set_escape(Buffer.G2, False) 315 | elif data == 0x2B: 316 | self.control.set_escape(Buffer.G3, False) 317 | else: 318 | self.control.degignate(data) 319 | elif self.control.esc_seq_count == 3: 320 | if data == 0x20: 321 | self.control.set_escape(None, True) 322 | else: 323 | self.control.degignate(data) 324 | elif self.control.esc_seq_count == 4: 325 | self.control.degignate(data) 326 | 327 | 328 | 329 | if __name__ == '__main__': 330 | f = open(sys.argv[1], 'rb') 331 | f.seek(0, 2) 332 | byte = f.tell() 333 | f.seek(0) 334 | arr = array.array('B') 335 | arr.fromfile(f, byte) 336 | f.close() 337 | 338 | arib = AribString(arr) 339 | arib.convert() 340 | 341 | f = open("output.txt", 'wb') 342 | arib.jis_array.tofile(f) 343 | f.close() 344 | -------------------------------------------------------------------------------- /aribtable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from constant import * 4 | import array 5 | 6 | class Section: 7 | def __init__(self, idx=5, length_prev=0): 8 | self.length_total = 0 9 | self.length_current = 0 10 | self.length_prev = length_prev 11 | self.idx = idx 12 | self.data = array.array('B', (0xFF,0xFF,0xFF,0xFF,0xFF)) 13 | 14 | class TransportPacketHeader: 15 | def __init__(self, pid, payload_unit_start_indicator, adaptation_field_control, pointer_field): 16 | self.pid = pid 17 | self.payload_unit_start_indicator = payload_unit_start_indicator 18 | self.adaptation_field_control = adaptation_field_control 19 | self.pointer_field = pointer_field 20 | 21 | class TransportPacket: 22 | def __init__(self, header, packet): 23 | self.header = header 24 | self.binary_data = packet 25 | if header.pid in SDT_PID: 26 | self.sdt = ServiceDescriptionTable(packet) 27 | else: 28 | self.eit = EventInfomationTable(packet) 29 | def __str__(self): 30 | return ( 31 | 'pid=%04X\n' 32 | 'payload_unit_start_indicator=%i\n' 33 | 'adaptation_field_control=%i\n' 34 | 'pointer_field=%i\n') % ( 35 | self.header.pid, self.header.payload_unit_start_indicator, 36 | self.header.adaptation_field_control, self.header.pointer_field) 37 | 38 | class ServiceDescriptionTable: 39 | def __init__(self, packet): 40 | self.table_id = packet[5] # 8 uimsbf 41 | self.section_syntax_indicator = (packet[6] >> 7) # 1 bslbf 42 | # reserved_future_use 1 bslbf 43 | # reserved 2 bslbf 44 | self.section_length = ((packet[6] & 0x0F) << 8) + packet[7] # 12 uimsbf 45 | self.transport_stream_id = (packet[8] << 8) + packet[9] # 16 uimsbf 46 | # reserved 2 bslbf 47 | self.version_number = ((packet[10] >> 1) & 0x1F) # 5 uimsbf 48 | self.current_next_indicator = (packet[10] & 0x01)# 1 bslbf 49 | self.section_number = packet[11] # 8 uimsbf 50 | self.last_section_number = packet[12] # 8 uimsbf 51 | self.original_network_id = (packet[13] << 8) + packet[14] # 16 uimsbf 52 | # reserved_future_use 8 bslbf 53 | self.services = [] 54 | crc32mpeg(packet[5:self.section_length + 8], self.table_id, self.section_length) 55 | def __str__(self): 56 | return ( 57 | ' table_id=0x%04X\n' 58 | ' section_syntax_indicator=%i\n' 59 | ' section_length=%i\n' 60 | ' transport_stream_id=%i\n' 61 | ' version_number=%i\n' 62 | ' current_next_indicator=%i\n' 63 | ' section_number=%i\n' 64 | ' last_section_number=%i\n' 65 | ' original_network_id=%i\n' 66 | ) % ( 67 | self.table_id, 68 | self.section_syntax_indicator, 69 | self.section_length, 70 | self.transport_stream_id, 71 | self.version_number, 72 | self.current_next_indicator, 73 | self.section_number, 74 | self.last_section_number, 75 | self.original_network_id 76 | ) 77 | 78 | class Service: 79 | def __init__(self, service_id, EIT_user_defined_flags, EIT_schedule_flag, 80 | EIT_present_following_flag, running_status, free_CA_mode, 81 | descriptors_loop_length): 82 | self.service_id = service_id # 16 uimsbf 83 | self.EIT_user_defined_flags = EIT_user_defined_flags # 3 bslbf 84 | self.EIT_schedule_flag = EIT_schedule_flag # 1 bslbf 85 | self.EIT_present_following_flag = EIT_present_following_flag # 1 bslbf 86 | self.running_status = running_status # 3 uimsbf 87 | self.free_CA_mode = free_CA_mode # 1 bslbf 88 | self.descriptors_loop_length = descriptors_loop_length # 12 uimsbf 89 | self.descriptors = [] 90 | def __str__(self): 91 | return ( 92 | ' service_id=%i\n' 93 | ' EIT_user_defined_flags=%i\n' 94 | ' EIT_schedule_flag=%i\n' 95 | ' EIT_present_following_flag=%i\n' 96 | ' running_status=%i\n' 97 | ' free_CA_mode=%i\n' 98 | ' descriptors_loop_length=%i\n' 99 | ) % ( 100 | self.service_id, 101 | self.EIT_user_defined_flags, 102 | self.EIT_schedule_flag, 103 | self.EIT_present_following_flag, 104 | self.running_status, 105 | self.free_CA_mode, 106 | self.descriptors_loop_length) 107 | 108 | class ServiceDescriptor: 109 | def __init__(self, descriptor_tag, descriptor_length, service_type, 110 | service_provider_name_length, service_provider_name, 111 | service_name_length, service_name): 112 | self.descriptor_tag = descriptor_tag 113 | self.descriptor_length = descriptor_length 114 | self.service_type = service_type 115 | self.service_provider_name_length = service_provider_name_length 116 | self.service_provider_name = service_provider_name 117 | self.service_name_length = service_name_length 118 | self.service_name = service_name 119 | def __str__(self): 120 | return ( 121 | ' descriptor_tag=0x%02X\n' 122 | ' descriptor_length=%i\n' 123 | ' service_type=0x%02X\n' 124 | ' service_provider_name_length=%i\n' 125 | ' service_provider_name=%s\n' 126 | ' service_name_length=%i\n' 127 | ' service_name=%s\n' 128 | ) % ( 129 | self.descriptor_tag, 130 | self.descriptor_length, 131 | self.service_type, 132 | self.service_provider_name_length, 133 | self.service_provider_name, 134 | self.service_name_length, 135 | self.service_name) 136 | 137 | class EventInfomationTable: 138 | def __init__(self, packet): 139 | self.table_id = packet[5] # 8 uimsbf 140 | self.section_length = (((packet[6] & 0x0F) << 8) + packet[7]) # 12 141 | self.service_id = ((packet[8] << 8) + packet[9]) # 16 uimsbf 142 | self.version_number = ((packet[10] >> 1) & 0x1F) # 5 uimsbf 143 | self.current_next_indicator = (packet[10] & 0x01) # 1 bslbf 144 | self.section_number = packet[11] # 8 uimsbf 145 | self.last_section_number = packet[12] # 8 uimsbf 146 | self.transport_stream_id = ((packet[13] << 8) + packet[14]) # 16 uimsbf 147 | self.original_network_id = ((packet[15] << 8) + packet[16]) # 16 uimsbf 148 | self.segment_last_section_number = packet[17] # 8 uimsbf 149 | self.last_table_id = packet[18] # 8 uimsbf 150 | self.events = [] 151 | crc32mpeg(packet[5:self.section_length + 8], self.table_id, self.section_length) 152 | def __str__(self): 153 | return ( 154 | ' table_id=%04X\n' 155 | ' section_length=%i\n' 156 | ' service_id=%i\n' 157 | ' version_number=%i\n' 158 | ' current_next_indicator=%i\n' 159 | ' section_number=%i\n' 160 | ' last_section_number=%i\n' 161 | ' transport_stream_id=%i\n' 162 | ' original_network_id=%i\n' 163 | ' segment_last_section_number=%i\n' 164 | ' last_table_id=%04X\n') % ( 165 | self.table_id, 166 | self.section_length, 167 | self.service_id, 168 | self.version_number, 169 | self.current_next_indicator, 170 | self.section_number, 171 | self.last_section_number, 172 | self.transport_stream_id, 173 | self.original_network_id, 174 | self.segment_last_section_number, 175 | self.last_table_id) 176 | 177 | class Event: 178 | def __init__(self, transport_stream_id, service_id, event_id, start_time, duration, 179 | running_status, free_CA_mode, descriptors_loop_length): 180 | self.transport_stream_id = transport_stream_id 181 | self.service_id = service_id 182 | self.event_id = event_id 183 | self.start_time = start_time 184 | self.duration = duration 185 | self.running_status = running_status 186 | self.free_CA_mode = free_CA_mode 187 | self.descriptors_loop_length = descriptors_loop_length 188 | self.descriptors = [] 189 | self.desc_short = None 190 | self.desc_content = None 191 | self.desc_extend = None 192 | def __str__(self): 193 | return ( 194 | ' service_id=%i\n' 195 | ' event_id=%04X\n' 196 | ' start_time=%s\n' 197 | ' duration=%s\n' 198 | ' running_status=%02X\n' 199 | ' free_CA_mode=%i\n' 200 | ' descriptors_loop_length=%i\n') % ( 201 | self.event_id, 202 | self.start_time, 203 | self.duration, 204 | self.running_status, 205 | self.free_CA_mode, 206 | self.descriptors_loop_length) 207 | 208 | class ContentDescriptor: 209 | def __init__(self, descriptor_tag, descriptor_length, content_type_array): 210 | self.descriptor_tag = descriptor_tag 211 | self.descriptor_length = descriptor_length 212 | self.content_type_array = content_type_array 213 | def __str__(self): 214 | return ( 215 | ' descriptor_tag=0x%02X\n' 216 | ' descriptor_length=%i\n') % ( 217 | self.descriptor_tag, 218 | self.descriptor_length) 219 | 220 | class ContentType: 221 | def __init__(self, content_nibble_level_1, content_nibble_level_2, 222 | user_nibble_1, user_nibble_2): 223 | self.content_nibble_level_1 = content_nibble_level_1 224 | self.content_nibble_level_2 = content_nibble_level_2 225 | self.user_nibble_1 = user_nibble_1 226 | self.user_nibble_2 = user_nibble_2 227 | def __str__(self): 228 | return ( 229 | ' content_nibble_level_1=%s\n' 230 | ' content_nibble_level_2=%s\n' 231 | ' user_nibble_1=0x%X\n' 232 | ' user_nibble_2=0x%X\n') % ( 233 | self.content_nibble_level_1, 234 | self.content_nibble_level_2, 235 | self.user_nibble_1, 236 | self.user_nibble_2) 237 | 238 | class ShortEventDescriptor: 239 | def __init__(self, descriptor_tag, descriptor_length, 240 | ISO_639_language_code, event_name_length, 241 | event_name, text_length, text): 242 | self.descriptor_tag = descriptor_tag 243 | self.descriptor_length = descriptor_length 244 | self.ISO_639_language_code = ISO_639_language_code 245 | self.event_name_length = event_name_length 246 | self.event_name = event_name 247 | self.text_length = text_length 248 | self.text = text 249 | def __str__(self): 250 | return ( 251 | ' descriptor_tag=0x%02X\n' 252 | ' descriptor_length=%i\n' 253 | ' ISO_639_language_code=%s\n' 254 | ' event_name_length=%i\n' 255 | ' event_name=%s\n' 256 | ' text_length=%i\n' 257 | ' text=%s\n') % ( 258 | self.descriptor_tag, 259 | self.descriptor_length, 260 | self.ISO_639_language_code, 261 | self.event_name_length, 262 | self.event_name, 263 | self.text_length, 264 | self.text) 265 | 266 | class ExtendedEventDescriptor: 267 | def __init__(self, descriptor_tag, descriptor_length, descriptor_number, 268 | last_descriptor_number, ISO_639_language_code, length_of_items, 269 | items, text_length, text): 270 | self.descriptor_tag = descriptor_tag 271 | self.descriptor_length = descriptor_length 272 | self.descriptor_number = descriptor_number 273 | self.last_descriptor_number = last_descriptor_number 274 | self.ISO_639_language_code = ISO_639_language_code 275 | self.length_of_items = length_of_items 276 | self.items = items 277 | self.text_length = text_length 278 | self.text = text 279 | def __str__(self): 280 | return ( 281 | ' descriptor_tag=%i\n' 282 | ' descriptor_length=%i\n' 283 | ' descriptor_number=%i\n' 284 | ' last_descriptor_number=%i\n' 285 | ' ISO_639_language_code=%s\n' 286 | ' text_length=%i\n' 287 | ' text=%s\n') % ( 288 | self.descriptor_tag, 289 | self.descriptor_length, 290 | self.descriptor_number, 291 | self.last_descriptor_number, 292 | self.ISO_639_language_code, 293 | self.text_length, 294 | self.text) 295 | 296 | class Item: 297 | def __init__(self, item_description_length, item_description, 298 | item_length, item): 299 | self.item_description_length = item_description_length 300 | self.item_description = item_description 301 | self.item_length = item_length 302 | self.item = item 303 | def __str__(self): 304 | return ( 305 | ' item_description_length=%i\n' 306 | ' item_description=%s\n' 307 | ' item_length=%i\n' 308 | ' item=%s\n') % ( 309 | self.item_description_length, 310 | self.item_description, 311 | self.item_length, 312 | self.item) 313 | 314 | class CRC32MpegError(Exception): 315 | pass 316 | 317 | def crc32mpeg(data, table_id, section_length, crc=0xffffffff): 318 | for d in data: 319 | idx = (((crc >> 24) ^ d) & 0xff) 320 | crc = ((CRC_32_MPEG[idx] ^ (crc << 8)) & 0xffffffff) 321 | if (crc & 0xffffffff) != 0x0: 322 | raise CRC32MpegError('table_id=0x%X section_length=%i' % (table_id, section_length)) 323 | -------------------------------------------------------------------------------- /constant.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | READ_PACKETS_MAX = 700000 5 | 6 | TYPE_DEGITAL = '' 7 | TYPE_BS = 'BS_' 8 | TYPE_CS = 'CS_' 9 | 10 | EIT_PID = (0x12, 0x26, 0x27) 11 | SDT_PID = (0x11,) 12 | 13 | TAG_SED = 0x4D # Short event descriptor 14 | TAG_EED = 0x4E # Extended event descriptor 15 | TAG_CD = 0x54 # Content descriptor 16 | TAG_SD = 0x48 # Service descriptor 17 | 18 | CONTENT_TYPE = { 19 | 0x0:('ニュース/報道', 20 | { 21 | 0x0:'定時・総合', 22 | 0x1:'天気', 23 | 0x2:'特集・ドキュメント', 24 | 0x3:'政治・国会', 25 | 0x4:'経済・市況', 26 | 0x5:'海外・国際', 27 | 0x6:'解説', 28 | 0x7:'討論・会談', 29 | 0x8:'報道特番', 30 | 0x9:'ローカル・地域', 31 | 0xA:'交通', 32 | 0xF:'その他', 33 | }), 34 | 0x1:('スポーツ', 35 | { 36 | 0x0:'スポーツニュース', 37 | 0x1:'野球', 38 | 0x2:'サッカー', 39 | 0x3:'ゴルフ', 40 | 0x4:'その他の球技', 41 | 0x5:'相撲・格闘技', 42 | 0x6:'オリンピック・国際大会', 43 | 0x7:'マラソン・陸上・水泳', 44 | 0x8:'モータースポーツ', 45 | 0x9:'マリン・ウィンタースポーツ', 46 | 0xA:'競馬・公営競技', 47 | 0xF:'その他', 48 | }), 49 | 0x2:('情報/ワイドショー', 50 | { 51 | 0x0:'芸能・ワイドショー', 52 | 0x1:'ファッション', 53 | 0x2:'暮らし・住まい', 54 | 0x3:'健康・医療', 55 | 0x4:'ショッピング・通販', 56 | 0x5:'グルメ・料理', 57 | 0x6:'イベント', 58 | 0x7:'番組紹介・お知らせ', 59 | 0xF:'その他', 60 | }), 61 | 0x3:('ドラマ', 62 | { 63 | 0x0:'国内ドラマ', 64 | 0x1:'海外ドラマ', 65 | 0x2:'時代劇', 66 | 0xF:'その他', 67 | }), 68 | 0x4:('音楽', 69 | { 70 | 0x0:'国内ロック・ポップス', 71 | 0x1:'海外ロック・ポップス', 72 | 0x2:'クラシック・オペラ', 73 | 0x3:'ジャズ・フュージョン', 74 | 0x4:'歌謡曲・演歌', 75 | 0x5:'ライブ・コンサート', 76 | 0x6:'ランキング・リクエスト', 77 | 0x7:'カラオケ・のど自慢', 78 | 0x8:'民謡・邦楽', 79 | 0x9:'童謡・キッズ', 80 | 0xA:'民族音楽・ワールドミュージック', 81 | 0xF:'その他', 82 | }), 83 | 0x5:('バラエティ', 84 | { 85 | 0x0:'クイズ', 86 | 0x1:'ゲーム', 87 | 0x2:'トークバラエティ', 88 | 0x3:'お笑い・コメディ', 89 | 0x4:'音楽バラエティ', 90 | 0x5:'旅バラエティ', 91 | 0x6:'料理バラエティ', 92 | 0xF:'その他', 93 | }), 94 | 0x6:('映画', 95 | { 96 | 0x0:'洋画', 97 | 0x1:'邦画', 98 | 0x2:'アニメ', 99 | 0xF:'その他', 100 | }), 101 | 0x7:('アニメ/特撮', 102 | { 103 | 0x0:'国内アニメ', 104 | 0x1:'海外アニメ', 105 | 0x2:'特撮', 106 | 0xF:'その他', 107 | }), 108 | 0x8:('ドキュメンタリー/教養', 109 | { 110 | 0x0:'社会・時事', 111 | 0x1:'歴史・紀行', 112 | 0x2:'自然・動物・環境', 113 | 0x3:'宇宙・科学・医学', 114 | 0x4:'カルチャー・伝統文化', 115 | 0x5:'文学・文芸', 116 | 0x6:'スポーツ', 117 | 0x7:'ドキュメンタリー全般', 118 | 0x8:'インタビュー・討論', 119 | 0xF:'その他', 120 | }), 121 | 0x9:('劇場/公演', 122 | { 123 | 0x0:'現代劇・新劇', 124 | 0x1:'ミュージカル', 125 | 0x2:'ダンス・バレエ', 126 | 0x3:'落語・演芸', 127 | 0x4:'歌舞伎・古典', 128 | 0xF:'その他', 129 | }), 130 | 0xA:('趣味/教育', 131 | { 132 | 0x0:'旅・釣り・アウトドア', 133 | 0x1:'園芸・ペット・手芸', 134 | 0x2:'音楽・美術・工芸', 135 | 0x3:'囲碁・将棋', 136 | 0x4:'麻雀・パチンコ', 137 | 0x5:'車・オートバイ', 138 | 0x6:'コンピュータ・TVゲーム', 139 | 0x7:'会話・語学', 140 | 0x8:'幼児・小学生', 141 | 0x9:'中学生・高校生', 142 | 0xA:'大学生・受験', 143 | 0xB:'生涯教育・資格', 144 | 0xC:'教育問題', 145 | 0xF:'その他', 146 | }), 147 | 0xB:('福祉', 148 | { 149 | 0x0:'高齢者', 150 | 0x1:'障害者', 151 | 0x2:'社会福祉', 152 | 0x3:'ボランティア', 153 | 0x4:'手話', 154 | 0x5:'文字(字幕)', 155 | 0x6:'音声解説', 156 | 0xF:'その他', 157 | }), 158 | 0xE:('拡張', 159 | { 160 | 0x0:'BS/地上デジタル放送用番組付属情報', 161 | 0x1:'広帯域CS デジタル放送用拡張', 162 | 0x2:'衛星デジタル音声放送用拡張', 163 | 0x3:'サーバー型番組付属情報', 164 | 0x4:'IP 放送用番組付属情報', 165 | }), 166 | 0xF:('その他', 167 | { 168 | }), 169 | } 170 | 171 | 172 | CRC_32_MPEG = ( 173 | 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 174 | 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 175 | 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 176 | 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 177 | 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 178 | 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 179 | 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 180 | 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 181 | 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 182 | 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 183 | 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 184 | 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 185 | 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 186 | 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 187 | 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 188 | 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 189 | 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 190 | 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 191 | 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 192 | 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 193 | 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 194 | 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 195 | 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 196 | 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 197 | 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 198 | 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 199 | 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 200 | 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 201 | 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 202 | 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 203 | 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 204 | 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 205 | 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 206 | 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 207 | 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 208 | 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 209 | 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 210 | 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 211 | 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 212 | 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 213 | 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 214 | 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 215 | 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 216 | 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 217 | 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 218 | 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 219 | 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 220 | 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 221 | 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 222 | 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 223 | 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 224 | 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 225 | 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 226 | 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 227 | 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 228 | 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 229 | 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 230 | 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 231 | 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 232 | 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 233 | 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 234 | 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 235 | 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 236 | 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 237 | ) 238 | -------------------------------------------------------------------------------- /epgdump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from parser import TransportStreamFile, parse_ts 4 | import xmltv 5 | import sys 6 | import getopt 7 | import time 8 | from constant import * 9 | 10 | def usage(): 11 | print >> sys.stderr, '''USAGE: epgdump_py -c CHANNEL_ID -i INPUT_FILE -o OUTPUT_FILE 12 | epgdump_py -b -i INPUT_FILE -o OUTPUT_FILE 13 | epgdump_py -s -i INPUT_FILE -o OUTPUT_FILE 14 | epgdump_py [-b|-s] -p TRANSPORT_STREAM_ID:SERVICE_ID:EVENT_ID -i INPUT_FILE 15 | -h, --help print help message 16 | -b, --bs input file is BS channel 17 | -s, --cs input file is CS channel 18 | -c, --channel-id specify channel identifier 19 | -d, --debug parse all ts packet 20 | -f, --format format xml 21 | -i, --input specify ts file 22 | -o, --output specify xml file 23 | -p, --print-time print start time, and end time of specifeid id 24 | -e, --event-id output transport_stream_id, servece_id and event_id 25 | ''' 26 | 27 | try: 28 | opts, args = getopt.getopt(sys.argv[1:], 'hbsc:dfi:o:p:e', ['help', 'bs', 'cs', 'channel-id=', 'debug', 'format', 'input=', 'output=', 'print-time=', 'event-id']) 29 | except IndexError, getopt.GetoptError: 30 | usage() 31 | sys.exit(1) 32 | 33 | channel_id = None 34 | input_file = None 35 | output_file = None 36 | pretty_print = False 37 | debug = False 38 | b_type = TYPE_DEGITAL 39 | transport_stream_id = None 40 | service_id = None 41 | event_id = None 42 | output_eid = False 43 | for o,a in opts: 44 | if o in ('-h', '--help'): 45 | usage() 46 | sys.exit(0) 47 | elif o in ('-b', '--bs'): 48 | b_type = TYPE_BS 49 | elif o in ('-s', '--cs'): 50 | b_type = TYPE_CS 51 | elif o in ('-c', '--channel-id'): 52 | channel_id = a 53 | elif o in ('-d', '--debug'): 54 | debug = True 55 | elif o in ('-f', '--format'): 56 | pretty_print = True 57 | elif o in ('-i', '--input'): 58 | input_file = a 59 | elif o in ('-o', '--output'): 60 | output_file = a 61 | elif o in ('-p', '--print-time'): 62 | arr = a.split(':') 63 | transport_stream_id = int(arr[0]) 64 | service_id = int(arr[1]) 65 | event_id = int(arr[2]) 66 | elif o in ('-e', '--event-id'): 67 | output_eid = True 68 | 69 | if service_id == None and ( 70 | (b_type == TYPE_DEGITAL and channel_id == None) or input_file == None or output_file == None): 71 | usage() 72 | sys.exit(1) 73 | elif input_file == None: 74 | usage() 75 | sys.exit(1) 76 | 77 | tsfile = TransportStreamFile(input_file, 'rb') 78 | (service, events) = parse_ts(b_type, tsfile, debug) 79 | tsfile.close() 80 | if service_id == None: 81 | xmltv.create_xml(b_type, channel_id, service, events, output_file, pretty_print, output_eid) 82 | else: 83 | start_time = None 84 | end_time = None 85 | for event in events: 86 | if (event.transport_stream_id == transport_stream_id and 87 | event.service_id == service_id and 88 | event.event_id == event_id): 89 | start_time = event.start_time 90 | end_time = event.start_time + event.duration 91 | break 92 | if start_time == None: 93 | print >> sys.stderr, "not found: transport_stream_id=%d service_id=%d event_id=%d" % (transport_stream_id, service_id, event_id) 94 | sys.exit(1) 95 | else: 96 | print int(time.mktime(start_time.timetuple())), int(time.mktime(end_time.timetuple())) 97 | -------------------------------------------------------------------------------- /parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | import datetime 5 | import copy 6 | import array 7 | import aribstr 8 | from constant import * 9 | from aribtable import * 10 | 11 | class TransportStreamFile(file): 12 | def next(self): 13 | try: 14 | sync = self.read(1) 15 | while ord(sync) != 0x47: 16 | sync = self.read(1) 17 | except TypeError: 18 | raise StopIteration 19 | data = self.read(187) 20 | packet = array.array('B', data) 21 | packet.insert(0, ord(sync)) 22 | if len(packet) != 188: 23 | raise StopIteration 24 | return packet 25 | 26 | class TransportPacketParser: 27 | def __init__(self, tsfile, pid, debug=False): 28 | self.tsfile = tsfile 29 | self.pid = pid 30 | self.section_map = {} 31 | self.queue = [] 32 | self.debug = debug 33 | self.count = 0 34 | def __iter__(self): 35 | return self 36 | def next(self): 37 | while True: 38 | try: 39 | return self.queue.pop(0) 40 | except IndexError: 41 | pass 42 | b_packet = self.tsfile.next() 43 | self.count += 1 44 | if not self.debug: 45 | if self.count >= READ_PACKETS_MAX: 46 | raise StopIteration 47 | header = self.parse_header(b_packet) 48 | if header.pid in self.pid and header.adaptation_field_control == 1: 49 | while True: 50 | (next_packet, section) = self.parse_section(header, self.section_map, b_packet) 51 | if next_packet: 52 | break 53 | if section: 54 | try: 55 | t_packet = TransportPacket(header, section.data) 56 | self.queue.append(t_packet) 57 | except CRC32MpegError, e: 58 | print >> sys.stderr, 'CRC32MpegError', e 59 | self.section_map.pop(header.pid) 60 | break 61 | 62 | def parse_header(self, b_packet): 63 | pid = ((b_packet[1] & 0x1F) << 8) + b_packet[2] 64 | payload_unit_start_indicator = ((b_packet[1] >> 6) & 0x01) 65 | adaptation_field_control = ((b_packet[3] >> 4) & 0x03) 66 | pointer_field = b_packet[4] 67 | return TransportPacketHeader(pid, payload_unit_start_indicator, adaptation_field_control, pointer_field) 68 | 69 | def parse_section(self, header, section_map, b_packet): 70 | sect = None 71 | next_packet = False 72 | sect = section_map.get(header.pid, Section()) 73 | 74 | if header.payload_unit_start_indicator == 1: 75 | if sect.length_total == 0: 76 | section_length = 180 77 | if header.pointer_field > 179: 78 | next_packet = True 79 | sect = None 80 | else: 81 | sect.idx += header.pointer_field 82 | section_length -= header.pointer_field 83 | sect.length_total = (((b_packet[sect.idx + 1] & 0x0F) << 8) + b_packet[sect.idx + 2]) # 12 uimsbf 84 | if sect.length_total < 15: 85 | next_packet = True 86 | sect = None 87 | elif sect.length_total <= section_length: 88 | sect.data.extend(b_packet[sect.idx:sect.idx + 3 + sect.length_total]) 89 | sect.idx += sect.length_total + 3 90 | sect.length_current += sect.length_total 91 | section_map[header.pid] = sect 92 | next_packet = False 93 | else: 94 | sect.data.extend(b_packet[sect.idx:]) 95 | sect.length_current += section_length 96 | sect.idx = 5 97 | section_map[header.pid] = sect 98 | next_packet = True 99 | sect = None 100 | else: 101 | remain = sect.length_total - sect.length_current 102 | section_length = 180 - sect.length_prev 103 | if remain == 0: 104 | next_packet = True 105 | section_map[header.pid] = Section() 106 | section_header = 0 107 | if sect.idx < 182: 108 | if sect.length_prev: 109 | prev = 3 110 | else: 111 | prev = 0 112 | sect = Section(sect.idx + prev, sect.idx - 5 + prev) 113 | section_header = (b_packet[sect.idx] << 16) + (b_packet[sect.idx + 1] << 8) + (b_packet[sect.idx + 2]) 114 | if section_header != 0xFFFFFF: 115 | sect.length_total = (((b_packet[sect.idx + 1] & 0x0F) << 8) + b_packet[sect.idx + 2]) # 12 uimsbf 116 | section_map[header.pid] = sect 117 | next_packet = False 118 | sect = None 119 | elif remain <= section_length: 120 | sect.data.extend(b_packet[sect.idx:sect.idx + 3 + remain]) 121 | sect.idx += remain 122 | sect.length_current += remain 123 | section_map[header.pid] = sect 124 | next_packet = False 125 | else: 126 | sect.data.extend(b_packet[sect.idx:]) 127 | sect.length_current += section_length 128 | sect.length_prev = 0 129 | sect.idx = 5 130 | section_map[header.pid] = sect 131 | next_packet = True 132 | sect = None 133 | else: 134 | # payload_unit_start_indicater set to 0b indicates that there is no pointer_field 135 | if sect.length_total != 0: 136 | sect.data.extend(b_packet[4:]) 137 | sect.length_current += 184 138 | if sect.length_current >= sect.length_total: 139 | section_map[header.pid] = Section() 140 | next_packet = False 141 | else: 142 | sect.length_prev = 0 143 | sect = None 144 | next_packet = True 145 | else: 146 | sect.length_prev = 0 147 | sect = None 148 | next_packet = True 149 | return (next_packet, sect) 150 | 151 | def mjd2datetime(payload): 152 | mjd = (payload[0] << 8) | payload[1] 153 | yy_ = int((mjd - 15078.2) / 365.25) 154 | mm_ = int((mjd - 14956.1 - int(yy_ * 365.25)) / 30.6001) 155 | k = 1 if 14 <= mm_ <= 15 else 0 156 | day = mjd - 14956 - int(yy_ * 365.25) - int(mm_ * 30.6001) 157 | year = 1900 + yy_ + k 158 | month = mm_ - 1 - k * 12 159 | hour = ((payload[2] & 0xF0) >> 4) * 10 + (payload[2] & 0x0F) 160 | minute = ((payload[3] & 0xF0) >> 4) * 10 + (payload[3] & 0x0F) 161 | second = ((payload[4] & 0xF0) >> 4) * 10 + (payload[4] & 0x0F) 162 | try: 163 | return datetime.datetime(year, month, day, hour, minute, second) 164 | except ValueError: 165 | return datetime.datetime(9999, 1, 1, 1, 1, 1) 166 | 167 | def bcd2time(payload): 168 | hour = ((payload[0] & 0xF0) >> 4) * 10 + (payload[0] & 0x0F) 169 | minute = ((payload[1] & 0xF0) >> 4) * 10 + (payload[1] & 0x0F) 170 | second = ((payload[2] & 0xF0) >> 4) * 10 + (payload[2] & 0x0F) 171 | return datetime.timedelta(hours=hour, minutes=minute, seconds=second) 172 | 173 | def parseShortEventDescriptor(idx, event, t_packet, b_packet): 174 | descriptor_tag = b_packet[idx] # 8 uimsbf 175 | descriptor_length = b_packet[idx + 1] # 8 uimsbf 176 | ISO_639_language_code = ( 177 | chr(b_packet[idx + 2]) + 178 | chr(b_packet[idx + 3]) + 179 | chr(b_packet[idx + 4])) # 24 bslbf 180 | event_name_length = b_packet[idx + 5] # 8 uimsbf 181 | arib = aribstr.AribString(b_packet[idx + 6:idx + 6 + event_name_length]) 182 | (event_name,symbol) = arib.convert_utf_split() 183 | idx = idx + 6 + event_name_length 184 | text_length = b_packet[idx] # 8 uimsbf 185 | arib = aribstr.AribString(b_packet[idx + 1:idx + 1 + text_length]) 186 | text = arib.convert_utf() 187 | text = symbol + "\n" + text 188 | desc = ShortEventDescriptor(descriptor_tag, descriptor_length, 189 | ISO_639_language_code, event_name_length, event_name, 190 | text_length, text) 191 | event.descriptors.append(desc) 192 | 193 | def parseExtendedEventDescriptor(idx, event, t_packet, b_packet): 194 | descriptor_tag = b_packet[idx] # 8 uimsbf 195 | descriptor_length = b_packet[idx + 1] # 8 uimsbf 196 | descriptor_number = (b_packet[idx + 2] >> 4) # 4 uimsbf 197 | last_descriptor_number = (b_packet[idx + 2] & 0x0F) # 4 uimsbf 198 | ISO_639_language_code = ( 199 | chr(b_packet[idx + 3]) + 200 | chr(b_packet[idx + 4]) + 201 | chr(b_packet[idx + 5])) # 24 bslbf 202 | length_of_items = b_packet[idx + 6] # 8 uimsbf 203 | idx = idx + 7 204 | length = idx + length_of_items 205 | item_list = [] 206 | while idx < length: 207 | item_description_length = b_packet[idx] # 8 uimsbf 208 | item_description = b_packet[idx + 1:idx + 1 + item_description_length] 209 | idx = idx + 1 + item_description_length 210 | item_length = b_packet[idx] # 8 uimsbf 211 | item = b_packet[idx + 1:idx + 1 + item_length] 212 | item_list.append(Item(item_description_length, item_description, 213 | item_length, item)) 214 | idx = idx + 1 + item_length 215 | text_length = b_packet[idx] # 8 uimsbf 216 | arib = aribstr.AribString(b_packet[idx + 1:idx + 1 + text_length]) 217 | text = arib.convert_utf() 218 | desc = ExtendedEventDescriptor(descriptor_tag, descriptor_length, 219 | descriptor_number, last_descriptor_number, ISO_639_language_code, 220 | length_of_items, item_list, text_length, text) 221 | event.descriptors.append(desc) 222 | 223 | def parseContentDescriptor(idx, event, t_packet, b_packet): 224 | descriptor_tag = b_packet[idx] # 8 uimsbf 225 | descriptor_length = b_packet[idx + 1] # 8 uimsbf 226 | idx += 2 227 | length = idx + descriptor_length 228 | content_list = [] 229 | while idx < length: 230 | content_nibble_level_1 = 'UNKNOWN' 231 | content_nibble_level_2 = 'UNKNOWN' 232 | try: 233 | c_map = CONTENT_TYPE[(b_packet[idx] >> 4)] # 4 uimsbf 234 | content_nibble_level_1 = c_map[0] 235 | content_nibble_level_2 = c_map[1][(b_packet[idx] & 0x0F)] # 4 uimsbf 236 | except KeyError: 237 | pass 238 | user_nibble_1 = (b_packet[idx + 1] >> 4) # 4 uimsbf 239 | user_nibble_2 = (b_packet[idx + 1] & 0x0F) # 4 uimsbf 240 | content = ContentType(content_nibble_level_1, content_nibble_level_2, 241 | user_nibble_1, user_nibble_2) 242 | content_list.append(content) 243 | idx += 2 244 | desc = ContentDescriptor(descriptor_tag, descriptor_length, content_list) 245 | event.descriptors.append(desc) 246 | 247 | def parseServiceDescriptor(idx, service, t_packet, b_packet): 248 | descriptor_tag = b_packet[idx] # 8 uimsbf 249 | descriptor_length = b_packet[idx + 1] # 8 uimsbf 250 | service_type = b_packet[idx + 2] # 8 uimsbf 251 | service_provider_name_length = b_packet[idx + 3] # 8 uimsbf 252 | arib = aribstr.AribString(b_packet[idx + 4:idx + 4 + service_provider_name_length]) 253 | service_provider_name = arib.convert_utf() 254 | idx = idx + 4 + service_provider_name_length 255 | service_name_length = b_packet[idx] # 8 uimsbf 256 | arib = aribstr.AribString(b_packet[idx + 1:idx + 1 + service_name_length]) 257 | service_name = arib.convert_utf() 258 | sd = ServiceDescriptor(descriptor_tag, descriptor_length, service_type, 259 | service_provider_name_length, service_provider_name, 260 | service_name_length, service_name) 261 | service.descriptors.append(sd) 262 | 263 | def parseDescriptors(idx, table, t_packet, b_packet): 264 | iface = { 265 | TAG_SED:parseShortEventDescriptor, 266 | TAG_EED:parseExtendedEventDescriptor, 267 | TAG_CD :parseContentDescriptor, 268 | TAG_SD :parseServiceDescriptor} 269 | length = idx + table.descriptors_loop_length 270 | while idx < length: 271 | descriptor_tag = b_packet[idx] # 8 uimsbf 272 | descriptor_length = b_packet[idx + 1] # 8 uimsbf 273 | if descriptor_tag in iface.keys(): 274 | iface[descriptor_tag](idx, table, t_packet, b_packet) 275 | idx = idx + 2 + descriptor_length 276 | 277 | def parseEvents(t_packet, b_packet): 278 | idx = 19 279 | length = t_packet.eit.section_length - idx 280 | while idx < length: 281 | event_id = (b_packet[idx] << 8) + b_packet[idx + 1] # 16 uimsbf 282 | start_time = mjd2datetime(b_packet[idx + 2 :idx + 7]) # 40 bslbf 283 | duration = bcd2time(b_packet[idx + 7:idx + 10]) # 24 uimsbf 284 | running_status = (b_packet[idx + 10] >> 5) # 3 uimsbf 285 | free_CA_mode = ((b_packet[idx + 10] >> 4) & 0x01) # 1 bslbf 286 | descriptors_loop_length = ((b_packet[idx + 10] & 0x0F) << 8) + b_packet[idx + 11] # 12 uimsbf 287 | event = Event(t_packet.eit.transport_stream_id, t_packet.eit.service_id, event_id, 288 | start_time, duration, running_status, free_CA_mode, descriptors_loop_length) 289 | parseDescriptors(idx + 12, event, t_packet, b_packet) 290 | t_packet.eit.events.append(event) 291 | idx = idx + 12 + descriptors_loop_length 292 | 293 | def parseService(t_packet, b_packet): 294 | idx = 16 295 | length = t_packet.sdt.section_length - idx 296 | while idx < length: 297 | service_id = (b_packet[idx] << 8) + b_packet[idx + 1] # 16 uimsbf 298 | # reserved_future_use 3 bslbf 299 | EIT_user_defined_flags = ((b_packet[idx + 2] >> 2) & 0x07) # 3 bslbf 300 | EIT_schedule_flag = ((b_packet[idx + 2] >> 1) & 0x01) # 1 bslbf 301 | EIT_present_following_flag = (b_packet[idx + 2] & 0x01) # 1 bslbf 302 | running_status = ((b_packet[idx + 3] >> 5) & 0x03) # 3 uimsbf 303 | free_CA_mode = ((b_packet[idx + 3] >> 4) & 0x01) # 1 bslbf 304 | descriptors_loop_length = (((b_packet[idx + 3] & 0x0F) << 8) + b_packet[idx + 4]) # 12 uimsbf 305 | service = Service(service_id, EIT_user_defined_flags, EIT_schedule_flag, 306 | EIT_present_following_flag, running_status, free_CA_mode, 307 | descriptors_loop_length) 308 | parseDescriptors(idx + 5, service, t_packet, b_packet) 309 | t_packet.sdt.services.append(service) 310 | idx = idx + 5 + descriptors_loop_length 311 | 312 | def add_event(b_type, event_map, t_packet): 313 | for event in t_packet.eit.events: 314 | if b_type == TYPE_DEGITAL: 315 | m_id = event.event_id 316 | else: 317 | m_id = (event.transport_stream_id << 32) + (event.service_id << 16) + event.event_id 318 | master = event_map.get(m_id) 319 | if master == None: 320 | master = copy.copy(event) 321 | master.descriptors = None 322 | event_map[m_id] = master 323 | elif event.service_id < master.service_id: 324 | master.service_id = event.service_id 325 | for desc in event.descriptors: 326 | tag = desc.descriptor_tag 327 | if tag == TAG_SED: 328 | master.desc_short = desc 329 | elif tag == TAG_CD: 330 | master.desc_content = desc 331 | elif tag == TAG_EED: 332 | if master.desc_extend == None: 333 | master.desc_extend = desc.items 334 | else: 335 | master.desc_extend.extend(desc.items) 336 | 337 | def fix_events(events): 338 | event_list = [] 339 | for event in events: 340 | item_list = [] 341 | item_map = {} 342 | if event.desc_short == None: 343 | continue 344 | if event.desc_extend != None: 345 | for item in event.desc_extend: 346 | if item.item_description_length == 0: 347 | item_list[-1].item.extend(item.item) 348 | item_list[-1].item_length += item.item_length 349 | else: 350 | item_list.append(item) 351 | for item in item_list: 352 | arib = aribstr.AribString(item.item_description) 353 | item.item_description = arib.convert_utf() 354 | arib = aribstr.AribString(item.item) 355 | item.item = arib.convert_utf() 356 | for item in item_list: 357 | item_map[item.item_description] = item.item 358 | event.desc_extend = item_map 359 | event_list.append(event) 360 | return event_list 361 | 362 | def compare_event(x, y): 363 | return int((x.start_time - y.start_time).total_seconds()) 364 | 365 | def compare_service(x, y): 366 | service_id = x.service_id - y.service_id 367 | if service_id == 0: 368 | return int((x.start_time - y.start_time).total_seconds()) 369 | else: 370 | return service_id 371 | 372 | def parse_eit(b_type, service, tsfile, debug): 373 | # Event Information Table 374 | ids = service.keys() 375 | event_map = {} 376 | parser = TransportPacketParser(tsfile, EIT_PID, debug) 377 | for t_packet in parser: 378 | if t_packet.eit.service_id in ids: 379 | parseEvents(t_packet, t_packet.binary_data) 380 | add_event(b_type, event_map, t_packet) 381 | print >> sys.stderr, "EIT: %i packets read" % (parser.count) 382 | event_list = event_map.values() 383 | event_list.sort(compare_event if b_type == TYPE_DEGITAL else compare_service) 384 | event_list = fix_events(event_list) 385 | return event_list 386 | 387 | def parse_sdt(b_type, tsfile, debug): 388 | # Service Description Table 389 | service_map = {} 390 | parser = TransportPacketParser(tsfile, SDT_PID, debug) 391 | for t_packet in parser: 392 | parseService(t_packet, t_packet.binary_data) 393 | for service in t_packet.sdt.services: 394 | if ( service.EIT_schedule_flag == 1 and 395 | service.EIT_present_following_flag == 1 and 396 | service.descriptors[0].service_type == 0x01): 397 | service_map[service.service_id] = service.descriptors[0].service_name 398 | if b_type == TYPE_DEGITAL: 399 | break 400 | print >> sys.stderr, "SDT: %i packets read" % (parser.count) 401 | return service_map 402 | 403 | def parse_ts(b_type, tsfile, debug): 404 | service = parse_sdt(b_type, tsfile, debug) 405 | tsfile.seek(0) 406 | events = parse_eit(b_type, service, tsfile, debug) 407 | return (service, events) 408 | -------------------------------------------------------------------------------- /xmltv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from xml.etree.ElementTree import ElementTree 4 | from xml.etree.ElementTree import Element 5 | import xml.etree.ElementTree 6 | import xml.dom.minidom 7 | from constant import * 8 | 9 | def get_text(text): 10 | return text.decode('utf-8') if text != None else "" 11 | 12 | def create_xml(b_type, channel_id, service, events, filename, pretty_print, output_eid): 13 | 14 | channel_el_list = create_channel(b_type, channel_id, service) 15 | programme_el_list = create_programme(channel_id, events, b_type, output_eid) 16 | attr = { 17 | 'generator-info-name':'epgdump_py', 18 | 'generator-info-url':'mailto:epgdump_py@gmail.com'} 19 | tv_el = Element('tv', attr) 20 | 21 | for el in channel_el_list: 22 | tv_el.append(el) 23 | for el in programme_el_list: 24 | tv_el.append(el) 25 | 26 | fd = open(filename, 'w') 27 | if pretty_print: 28 | xml_str = xml.etree.ElementTree.tostring(tv_el) 29 | xml_str = xml.dom.minidom.parseString(xml_str).toprettyxml(indent=' ', encoding='utf-8') 30 | fd.write(xml_str) 31 | else: 32 | xml.etree.ElementTree.ElementTree(tv_el).write(fd, 'utf-8', ' ') 33 | fd.close() 34 | 35 | def create_channel(b_type, channel_id, service): 36 | el_list = [] 37 | for (service_id, service_name) in service.items(): 38 | ch = b_type + str(service_id) if channel_id == None else channel_id 39 | attr = {'id':ch} 40 | channel_el = Element('channel', attr) 41 | attr = {'lang':'ja'} 42 | 43 | display_el = Element('display-name', attr) 44 | display_el.text = get_text(service_name) 45 | channel_el.append(display_el) 46 | 47 | display_el = Element('display-name', attr) 48 | display_el.text = ch 49 | channel_el.append(display_el) 50 | 51 | display_el = Element('display-name', attr) 52 | display_el.text = ch + ' ' + get_text(service_name) 53 | channel_el.append(display_el) 54 | 55 | el_list.append(channel_el) 56 | 57 | return el_list 58 | 59 | def create_programme(channel_id, events, b_type, output_eid): 60 | t_format = '%Y%m%d%H%M%S +0900' 61 | el_list = [] 62 | for event in events: 63 | 64 | ch = b_type + str(event.service_id) if channel_id == None else channel_id 65 | start = event.start_time.strftime(t_format) 66 | stop = (event.start_time + event.duration).strftime(t_format) 67 | attr = {'start':start, 'stop':stop, 'channel':ch} 68 | programme_el = Element('programme', attr) 69 | 70 | attr = {'lang':'ja'} 71 | 72 | title_el = Element('title', attr) 73 | title_el.text = get_text(event.desc_short.event_name) 74 | programme_el.append(title_el) 75 | 76 | eed_text = '' 77 | if event.desc_extend != None: 78 | for (k,v) in event.desc_extend.items(): 79 | eed_text += '\n' + get_text(k) + '\n' + get_text(v) + '\n' 80 | 81 | desc_el = Element('desc', attr) 82 | desc_el.text = get_text(event.desc_short.text) + '\n' + eed_text 83 | programme_el.append(desc_el) 84 | 85 | if event.desc_content != None: 86 | category_list = [] 87 | for ct in event.desc_content.content_type_array: 88 | category_text = get_text(ct.content_nibble_level_1) 89 | if category_text not in category_list and category_text != 'UNKNOWN': 90 | category_list.append(category_text) 91 | category_text = get_text(ct.content_nibble_level_2) 92 | if category_text not in category_list and category_text != 'UNKNOWN': 93 | category_list.append(category_text) 94 | for category_text in category_list: 95 | category_el_1 = Element('category', attr) 96 | category_el_1.text = category_text 97 | programme_el.append(category_el_1) 98 | if output_eid == True: 99 | el = Element('transport-stream-id') 100 | el.text = str(event.transport_stream_id) 101 | programme_el.append(el) 102 | el = Element('service-id') 103 | el.text = str(event.service_id) 104 | programme_el.append(el) 105 | el = Element('event-id') 106 | el.text = str(event.event_id) 107 | programme_el.append(el) 108 | 109 | el_list.append(programme_el) 110 | 111 | return el_list 112 | --------------------------------------------------------------------------------