├── .gitattributes ├── .gitignore ├── CHANGELOG.md ├── CMakeLists.txt ├── COPYING.txt ├── History.txt ├── README.md ├── README_old.md ├── cmake_uninstall.cmake.in ├── msvc ├── D2VSource.sln ├── D2VSource.vcxproj └── D2VSource.vcxproj.filters └── src ├── AVISynthAPI.cpp ├── AVISynthAPI.h ├── MPEG2Decoder.cpp ├── MPEG2Decoder.h ├── color_convert.cpp ├── color_convert.h ├── d2vsource.rc ├── getbit.cpp ├── gethdr.cpp ├── getpic.cpp ├── global.cpp ├── global.h ├── idct.h ├── idct_ap922_sse2.cpp ├── idct_llm_float_avx2.cpp ├── idct_llm_float_sse2.cpp ├── idct_ref_sse3.cpp ├── mc.cpp ├── mc.h ├── misc.cpp ├── misc.h ├── store.cpp ├── win_import_min.h ├── yv12pict.cpp └── yv12pict.h /.gitattributes: -------------------------------------------------------------------------------- 1 | #sources 2 | *.c text 3 | *.cc text 4 | *.cxx text 5 | *.cpp text 6 | *.c++ text 7 | *.hpp text 8 | *.h text 9 | *.h++ text 10 | *.hh text 11 | 12 | # Compiled Object files 13 | *.slo binary 14 | *.lo binary 15 | *.o binary 16 | *.obj binary 17 | 18 | # Precompiled Headers 19 | *.gch binary 20 | *.pch binary 21 | 22 | # Compiled Dynamic libraries 23 | *.so binary 24 | *.dylib binary 25 | *.dll binary 26 | 27 | # Compiled Static libraries 28 | *.lai binary 29 | *.la binary 30 | *.a binary 31 | *.lib binary 32 | 33 | # Executables 34 | *.exe binary 35 | *.out binary 36 | *.app binary 37 | ############################################################################### 38 | # Set default behavior to automatically normalize line endings. 39 | ############################################################################### 40 | * text=auto 41 | 42 | ############################################################################### 43 | # Set the merge driver for project and solution files 44 | # 45 | # Merging from the command prompt will add diff markers to the files if there 46 | # are conflicts (Merging from VS is not affected by the settings below, in VS 47 | # the diff markers are never inserted). Diff markers may cause the following 48 | # file extensions to fail to load in VS. An alternative would be to treat 49 | # these files as binary and thus will always conflict and require user 50 | # intervention with every merge. To do so, just comment the entries below and 51 | # uncomment the group further below 52 | ############################################################################### 53 | 54 | *.sln text eol=crlf 55 | *.csproj text eol=crlf 56 | *.vbproj text eol=crlf 57 | *.vcxproj text eol=crlf 58 | *.vcproj text eol=crlf 59 | *.dbproj text eol=crlf 60 | *.fsproj text eol=crlf 61 | *.lsproj text eol=crlf 62 | *.wixproj text eol=crlf 63 | *.modelproj text eol=crlf 64 | *.sqlproj text eol=crlf 65 | *.wmaproj text eol=crlf 66 | 67 | *.xproj text eol=crlf 68 | *.props text eol=crlf 69 | *.filters text eol=crlf 70 | *.vcxitems text eol=crlf 71 | 72 | 73 | #*.sln merge=binary 74 | #*.csproj merge=binary 75 | #*.vbproj merge=binary 76 | #*.vcxproj merge=binary 77 | #*.vcproj merge=binary 78 | #*.dbproj merge=binary 79 | #*.fsproj merge=binary 80 | #*.lsproj merge=binary 81 | #*.wixproj merge=binary 82 | #*.modelproj merge=binary 83 | #*.sqlproj merge=binary 84 | #*.wwaproj merge=binary 85 | 86 | #*.xproj merge=binary 87 | #*.props merge=binary 88 | #*.filters merge=binary 89 | #*.vcxitems merge=binary 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.exe 3 | *.dll 4 | *.lib 5 | *.log 6 | *.diff 7 | *.patch 8 | *.old 9 | *.bak 10 | *.orig 11 | *.rej 12 | *.aps 13 | *.ncb 14 | *.opensdf 15 | *.sdf 16 | *.VC.db 17 | *.VC.opendb 18 | *.suo 19 | *.user 20 | *.obj 21 | *.res 22 | *.exp 23 | *.ilk 24 | *.pdb 25 | build/*/*/ipch/* 26 | build/*/*/Debug/* 27 | build/*/*/Release/* 28 | bin/msvc*/* 29 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ##### 1.3.0: 2 | Changed `_SARDen`, `_SARNum` to display MPEG-4 PAR. (videoh) 3 | Changed `_AspectRatio` type to array int. 4 | 5 | ##### 1.2.6: 6 | Restored previous behavior of frame property `_FieldBased`. 7 | Fixed frame properties `_DurationNum` and `_DurationDen`. 8 | Added frame properties `_SARDen`, `_SARNum`, `_FieldOrder`, `_FieldOperation`, `_TFF`, `_RFF`, `_Film`, `_ProgressiveFrame`, `_ChromaLocation`, `_AbsoluteTime`. (videoh) 9 | Added parameters `nocrop` and `rff`. 10 | 11 | ##### 1.2.5: 12 | Fixed frame property `_FieldBased`. 13 | 14 | ##### 1.2.4: 15 | Fixed regression for relative file paths. 16 | 17 | ##### 1.2.3: 18 | Fixed FFSAR_NUM, FFSAR_DEN, FFSAR. 19 | 20 | ##### 1.2.2: 21 | Fixed values of frame properties _Quants* when info=0. 22 | 23 | ##### 1.2.1: 24 | Added support for path with forward slash (Windows). 25 | 26 | ##### 1.2.0: 27 | Added variables (ffms2 like) - FFSAR_NUM, FFSAR_DEN, FFSAR. 28 | 29 | ##### 1.1.0: 30 | Set frame properties - _DurationNum, _DurationDen, _FieldBased, _AspectRatio, _GOPNumber, _GOPPosition, _GOPClosed, _EncodedFrameTop, _EncodedFrameBottom, _PictType, _Matrix, _QuantsAverage, _QuantsAverage, _QuantsMax. 31 | 32 | ##### 1.0.0: 33 | Renamed the plugin and function to D2VSource. 34 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(libd2vsource LANGUAGES CXX) 4 | 5 | find_package (Git) 6 | if (GIT_FOUND) 7 | execute_process (COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 8 | OUTPUT_VARIABLE ver 9 | OUTPUT_STRIP_TRAILING_WHITESPACE 10 | ) 11 | else () 12 | message (STATUS "GIT not found") 13 | endif () 14 | 15 | add_library(d2vsource SHARED 16 | src/AVISynthAPI.cpp 17 | src/color_convert.cpp 18 | src/getbit.cpp 19 | src/gethdr.cpp 20 | src/getpic.cpp 21 | src/global.cpp 22 | src/idct_ap922_sse2.cpp 23 | src/idct_llm_float_avx2.cpp 24 | src/idct_llm_float_sse2.cpp 25 | src/idct_ref_sse3.cpp 26 | src/mc.cpp 27 | src/misc.cpp 28 | src/MPEG2Decoder.cpp 29 | src/store.cpp 30 | src/yv12pict.cpp 31 | ) 32 | 33 | if (NOT CMAKE_BUILD_TYPE) 34 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) 35 | endif() 36 | 37 | message(STATUS "Build type - ${CMAKE_BUILD_TYPE}") 38 | 39 | set_source_files_properties(src/idct_ap922_sse2.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse2") 40 | set_source_files_properties(src/idct_llm_float_sse2.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse2") 41 | set_source_files_properties(src/idct_ref_sse3.cpp PROPERTIES COMPILE_OPTIONS "-mssse3") 42 | set_source_files_properties(src/idct_llm_float_avx2.cpp PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma") 43 | 44 | target_include_directories(d2vsource PRIVATE 45 | ${CMAKE_CURRENT_SOURCE_DIR}/src 46 | /usr/local/include/avisynth 47 | ) 48 | 49 | set_target_properties(d2vsource PROPERTIES OUTPUT_NAME "d2vsource.${ver}") 50 | 51 | target_compile_features(d2vsource PRIVATE cxx_std_17) 52 | 53 | include(GNUInstallDirs) 54 | 55 | INSTALL(TARGETS d2vsource 56 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth") 57 | 58 | # uninstall target 59 | if(NOT TARGET uninstall) 60 | configure_file( 61 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" 62 | "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" 63 | IMMEDIATE @ONLY) 64 | 65 | add_custom_target(uninstall 66 | COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 67 | endif() 68 | -------------------------------------------------------------------------------- /COPYING.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 309 | 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) year name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | , 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Library General 340 | Public License instead of this License. 341 | -------------------------------------------------------------------------------- /History.txt: -------------------------------------------------------------------------------- 1 | 2005/01/11 2 | ---------- 3 | 4 | Fixed a little bug with the info option, and added faster mmx conv 5 | routines for 4:2:2 planar -> 4:2:2 packed and vice versa. 6 | 7 | by "tritical" 8 | 9 | 2005/01/05 10 | ---------- 11 | 12 | This is a 1.0.13b4 version of dgdecode but with the following changes: 13 | 14 | 1.) added info option to mpeg2source() to display info on frames 15 | - example usage: mpeg2source(info=true) 16 | 17 | 2.) iPP is a bool again. If it is not explicitly set to true or false then it 18 | defaults to auto which switches between field/frame based pp based on the 19 | progressive_frame flag 20 | 21 | 3.) 4:2:2 input support 22 | - can now correctly decode 4:2:2 input sources and correctly output it 23 | to AviSynth as YUY2 24 | 25 | *NOTE: mpeg2source() now throws an error on 4:4:4 input, it could handle it 26 | but AviSynth 2.5 does not have internal YUV 4:4:4 support 27 | 28 | 3a.) 4:2:2 PLANAR post-processing support 29 | 30 | 4.) Upsampling to 4:2:2 from 4:2:0 based on progressive_frame flag. Uses new 31 | upConv parameter of mpeg2source(). 32 | 33 | - example mpeg2source(upConv=true) 34 | 35 | *NOTES: it will only work if input is 4:2:0, if input is anything else 36 | then the upConv parameter is ignored 37 | 38 | 5.) BlindPP now supports YUY2 colorspace 39 | 40 | 6.) other stuff: 41 | 42 | - fixed blindPP syntax bug (x and X) 43 | - fixed a small bug with showQ option 44 | - fixed small memory leak with FrameList/GOPList not being free'd 45 | - fixed a bug in the vertical chroma deblocking postprocessing 46 | QP pointer being passed was incorrect for 4:2:0 47 | - faster mmx 4:2:2 to packed YUY2 and YUY2 to planar 4:2:2 conversions 48 | - info output would not work correctly if temporal_reference was not zero 49 | based at the beginning of gops, it does now 50 | 51 | by "tritical" 52 | 53 | changes.doc (Sept 11, 2003) 54 | -------------------------- 55 | 56 | Modified the decoding and random access code so that it never has 57 | to skip B frames. This was the cause of the frame dropping problem. 58 | This version must be used with an appropriately modified DVD2AVI 59 | version, for example, DGIndex. If these two are used together, 60 | frames will never be dropped. Refer to the DVD2AVI forum at doom9.org 61 | for a discussion of this problem. Search for the thread "does dvd2avi 62 | chop off frames?". Also fixes several bugs in MPEG2 decoding. 63 | 64 | Note: This version is derived from Nic's MPEG2DEC3 1.10. 65 | 66 | by Donald A. Graft 67 | 68 | changes.doc (08 dec 2002) 69 | ------------------------- 70 | 71 | here's the code of MPEG2Dec3 72 | it's based on MPEG2Dec2, some parts of the code are intact, others heavily modified. 73 | you'll need both nasm & masm to compile it. 74 | it should compile flawlessly under M$ VC++ 6 SP4 (the compiler i use) 75 | you can use the profiling define (commented out in global.h) to test optimisations. 76 | BTW, if you don't have nasm, i added mcsse.obj to the sources. 77 | 78 | MarcFD 79 | 80 | changes.doc (05/10/2002) 81 | ----------------- 82 | 83 | Removed Dividee filters from the sources. 84 | 85 | Vlad59 (babas.lucas@laposte.net) 86 | 87 | 88 | changes.doc (03/30/2002) 89 | ----------------- 90 | 91 | In addition to pcdvdguy's changes below this version also is optimized for P4/SSE2 code. This will be used whenever the machine supports it and "iDCT_Algorithm=5" is specified int the .d2v file. That can be put there with an editor or by using the newer version of DVD2AVI that also supports this. See the save-oe project on Sourceforge. 92 | 93 | This version also has other minor optimizations and something of a fix for crashing on garbage data such as ATSC HDTV captures. 94 | 95 | If cropping has been specified in DVD2AVI then it will now work, without messing up the color. If resizing was specified it will still be ignored, so you'll have to do that in your .Avisynth script or elsewhere. 96 | 97 | Tom Barry 98 | 99 | 100 | 101 | changes.doc (03/29/2002, late at night...) 102 | ----------------- 103 | This document explains the changes made to the DVD2AVI/MPEG2DEC.DLL source-code, for the purpose of decoding MPEG-2 transport streams. 104 | 105 | Special thanks to Ben Cooley, for writing HDTVtoMPEG2, a great source of inspiration! 106 | 107 | MPEG-2 transport stream demuxing 108 | 109 | This feature allows MPEG2DEC.DLL to parse MPEG2 transport streams (*.trp, *.ts), and decode MPEG-1/2 video elementary streams. It has been successfully tested with several ATSC/DTV broadcasters in the Southern California area. The code to support this feature is still considered 'preliminary', and suffers from the following limitations: 110 | 111 | 1) When opened in DVD2AVI, the input-filename extension *.trp and *.ts are unconditionally treated as transport-streams. Otherwise, other input-filenames are briefly checked (first 2048 bytes) for an MPEG-2 transport sync-byte sequence. If this sequence is found, the entire stream is treated as an MPEG-2 transport stream. 112 | 113 | 2) There is currently *NO* GUI to select the video-ID and audio-ID. These variables are stored in the DVD2AVI.ini file, and the user (that's YOU) must manually edit the ini file to set the video-ID and audio-ID. 114 | 115 | 2) If you want to use avisynth with DVD2AVI, you will need the updated MPEG2DEC.DLL. The updated DLL has added support for transport-stream demuxing. If you want to use DVD2AVI as a VFAPI frameserver, you will need the updated DVD2AVI.VFP (for the same reason.) 116 | 117 | 3) Do NOT mix MPEG-2 program streams (*.vob, *.mpg) and MPEG-2 transport streams (*.trp, *.ts) in the same d2v project-file! 118 | 119 | 4) DVD2AVI is sensitive to bitstream errors. Your broadcast DTV recordings may contain errors, which will manifest as distortion, image breakup, and other visual artifacts. At worst, mpeg2dec.dll can crash. Unfortunately, there is no way to guard against this except to routinely check your DTV-receiver's quality-monitor and adjust accordingly. 120 | 121 | mpeg2dec.dll relies on the variable "SystemStream_Flag==2" to identify transport-streams. (In the d2v-project file, the following line indicates the video_stream_id and audio_stream_id, but only the video_stream_id is used.) 122 | 123 | 5) The ATSC DTV standard allows many different formats. To work with the highest-resolution mode (1920x1080 30i), a fast CPU and lots of memory is recommended! 124 | 125 | Source code-changes 126 | ---------------------------- 127 | SystemStream_Flag == 2 ; // MPEG-2 transport stream (calls to Next_Packet() are redirected to Next_Transport_Packet() ) 128 | 129 | global.h - add declaration for function "Next_Transport_Packet()" 130 | int MPEG2_Transport_VideoPID; // VideoID for MPEG-2 transport streams 131 | int MPEG2_Transport_AudioPID;// AudioID for MPEG-2 transport streams 132 | 133 | getbit.c - add function Next_Transport_Packet(), this does the bulk of the work! 134 | 135 | gui.cpp - initializes the variables (MPEG2_Transport_VideoPID, MPEG2_Transport_AudioPID), first by reading from dvd2avi.ini, and then by re-reading those values from the D2V project file (if one is opened, and the SystemStream_Flag==2.) Also modify the 'open' dialog-box, to add an entry for file-filter ("*.trp, *.ts") 136 | 137 | mpeg2dec.c - scans the first 2048 bytes of the MPEG-bitstream (to check if it's an MPEG-2 transport-stream.) 138 | 139 | --- 140 | These source-files are changes to the mpeg2dec_dll.zip file from www.davetech.org/software2.htm 141 | 142 | liaor@iname.com 143 | http://members.tripod.com/~liaor 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## D2VSource 2 | 3 | This is a project (previously named as MPEG2DecPlus) to modify DGDecode.dll for AviSynth+. 4 | 5 | ### Requirements: 6 | 7 | - AviSynth 2.60 / AviSynth+ 3.4 or later 8 | 9 | - Microsoft VisualC++ Redistributable Package 2022 (can be downloaded from [here](https://github.com/abbodi1406/vcredist/releases)) 10 | 11 | ### Usage: 12 | 13 | ``` 14 | D2VSource(string "d2v", int "idct", bool "showQ", int "info", int "upConv", bool "i420", bool "iCC", bool "nocrop", int "rff") 15 | ``` 16 | 17 | ### Parameters: 18 | 19 | - d2v\ 20 | The path of the dv2 file. 21 | 22 | - idct\ 23 | The iDCT algorithm to use.\ 24 | 0: Follow the d2v specification.\ 25 | 1,2,3,6,7: AP922 integer (same as SSE2/MMX).\ 26 | 4: SSE2/AVX2 LLM (single precision floating point, SSE2/AVX2 determination is automatic).\ 27 | 5: IEEE 1180 reference (double precision floating point).\ 28 | Default: -1. 29 | 30 | - showQ\ 31 | It displays macroblock quantizers..\ 32 | Default: False. 33 | 34 | - info\ 35 | It prints debug information.\ 36 | 0: Do not display.\ 37 | 1: Overlay on video frame.\ 38 | 2: Output with OutputDebugString(). (The contents are confirmed by DebugView.exe).\ 39 | 3: Embed hints in 64 bytes of the frame upper left corner.\ 40 | Default: 0. 41 | 42 | - upConv\ 43 | The output format.\ 44 | 0: No conversion. YUV420 output is YV12, YUV422 output is YV16.\ 45 | 1: Output YV16.\ 46 | 2: Output YV24.\ 47 | Default: 0. 48 | 49 | - i420\ 50 | It determinates what is the output of YUV420.\ 51 | True: The output is i410.\ 52 | False: The output is YV12.\ 53 | Default: False. 54 | 55 | - iCC\ 56 | It determinates how YUV420 is upscaled when upConv=true.\ 57 | True: Force field-based upsampling.\ 58 | False: Forse progressive upsampling.\ 59 | Default: Auto determination based on the frame flag. 60 | 61 | - nocrop\ 62 | Use direct-rendered buffer, which may need cropping.\ 63 | It could provide a speedup when you know you need to crop your image anyway, by avoiding extra memcpy calls.\ 64 | Default: False. 65 | 66 | - rff\ 67 | Changes Field_Operation without the need of editing d2v or rescanning with different Field Operation.\ 68 | 0: Honor Pulldowns Flags.\ 69 | 1: Forced Film.\ 70 | 2: Ignored Pulldowns Flags.\ 71 | Default: -1 - read the value from d2v. 72 | 73 | ### Exported variables: 74 | 75 | FFSAR_NUM, FFSAR_DEN, FFSAR (these indicate Generic PAR). 76 | 77 | ### Frame properties 78 | 79 | _AbsoluteTime [float]\ 80 | The frame’s absolute timestamp in seconds. 81 | 82 | _AspectRatio [int]\ 83 | An array giving the display aspect ratio. 84 | 85 | _ChromaLocation [int]\ 86 | Chroma sample position in YUV formats: 87 | 0=left, 1=center, 2=topleft, 3=top, 4=bottomleft, 5=bottom. 88 | 89 | _DurationNum [int], _DurationDen [int]\ 90 | The frame’s duration in seconds as a rational number. 91 | 92 | _EncodedFrameTop [int], _EncodedFrameBottom [int]\ 93 | Frame number (before pulldown) used to generate this frame's 94 | top/bottom field. 95 | 96 | _FieldBased [int]\ 97 | Describes the composition of the frame:\ 98 | 0=frame based (progressive), 1=bottom field first, 2=top field first.\ 99 | Note that the GOP progressive flag is used to determine whether the frame is progressive. 100 | 101 | _FieldOperation [int]\ 102 | Describes the field operation option in effect:\ 103 | 0=honor pulldown, 1=force film, 2=ignore pulldown. 104 | 105 | _FieldOrder [int]\ 106 | Display field order of the frame:\ 107 | 0=bottom field first, 1=top field first. 108 | 109 | _Film [int]\ 110 | Set if the frame is part of a 3:2 soft pulldown section.\ 111 | Note that this uses the RFF history of several preceding\ 112 | frames, and so is valid only when doing linear access. 113 | 114 | _GOPClosed [int]\ 115 | Set if the current GOP is closed. 116 | 117 | _GOPNumber [int]\ 118 | The 0-based GOP number that contains the frame. Note that\ 119 | if this is set as the value x, then propShow displays it as\ 120 | [x, y] where y is the 0-based frame number of the first frame\ 121 | in the GOP. 122 | 123 | _GOPPosition [int]\ 124 | The GOP position field from the D2V file for the GOP containing\ 125 | the frame. 126 | 127 | _Matrix [int]\ 128 | The matrix number field from the D2V file for the GOP containing\ 129 | the frame. 130 | 131 | _PictType [data]\ 132 | A single character describing the frame type. It uses the common\ 133 | IPB characters but others may also be used for formats with\ 134 | additional frame types. 135 | 136 | _ProgressiveFrame [int]\ 137 | Set if the progress_frame flag is set for this frame. 138 | 139 | _QuantsAverage [int]\ 140 | The average quantizer value for the frame. 141 | 142 | _QuantsMax [int]\ 143 | The maximum quantizer value for the frame. 144 | 145 | _QuantsMin [int]\ 146 | The minimum quantizer value for the frame. 147 | 148 | _RFF [int]\ 149 | If _FieldOperation is 2 (ignore pulldown) then _RFF describes whether the stream specifies that a repeat field operation is to be performed on this frame. If _FieldOperation is 0 (honor pulldown) or 1 (force film) then _RFF describes whether the frame was composed with field repetition. 150 | 151 | _SARDen [int] 152 | The denominator of the "pixel size" (MPEG-4 PAR), also called the\ 153 | Sample Aspect Ratio (SAR). 154 | 155 | _SARNum [int] 156 | The numerator of the "pixel size" (MPEG-4 PAR), also called the\ 157 | Sample Aspect Ratio (SAR). 158 | 159 | _TFF [int]\ 160 | If _FieldOperation is 2 (ignore pulldown) and _RFF is set, then _TFF\ 161 | describes whether the stream specifies that the top field is to be repeated, otherwise the bottom field is to be repeated. If _FieldOperation is 0 (honor pulldown) or 1 (force film) then _TFF is inapplicable and is set to -1. 162 | 163 | ### Building: 164 | 165 | - Windows\ 166 | Use solution files. 167 | 168 | - Linux 169 | ``` 170 | Requirements: 171 | - Git 172 | - C++17 compiler 173 | - CMake >= 3.16 174 | ``` 175 | ``` 176 | git clone https://github.com/Asd-g/MPEG2DecPlus && \ 177 | cd MPEG2DecPlus && \ 178 | mkdir build && \ 179 | cd build && \ 180 | 181 | cmake .. 182 | make -j$(nproc) 183 | sudo make install 184 | ``` 185 | -------------------------------------------------------------------------------- /README_old.md: -------------------------------------------------------------------------------- 1 | # MPEG2DecPlus 2 | これはDGDecode.dllをAvisynth+用に改造するプロジェクトです。 3 | 4 | ###やりたいこと: 5 | - 改築を重ねた温泉旅館のようなコードをきれいにする。 6 | - VFAPI用コード、YUY2用コード等、現在では必要ないコードの排除。 7 | - アセンブラの排除による64bitへの対応、及びSSE2/AVX2でのintrinsicによる最適化。等 8 | 9 | ###必要なもの: 10 | - Windows Vista SP2 以降の Windows OS 11 | - SSE3が使えるCPU(Intel Pentium4(prescott) または AMD Athlon64x2 以降) 12 | - Avisynth+ r2172以降 またはAvisynth 2.60以降 13 | - Microsoft VisualC++ Redistributable Package 2019. 14 | 15 | ###使い方: 16 | ``` 17 | MPEG2Source(string "d2v", int "cpu", int "idct", bool "iPP", int "moderate_h", int "moderate_v", 18 | bool "showQ", bool "fastMC", string "cpu2", int "info", int "upConv", bool "i420", bool "iCC") 19 | ``` 20 | d2v: dv2ファイルのパス 21 | 22 | cpu: 現在使用不可。設定しても何も起こらない。iPP, moderate_h, moderate_v, fastMC, cpu2も同様。 23 | 24 | idct: 使用するiDCTアルゴリズム。 25 | 0: d2vの指定に従う。 26 | 1,2,3,6,7: AP922整数(SSE2MMXと同じもの)。 27 | 4: SSE2/AVX2 LLM(単精度浮動小数点、SSE2/AVX2の判定は自動)。 28 | 5: IEEE 1180 reference(倍精度浮動小数点)。 29 | 30 | showQ: マクロブロックの量子化器を表示する。 31 | 32 | info: デバッグ情報を出力する。 33 | 0: 表示しない。(デフォルト) 34 | 1: 動画フレームにオーバーレイで表示。 35 | 2: OutputDebugString()で出力。(内容はDebugView.exeで確認) 36 | 3: hintsをフレーム左上隅の64バイトに埋め込む。 37 | 38 | upConv: フレームを出力するフォーマット。 39 | 0: YUV420なソースはYV12で出力、YUV422なソースはYV16で出力。 40 | 1: YV16で出力。 41 | 2: YV24で出力。 42 | 43 | i420: trueであればYUV420をi420として出力する。現在ではどちらでもほぼ変わりはない。 44 | 45 | iCC: upConvにおけるYUV420の取扱いの設定。 46 | 未設定: フレームフラグに従ってinterlaced/progressiveを切り替える。 47 | true: 全フレームをinterlacedとして処理する。 48 | false: 全フレームをprogressiveとして処理する。 49 | 50 | 51 | ``` 52 | LumaYUV(clip c, int "lumoff", int "lumgain") 53 | ``` 54 | 入力クリップの輝度をlumoffとlumgainの値によって変更する。出力Y = (入力y * lumgain) + lumoff 55 | 56 | clip: Y8, YV12, YV16, YV411, YV24をサポート。 57 | 58 | lumoff: -255 ~ 255 (デフォルト0) 59 | 60 | lumgain: 0.0 ~ 2.0 (デフォルト1.0) 61 | 62 | ###ソースコード 63 | https://github.com/chikuzen/MPEG2DecPlus/ 64 | 65 | 66 | -------------------------------------------------------------------------------- /cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") 3 | endif() 4 | 5 | file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) 6 | string(REGEX REPLACE "\n" ";" files "${files}") 7 | foreach(file ${files}) 8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 10 | exec_program( 11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 12 | OUTPUT_VARIABLE rm_out 13 | RETURN_VALUE rm_retval 14 | ) 15 | if(NOT "${rm_retval}" STREQUAL 0) 16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 17 | endif() 18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 20 | endif() 21 | endforeach() 22 | -------------------------------------------------------------------------------- /msvc/D2VSource.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30204.135 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D2VSource", "D2VSource.vcxproj", "{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x64.ActiveCfg = Debug|x64 17 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x64.Build.0 = Debug|x64 18 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x86.ActiveCfg = Debug|Win32 19 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x86.Build.0 = Debug|Win32 20 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x64.ActiveCfg = Release|x64 21 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x64.Build.0 = Release|x64 22 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x86.ActiveCfg = Release|Win32 23 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {80DFC486-AC15-406C-BBDA-6D722A495010} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /msvc/D2VSource.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 16.0 23 | {BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6} 24 | Win32Proj 25 | 10.0 26 | 27 | 28 | 29 | Application 30 | true 31 | v142 32 | 33 | 34 | DynamicLibrary 35 | false 36 | v142 37 | 38 | 39 | DynamicLibrary 40 | true 41 | v142 42 | 43 | 44 | DynamicLibrary 45 | false 46 | v142 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | true 68 | 69 | 70 | false 71 | ..\..\AviSynthPlus\avs_core\include;$(IncludePath) 72 | 73 | 74 | ..\..\AviSynthPlus\avs_core\include;$(IncludePath) 75 | false 76 | 77 | 78 | ..\..\AviSynthPlus\avs_core\include;$(IncludePath) 79 | 80 | 81 | 82 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 83 | MultiThreadedDebugDLL 84 | Level3 85 | ProgramDatabase 86 | Disabled 87 | 88 | 89 | MachineX86 90 | true 91 | Windows 92 | 93 | 94 | 95 | 96 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 97 | MultiThreadedDLL 98 | Level3 99 | ProgramDatabase 100 | true 101 | AnySuitable 102 | true 103 | Speed 104 | true 105 | true 106 | Precise 107 | stdcpp17 108 | true 109 | 110 | 111 | MachineX86 112 | true 113 | Windows 114 | true 115 | true 116 | UseLinkTimeCodeGeneration 117 | 118 | 119 | 120 | 121 | true 122 | AnySuitable 123 | true 124 | Speed 125 | true 126 | true 127 | stdcpp17 128 | true 129 | Precise 130 | 131 | 132 | true 133 | 134 | 135 | true 136 | UseLinkTimeCodeGeneration 137 | 138 | 139 | 140 | 141 | stdcpp17 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | false 153 | 154 | 155 | 156 | AdvancedVectorExtensions2 157 | AdvancedVectorExtensions2 158 | AdvancedVectorExtensions2 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | false 168 | 169 | 170 | 171 | 172 | 173 | 174 | false 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | false 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /msvc/D2VSource.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | Source Files 50 | 51 | 52 | Source Files 53 | 54 | 55 | Source Files 56 | 57 | 58 | Source Files 59 | 60 | 61 | Source Files 62 | 63 | 64 | 65 | 66 | Header Files 67 | 68 | 69 | Header Files 70 | 71 | 72 | Header Files 73 | 74 | 75 | Header Files 76 | 77 | 78 | Header Files 79 | 80 | 81 | Header Files 82 | 83 | 84 | Header Files 85 | 86 | 87 | Header Files 88 | 89 | 90 | Header Files 91 | 92 | 93 | 94 | 95 | Resource Files 96 | 97 | 98 | -------------------------------------------------------------------------------- /src/AVISynthAPI.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Avisynth 2.5 API for MPEG2Dec3 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * based of the intial MPEG2Dec Avisytnh API Copyright (C) Mathias Born - May 2001 7 | * 8 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 9 | * 10 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 11 | * it under the terms of the GNU General Public License as published by 12 | * the Free Software Foundation; either version 2, or (at your option) 13 | * any later version. 14 | * 15 | * MPEG2Dec3 is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with GNU Make; see the file COPYING. If not, write to 22 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 | * 24 | */ 25 | 26 | #ifndef MPEG2DECPLUS_AVS_API_H 27 | #define MPEG2DECPLUS_AVS_API_H 28 | 29 | #include 30 | 31 | #include "avisynth.h" 32 | #include "MPEG2Decoder.h" 33 | 34 | 35 | class D2VSource : public IClip { 36 | VideoInfo vi; 37 | //int _PP_MODE; 38 | uint8_t* bufY, * bufU, * bufV; // for 4:2:2 input support 39 | CMPEG2Decoder* decoder; 40 | bool luminanceFlag; 41 | uint8_t luminanceTable[256]; 42 | bool has_at_least_v8; 43 | int history[5]; 44 | 45 | public: 46 | D2VSource(const char* d2v, int idct, bool showQ, int _info, int _upConv, bool _i420, int iCC, int _rff, IScriptEnvironment* env); 47 | ~D2VSource() {} 48 | PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); 49 | bool __stdcall GetParity(int n); 50 | void __stdcall GetAudio(void* buf, int64_t start, int64_t count, IScriptEnvironment* env) {}; 51 | const VideoInfo& __stdcall GetVideoInfo() { return vi; } 52 | int __stdcall SetCacheHints(int hints, int) { return hints == CACHE_GET_MTMODE ? MT_SERIALIZED : 0; }; 53 | static AVSValue __cdecl create(AVSValue args, void*, IScriptEnvironment* env); 54 | }; 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/MPEG2Decoder.h: -------------------------------------------------------------------------------- 1 | #ifndef MPEG2DECODER_H 2 | #define MPEG2DECODER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "yv12pict.h" 12 | #ifndef _WIN32 13 | #include "win_import_min.h" 14 | #else 15 | #include 16 | #endif 17 | 18 | 19 | /* code definition */ 20 | enum { 21 | PICTURE_START_CODE = 0x100, 22 | SLICE_START_CODE_MIN = 0x101, 23 | SLICE_START_CODE_MAX = 0x1AF, 24 | USER_DATA_START_CODE = 0x1B2, 25 | SEQUENCE_HEADER_CODE = 0x1B3, 26 | EXTENSION_START_CODE = 0x1B5, 27 | SEQUENCE_END_CODE = 0x1B7, 28 | GROUP_START_CODE = 0x1B8, 29 | 30 | SYSTEM_END_CODE = 0x1B9, 31 | PACK_START_CODE = 0x1BA, 32 | SYSTEM_START_CODE = 0x1BB, 33 | PRIVATE_STREAM_1 = 0x1BD, 34 | VIDEO_ELEMENTARY_STREAM = 0x1E0, 35 | }; 36 | 37 | /* extension start code IDs */ 38 | enum { 39 | SEQUENCE_EXTENSION_ID = 1, 40 | SEQUENCE_DISPLAY_EXTENSION_ID = 2, 41 | QUANT_MATRIX_EXTENSION_ID = 3, 42 | COPYRIGHT_EXTENSION_ID = 4, 43 | PICTURE_DISPLAY_EXTENSION_ID = 7, 44 | PICTURE_CODING_EXTENSION_ID = 8, 45 | }; 46 | 47 | enum { 48 | ZIG_ZAG = 0, 49 | MB_WEIGHT = 32, 50 | MB_CLASS4 = 64, 51 | }; 52 | 53 | enum { 54 | I_TYPE = 1, 55 | P_TYPE = 2, 56 | B_TYPE = 3, 57 | D_TYPE = 4, 58 | }; 59 | 60 | enum { 61 | TOP_FIELD = 1, 62 | BOTTOM_FIELD = 2, 63 | FRAME_PICTURE = 3, 64 | }; 65 | 66 | enum { 67 | MC_FIELD = 1, 68 | MC_FRAME = 2, 69 | MC_16X8 = 2, 70 | MC_DMV = 3, 71 | }; 72 | 73 | enum { 74 | MV_FIELD, 75 | MV_FRAME, 76 | }; 77 | 78 | enum { 79 | CHROMA420 = 1, 80 | CHROMA422 = 2, 81 | CHROMA444 = 3, 82 | }; 83 | 84 | 85 | 86 | 87 | enum { 88 | IDCT_AUTO = 0, 89 | IDCT_AP922_INT = 3, 90 | IDCT_LLM_FLOAT = 4, 91 | IDCT_REF = 5, 92 | }; 93 | 94 | enum { 95 | FO_NONE = 0, 96 | FO_FILM = 1, 97 | FO_RAW = 2, 98 | }; 99 | 100 | enum { 101 | IS_NOT_MPEG = 0, 102 | IS_MPEG1, 103 | IS_MPEG2, 104 | }; 105 | 106 | // Fault_Flag values 107 | #define OUT_OF_BITS 11 108 | 109 | 110 | struct GOPLIST { 111 | uint32_t number; 112 | int file; 113 | int64_t position; 114 | uint32_t I_count; 115 | int closed; 116 | int progressive; 117 | int matrix; 118 | GOPLIST(int _film, int _matrix, int _file, int64_t pos, int ic, uint32_t type) 119 | { 120 | number = _film; 121 | matrix = (_matrix < 0 || _matrix > 7) ? 3 : _matrix; // 3:reserved 122 | file = _file; 123 | position = pos; 124 | I_count = ic; 125 | closed = !!(type & 0x0400); 126 | progressive = !!(type & 0x0200); 127 | } 128 | }; 129 | 130 | struct FRAMELIST { 131 | uint32_t top; 132 | uint32_t bottom; 133 | uint8_t pf; 134 | uint8_t pct; 135 | uint8_t type; // Valid only for FO_RAW. Records the TFF/RFF flags. 136 | }; 137 | 138 | 139 | constexpr size_t BUFFER_SIZE = 128 * 1024; // 128KiB 140 | 141 | 142 | class CMPEG2Decoder 143 | { 144 | //int moderate_h, moderate_v, pp_mode; 145 | 146 | // getbit.cpp 147 | void Initialize_Buffer(void); 148 | void Fill_Buffer(void); 149 | void Next_Transport_Packet(void); 150 | void Next_PVA_Packet(void); 151 | void Next_Packet(void); 152 | void Next_File(void); 153 | 154 | uint32_t Show_Bits(uint32_t N); 155 | uint32_t Get_Bits(uint32_t N); 156 | void Flush_Buffer(uint32_t N); 157 | void Fill_Next(void); 158 | uint32_t Get_Byte(void); 159 | uint32_t Get_Short(void); 160 | void Next_Start_Code(void); 161 | 162 | std::vector ReadBuffer; 163 | uint8_t* Rdbfr, * Rdptr, * Rdmax; 164 | uint32_t CurrentBfr, NextBfr, BitsLeft, Val, Read; 165 | uint8_t* buffer_invalid; 166 | 167 | // gethdr.cpp 168 | int Get_Hdr(void); 169 | void Sequence_Header(void); 170 | int slice_header(void); 171 | void group_of_pictures_header(void); 172 | void picture_header(void); 173 | void sequence_extension(void); 174 | void sequence_display_extension(void); 175 | void quant_matrix_extension(void); 176 | void picture_display_extension(void); 177 | void picture_coding_extension(void); 178 | void copyright_extension(void); 179 | int extra_bit_information(void); 180 | void extension_and_user_data(void); 181 | 182 | // getpic.cpp 183 | void Decode_Picture(YV12PICT& dst); 184 | void update_picture_buffers(void); 185 | void picture_data(void); 186 | void slice(int MBAmax, uint32_t code); 187 | void macroblock_modes(int& pmacroblock_type, int& pmotion_type, 188 | int& pmotion_vector_count, int& pmv_format, int& pdmv, int& pmvscale, int& pdct_type); 189 | void clear_block(int count); 190 | void add_block(int count, int bx, int by, int dct_type, int addflag); 191 | void motion_compensation(int MBA, int macroblock_type, int motion_type, 192 | int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2], int dct_type); 193 | void skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2], 194 | int& motion_type, int motion_vertical_field_select[2][2], int& macroblock_type); 195 | void decode_macroblock(int& macroblock_type, int& motion_type, int& dct_type, 196 | int PMV[2][2][2], int dc_dct_pred[3], int motion_vertical_field_select[2][2], int dmvector[2]); 197 | void decode_mpeg1_intra_block(int comp, int dc_dct_pred[]); 198 | void decode_mpeg1_non_intra_block(int comp); 199 | void Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[]); 200 | void Decode_MPEG2_Non_Intra_Block(int comp); 201 | 202 | int Get_macroblock_type(void); 203 | int Get_I_macroblock_type(void); 204 | int Get_P_macroblock_type(void); 205 | int Get_B_macroblock_type(void); 206 | int Get_D_macroblock_type(void); 207 | int Get_coded_block_pattern(void); 208 | int Get_macroblock_address_increment(void); 209 | int Get_Luma_DC_dct_diff(void); 210 | int Get_Chroma_DC_dct_diff(void); 211 | 212 | void form_predictions(int bx, int by, int macroblock_type, int motion_type, 213 | int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2]); 214 | 215 | void form_prediction(uint8_t* src[], int sfield, uint8_t* dst[], int dfield, 216 | int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag); 217 | 218 | // motion.cpp 219 | void motion_vectors(int PMV[2][2][2], int dmvector[2], int motion_vertical_field_select[2][2], 220 | int s, int motion_vector_count, int mv_format, 221 | int h_r_size, int v_r_size, int dmv, int mvscale); 222 | void Dual_Prime_Arithmetic(int DMV[][2], int* dmvector, int mvx, int mvy); 223 | 224 | void motion_vector(int* PMV, int* dmvector, int h_r_size, int v_r_size, 225 | int dmv, int mvscale, int full_pel_vector); 226 | void decode_motion_vector(int* pred, int r_size, int motion_code, 227 | int motion_residualesidual, int full_pel_vector); 228 | int Get_motion_code(void); 229 | int Get_dmvector(void); 230 | 231 | // store.cpp 232 | void assembleFrame(uint8_t* src[], int pf, YV12PICT& dst); 233 | 234 | // decoder operation control flags 235 | int Fault_Flag; 236 | int File_Flag; 237 | void(*idctFunction)(int16_t* block); 238 | void(*prefetchTables)(); 239 | int SystemStream_Flag; // 0 = none, 1=program, 2=Transport 3=PVA 240 | 241 | int TransportPacketSize; 242 | int MPEG2_Transport_AudioPID; // used only for transport streams 243 | int MPEG2_Transport_VideoPID; // used only for transport streams 244 | int MPEG2_Transport_PCRPID; // used only for transport streams 245 | 246 | int lfsr0, lfsr1; 247 | 248 | std::vector Infile; 249 | int closed_gop; 250 | 251 | int intra_quantizer_matrix[64]; 252 | int non_intra_quantizer_matrix[64]; 253 | int chroma_intra_quantizer_matrix[64]; 254 | int chroma_non_intra_quantizer_matrix[64]; 255 | 256 | int load_intra_quantizer_matrix; 257 | int load_non_intra_quantizer_matrix; 258 | int load_chroma_intra_quantizer_matrix; 259 | int load_chroma_non_intra_quantizer_matrix; 260 | 261 | int q_scale_type; 262 | int alternate_scan; 263 | int quantizer_scale; 264 | 265 | short* block[8], * p_block[8]; 266 | int pf_backward, pf_forward, pf_current; 267 | 268 | // global values 269 | uint8_t* backward_reference_frame[3], * forward_reference_frame[3]; 270 | uint8_t* auxframe[3], * current_frame[3]; 271 | //uint8_t *u422, *v422; 272 | YV12PICT* auxFrame1; 273 | YV12PICT* auxFrame2; 274 | YV12PICT* saved_active; 275 | YV12PICT* saved_store; 276 | 277 | enum { 278 | ELEMENTARY_STREAM = 0, 279 | MPEG1_PROGRAM_STREAM, 280 | MPEG2_PROGRAM_STREAM, 281 | }; 282 | 283 | int Coded_Picture_Width, Coded_Picture_Height, Chroma_Width, Chroma_Height; 284 | int block_count, Second_Field; 285 | 286 | /* ISO/IEC 13818-2 section 6.2.2.3: sequence_extension() */ 287 | int progressive_sequence; 288 | int chroma_format; 289 | int matrix_coefficients; 290 | 291 | /* ISO/IEC 13818-2 section 6.2.3: picture_header() */ 292 | int picture_coding_type; 293 | int temporal_reference; 294 | int full_pel_forward_vector; 295 | int forward_f_code; 296 | int full_pel_backward_vector; 297 | int backward_f_code; 298 | 299 | /* ISO/IEC 13818-2 section 6.2.3.1: picture_coding_extension() header */ 300 | int f_code[2][2]; 301 | int picture_structure; 302 | int frame_pred_frame_dct; 303 | int progressive_frame; 304 | int concealment_motion_vectors; 305 | int intra_dc_precision; 306 | int top_field_first; 307 | int repeat_first_field; 308 | int intra_vlc_format; 309 | 310 | void copy_all(YV12PICT& src, YV12PICT& dst); 311 | void copy_top(YV12PICT& src, YV12PICT& dst); 312 | void copy_bottom(YV12PICT& src, YV12PICT& dst); 313 | 314 | int* QP, * backwardQP, * auxQP; 315 | uint32_t prev_frame; 316 | 317 | std::vector DirectAccess; 318 | 319 | void create_file_lists(FILE* d2vf, const char* path, char* buf); 320 | void setIDCT(int idct); 321 | void create_gop_and_frame_lists(FILE* d2vf, char* buf); 322 | void set_clip_properties(); 323 | void allocate_buffers(); 324 | void search_bad_starting(); 325 | void destroy(); 326 | 327 | public: 328 | CMPEG2Decoder(FILE* file, const char* path, int _idct, int icc, int upconv, int info, bool showq, bool _i420, int _rff, int _cpu_flags); 329 | ~CMPEG2Decoder() { destroy(); } 330 | void Decode(uint32_t frame, YV12PICT& dst); 331 | 332 | std::vector Infilename; 333 | uint32_t BadStartingFrames; 334 | 335 | int Clip_Width, Clip_Height; 336 | int D2V_Width, D2V_Height; 337 | int Clip_Top, Clip_Bottom, Clip_Left, Clip_Right; 338 | char Aspect_Ratio[20]; 339 | 340 | std::vector GOPList; 341 | std::vector FrameList; 342 | 343 | int mpeg_type; 344 | int FO_Flag; 345 | int Field_Order; 346 | bool HaveRFFs; 347 | 348 | int VF_FrameRate; 349 | uint32_t VF_FrameRate_Num; 350 | uint32_t VF_FrameRate_Den; 351 | 352 | int horizontal_size, vertical_size, mb_width, mb_height, aspect_ratio_information; 353 | //int iPP; 354 | int iCC; 355 | bool showQ; 356 | int upConv; 357 | bool i420; 358 | 359 | // info option stuff 360 | int info; 361 | int minquant, maxquant, avgquant; 362 | bool has_prop = false; 363 | 364 | // Luminance Code 365 | int lumGamma; 366 | int lumOffset; 367 | 368 | int getChromaFormat() { return chroma_format; } 369 | int getChromaWidth() { return Chroma_Width; } 370 | int getLumaWidth() { return Coded_Picture_Width; } 371 | int getLumaHeight() { return Coded_Picture_Height; } 372 | int cpu_flags; 373 | }; 374 | 375 | 376 | __forceinline uint32_t CMPEG2Decoder::Show_Bits(uint32_t N) 377 | { 378 | if (N <= BitsLeft) { 379 | return (CurrentBfr << (32 - BitsLeft)) >> (32 - N);; 380 | } 381 | else { 382 | N -= BitsLeft; 383 | int shift = 32 - BitsLeft; 384 | //return (((CurrentBfr << shift) >> shift) << N) + (NextBfr >> (32 - N));; 385 | return ((CurrentBfr << shift) >> (shift - N)) | (NextBfr >> (32 - N)); 386 | } 387 | } 388 | 389 | __forceinline uint32_t CMPEG2Decoder::Get_Bits(uint32_t N) 390 | { 391 | if (N < BitsLeft) { 392 | Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - N); 393 | BitsLeft -= N; 394 | return Val; 395 | } 396 | else { 397 | N -= BitsLeft; 398 | int shift = 32 - BitsLeft; 399 | Val = (CurrentBfr << shift) >> shift; 400 | if (N != 0) 401 | Val = (Val << N) | (NextBfr >> (32 - N)); 402 | CurrentBfr = NextBfr; 403 | BitsLeft = 32 - N; 404 | Fill_Next(); 405 | return Val; 406 | } 407 | } 408 | 409 | 410 | __forceinline void CMPEG2Decoder::Flush_Buffer(uint32_t N) 411 | { 412 | if (N < BitsLeft) { 413 | BitsLeft -= N; 414 | } 415 | else { 416 | CurrentBfr = NextBfr; 417 | BitsLeft += 32 - N; 418 | Fill_Next(); 419 | } 420 | } 421 | 422 | 423 | __forceinline void CMPEG2Decoder::Fill_Next() 424 | { 425 | if (SystemStream_Flag && Rdptr > Rdmax - 4) { 426 | if (Rdptr >= Rdmax) 427 | Next_Packet(); 428 | NextBfr = Get_Byte() << 24; 429 | 430 | if (Rdptr >= Rdmax) 431 | Next_Packet(); 432 | NextBfr |= Get_Byte() << 16; 433 | 434 | if (Rdptr >= Rdmax) 435 | Next_Packet(); 436 | NextBfr |= Get_Byte() << 8; 437 | 438 | if (Rdptr >= Rdmax) 439 | Next_Packet(); 440 | NextBfr |= Get_Byte(); 441 | } 442 | else if (Rdptr < Rdbfr + BUFFER_SIZE - 3) { 443 | //NextBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3); 444 | NextBfr = _byteswap_ulong(*reinterpret_cast(Rdptr)); 445 | Rdptr += 4; 446 | } 447 | else { 448 | switch (Rdbfr + BUFFER_SIZE - Rdptr) { 449 | case 1: 450 | NextBfr = *Rdptr++ << 24; 451 | Fill_Buffer(); 452 | NextBfr |= (Rdptr[0] << 16) | (Rdptr[1] << 8) | Rdptr[2]; 453 | Rdptr += 3; 454 | break; 455 | case 2: 456 | NextBfr = (Rdptr[0] << 24) | (Rdptr[1] << 16); 457 | Rdptr += 2; 458 | Fill_Buffer(); 459 | NextBfr |= (Rdptr[0] << 8) | Rdptr[1]; 460 | Rdptr += 2; 461 | break; 462 | case 3: 463 | NextBfr = (Rdptr[0] << 24) | (Rdptr[1] << 16) | (Rdptr[2] << 8); 464 | Rdptr += 3; 465 | Fill_Buffer(); 466 | NextBfr |= *Rdptr++; 467 | break; 468 | default: 469 | Fill_Buffer(); 470 | NextBfr = _byteswap_ulong(*reinterpret_cast(Rdptr)); 471 | Rdptr += 4; 472 | } 473 | } 474 | } 475 | 476 | 477 | __forceinline void CMPEG2Decoder::Fill_Buffer() 478 | { 479 | Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE); 480 | 481 | if (Read < BUFFER_SIZE) 482 | Next_File(); 483 | 484 | Rdptr = Rdbfr; 485 | 486 | if (SystemStream_Flag) 487 | Rdmax -= BUFFER_SIZE; 488 | } 489 | 490 | 491 | __forceinline uint32_t CMPEG2Decoder::Get_Byte() 492 | { 493 | while (Rdptr >= (Rdbfr + BUFFER_SIZE)) { 494 | Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE); 495 | if (Read < BUFFER_SIZE) 496 | Next_File(); 497 | Rdptr -= BUFFER_SIZE; 498 | Rdmax -= BUFFER_SIZE; 499 | } 500 | 501 | return *Rdptr++; 502 | } 503 | 504 | __forceinline uint32_t CMPEG2Decoder::Get_Short() 505 | { 506 | uint32_t i = Get_Byte(); 507 | return (i << 8) + Get_Byte(); 508 | } 509 | 510 | 511 | __forceinline void CMPEG2Decoder::Next_Start_Code() 512 | { 513 | // This is contrary to the spec but is more resilient to some 514 | // stream corruption scenarios. 515 | BitsLeft = (BitsLeft + 7) & ~7; 516 | 517 | do { 518 | uint32_t show = Show_Bits(24); 519 | if (Fault_Flag == OUT_OF_BITS) 520 | return; 521 | if (show == 0x000001) 522 | return; 523 | Flush_Buffer(8); 524 | } while (true); 525 | } 526 | 527 | #endif 528 | 529 | -------------------------------------------------------------------------------- /src/color_convert.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | MPEG2Dec's colorspace convertions Copyright (C) Chia-chen Kuo - April 2001 4 | 5 | */ 6 | 7 | // modified to be pitch != width friendly 8 | // tritical - May 16, 2005 9 | 10 | // lots of bug fixes and new isse 422->444 routine 11 | // tritical - August 18, 2005 12 | 13 | // rewite all code to sse2 intrinsic 14 | // OKA Motofumi - August 21, 2016 15 | 16 | 17 | #include 18 | #include 19 | #include "color_convert.h" 20 | #ifndef _WIN32 21 | #include "win_import_min.h" 22 | #endif 23 | 24 | 25 | #if 0 26 | // C implementation 27 | void conv420to422I_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, int width, int height) 28 | { 29 | const uint8_t* s0 = src; 30 | const uint8_t* s1 = src + src_pitch; 31 | uint8_t* d0 = dst; 32 | uint8_t* d1 = dst + dst_pitch; 33 | 34 | width /= 2; 35 | src_pitch *= 2; 36 | dst_pitch *= 2; 37 | 38 | std::memcpy(d0, s0, width); 39 | std::memcpy(d1, s1, width); 40 | 41 | d0 += dst_pitch; 42 | d1 += dst_pitch; 43 | 44 | for (int y = 2; y < height - 2; y += 4) { 45 | const uint8_t* s2 = s0 + src_pitch; 46 | const uint8_t* s3 = s1 + src_pitch; 47 | uint8_t* d2 = d0 + dst_pitch; 48 | uint8_t* d3 = d1 + dst_pitch; 49 | 50 | for (int x = 0; x < width; ++x) { 51 | d0[x] = (s0[x] * 5 + s2[x] * 3 + 4) / 8; 52 | d1[x] = (s1[x] * 7 + s3[x] * 1 + 4) / 8; 53 | d2[x] = (s0[x] * 1 + s2[x] * 7 + 4) / 8; 54 | d3[x] = (s1[x] * 3 + s3[x] * 5 + 4) / 8; 55 | } 56 | s0 = s2; 57 | s1 = s3; 58 | d0 = d2 + dst_pitch; 59 | d1 = d3 + dst_pitch; 60 | } 61 | 62 | std::memcpy(d0, s0, width); 63 | std::memcpy(d1, s1, width); 64 | } 65 | #endif 66 | 67 | 68 | static __forceinline __m128i 69 | avg_weight_1_7(const __m128i& x, const __m128i& y, const __m128i& four) 70 | { 71 | //(x + y * 7 + 4) / 8 72 | __m128i t0 = _mm_subs_epu16(_mm_slli_epi16(y, 3), y); 73 | t0 = _mm_adds_epu16(_mm_adds_epu16(t0, x), four); 74 | return _mm_srli_epi16(t0, 3); 75 | } 76 | 77 | static __forceinline __m128i 78 | avg_weight_3_5(const __m128i& x, const __m128i& y, const __m128i& four) 79 | { 80 | //(x * 3 + y * 5 + 4) / 8 81 | __m128i t0 = _mm_adds_epu16(_mm_slli_epi16(x, 1), x); 82 | __m128i t1 = _mm_adds_epu16(_mm_slli_epi16(y, 2), y); 83 | t0 = _mm_adds_epu16(_mm_adds_epu16(t0, t1), four); 84 | return _mm_srli_epi16(t0, 3); 85 | } 86 | 87 | 88 | void conv420to422I(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, int width, int height) 89 | { 90 | const uint8_t* src0 = src; 91 | const uint8_t* src1 = src + src_pitch; 92 | uint8_t* dst0 = dst; 93 | uint8_t* dst1 = dst + dst_pitch; 94 | 95 | width /= 2; 96 | src_pitch *= 2; 97 | dst_pitch *= 2; 98 | 99 | std::memcpy(dst0, src0, width); 100 | std::memcpy(dst1, src1, width); 101 | 102 | dst0 += dst_pitch; 103 | dst1 += dst_pitch; 104 | 105 | const __m128i zero = _mm_setzero_si128(); 106 | const __m128i four = _mm_set1_epi16(0x0004); 107 | 108 | for (int y = 2; y < height - 2; y += 4) { 109 | const uint8_t* src2 = src0 + src_pitch; 110 | const uint8_t* src3 = src1 + src_pitch; 111 | uint8_t* dst2 = dst0 + dst_pitch; 112 | uint8_t* dst3 = dst1 + dst_pitch; 113 | 114 | for (int x = 0; x < width; x += 8) { 115 | __m128i s0 = _mm_loadl_epi64(reinterpret_cast(src0 + x)); 116 | __m128i s1 = _mm_loadl_epi64(reinterpret_cast(src2 + x)); 117 | s0 = _mm_unpacklo_epi8(s0, zero); 118 | s1 = _mm_unpacklo_epi8(s1, zero); 119 | __m128i d = _mm_packus_epi16(avg_weight_3_5(s1, s0, four), zero); 120 | _mm_storel_epi64(reinterpret_cast<__m128i*>(dst0 + x), d); 121 | d = _mm_packus_epi16(avg_weight_1_7(s0, s1, four), zero); 122 | _mm_storel_epi64(reinterpret_cast<__m128i*>(dst2 + x), d); 123 | 124 | s0 = _mm_loadl_epi64(reinterpret_cast(src1 + x)); 125 | s1 = _mm_loadl_epi64(reinterpret_cast(src3 + x)); 126 | s0 = _mm_unpacklo_epi8(s0, zero); 127 | s1 = _mm_unpacklo_epi8(s1, zero); 128 | d = _mm_packus_epi16(avg_weight_1_7(s1, s0, four), zero); 129 | _mm_storel_epi64(reinterpret_cast<__m128i*>(dst1 + x), d); 130 | d = _mm_packus_epi16(avg_weight_3_5(s0, s1, four), zero); 131 | _mm_storel_epi64(reinterpret_cast<__m128i*>(dst3 + x), d); 132 | } 133 | src0 = src2; 134 | src1 = src3; 135 | dst0 = dst2 + dst_pitch; 136 | dst1 = dst3 + dst_pitch; 137 | } 138 | 139 | std::memcpy(dst0, src0, width); 140 | std::memcpy(dst1, src1, width); 141 | } 142 | 143 | 144 | #if 0 145 | // C implementation 146 | void conv420to422P_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 147 | int width, int height) 148 | { 149 | const uint8_t* s0 = src; 150 | const uint8_t* s1 = s0 + src_pitch; 151 | uint8_t* d0 = dst; 152 | uint8_t* d1 = dst + dst_pitch; 153 | 154 | width /= 2; 155 | height /= 2; 156 | dst_pitch *= 2; 157 | 158 | for (int x = 0; x < width; ++x) { 159 | d0[x] = s0[x]; 160 | d1[x] = (s0[x] * 3 + s1[x] + 2) / 4; 161 | } 162 | 163 | d0 += dst_pitch; 164 | d1 += dst_pitch; 165 | 166 | for (int y = 0; y < height - 2; ++y) { 167 | const uint8_t* s2 = s1 + src_pitch; 168 | for (int x = 0; x < width; ++x) { 169 | d0[x] = (s0[x] + s1[x] * 3 + 2) / 4; 170 | d1[x] = (s2[x] + s1[x] * 3 + 2) / 4; 171 | } 172 | s0 = s1; 173 | s1 = s2; 174 | d0 += dst_pitch; 175 | d1 += dst_pitch; 176 | } 177 | 178 | for (int x = 0; x < width; ++x) { 179 | d0[x] = (s0[x] + s1[x] * 3 + 2) / 4; 180 | d1[x] = s1[x]; 181 | } 182 | } 183 | #endif 184 | 185 | 186 | void conv420to422P(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 187 | int width, int height) 188 | { 189 | const uint8_t* s0 = src; 190 | const uint8_t* s1 = s0 + src_pitch; 191 | uint8_t* d0 = dst; 192 | uint8_t* d1 = dst + dst_pitch; 193 | 194 | width /= 2; 195 | height /= 2; 196 | dst_pitch *= 2; 197 | 198 | const __m128i one = _mm_set1_epi8(0x01); 199 | 200 | for (int x = 0; x < width; x += 16) { 201 | const __m128i sx0 = _mm_loadu_si128(reinterpret_cast(s0 + x)); 202 | __m128i sx1 = _mm_loadu_si128(reinterpret_cast(s1 + x)); 203 | 204 | sx1 = _mm_subs_epu8(sx1, one); 205 | sx1 = _mm_avg_epu8(_mm_avg_epu8(sx1, sx0), sx0); 206 | 207 | _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0); 208 | _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx1); 209 | } 210 | 211 | d0 += dst_pitch; 212 | d1 += dst_pitch; 213 | 214 | for (int y = 0; y < height - 2; ++y) { 215 | const uint8_t* s2 = s1 + src_pitch; 216 | 217 | for (int x = 0; x < width; x += 16) { 218 | __m128i sx0 = _mm_loadu_si128(reinterpret_cast(s0 + x)); 219 | const __m128i sx1 = _mm_loadu_si128(reinterpret_cast(s1 + x)); 220 | __m128i sx2 = _mm_loadu_si128(reinterpret_cast(s2 + x)); 221 | 222 | sx0 = _mm_subs_epu8(sx0, one); 223 | sx2 = _mm_subs_epu8(sx2, one); 224 | sx0 = _mm_avg_epu8(_mm_avg_epu8(sx0, sx1), sx1); 225 | sx2 = _mm_avg_epu8(_mm_avg_epu8(sx2, sx1), sx1); 226 | 227 | _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0); 228 | _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx2); 229 | } 230 | s0 = s1; 231 | s1 = s2; 232 | d0 += dst_pitch; 233 | d1 += dst_pitch; 234 | } 235 | 236 | for (int x = 0; x < width; x += 16) { 237 | __m128i sx0 = _mm_loadu_si128(reinterpret_cast(s0 + x)); 238 | const __m128i sx1 = _mm_loadu_si128(reinterpret_cast(s1 + x)); 239 | 240 | sx0 = _mm_subs_epu8(sx0, one); 241 | sx0 = _mm_avg_epu8(_mm_avg_epu8(sx0, sx1), sx1); 242 | 243 | _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0); 244 | _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx1); 245 | } 246 | } 247 | 248 | #if 0 249 | // C implementation 250 | void conv422to444_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 251 | int width, int height) 252 | { 253 | width /= 2; 254 | 255 | for (int y = 0; y < height; ++y) { 256 | for (int x = 0; x < width - 1; ++x) { 257 | dst[2 * x] = src[x]; 258 | dst[2 * x + 1] = (src[x] + src[x + 1] + 1) / 2; 259 | } 260 | dst[2 * width - 2] = dst[2 * width - 1] = src[width - 1]; 261 | src += src_pitch; 262 | dst += dst_pitch; 263 | } 264 | } 265 | #endif 266 | 267 | 268 | void conv422to444(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 269 | int width, int height) 270 | { 271 | const int right = width - 1; 272 | width /= 2; 273 | 274 | for (int y = 0; y < height; ++y) { 275 | for (int x = 0; x < width; x += 16) { 276 | __m128i s0 = _mm_load_si128(reinterpret_cast(src + x)); 277 | __m128i s1 = _mm_loadu_si128(reinterpret_cast(src + x + 1)); 278 | s1 = _mm_avg_epu8(s1, s0); 279 | __m128i d0 = _mm_unpacklo_epi8(s0, s1); 280 | __m128i d1 = _mm_unpackhi_epi8(s0, s1); 281 | _mm_store_si128(reinterpret_cast<__m128i*>(dst + static_cast(2) * x), d0); 282 | _mm_store_si128(reinterpret_cast<__m128i*>(dst + static_cast(2) * x + 16), d1); 283 | } 284 | dst[right] = dst[right - 1]; 285 | src += src_pitch; 286 | dst += dst_pitch; 287 | } 288 | } 289 | 290 | 291 | #if 0 292 | const int64_t mmmask_0001 = 0x0001000100010001; 293 | const int64_t mmmask_0128 = 0x0080008000800080; 294 | 295 | void conv444toRGB24(const uint8_t* py, const uint8_t* pu, const uint8_t* pv, 296 | uint8_t* dst, int src_pitchY, int src_pitchUV, int dst_pitch, int width, 297 | int height, int matrix, int pc_scale) 298 | { 299 | int64_t RGB_Offset, RGB_Scale, RGB_CBU, RGB_CRV, RGB_CGX; 300 | int dst_modulo = dst_pitch - (3 * width); 301 | 302 | if (pc_scale) 303 | { 304 | RGB_Scale = 0x1000254310002543; 305 | RGB_Offset = 0x0010001000100010; 306 | if (matrix == 7) // SMPTE 240M (1987) 307 | { 308 | RGB_CBU = 0x0000428500004285; 309 | RGB_CGX = 0xF7BFEEA3F7BFEEA3; 310 | RGB_CRV = 0x0000396900003969; 311 | } 312 | else if (matrix == 6 || matrix == 5) // SMPTE 170M/ITU-R BT.470-2 -- BT.601 313 | { 314 | RGB_CBU = 0x0000408D0000408D; 315 | RGB_CGX = 0xF377E5FCF377E5FC; 316 | RGB_CRV = 0x0000331300003313; 317 | } 318 | else if (matrix == 4) // FCC 319 | { 320 | RGB_CBU = 0x000040D8000040D8; 321 | RGB_CGX = 0xF3E9E611F3E9E611; 322 | RGB_CRV = 0x0000330000003300; 323 | } 324 | else // ITU-R Rec.709 (1990) -- BT.709 325 | { 326 | RGB_CBU = 0x0000439A0000439A; 327 | RGB_CGX = 0xF92CEEF1F92CEEF1; 328 | RGB_CRV = 0x0000395F0000395F; 329 | } 330 | } 331 | else 332 | { 333 | RGB_Scale = 0x1000200010002000; 334 | RGB_Offset = 0x0000000000000000; 335 | if (matrix == 7) // SMPTE 240M (1987) 336 | { 337 | RGB_CBU = 0x00003A6F00003A6F; 338 | RGB_CGX = 0xF8C0F0BFF8C0F0BF; 339 | RGB_CRV = 0x0000326E0000326E; 340 | } 341 | else if (matrix == 6 || matrix == 5) // SMPTE 170M/ITU-R BT.470-2 -- BT.601 342 | { 343 | RGB_CBU = 0x000038B4000038B4; 344 | RGB_CGX = 0xF4FDE926F4FDE926; 345 | RGB_CRV = 0x00002CDD00002CDD; 346 | } 347 | else if (matrix == 4) // FCC 348 | { 349 | RGB_CBU = 0x000038F6000038F6; 350 | RGB_CGX = 0xF561E938F561E938; 351 | RGB_CRV = 0x00002CCD00002CCD; 352 | } 353 | else // ITU-R Rec.709 (1990) -- BT.709 354 | { 355 | RGB_CBU = 0x00003B6200003B62; 356 | RGB_CGX = 0xFA00F104FA00F104; 357 | RGB_CRV = 0x0000326600003266; 358 | } 359 | } 360 | 361 | __asm 362 | { 363 | mov eax, [py] // eax = py 364 | mov ebx, [pu] // ebx = pu 365 | mov ecx, [pv] // ecx = pv 366 | mov edx, [dst] // edx = dst 367 | mov edi, width // edi = width 368 | xor esi, esi 369 | pxor mm0, mm0 370 | 371 | convRGB24 : 372 | movd mm1, [eax + esi] 373 | movd mm3, [ebx + esi] 374 | punpcklbw mm1, mm0 375 | punpcklbw mm3, mm0 376 | movd mm5, [ecx + esi] 377 | punpcklbw mm5, mm0 378 | movq mm7, [mmmask_0128] 379 | psubw mm3, mm7 380 | psubw mm5, mm7 381 | 382 | psubw mm1, RGB_Offset 383 | movq mm2, mm1 384 | movq mm7, [mmmask_0001] 385 | punpcklwd mm1, mm7 386 | punpckhwd mm2, mm7 387 | movq mm7, RGB_Scale 388 | pmaddwd mm1, mm7 389 | pmaddwd mm2, mm7 390 | 391 | movq mm4, mm3 392 | punpcklwd mm3, mm0 393 | punpckhwd mm4, mm0 394 | movq mm7, RGB_CBU 395 | pmaddwd mm3, mm7 396 | pmaddwd mm4, mm7 397 | paddd mm3, mm1 398 | paddd mm4, mm2 399 | psrad mm3, 13 400 | psrad mm4, 13 401 | packuswb mm3, mm0 402 | packuswb mm4, mm0 403 | 404 | movq mm6, mm5 405 | punpcklwd mm5, mm0 406 | punpckhwd mm6, mm0 407 | movq mm7, RGB_CRV 408 | pmaddwd mm5, mm7 409 | pmaddwd mm6, mm7 410 | paddd mm5, mm1 411 | paddd mm6, mm2 412 | psrad mm5, 13 413 | psrad mm6, 13 414 | packuswb mm5, mm0 415 | packuswb mm6, mm0 416 | 417 | punpcklbw mm3, mm5 418 | punpcklbw mm4, mm6 419 | movq mm5, mm3 420 | movq mm6, mm4 421 | psrlq mm5, 16 422 | psrlq mm6, 16 423 | por mm3, mm5 424 | por mm4, mm6 425 | 426 | movd mm5, [ebx + esi] 427 | movd mm6, [ecx + esi] 428 | punpcklbw mm5, mm0 429 | punpcklbw mm6, mm0 430 | movq mm7, [mmmask_0128] 431 | psubw mm5, mm7 432 | psubw mm6, mm7 433 | 434 | movq mm7, mm6 435 | punpcklwd mm6, mm5 436 | punpckhwd mm7, mm5 437 | movq mm5, RGB_CGX 438 | pmaddwd mm6, mm5 439 | pmaddwd mm7, mm5 440 | paddd mm6, mm1 441 | paddd mm7, mm2 442 | 443 | psrad mm6, 13 444 | psrad mm7, 13 445 | packuswb mm6, mm0 446 | packuswb mm7, mm0 447 | 448 | punpcklbw mm3, mm6 449 | punpcklbw mm4, mm7 450 | 451 | movq mm1, mm3 452 | movq mm5, mm4 453 | movq mm6, mm4 454 | 455 | psrlq mm1, 32 456 | psllq mm1, 24 457 | por mm1, mm3 458 | 459 | psrlq mm3, 40 460 | psllq mm6, 16 461 | por mm3, mm6 462 | movd[edx], mm1 463 | 464 | psrld mm4, 16 465 | psrlq mm5, 24 466 | por mm5, mm4 467 | movd[edx + 4], mm3 468 | 469 | add edx, 0x0c 470 | add esi, 0x04 471 | cmp esi, edi 472 | movd[edx - 4], mm5 473 | 474 | jl convRGB24 475 | 476 | add eax, src_pitchY 477 | add ebx, src_pitchUV 478 | add ecx, src_pitchUV 479 | add edx, dst_modulo 480 | xor esi, esi 481 | dec height 482 | jnz convRGB24 483 | 484 | emms 485 | } 486 | } 487 | 488 | 489 | void conv422PtoYUY2(const uint8_t* py, uint8_t* pu, uint8_t* pv, uint8_t* dst, 490 | int pitch1Y, int pitch1UV, int pitch2, int width, int height) 491 | { 492 | width /= 2; 493 | 494 | for (int y = 0; y < height; ++y) { 495 | for (int x = 0; x < width; x += 8) { 496 | __m128i u = _mm_loadl_epi64(reinterpret_cast(pu + x)); 497 | __m128i v = _mm_loadl_epi64(reinterpret_cast(pv + x)); 498 | __m128i uv = _mm_unpacklo_epi8(u, v); 499 | __m128i y = _mm_load_si128(reinterpret_cast(py + 2 * x)); 500 | __m128i yuyv0 = _mm_unpacklo_epi8(y, uv); 501 | __m128i yuyv1 = _mm_unpackhi_epi8(y, uv); 502 | _mm_stream_si128(reinterpret_cast<__m128i*>(dst + 4 * x), yuyv0); 503 | _mm_stream_si128(reinterpret_cast<__m128i*>(dst + 4 * x + 16), yuyv1); 504 | } 505 | py += pitch1Y; 506 | pu += pitch1UV; 507 | pv += pitch1UV; 508 | dst += pitch2; 509 | } 510 | } 511 | 512 | 513 | void convYUY2to422P(const uint8_t* src, uint8_t* py, uint8_t* pu, uint8_t* pv, 514 | int pitch1, int pitch2y, int pitch2uv, int width, int height) 515 | { 516 | width /= 2; 517 | 518 | for (int y = 0; y < height; ++y) { 519 | for (int x = 0; x < width; x += 8) { 520 | __m128i s0 = _mm_load_si128(reinterpret_cast(src + 4 * x)); 521 | __m128i s1 = _mm_load_si128(reinterpret_cast(src + 4 * x + 16)); 522 | 523 | __m128i s2 = _mm_unpacklo_epi8(s0, s1); 524 | __m128i s3 = _mm_unpackhi_epi8(s0, s1); 525 | 526 | s0 = _mm_unpacklo_epi8(s2, s3); 527 | s1 = _mm_unpackhi_epi8(s2, s3); 528 | 529 | s2 = _mm_unpacklo_epi8(s0, s1); 530 | s3 = _mm_unpackhi_epi8(s0, s1); 531 | 532 | s0 = _mm_unpacklo_epi8(s2, s3); 533 | s2 = _mm_srli_si128(s2, 8); 534 | s3 = _mm_srli_si128(s3, 8); 535 | _mm_store_si128(reinterpret_cast<__m128i*>(py + 2 * x), s0); 536 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pu + x), s2); 537 | _mm_storel_epi64(reinterpret_cast<__m128i*>(pv + x), s3); 538 | } 539 | src += pitch1; 540 | py += pitch2y; 541 | pu += pitch2uv; 542 | pv += pitch2uv; 543 | } 544 | } 545 | #endif 546 | -------------------------------------------------------------------------------- /src/color_convert.h: -------------------------------------------------------------------------------- 1 | #ifndef MPEG2DECPLUS_COLOR_CONVERT_H 2 | #define MPEG2DECPLUS_COLOR_CONVERT_H 3 | 4 | #include 5 | 6 | void conv420to422P(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 7 | int width, int height); 8 | 9 | void conv420to422I(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 10 | int width, int height); 11 | 12 | void conv422to444(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, 13 | int width, int height); 14 | 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/d2vsource.rc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | VS_VERSION_INFO VERSIONINFO 4 | FILEVERSION 1,3,0,0 5 | PRODUCTVERSION 1,3,0,0 6 | FILEFLAGSMASK VS_FFI_FILEFLAGSMASK 7 | FILEFLAGS 0x0L 8 | FILEOS VOS__WINDOWS32 9 | FILETYPE VFT_DLL 10 | FILESUBTYPE VFT2_UNKNOWN 11 | BEGIN 12 | BLOCK "StringFileInfo" 13 | BEGIN 14 | BLOCK "040904E4" 15 | BEGIN 16 | VALUE "Comments", "Modified DGDecode." 17 | VALUE "FileDescription", "D2VSource for AviSynth 2.6 / AviSynth+" 18 | VALUE "FileVersion", "1.3.0" 19 | VALUE "InternalName", "D2VSource" 20 | VALUE "OriginalFilename", "D2VSource.dll" 21 | VALUE "ProductName", "D2VSource" 22 | VALUE "ProductVersion", "1.3.0" 23 | END 24 | END 25 | BLOCK "VarFileInfo" 26 | BEGIN 27 | VALUE "Translation", 0x409, 1252 28 | END 29 | END 30 | -------------------------------------------------------------------------------- /src/getbit.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Chia-chen Kuo - April 2001 3 | * 4 | * This file is part of DVD2AVI, a free MPEG-2 decoder 5 | * 6 | * DVD2AVI is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * DVD2AVI is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with GNU Make; see the file COPYING. If not, write to 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 19 | * 20 | */ 21 | 22 | 23 | #include "MPEG2Decoder.h" 24 | 25 | void CMPEG2Decoder::Initialize_Buffer() 26 | { 27 | Rdptr = Rdbfr + BUFFER_SIZE; 28 | Rdmax = Rdptr; 29 | buffer_invalid = (uint8_t*)(UINTPTR_MAX); 30 | 31 | if (SystemStream_Flag) 32 | { 33 | if (Rdptr >= Rdmax) 34 | Next_Packet(); 35 | CurrentBfr = *Rdptr++ << 24; 36 | 37 | if (Rdptr >= Rdmax) 38 | Next_Packet(); 39 | CurrentBfr += *Rdptr++ << 16; 40 | 41 | if (Rdptr >= Rdmax) 42 | Next_Packet(); 43 | CurrentBfr += *Rdptr++ << 8; 44 | 45 | if (Rdptr >= Rdmax) 46 | Next_Packet(); 47 | CurrentBfr += *Rdptr++; 48 | 49 | Fill_Next(); 50 | } 51 | else 52 | { 53 | Fill_Buffer(); 54 | 55 | CurrentBfr = (*Rdptr << 24) + (*(Rdptr + 1) << 16) + (*(Rdptr + 2) << 8) + *(Rdptr + 3); 56 | Rdptr += 4; 57 | 58 | Fill_Next(); 59 | } 60 | 61 | BitsLeft = 32; 62 | } 63 | 64 | 65 | struct transport_packet { 66 | // 1 byte 67 | uint8_t sync_byte; // 8 bslbf 68 | 69 | // 2 bytes 70 | uint8_t transport_error_indicator;// 1 bslbf 71 | uint8_t payload_unit_start_indicator;// 1 bslbf 72 | uint8_t transport_priority; // 1 bslbf 73 | uint16_t pid; // 13 uimsbf 74 | 75 | // 1 byte 76 | uint8_t transport_scrambling_control;// 2 bslbf 77 | uint8_t adaptation_field_control;// 2 bslbf 78 | uint8_t continuity_counter;// 4 uimsbf 79 | 80 | // VVV (only valid if adaptation_field_control != 1) 81 | // 1 byte 82 | uint8_t adaptation_field_length; // 8 uimsbf 83 | 84 | // VVV (only valid if adaptation_field_length != 0) 85 | // 1 byte 86 | uint8_t discontinuity_indicator; // 1 bslbf 87 | uint8_t random_access_indicator; // 1 bslbf 88 | uint8_t elementary_stream_priority_indicator; // 1 bslbf 89 | uint8_t PCR_flag; // 1 bslbf 90 | uint8_t OPCR_flag; // 1 bslbf 91 | uint8_t splicing_point_flag; // 1 bslbf 92 | uint8_t transport_private_data_flag; // 1 bslbf 93 | uint8_t adaptation_field_extension_flag; // 1 bslbf 94 | 95 | /* 96 | if(adaptation_field_control=='10' || adaptation_field_control=='11'){ 97 | adaptation_field() 98 | } 99 | if(adaptation_field_control=='01' || adaptation_field_control=='11') { 100 | for (i=0;i TransportPacketSize) 140 | { 141 | if (Rdptr[-(TransportPacketSize + 1)] == 0x47) 142 | break; 143 | } 144 | else if (Rdbfr + Read - Rdptr > TransportPacketSize - static_cast(1)) 145 | { 146 | if (Rdptr[+(TransportPacketSize - 1)] == 0x47) 147 | break; 148 | } 149 | else 150 | { 151 | // We can't check so just accept this sync byte. 152 | break; 153 | } 154 | } 155 | --Packet_Length; // decrement the sync_byte; 156 | 157 | // 2) get pid, transport_error_indicator, payload_unit_start_indicator 158 | code = Get_Short(); 159 | Packet_Length = Packet_Length - 2; // decrement the two bytes we just got; 160 | tp.pid = code & 0x1FFF; // bits [12:0] 161 | tp.transport_error_indicator = (code >> 15) & 0x01; // bit#15 162 | tp.payload_unit_start_indicator = (code >> 14) & 0x01; // bit#14 163 | tp.transport_priority = (code >> 13) & 0x01; // bit#13 164 | 165 | // 3) get other fields 166 | code = Get_Byte(); 167 | --Packet_Length; // decrement the 1 byte we just got; 168 | tp.transport_scrambling_control = (code >> 6) & 0x03;// 2 bslbf 169 | tp.adaptation_field_control = (code >> 4) & 0x03;// 2 bslbf 170 | tp.continuity_counter = code & 0x0F;// 4 uimsbf 171 | 172 | 173 | // 4) check for early-exit conditions ... (possibly skip packet) 174 | // we don't care about the continuity counter 175 | // if ( tp.continuity_counter != previous_continuity_counter ) ... 176 | if (tp.transport_error_indicator || 177 | (tp.adaptation_field_control == 0)) 178 | { 179 | // skip remaining bytes in current packet 180 | SKIP_TRANSPORT_PACKET_BYTES(Packet_Length) 181 | continue; // abort, and circle back to top of 'for() loop' 182 | } 183 | 184 | // 5) check 185 | if (tp.adaptation_field_control == 2 || tp.adaptation_field_control == 3) 186 | { 187 | // adaptation field is present 188 | tp.adaptation_field_length = Get_Byte(); // 8-bits 189 | --Packet_Length; // decrement the 1 byte we just got; 190 | 191 | if (tp.adaptation_field_length != 0) // end of field already? 192 | { 193 | // if we made it this far, we no longer need to decrement 194 | // Packet_Length. We took care of it up there! 195 | code = Get_Byte(); 196 | --Packet_Length; // decrement the 1 byte we just got; 197 | tp.discontinuity_indicator = (code >> 7) & 0x01; // 1 bslbf 198 | tp.random_access_indicator = (code >> 6) & 0x01; // 1 bslbf 199 | tp.elementary_stream_priority_indicator = (code >> 5) & 0x01; // 1 bslbf 200 | tp.PCR_flag = (code >> 4) & 0x01; // 1 bslbf 201 | tp.OPCR_flag = (code >> 3) & 0x01; // 1 bslbf 202 | tp.splicing_point_flag = (code >> 2) & 0x01; // 1 bslbf 203 | tp.transport_private_data_flag = (code >> 1) & 0x01; // 1 bslbf 204 | tp.adaptation_field_extension_flag = (code >> 0) & 0x01; // 1 bslbf 205 | 206 | // skip the remainder of the adaptation_field 207 | SKIP_TRANSPORT_PACKET_BYTES(tp.adaptation_field_length - 1) 208 | } // if ( tp.adaptation_field_length != 0 ) 209 | } // if ( tp.adaptation_field_control != 1 ) 210 | 211 | // we've processed the header, so now just the payload is left... 212 | 213 | // video 214 | if (tp.pid == MPEG2_Transport_VideoPID && Packet_Length > 0) 215 | { 216 | #if 0 217 | code = Get_Short(); 218 | code = (code & 0xffff) << 16 | Get_Short(); 219 | Packet_Length = Packet_Length - 4; // remove these two bytes 220 | 221 | // Packet start? 222 | if (code < 0x000001E0 || code > 0x000001EF) 223 | if (!tp.payload_unit_start_indicator) 224 | { 225 | // No, move the buffer-pointer back. 226 | Rdptr -= 4; 227 | Packet_Length = Packet_Length + 4; // restore these four bytes 228 | } 229 | else 230 | #endif 231 | if (tp.payload_unit_start_indicator) 232 | { 233 | // YES, pull out PTS 234 | //Get_Short(); 235 | //Get_Short(); 236 | //Get_Short(); // MPEG2-PES total Packet_Length 237 | //Get_Byte(); // skip a byte 238 | Rdptr += 7; 239 | code = Get_Byte(); 240 | Packet_Header_Length = Get_Byte(); 241 | Packet_Length = Packet_Length - 9; // compensate the bytes we extracted 242 | 243 | // get PTS, and skip rest of PES-header 244 | if (code >= 0x80 && Packet_Header_Length > 4) // Extension_flag ? 245 | { 246 | // Skip PES_PTS 247 | //Get_Short(); 248 | //Get_Short(); 249 | Rdptr += 4; 250 | Get_Byte(); 251 | Packet_Length = Packet_Length - 5; 252 | SKIP_TRANSPORT_PACKET_BYTES(Packet_Header_Length - static_cast(5)) 253 | } 254 | else 255 | SKIP_TRANSPORT_PACKET_BYTES(Packet_Header_Length) 256 | } 257 | Rdmax = Rdptr + Packet_Length; 258 | if (TransportPacketSize == 204) 259 | Rdmax -= 16; 260 | return; 261 | } 262 | 263 | // fall through case 264 | // skip the remainder of the adaptation_field 265 | SKIP_TRANSPORT_PACKET_BYTES(Packet_Length) 266 | } // for 267 | } 268 | 269 | // PVA packet data structure. 270 | struct pva_packet { 271 | uint16_t sync_byte; 272 | uint8_t stream_id; 273 | uint8_t counter; 274 | uint8_t reserved; 275 | uint8_t flags; 276 | uint16_t length; 277 | }; 278 | 279 | // PVA transport stream parser. 280 | void CMPEG2Decoder::Next_PVA_Packet() 281 | { 282 | uint32_t Packet_Length; 283 | pva_packet pva; 284 | uint32_t PTS; 285 | 286 | for (;;) 287 | { 288 | // Search for a good sync. 289 | while (true) 290 | { 291 | // Sync word is 0x4156. 292 | if (Get_Byte() != 0x41) continue; 293 | if (Get_Byte() != 0x56) 294 | { 295 | // This byte might be a 0x41, so back up by one. 296 | Rdptr--; 297 | continue; 298 | } 299 | // To protect against emulation of the sync word, 300 | // also check that the stream says audio or video. 301 | pva.stream_id = Get_Byte(); 302 | if (pva.stream_id != 0x01 && pva.stream_id != 0x02) 303 | { 304 | // This byte might be a 0x41, so back up by one. 305 | Rdptr--; 306 | continue; 307 | } 308 | break; 309 | } 310 | 311 | // Pick up the remaining packet header fields. 312 | pva.counter = Get_Byte(); 313 | pva.reserved = Get_Byte(); 314 | pva.flags = Get_Byte(); 315 | pva.length = Get_Byte() << 8; 316 | pva.length |= Get_Byte(); 317 | Packet_Length = pva.length; 318 | 319 | // Any payload? 320 | if (Packet_Length == 0 || pva.reserved != 0x55) 321 | continue; // No, try the next packet. 322 | 323 | // Check stream id for video. 324 | if (pva.stream_id == 1) 325 | { 326 | // This is a video packet. 327 | // Extract the PTS if it exists. 328 | if (pva.flags & 0x10) 329 | { 330 | // The spec is unclear about the significance of the prebytes field. 331 | // It appears to be safe to ignore it. 332 | PTS = (int)((Get_Byte() << 24) | (Get_Byte() << 16) | (Get_Byte() << 8) | Get_Byte()); 333 | Packet_Length -= 4; 334 | } 335 | 336 | // Deliver the video to the ES parsing layer. 337 | Rdmax = Rdptr + Packet_Length; 338 | return; 339 | } 340 | 341 | // Not an video packet or an audio packet to be demultiplexed. Keep looking. 342 | SKIP_TRANSPORT_PACKET_BYTES(Packet_Length); 343 | } 344 | } 345 | 346 | void CMPEG2Decoder::Next_Packet() 347 | { 348 | if (SystemStream_Flag == 2) // MPEG-2 transport packet? 349 | { 350 | Next_Transport_Packet(); 351 | return; 352 | } 353 | else if (SystemStream_Flag == 3) // PVA packet? 354 | { 355 | Next_PVA_Packet(); 356 | return; 357 | } 358 | 359 | uint32_t code, Packet_Length, Packet_Header_Length; 360 | static int stream_type; 361 | while (true) { 362 | code = Get_Short(); 363 | code = (code << 16) + Get_Short(); 364 | 365 | // remove system layer byte stuffing 366 | while ((code & 0xffffff00) != 0x00000100) { 367 | if (Fault_Flag == OUT_OF_BITS) 368 | return; 369 | code = (code << 8) | Get_Byte(); 370 | } 371 | 372 | if (code == PACK_START_CODE) { 373 | if ((Get_Byte() & 0xf0) == 0x20) { 374 | Rdptr += 7; // MPEG1 program stream 375 | stream_type = MPEG1_PROGRAM_STREAM; 376 | } 377 | else { 378 | Rdptr += 8; // MPEG2 program stream 379 | stream_type = MPEG2_PROGRAM_STREAM; 380 | } 381 | } 382 | else if ((code & 0xfffffff0) == VIDEO_ELEMENTARY_STREAM) { 383 | Packet_Length = Get_Short(); 384 | Rdmax = Rdptr + Packet_Length; 385 | 386 | if (stream_type == MPEG1_PROGRAM_STREAM) { 387 | // MPEG1 program stream. 388 | Packet_Header_Length = 0; 389 | // Stuffing bytes. 390 | do { 391 | code = Get_Byte(); 392 | Packet_Header_Length += 1; 393 | } while (code == 0xff); 394 | if ((code & 0xc0) == 0x40) { 395 | // STD bytes. 396 | Get_Byte(); 397 | code = Get_Byte(); 398 | Packet_Header_Length += 2; 399 | } 400 | if ((code & 0xf0) == 0x20) { 401 | // PTS bytes. 402 | Get_Short(); 403 | Get_Short(); 404 | Packet_Header_Length += 4; 405 | } 406 | else if ((code & 0xf0) == 0x30) { 407 | // PTS/DTS bytes. 408 | Get_Short(); 409 | Get_Short(); 410 | Get_Short(); 411 | Get_Short(); 412 | Get_Byte(); 413 | Packet_Header_Length += 9; 414 | } 415 | return; 416 | } 417 | else { 418 | // MPEG2 program stream. 419 | code = Get_Byte(); 420 | if ((code & 0xc0) == 0x80) 421 | { 422 | //code = Get_Byte(); 423 | ++Rdptr; 424 | Packet_Header_Length = Get_Byte(); 425 | 426 | Rdptr += Packet_Header_Length; 427 | return; 428 | } 429 | else 430 | Rdptr += Packet_Length - 1; 431 | } 432 | } 433 | else if (code >= SYSTEM_START_CODE) 434 | { 435 | code = Get_Short(); 436 | Rdptr += code; 437 | } 438 | } 439 | } 440 | 441 | 442 | void CMPEG2Decoder::Next_File() 443 | { 444 | if (File_Flag < static_cast(Infile.size() - 1)) { 445 | File_Flag++; 446 | 447 | } 448 | else { 449 | File_Flag = 0; 450 | } 451 | // Even if we ran out of files, we reread the first one, just so 452 | // the decoder at least processes valid data until it detects the 453 | // fault flag and exits. 454 | _lseeki64(Infile[File_Flag], 0, SEEK_SET); 455 | int bytes = _read(Infile[File_Flag], Rdbfr + Read, BUFFER_SIZE - Read); 456 | if (Read + static_cast(bytes) == BUFFER_SIZE) 457 | // The whole buffer has valid data. 458 | buffer_invalid = (uint8_t*)(UINTPTR_MAX); 459 | else 460 | // Point to the first invalid buffer location. 461 | buffer_invalid = Rdbfr + Read + bytes; 462 | } 463 | 464 | -------------------------------------------------------------------------------- /src/gethdr.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ 2 | 3 | /* 4 | * Disclaimer of Warranty 5 | * 6 | * These software programs are available to the user without any license fee or 7 | * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims 8 | * any and all warranties, whether express, implied, or statuary, including any 9 | * implied warranties or merchantability or of fitness for a particular 10 | * purpose. In no event shall the copyright-holder be liable for any 11 | * incidental, punitive, or consequential damages of any kind whatsoever 12 | * arising from the use of these programs. 13 | * 14 | * This disclaimer of warranty extends to the user of these programs and user's 15 | * customers, employees, agents, transferees, successors, and assigns. 16 | * 17 | * The MPEG Software Simulation Group does not represent or warrant that the 18 | * programs furnished hereunder are free of infringement of any third-party 19 | * patents. 20 | * 21 | * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, 22 | * are subject to royalty fees to patent holders. Many of these patents are 23 | * general enough such that they are unavoidable regardless of implementation 24 | * design. 25 | * 26 | */ 27 | 28 | #include "global.h" 29 | #include "MPEG2Decoder.h" 30 | 31 | 32 | /* decode headers from one input stream */ 33 | int CMPEG2Decoder::Get_Hdr() 34 | { 35 | for (;;) 36 | { 37 | /* look for next_start_code */ 38 | Next_Start_Code(); 39 | if (Fault_Flag == OUT_OF_BITS) 40 | { 41 | // We've run dry on data from the stream. 42 | return 0; 43 | } 44 | 45 | switch (Get_Bits(32)) 46 | { 47 | case SEQUENCE_HEADER_CODE: 48 | Sequence_Header(); 49 | Second_Field = 0; 50 | break; 51 | 52 | case GROUP_START_CODE: 53 | group_of_pictures_header(); 54 | Second_Field = 0; 55 | break; 56 | 57 | case PICTURE_START_CODE: 58 | picture_header(); 59 | return 1; 60 | } 61 | } 62 | } 63 | 64 | 65 | /* decode group of pictures header */ 66 | /* ISO/IEC 13818-2 section 6.2.2.6 */ 67 | __forceinline void CMPEG2Decoder::group_of_pictures_header() 68 | { 69 | #if 0 70 | Get_Bits(1); //drop_flag 71 | Get_Bits(5); //gop_hour 72 | Get_Bits(6); //gop_minute 73 | Flush_Buffer(1); // marker bit 74 | Get_Bits(6); //gop_sec 75 | Get_Bits(6); //gop_frame 76 | Get_Bits(1); //closed_gop 77 | Get_Bits(1); //broken_link 78 | #else 79 | Flush_Buffer(27); 80 | #endif 81 | extension_and_user_data(); 82 | } 83 | 84 | 85 | /* decode picture header */ 86 | /* ISO/IEC 13818-2 section 6.2.3 */ 87 | inline void CMPEG2Decoder::picture_header() 88 | { 89 | temporal_reference = Get_Bits(10); 90 | picture_coding_type = Get_Bits(3); 91 | Flush_Buffer(16);//Get_Bits(16); //vbv_delay 92 | 93 | if (picture_coding_type == P_TYPE || picture_coding_type == B_TYPE) 94 | { 95 | full_pel_forward_vector = Get_Bits(1); 96 | forward_f_code = Get_Bits(3); 97 | } 98 | 99 | if (picture_coding_type == B_TYPE) 100 | { 101 | full_pel_backward_vector = Get_Bits(1); 102 | backward_f_code = Get_Bits(3); 103 | } 104 | 105 | // MPEG1 defaults. May be overriden by picture coding extension. 106 | intra_dc_precision = 0; 107 | picture_structure = FRAME_PICTURE; 108 | top_field_first = 1; 109 | frame_pred_frame_dct = 1; 110 | concealment_motion_vectors = 0; 111 | q_scale_type = 0; 112 | intra_vlc_format = 0; 113 | alternate_scan = 0; 114 | repeat_first_field = 0; 115 | progressive_frame = 1; 116 | 117 | pf_current = progressive_frame; 118 | 119 | extra_bit_information(); // extra information byte count 120 | extension_and_user_data(); 121 | } 122 | 123 | 124 | /* decode sequence header */ 125 | void CMPEG2Decoder::Sequence_Header() 126 | { 127 | int i; 128 | 129 | horizontal_size = Get_Bits(12); 130 | vertical_size = Get_Bits(12); 131 | aspect_ratio_information = Get_Bits(4); 132 | #if 0 133 | Get_Bits(4); //frame_rate_code 134 | Get_Bits(18); //bit_rate_value 135 | Flush_Buffer(1); // marker bit 136 | Get_Bits(10); //vbv_buffer_size 137 | Get_Bits(1); //constrained_parameters_flag 138 | #else 139 | Flush_Buffer(34); 140 | #endif 141 | 142 | if ((load_intra_quantizer_matrix = Get_Bits(1))) 143 | { 144 | for (i = 0; i < 64; i++) 145 | intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 146 | } 147 | else 148 | { 149 | for (i = 0; i < 64; i++) 150 | intra_quantizer_matrix[i] = default_intra_quantizer_matrix[i]; 151 | } 152 | 153 | if ((load_non_intra_quantizer_matrix = Get_Bits(1))) 154 | { 155 | for (i = 0; i < 64; i++) 156 | non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 157 | } 158 | else 159 | { 160 | for (i = 0; i < 64; i++) 161 | non_intra_quantizer_matrix[i] = 16; 162 | } 163 | 164 | /* copy luminance to chrominance matrices */ 165 | for (i = 0; i < 64; i++) 166 | { 167 | chroma_intra_quantizer_matrix[i] = intra_quantizer_matrix[i]; 168 | chroma_non_intra_quantizer_matrix[i] = non_intra_quantizer_matrix[i]; 169 | } 170 | 171 | // These are MPEG1 defaults. These will be overridden if we have MPEG2 172 | // when the sequence header extension is parsed. 173 | progressive_sequence = 1; 174 | chroma_format = CHROMA420; 175 | matrix_coefficients = 5; 176 | 177 | extension_and_user_data(); 178 | } 179 | 180 | /* decode slice header */ 181 | /* ISO/IEC 13818-2 section 6.2.4 */ 182 | int CMPEG2Decoder::slice_header() 183 | { 184 | int slice_vertical_position_extension = 0; 185 | if (mpeg_type == IS_MPEG2 && vertical_size > 2800) { 186 | slice_vertical_position_extension = Get_Bits(3); 187 | } 188 | 189 | int quantizer_scale_code = Get_Bits(5); 190 | if (mpeg_type == IS_MPEG2) 191 | quantizer_scale = q_scale_type ? Non_Linear_quantizer_scale[quantizer_scale_code] : quantizer_scale_code << 1; 192 | else 193 | quantizer_scale = quantizer_scale_code; 194 | 195 | while (Get_Bits(1)) Flush_Buffer(8); 196 | 197 | return slice_vertical_position_extension; 198 | } 199 | 200 | /* decode extension and user data */ 201 | /* ISO/IEC 13818-2 section 6.2.2.2 */ 202 | void CMPEG2Decoder::extension_and_user_data() 203 | { 204 | int code, ext_ID; 205 | 206 | Next_Start_Code(); 207 | 208 | while ((code = Show_Bits(32)) == EXTENSION_START_CODE || code == USER_DATA_START_CODE) 209 | { 210 | if (Fault_Flag == OUT_OF_BITS) return; 211 | 212 | if (code == EXTENSION_START_CODE) 213 | { 214 | Flush_Buffer(32); 215 | ext_ID = Get_Bits(4); 216 | 217 | switch (ext_ID) 218 | { 219 | case SEQUENCE_EXTENSION_ID: 220 | sequence_extension(); 221 | break; 222 | 223 | case SEQUENCE_DISPLAY_EXTENSION_ID: 224 | sequence_display_extension(); 225 | break; 226 | 227 | case QUANT_MATRIX_EXTENSION_ID: 228 | quant_matrix_extension(); 229 | break; 230 | 231 | case PICTURE_DISPLAY_EXTENSION_ID: 232 | picture_display_extension(); 233 | break; 234 | 235 | case PICTURE_CODING_EXTENSION_ID: 236 | picture_coding_extension(); 237 | break; 238 | 239 | case COPYRIGHT_EXTENSION_ID: 240 | copyright_extension(); 241 | break; 242 | } 243 | Next_Start_Code(); 244 | } 245 | else 246 | { 247 | Flush_Buffer(32); 248 | Next_Start_Code(); 249 | } 250 | } 251 | } 252 | 253 | /* decode sequence extension */ 254 | /* ISO/IEC 13818-2 section 6.2.2.3 */ 255 | __forceinline void CMPEG2Decoder::sequence_extension() 256 | { 257 | Flush_Buffer(8); //Get_Bits(8); //profile_and_level_indication 258 | progressive_sequence = Get_Bits(1); 259 | chroma_format = Get_Bits(2); 260 | int horizontal_size_extension = Get_Bits(2) << 12; 261 | int vertical_size_extension = Get_Bits(2) << 12; 262 | #if 0 263 | Get_Bits(12); //bit_rate_extension 264 | Flush_Buffer(1); // marker bit 265 | Get_Bits(8); //vbv_buffer_size_extension 266 | Get_Bits(1); //low_delay 267 | 268 | Get_Bits(2); //frame_rate_extension_n 269 | Get_Bits(5); //frame_rate_extension_d 270 | #else 271 | Flush_Buffer(29); 272 | #endif 273 | 274 | horizontal_size = horizontal_size_extension | (horizontal_size & 0x0fff); 275 | vertical_size = vertical_size_extension | (vertical_size & 0x0fff); 276 | } 277 | 278 | /* decode sequence display extension */ 279 | __forceinline void CMPEG2Decoder::sequence_display_extension() 280 | { 281 | Flush_Buffer(3);// Get_Bits(3); //video_format 282 | 283 | matrix_coefficients = 1; 284 | if (Get_Bits(1)) //color_description 285 | { 286 | //Get_Bits(8); //color_primaries 287 | //Get_Bits(8); //transfer_characteristics 288 | Flush_Buffer(16); 289 | matrix_coefficients = Get_Bits(8); 290 | } 291 | #if 0 292 | Get_Bits(14); //display_horizontal_size 293 | Flush_Buffer(1); // marker bit 294 | Get_Bits(14); //display_vertical_size 295 | #else 296 | Flush_Buffer(29); 297 | #endif 298 | } 299 | 300 | /* decode quant matrix entension */ 301 | /* ISO/IEC 13818-2 section 6.2.3.2 */ 302 | void CMPEG2Decoder::quant_matrix_extension() 303 | { 304 | int i; 305 | 306 | if ((load_intra_quantizer_matrix = Get_Bits(1))) 307 | for (i = 0; i < 64; i++) 308 | chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]] 309 | = intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 310 | 311 | if ((load_non_intra_quantizer_matrix = Get_Bits(1))) 312 | for (i = 0; i < 64; i++) 313 | chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] 314 | = non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 315 | 316 | if ((load_chroma_intra_quantizer_matrix = Get_Bits(1))) 317 | for (i = 0; i < 64; i++) 318 | chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 319 | 320 | if ((load_chroma_non_intra_quantizer_matrix = Get_Bits(1))) 321 | for (i = 0; i < 64; i++) 322 | chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8); 323 | } 324 | 325 | /* decode picture display extension */ 326 | /* ISO/IEC 13818-2 section 6.2.3.3. */ 327 | void CMPEG2Decoder::picture_display_extension() 328 | { 329 | int frame_center_horizontal_offset[3]; 330 | int frame_center_vertical_offset[3]; 331 | 332 | int i; 333 | int number_of_frame_center_offsets; 334 | 335 | /* based on ISO/IEC 13818-2 section 6.3.12 336 | (November 1994) Picture display extensions */ 337 | 338 | /* derive number_of_frame_center_offsets */ 339 | if (progressive_sequence) 340 | { 341 | if (repeat_first_field) 342 | { 343 | if (top_field_first) 344 | number_of_frame_center_offsets = 3; 345 | else 346 | number_of_frame_center_offsets = 2; 347 | } 348 | else 349 | number_of_frame_center_offsets = 1; 350 | } 351 | else 352 | { 353 | if (picture_structure != FRAME_PICTURE) 354 | number_of_frame_center_offsets = 1; 355 | else 356 | { 357 | if (repeat_first_field) 358 | number_of_frame_center_offsets = 3; 359 | else 360 | number_of_frame_center_offsets = 2; 361 | } 362 | } 363 | 364 | /* now parse */ 365 | for (i = 0; i < number_of_frame_center_offsets; i++) 366 | { 367 | frame_center_horizontal_offset[i] = Get_Bits(16); 368 | Flush_Buffer(1); // marker bit 369 | 370 | frame_center_vertical_offset[i] = Get_Bits(16); 371 | Flush_Buffer(1); // marker bit 372 | } 373 | } 374 | 375 | /* decode picture coding extension */ 376 | void CMPEG2Decoder::picture_coding_extension() 377 | { 378 | f_code[0][0] = Get_Bits(4); 379 | f_code[0][1] = Get_Bits(4); 380 | f_code[1][0] = Get_Bits(4); 381 | f_code[1][1] = Get_Bits(4); 382 | 383 | intra_dc_precision = Get_Bits(2); 384 | picture_structure = Get_Bits(2); 385 | top_field_first = Get_Bits(1); 386 | frame_pred_frame_dct = Get_Bits(1); 387 | concealment_motion_vectors = Get_Bits(1); 388 | q_scale_type = Get_Bits(1); 389 | intra_vlc_format = Get_Bits(1); 390 | alternate_scan = Get_Bits(1); 391 | repeat_first_field = Get_Bits(1); 392 | Get_Bits(1); //uint32_t chroma_420_type 393 | progressive_frame = Get_Bits(1); 394 | 395 | if (picture_structure != FRAME_PICTURE) 396 | { 397 | if (picture_structure == TOP_FIELD) 398 | top_field_first = 1; 399 | else 400 | top_field_first = 0; 401 | repeat_first_field = 0; 402 | progressive_frame = 0; 403 | } 404 | 405 | pf_current = progressive_frame; 406 | } 407 | 408 | /* decode extra bit information */ 409 | /* ISO/IEC 13818-2 section 6.2.3.4. */ 410 | __forceinline int CMPEG2Decoder::extra_bit_information() 411 | { 412 | int byte_count = 0; 413 | 414 | while (Get_Bits(1)) 415 | { 416 | if (Fault_Flag == OUT_OF_BITS) 417 | return byte_count; 418 | Flush_Buffer(8); 419 | ++byte_count; 420 | } 421 | 422 | return byte_count; 423 | } 424 | 425 | /* Copyright extension */ 426 | /* ISO/IEC 13818-2 section 6.2.3.6. */ 427 | /* (header added in November, 1994 to the IS document) */ 428 | __forceinline void CMPEG2Decoder::copyright_extension() 429 | { 430 | #if 0 431 | Get_Bits(1); //copyright_flag 432 | Get_Bits(8); //copyright_identifier 433 | Get_Bits(1); //original_or_copy 434 | 435 | /* reserved */ 436 | Get_Bits(7); //reserved_data 437 | 438 | Flush_Buffer(1); // marker bit 439 | Get_Bits(20); //copyright_number_1 440 | Flush_Buffer(1); // marker bit 441 | Get_Bits(22); //copyright_number_2 442 | Flush_Buffer(1); // marker bit 443 | Get_Bits(22); //copyright_number_3 444 | #else 445 | Flush_Buffer(32); 446 | Flush_Buffer(32); 447 | Flush_Buffer(20); 448 | #endif 449 | } 450 | -------------------------------------------------------------------------------- /src/global.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Chia-chen Kuo - April 2001 3 | * 4 | * This file is part of DVD2AVI, a free MPEG-2 decoder 5 | * Ported to C++ by Mathias Born - May 2001 6 | * 7 | * DVD2AVI is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * DVD2AVI is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GNU Make; see the file COPYING. If not, write to 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 20 | * 21 | */ 22 | 23 | 24 | #define GLOBAL 25 | #include "global.h" 26 | 27 | int testint; 28 | -------------------------------------------------------------------------------- /src/global.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ 2 | 3 | /* 4 | * Disclaimer of Warranty 5 | * 6 | * These software programs are available to the user with any license fee or 7 | * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims 8 | * any and all warranties, whether express, implied, or statuary, including any 9 | * implied warranties or merchantability or of fitness for a particular 10 | * purpose. In no event shall the copyright-holder be liable for any 11 | * incidental, punitive, or consequential damages of any kind whatsoever 12 | * arising from the use of these programs. 13 | * 14 | * This disclaimer of warranty extends to the user of these programs and user's 15 | * customers, employees, agents, transferees, successors, and assigns. 16 | * 17 | * The MPEG Software Simulation Group does not represent or warrant that the 18 | * programs furnished hereunder are free of infringement of any third-party 19 | * patents. 20 | * 21 | * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, 22 | * are subject to royalty fees to patent holders. Many of these patents are 23 | * general enough such that they are unavoidable regardless of implementation 24 | * design. 25 | * 26 | */ 27 | 28 | #ifndef __GLOBAL_H 29 | #define __GLOBAL_H 30 | 31 | #include 32 | 33 | 34 | //#include "misc.h" 35 | 36 | 37 | #ifdef GLOBAL 38 | #define XTN 39 | #else 40 | #define XTN extern 41 | #endif 42 | 43 | enum { 44 | MACROBLOCK_INTRA = 1, 45 | MACROBLOCK_PATTERN = 2, 46 | MACROBLOCK_MOTION_BACKWARD = 4, 47 | MACROBLOCK_MOTION_FORWARD = 8, 48 | MACROBLOCK_QUANT = 16, 49 | }; 50 | 51 | 52 | /* default intra quantization matrix */ 53 | XTN uint8_t default_intra_quantizer_matrix[64] 54 | #ifdef GLOBAL 55 | = 56 | { 57 | 8, 16, 19, 22, 26, 27, 29, 34, 58 | 16, 16, 22, 24, 27, 29, 34, 37, 59 | 19, 22, 26, 27, 29, 34, 34, 38, 60 | 22, 22, 26, 27, 29, 34, 37, 40, 61 | 22, 26, 27, 29, 32, 35, 40, 48, 62 | 26, 27, 29, 32, 35, 40, 48, 58, 63 | 26, 27, 29, 34, 38, 46, 56, 69, 64 | 27, 29, 35, 38, 46, 56, 69, 83 65 | } 66 | #endif 67 | ; 68 | 69 | /* zig-zag and alternate scan patterns */ 70 | XTN uint8_t scan[2][64] 71 | #ifdef GLOBAL 72 | = 73 | { 74 | { /* Zig-Zag scan pattern */ 75 | 0, 1, 8, 16, 9, 2, 3, 10, 76 | 17, 24, 32, 25, 18, 11, 4, 5, 77 | 12, 19, 26, 33, 40, 48, 41, 34, 78 | 27, 20, 13, 6, 7, 14, 21, 28, 79 | 35, 42, 49, 56, 57, 50, 43, 36, 80 | 29, 22, 15, 23, 30, 37, 44, 51, 81 | 58, 59, 52, 45, 38, 31, 39, 46, 82 | 53, 60, 61, 54, 47, 55, 62, 63 83 | } 84 | , 85 | { /* Alternate scan pattern */ 86 | 0, 8, 16, 24, 1, 9, 2, 10, 87 | 17, 25, 32, 40, 48, 56, 57, 49, 88 | 41, 33, 26, 18, 3, 11, 4, 12, 89 | 19, 27, 34, 42, 50, 58, 35, 43, 90 | 51, 59, 20, 28, 5, 13, 6, 14, 91 | 21, 29, 36, 44, 52, 60, 37, 45, 92 | 53, 61, 22, 30, 7, 15, 23, 31, 93 | 38, 46, 54, 62, 39, 47, 55, 63 94 | } 95 | } 96 | #endif 97 | ; 98 | 99 | /* non-linear quantization coefficient table */ 100 | XTN uint8_t Non_Linear_quantizer_scale[32] 101 | #ifdef GLOBAL 102 | = 103 | { 104 | 0, 1, 2, 3, 4, 5, 6, 7, 105 | 8, 10, 12, 14, 16, 18, 20, 22, 106 | 24, 28, 32, 36, 40, 44, 48, 52, 107 | 56, 64, 72, 80, 88, 96, 104, 112 108 | } 109 | #endif 110 | ; 111 | 112 | #define ERROR_VALUE (-1) 113 | 114 | struct DCTtab { 115 | char run, level, len; 116 | }; 117 | 118 | struct VLCtab { 119 | char val, len; 120 | }; 121 | 122 | /* Table B-10, motion_code, codes 0001 ... 01xx */ 123 | XTN VLCtab MVtab0[8] 124 | #ifdef GLOBAL 125 | = 126 | { 127 | {ERROR_VALUE,0}, {3,3}, {2,2}, {2,2}, {1,1}, {1,1}, {1,1}, {1,1} 128 | } 129 | #endif 130 | ; 131 | 132 | /* Table B-10, motion_code, codes 0000011 ... 000011x */ 133 | XTN VLCtab MVtab1[8] 134 | #ifdef GLOBAL 135 | = 136 | { 137 | {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,6}, {6,6}, {5,6}, {4,5}, {4,5} 138 | } 139 | #endif 140 | ; 141 | 142 | /* Table B-10, motion_code, codes 0000001100 ... 000001011x */ 143 | XTN VLCtab MVtab2[12] 144 | #ifdef GLOBAL 145 | = 146 | { 147 | {16,9}, {15,9}, {14,9}, {13,9}, 148 | {12,9}, {11,9}, {10,8}, {10,8}, 149 | {9,8}, {9,8}, {8,8}, {8,8} 150 | } 151 | #endif 152 | ; 153 | 154 | /* Table B-9, coded_block_pattern, codes 01000 ... 111xx */ 155 | XTN VLCtab CBPtab0[32] 156 | #ifdef GLOBAL 157 | = 158 | { 159 | {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, 160 | {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, 161 | {62,5}, {2,5}, {61,5}, {1,5}, {56,5}, {52,5}, {44,5}, {28,5}, 162 | {40,5}, {20,5}, {48,5}, {12,5}, {32,4}, {32,4}, {16,4}, {16,4}, 163 | {8,4}, {8,4}, {4,4}, {4,4}, {60,3}, {60,3}, {60,3}, {60,3} 164 | } 165 | #endif 166 | ; 167 | 168 | /* Table B-9, coded_block_pattern, codes 00000100 ... 001111xx */ 169 | XTN VLCtab CBPtab1[64] 170 | #ifdef GLOBAL 171 | = 172 | { 173 | {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, 174 | {58,8}, {54,8}, {46,8}, {30,8}, 175 | {57,8}, {53,8}, {45,8}, {29,8}, {38,8}, {26,8}, {37,8}, {25,8}, 176 | {43,8}, {23,8}, {51,8}, {15,8}, {42,8}, {22,8}, {50,8}, {14,8}, 177 | {41,8}, {21,8}, {49,8}, {13,8}, {35,8}, {19,8}, {11,8}, {7,8}, 178 | {34,7}, {34,7}, {18,7}, {18,7}, {10,7}, {10,7}, {6,7}, {6,7}, 179 | {33,7}, {33,7}, {17,7}, {17,7}, {9,7}, {9,7}, {5,7}, {5,7}, 180 | {63,6}, {63,6}, {63,6}, {63,6}, {3,6}, {3,6}, {3,6}, {3,6}, 181 | {36,6}, {36,6}, {36,6}, {36,6}, {24,6}, {24,6}, {24,6}, {24,6} 182 | } 183 | #endif 184 | ; 185 | 186 | /* Table B-9, coded_block_pattern, codes 000000001 ... 000000111 */ 187 | XTN VLCtab CBPtab2[8] 188 | #ifdef GLOBAL 189 | = 190 | { 191 | {ERROR_VALUE,0}, {0,9}, {39,9}, {27,9}, {59,9}, {55,9}, {47,9}, {31,9} 192 | } 193 | #endif 194 | ; 195 | 196 | /* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */ 197 | XTN VLCtab MBAtab1[16] 198 | #ifdef GLOBAL 199 | = 200 | { 201 | {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, 202 | {4,4}, {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3} 203 | } 204 | #endif 205 | ; 206 | 207 | /* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */ 208 | XTN VLCtab MBAtab2[104] 209 | #ifdef GLOBAL 210 | = 211 | { 212 | {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11}, 213 | {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10}, 214 | {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10}, 215 | {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, 216 | {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, 217 | {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, 218 | {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, 219 | {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, 220 | {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, 221 | {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, 222 | {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, 223 | {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, 224 | {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7} 225 | } 226 | #endif 227 | ; 228 | 229 | /* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ 230 | XTN VLCtab DClumtab0[32] 231 | #ifdef GLOBAL 232 | = 233 | { 234 | {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, 235 | {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, 236 | {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, 237 | {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {ERROR_VALUE, 0} 238 | } 239 | #endif 240 | ; 241 | 242 | /* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ 243 | XTN VLCtab DClumtab1[16] 244 | #ifdef GLOBAL 245 | = 246 | { 247 | {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, 248 | {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} 249 | } 250 | #endif 251 | ; 252 | 253 | /* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ 254 | XTN VLCtab DCchromtab0[32] 255 | #ifdef GLOBAL 256 | = 257 | { 258 | {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, 259 | {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, 260 | {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, 261 | {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {ERROR_VALUE, 0} 262 | } 263 | #endif 264 | ; 265 | 266 | /* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ 267 | XTN VLCtab DCchromtab1[32] 268 | #ifdef GLOBAL 269 | = 270 | { 271 | {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, 272 | {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, 273 | {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, 274 | {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} 275 | } 276 | #endif 277 | ; 278 | 279 | /* Table B-14, DCT coefficients table zero, 280 | * codes 0100 ... 1xxx (used for first (DC) coefficient) 281 | */ 282 | XTN DCTtab DCTtabfirst[12] 283 | #ifdef GLOBAL 284 | = 285 | { 286 | {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, 287 | {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, 288 | {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} 289 | } 290 | #endif 291 | ; 292 | 293 | /* Table B-14, DCT coefficients table zero, 294 | * codes 0100 ... 1xxx (used for all other coefficients) 295 | */ 296 | XTN DCTtab DCTtabnext[12] 297 | #ifdef GLOBAL 298 | = 299 | { 300 | {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, 301 | {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */ 302 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} 303 | } 304 | #endif 305 | ; 306 | 307 | /* Table B-14, DCT coefficients table zero, 308 | * codes 000001xx ... 00111xxx 309 | */ 310 | XTN DCTtab DCTtab0[60] 311 | #ifdef GLOBAL 312 | = 313 | { 314 | {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ 315 | {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7}, 316 | {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7}, 317 | {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, 318 | {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6}, 319 | {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6}, 320 | {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, 321 | {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8}, 322 | {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8}, 323 | {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, 324 | {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, 325 | {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, 326 | {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, 327 | {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, 328 | {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} 329 | } 330 | #endif 331 | ; 332 | 333 | /* Table B-15, DCT coefficients table one, 334 | * codes 000001xx ... 11111111 335 | */ 336 | XTN DCTtab DCTtab0a[252] 337 | #ifdef GLOBAL 338 | = 339 | { 340 | {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ 341 | {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7}, 342 | {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7}, 343 | {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, 344 | {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6}, 345 | {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6}, 346 | {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, 347 | {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8}, 348 | {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8}, 349 | {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, 350 | {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, 351 | {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, 352 | {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, 353 | {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, 354 | {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, 355 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 356 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 357 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 358 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 359 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 360 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 361 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 362 | {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, 363 | {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */ 364 | {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, 365 | {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, 366 | {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, 367 | {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, 368 | {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, 369 | {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, 370 | {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, 371 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 372 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 373 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 374 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 375 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 376 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 377 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 378 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 379 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 380 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 381 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 382 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 383 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 384 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 385 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 386 | {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, 387 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 388 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 389 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 390 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 391 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 392 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 393 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 394 | {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, 395 | {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, 396 | {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, 397 | {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, 398 | {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, 399 | {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7}, 400 | {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7}, 401 | {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8}, 402 | {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} 403 | } 404 | #endif 405 | ; 406 | 407 | /* Table B-14, DCT coefficients table zero, 408 | * codes 0000001000 ... 0000001111 409 | */ 410 | XTN DCTtab DCTtab1[8] 411 | #ifdef GLOBAL 412 | = 413 | { 414 | {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10}, 415 | {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} 416 | } 417 | #endif 418 | ; 419 | 420 | /* Table B-15, DCT coefficients table one, 421 | * codes 000000100x ... 000000111x 422 | */ 423 | XTN DCTtab DCTtab1a[8] 424 | #ifdef GLOBAL 425 | = 426 | { 427 | {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9}, 428 | {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} 429 | } 430 | #endif 431 | ; 432 | 433 | /* Table B-14/15, DCT coefficients table zero / one, 434 | * codes 000000010000 ... 000000011111 435 | */ 436 | XTN DCTtab DCTtab2[16] 437 | #ifdef GLOBAL 438 | = 439 | { 440 | {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12}, 441 | {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12}, 442 | {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12}, 443 | {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} 444 | } 445 | #endif 446 | ; 447 | 448 | /* Table B-14/15, DCT coefficients table zero / one, 449 | * codes 0000000010000 ... 0000000011111 450 | */ 451 | XTN DCTtab DCTtab3[16] 452 | #ifdef GLOBAL 453 | = 454 | { 455 | {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13}, 456 | {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13}, 457 | {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13}, 458 | {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} 459 | } 460 | #endif 461 | ; 462 | 463 | /* Table B-14/15, DCT coefficients table zero / one, 464 | * codes 00000000010000 ... 00000000011111 465 | */ 466 | XTN DCTtab DCTtab4[16] 467 | #ifdef GLOBAL 468 | = 469 | { 470 | {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14}, 471 | {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14}, 472 | {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14}, 473 | {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} 474 | } 475 | #endif 476 | ; 477 | 478 | /* Table B-14/15, DCT coefficients table zero / one, 479 | * codes 000000000010000 ... 000000000011111 480 | */ 481 | XTN DCTtab DCTtab5[16] 482 | #ifdef GLOBAL 483 | = 484 | { 485 | {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15}, 486 | {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15}, 487 | {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15}, 488 | {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} 489 | } 490 | #endif 491 | ; 492 | 493 | /* Table B-14/15, DCT coefficients table zero / one, 494 | * codes 0000000000010000 ... 0000000000011111 495 | */ 496 | XTN DCTtab DCTtab6[16] 497 | #ifdef GLOBAL 498 | = 499 | { 500 | {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16}, 501 | {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16}, 502 | {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16}, 503 | {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} 504 | } 505 | #endif 506 | ; 507 | // add extra table of table ptrs for performance - trbarry 5/2003 508 | XTN DCTtab* pDCTtabNonI[28] // ptr to non_intra tables 509 | #ifdef GLOBAL 510 | = 511 | { 512 | &DCTtab6[0] - 16, // bsf val = 4, code => 16 513 | &DCTtab5[0] - 16, // bsf val = 5, code => 32 514 | &DCTtab4[0] - 16, // bsf val = 6, code => 64 515 | &DCTtab3[0] - 16, // bsf val = 7, code => 128 516 | &DCTtab2[0] - 16, // bsf val = 8, code => 256 517 | &DCTtab1[0] - 8, // bsf val = 9, code => 512 518 | &DCTtab0[0] - 4, // bsf val = 10, code => 1024 519 | &DCTtab0[0] - 4, // bsf val = 11, code => 2048, same 520 | &DCTtab0[0] - 4, // bsf val = 12, code => 4096, same 521 | &DCTtab0[0] - 4, // bsf val = 13, code => 8192, same 522 | &DCTtab0[0] - 4, // bsf val = 14, code => 16384, same 523 | &DCTtab0[0] - 4, // bsf val = 15, how big can this get?? 524 | &DCTtab0[0] - 4, // bsf val = 16, same? 525 | &DCTtab0[0] - 4, // bsf val = 17, same? 526 | &DCTtab0[0] - 4, // bsf val = 18, same? 527 | &DCTtab0[0] - 4, // bsf val = 19, same? 528 | &DCTtab0[0] - 4, // bsf val = 20, same? 529 | &DCTtab0[0] - 4, // bsf val = 21, same? 530 | &DCTtab0[0] - 4, // bsf val = 22, same? 531 | &DCTtab0[0] - 4, // bsf val = 23, same? 532 | &DCTtab0[0] - 4, // bsf val = 24, same? 533 | &DCTtab0[0] - 4, // bsf val = 25, same? 534 | &DCTtab0[0] - 4, // bsf val = 26, same? 535 | &DCTtab0[0] - 4, // bsf val = 27, same? 536 | &DCTtab0[0] - 4, // bsf val = 28, same? 537 | &DCTtab0[0] - 4, // bsf val = 29, same? 538 | &DCTtab0[0] - 4, // bsf val = 30, same? 539 | &DCTtab0[0] - 4 // bsf val = 31, same? 540 | } 541 | #endif 542 | ; 543 | // same as above but for when intra_vlc_format - trbarry 5/2003 544 | XTN DCTtab* pDCTtab_intra[28] // ptr to non_intra tables 545 | #ifdef GLOBAL 546 | = 547 | { 548 | &DCTtab6[0] - 16, // bsf val = 4, code => 16 549 | &DCTtab5[0] - 16, // bsf val = 5, code => 32 550 | &DCTtab4[0] - 16, // bsf val = 6, code => 64 551 | &DCTtab3[0] - 16, // bsf val = 7, code => 128 552 | &DCTtab2[0] - 16, // bsf val = 8, code => 256 553 | &DCTtab1a[0] - 8, // bsf val = 9, code => 512 554 | &DCTtab0a[0] - 4, // bsf val = 10, code => 1024 555 | &DCTtab0a[0] - 4, // bsf val = 11, code => 2048, same 556 | &DCTtab0a[0] - 4, // bsf val = 12, code => 4096, same 557 | &DCTtab0a[0] - 4, // bsf val = 13, code => 8192, same 558 | &DCTtab0a[0] - 4, // bsf val = 14 code => 16384, same 559 | &DCTtab0a[0] - 4, // bsf val = 15, code => how big can this get? 560 | &DCTtab0a[0] - 4, // bsf val = 16, same? 561 | &DCTtab0a[0] - 4, // bsf val = 17, same? 562 | &DCTtab0a[0] - 4, // bsf val = 18, same? 563 | &DCTtab0a[0] - 4, // bsf val = 19, same? 564 | &DCTtab0a[0] - 4, // bsf val = 20, same? 565 | &DCTtab0a[0] - 4, // bsf val = 21, same? 566 | &DCTtab0a[0] - 4, // bsf val = 22, same? 567 | &DCTtab0a[0] - 4, // bsf val = 23, same? 568 | &DCTtab0a[0] - 4, // bsf val = 24, same? 569 | &DCTtab0a[0] - 4, // bsf val = 25, same? 570 | &DCTtab0a[0] - 4, // bsf val = 26, same? 571 | &DCTtab0a[0] - 4, // bsf val = 27, same? 572 | &DCTtab0a[0] - 4, // bsf val = 28, same? 573 | &DCTtab0a[0] - 4, // bsf val = 29, same? 574 | &DCTtab0a[0] - 4, // bsf val = 30, same? 575 | &DCTtab0a[0] - 4 // bsf val = 31, same? 576 | } 577 | #endif 578 | ; 579 | 580 | // add extra table of shift amounts for performance - trbarry 5/2003 581 | XTN int DCTShiftTab[28] // amounts to shift code 582 | #ifdef GLOBAL 583 | = 584 | { 585 | 0, // bsf val = 4, code => 16 586 | 1, // bsf val = 5, code => 32 587 | 2, // bsf val = 6, code => 64 588 | 3, // bsf val = 7, code => 128 589 | 4, // bsf val = 8, code => 256 590 | 6, // bsf val = 9, code => 512 591 | 8, // bsf val = 10, code => 1024 592 | 8, // bsf val = 11, code => 2048, same 593 | 8, // bsf val = 12, code => 4096, same 594 | 8, // bsf val = 13, code => 8192, same 595 | 8, // bsf val = 14, code => 16384, same 596 | 8, // bsf val = 15, how big can this get? 597 | 8, // bsf val = 16, same? 598 | 8, // bsf val = 17, same? 599 | 8, // bsf val = 18, same? 600 | 8, // bsf val = 19, same? 601 | 8, // bsf val = 20, same? 602 | 8, // bsf val = 21, same? 603 | 8, // bsf val = 22, same? 604 | 8, // bsf val = 23, same? 605 | 8, // bsf val = 24, same? 606 | 8, // bsf val = 25, same? 607 | 8, // bsf val = 26, same? 608 | 8, // bsf val = 27, same? 609 | 8, // bsf val = 28, same? 610 | 8, // bsf val = 29, same? 611 | 8, // bsf val = 30, same? 612 | 8 // bsf val = 31, same? 613 | } 614 | #endif 615 | ; 616 | 617 | /* Table B-3, macroblock_type in P-pictures, codes 001..1xx */ 618 | XTN VLCtab PMBtab0[8] 619 | #ifdef GLOBAL 620 | = 621 | { 622 | {ERROR_VALUE,0}, 623 | {MACROBLOCK_MOTION_FORWARD,3}, 624 | {MACROBLOCK_PATTERN,2}, {MACROBLOCK_PATTERN,2}, 625 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1}, 626 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1}, 627 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1}, 628 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1} 629 | } 630 | #endif 631 | ; 632 | 633 | /* Table B-3, macroblock_type in P-pictures, codes 000001..00011x */ 634 | XTN VLCtab PMBtab1[8] 635 | #ifdef GLOBAL 636 | = 637 | { 638 | {ERROR_VALUE,0}, 639 | {MACROBLOCK_QUANT | MACROBLOCK_INTRA,6}, 640 | {MACROBLOCK_QUANT | MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT | MACROBLOCK_PATTERN,5}, 641 | {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,5}, 642 | {MACROBLOCK_INTRA,5}, {MACROBLOCK_INTRA,5} 643 | } 644 | #endif 645 | ; 646 | 647 | /* Table B-4, macroblock_type in B-pictures, codes 0010..11xx */ 648 | XTN VLCtab BMBtab0[16] 649 | #ifdef GLOBAL 650 | = 651 | { 652 | {ERROR_VALUE,0}, 653 | {ERROR_VALUE,0}, 654 | {MACROBLOCK_MOTION_FORWARD,4}, 655 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,4}, 656 | {MACROBLOCK_MOTION_BACKWARD,3}, 657 | {MACROBLOCK_MOTION_BACKWARD,3}, 658 | {MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,3}, 659 | {MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,3}, 660 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2}, 661 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2}, 662 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2}, 663 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2}, 664 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2}, 665 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2}, 666 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2}, 667 | {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2} 668 | } 669 | #endif 670 | ; 671 | 672 | /* Table B-4, macroblock_type in B-pictures, codes 000001..00011x */ 673 | XTN VLCtab BMBtab1[8] 674 | #ifdef GLOBAL 675 | = 676 | { 677 | {ERROR_VALUE,0}, 678 | {MACROBLOCK_QUANT | MACROBLOCK_INTRA,6}, 679 | {MACROBLOCK_QUANT | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,6}, 680 | {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,6}, 681 | {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,5}, 682 | {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,5}, 683 | {MACROBLOCK_INTRA,5}, 684 | {MACROBLOCK_INTRA,5} 685 | } 686 | #endif 687 | ; 688 | 689 | #undef XTN 690 | 691 | #endif // __GLOBAL_H 692 | -------------------------------------------------------------------------------- /src/idct.h: -------------------------------------------------------------------------------- 1 | #ifndef MPEG2DECPLUS_IDCT_H 2 | #define MPEG2DECPLUS_IDCT_H 3 | 4 | #include 5 | #ifndef _WIN32 6 | #include "win_import_min.h" 7 | #endif 8 | 9 | void idct_ref_sse3(int16_t* block); 10 | 11 | void prefetch_ref(); 12 | 13 | void idct_ap922_sse2(int16_t* block); 14 | 15 | void prefetch_ap922(); 16 | 17 | void idct_llm_float_sse2(int16_t* block); 18 | 19 | void idct_llm_float_avx2(int16_t* block); 20 | 21 | void prefetch_llm_float_sse2(); 22 | 23 | void prefetch_llm_float_avx2(); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/idct_ap922_sse2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | idct_ap922_sse2.cpp 3 | 4 | Originally provided by Intel at AP-922 5 | http://developer.intel.com/vtune/cbts/strmsimd/922down.htm 6 | (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm) 7 | but in a limited edition. 8 | New macro implements a column part for precise iDCT 9 | The routine precision now satisfies IEEE standard 1180-1990. 10 | 11 | Copyright (c) 2000-2001 Peter Gubanov 12 | Rounding trick Copyright (c) 2000 Michel Lespinasse 13 | 14 | http://www.elecard.com/peter/idct.html 15 | http://www.linuxvideo.org/mpeg2dec/ 16 | 17 | SSE2 code by Dmitry Rozhdestvensky 18 | 19 | rewite to intrinsic by OKA Motofumi 20 | 21 | ============================================================================ 22 | 23 | These examples contain code fragments for first stage iDCT 8x8 24 | (for rows) and first stage DCT 8x8 (for columns) 25 | 26 | ============================================================================ 27 | 28 | The first stage iDCT 8x8 - inverse DCTs of rows 29 | 30 | ----------------------------------------------------------------------------- 31 | The 8-point inverse DCT direct algorithm 32 | ----------------------------------------------------------------------------- 33 | 34 | static const short w[32] = { 35 | FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16), 36 | FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16), 37 | FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16), 38 | FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16), 39 | FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16), 40 | FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16), 41 | FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16), 42 | FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) }; 43 | 44 | #define DCT_8_INV_ROW(x, y) 45 | { 46 | int a0, a1, a2, a3, b0, b1, b2, b3; 47 | 48 | a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3]; 49 | a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7]; 50 | a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11]; 51 | a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15]; 52 | b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19]; 53 | b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23]; 54 | b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27]; 55 | b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31]; 56 | 57 | y[0] = SHIFT_ROUND ( a0 + b0 ); 58 | y[1] = SHIFT_ROUND ( a1 + b1 ); 59 | y[2] = SHIFT_ROUND ( a2 + b2 ); 60 | y[3] = SHIFT_ROUND ( a3 + b3 ); 61 | y[4] = SHIFT_ROUND ( a3 - b3 ); 62 | y[5] = SHIFT_ROUND ( a2 - b2 ); 63 | y[6] = SHIFT_ROUND ( a1 - b1 ); 64 | y[7] = SHIFT_ROUND ( a0 - b0 ); 65 | } 66 | 67 | ----------------------------------------------------------------------------- 68 | 69 | In this implementation the outputs of the iDCT-1D are multiplied 70 | for rows 0,4 - by cos_4_16, 71 | for rows 1,7 - by cos_1_16, 72 | for rows 2,6 - by cos_2_16, 73 | for rows 3,5 - by cos_3_16 74 | and are shifted to the left for better accuracy 75 | 76 | For the constants used, 77 | FIX(float_const) = (short) (float_const * (1<<15) + 0.5) 78 | 79 | ============================================================================= 80 | */ 81 | 82 | 83 | #include 84 | #include 85 | 86 | #ifndef _WIN32 87 | #include "win_import_min.h" 88 | #endif 89 | 90 | 91 | alignas(64) static constexpr int16_t table04[] = { 92 | 16384, 21407, 16384, 8867, 16384, -8867, 16384, -21407, // w0, w1, w4, w5, w8, w9,w12,w13 93 | 16384, 8867, -16384, -21407, -16384, 21407, 16384, -8867, // w2, w3, w6, w7,w10,w11,w14,w15 94 | 22725, 19266, 19266, -4520, 12873, -22725, 4520, -12873, //w16,w17,w20,w21,w24,w25,w28,w29 95 | 12873, 4520, -22725, -12873, 4520, 19266, 19266, -22725, //w18,w19,w22,w23,w26,w27,w30,w31 96 | }; 97 | 98 | alignas(64) static constexpr int16_t table17[] = { 99 | 22725, 29692, 22725, 12299, 22725, -12299, 22725, -29692, // w0, w1, w4, w5, w8, w9,w12,w13 100 | 22725, 12299, -22725, -29692, -22725, 29692, 22725, -12299, // w2, w3, w6, w7,w10,w11,w14,w15 101 | 31521, 26722, 26722, -6270, 17855, -31521, 6270, -17855, //w16,w17,w20,w21,w24,w25,w28,w29 102 | 17855, 6270, -31521, -17855, 6270, 26722, 26722, -31521, //w18,w19,w22,w23,w26,w27,w30,w31 103 | }; 104 | 105 | alignas(64) static constexpr int16_t table26[] = { 106 | 21407, 27969, 21407, 11585, 21407, -11585, 21407, -27969, // w0, w1, w4, w5, w8, w9,w12,w13 107 | 21407, 11585, -21407, -27969, -21407, 27969, 21407, -11585, // w2, w3, w6, w7,w10,w11,w14,w15 108 | 29692, 25172, 25172, -5906, 16819, -29692, 5906, -16819, //w16,w17,w20,w21,w24,w25,w28,w29 109 | 16819, 5906, -29692, -16819, 5906, 25172, 25172, -29692, //w18,w19,w22,w23,w26,w27,w30,w31 110 | }; 111 | 112 | alignas(64) static constexpr int16_t table35[] = { 113 | 19266, 25172, 19266, 10426, 19266, -10426, 19266, -25172, // w0, w1, w4, w5, w8, w9,w12,w13 114 | 19266, 10426, -19266, -25172, -19266, 25172, 19266, -10426, // w2, w3, w6, w7,w10,w11,w14,w15 115 | 26722, 22654, 22654, -5315, 15137, -26722, 5315, -15137, //w16,w17,w20,w21,w24,w25,w28,w29 116 | 15137, 5315, -26722, -15137, 5315, 22654, 22654, -26722, //w18,w19,w22,w23,w26,w27,w30,w31 117 | }; 118 | 119 | alignas(64) static constexpr int32_t rounders[8][4] = { 120 | { 65536, 65536, 65536, 65536 }, 121 | { 3597, 3597, 3597, 3597 }, 122 | { 2260, 2260, 2260, 2260 }, 123 | { 1203, 1203, 1203, 1203 }, 124 | { 0, 0, 0, 0 }, 125 | { 120, 120, 120, 120 }, 126 | { 512, 512, 512, 512 }, 127 | { 512, 512, 512, 512 }, 128 | }; 129 | 130 | alignas(64) static constexpr int16_t tg[4][8] = { 131 | { 13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036 }, 132 | { 27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}, 133 | {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}, 134 | { 23170, 23170, 23170, 23170, 23170, 23170, 23170, 23170}, 135 | }; 136 | 137 | 138 | static __forceinline void 139 | idct_row_sse2(int16_t* block, const int16_t* table, const int32_t* rounder) noexcept 140 | { 141 | __m128i* blk = reinterpret_cast<__m128i*>(block); 142 | const __m128i* tbl = reinterpret_cast(table); 143 | const __m128i* rnd = reinterpret_cast(rounder); 144 | 145 | __m128i row = _mm_load_si128(blk); 146 | row = _mm_shufflehi_epi16(row, _MM_SHUFFLE(3, 1, 2, 0)); 147 | row = _mm_shufflelo_epi16(row, _MM_SHUFFLE(3, 1, 2, 0)); 148 | 149 | __m128i t0 = _mm_shuffle_epi32(row, _MM_SHUFFLE(0, 0, 0, 0)); 150 | t0 = _mm_madd_epi16(t0, _mm_load_si128(tbl)); 151 | 152 | __m128i t1 = _mm_shuffle_epi32(row, _MM_SHUFFLE(2, 2, 2, 2)); 153 | t1 = _mm_madd_epi16(t1, _mm_load_si128(++tbl)); 154 | 155 | t0 = _mm_add_epi32(_mm_add_epi32(t0, t1), _mm_load_si128(rnd)); 156 | 157 | __m128i t2 = _mm_shuffle_epi32(row, _MM_SHUFFLE(1, 1, 1, 1)); 158 | t2 = _mm_madd_epi16(t2, _mm_load_si128(++tbl)); 159 | 160 | __m128i t3 = _mm_shuffle_epi32(row, _MM_SHUFFLE(3, 3, 3, 3)); 161 | t3 = _mm_madd_epi16(t3, _mm_load_si128(++tbl)); 162 | 163 | t3 = _mm_add_epi32(t2, t3); 164 | 165 | t1 = _mm_add_epi32(t0, t3); 166 | t2 = _mm_sub_epi32(t0, t3); 167 | 168 | t0 = _mm_packs_epi32(_mm_srai_epi32(t1, 11), _mm_srai_epi32(t2, 11)); 169 | t0 = _mm_shufflehi_epi16(t0, _MM_SHUFFLE(0, 1, 2, 3)); 170 | 171 | _mm_store_si128(blk, t0); 172 | } 173 | 174 | 175 | static __forceinline void 176 | idct_colx8_sse2(int16_t* block) noexcept 177 | { 178 | const __m128i* tg1 = reinterpret_cast(tg[0]); 179 | const __m128i* tg2 = reinterpret_cast(tg[1]); 180 | const __m128i* tg3 = reinterpret_cast(tg[2]); 181 | const __m128i* ocos4 = reinterpret_cast(tg[3]); 182 | 183 | __m128i* blk = reinterpret_cast<__m128i*>(block); 184 | 185 | __m128i x0 = _mm_load_si128(blk + 0); 186 | __m128i x4 = _mm_load_si128(blk + 4); 187 | __m128i x2 = _mm_load_si128(blk + 2); 188 | __m128i x6 = _mm_load_si128(blk + 6); 189 | __m128i tgx = _mm_load_si128(tg2); 190 | 191 | __m128i u04 = _mm_adds_epi16(x0, x4); 192 | __m128i v04 = _mm_subs_epi16(x0, x4); 193 | 194 | __m128i t0 = _mm_mulhi_epi16(x2, tgx); 195 | __m128i t1 = _mm_mulhi_epi16(x6, tgx); 196 | __m128i v26 = _mm_subs_epi16(t0, x6); 197 | __m128i u26 = _mm_adds_epi16(t1, x2); 198 | 199 | __m128i a0 = _mm_adds_epi16(u04, u26); 200 | __m128i a1 = _mm_adds_epi16(v04, v26); 201 | __m128i a2 = _mm_subs_epi16(v04, v26); 202 | __m128i a3 = _mm_subs_epi16(u04, u26); 203 | 204 | __m128i x1 = _mm_load_si128(blk + 1); 205 | __m128i x7 = _mm_load_si128(blk + 7); 206 | __m128i x3 = _mm_load_si128(blk + 3); 207 | __m128i x5 = _mm_load_si128(blk + 5); 208 | tgx = _mm_load_si128(tg1); 209 | 210 | t0 = _mm_mulhi_epi16(x1, tgx); 211 | t1 = _mm_mulhi_epi16(x7, tgx); 212 | __m128i u17 = _mm_adds_epi16(t1, x1); 213 | __m128i v17 = _mm_subs_epi16(t0, x7); 214 | 215 | tgx = _mm_load_si128(tg3); 216 | 217 | t0 = _mm_mulhi_epi16(x3, tgx); 218 | t1 = _mm_mulhi_epi16(x5, tgx); 219 | t0 = _mm_adds_epi16(t0, x3); 220 | t1 = _mm_adds_epi16(t1, x5); 221 | __m128i v35 = _mm_subs_epi16(t0, x5); 222 | __m128i u35 = _mm_adds_epi16(t1, x3); 223 | 224 | __m128i b0 = _mm_adds_epi16(u17, u35); 225 | __m128i b3 = _mm_subs_epi16(v17, v35); 226 | __m128i u12 = _mm_subs_epi16(u17, u35); 227 | __m128i v12 = _mm_adds_epi16(v17, v35); 228 | 229 | tgx = _mm_load_si128(ocos4); 230 | t0 = _mm_adds_epi16(u12, v12); 231 | t1 = _mm_subs_epi16(u12, v12); 232 | t0 = _mm_mulhi_epi16(t0, tgx); 233 | t1 = _mm_mulhi_epi16(t1, tgx); 234 | __m128i b1 = _mm_adds_epi16(t0, t0); 235 | __m128i b2 = _mm_adds_epi16(t1, t1); 236 | 237 | _mm_store_si128(blk + 0, _mm_srai_epi16(_mm_adds_epi16(a0, b0), 6)); 238 | _mm_store_si128(blk + 7, _mm_srai_epi16(_mm_subs_epi16(a0, b0), 6)); 239 | 240 | _mm_store_si128(blk + 3, _mm_srai_epi16(_mm_adds_epi16(a3, b3), 6)); 241 | _mm_store_si128(blk + 4, _mm_srai_epi16(_mm_subs_epi16(a3, b3), 6)); 242 | 243 | _mm_store_si128(blk + 1, _mm_srai_epi16(_mm_adds_epi16(a1, b1), 6)); 244 | _mm_store_si128(blk + 6, _mm_srai_epi16(_mm_subs_epi16(a1, b1), 6)); 245 | 246 | _mm_store_si128(blk + 2, _mm_srai_epi16(_mm_adds_epi16(a2, b2), 6)); 247 | _mm_store_si128(blk + 5, _mm_srai_epi16(_mm_subs_epi16(a2, b2), 6)); 248 | } 249 | 250 | 251 | void idct_ap922_sse2(int16_t* block) 252 | { 253 | idct_row_sse2(block + 0, table04, rounders[0]); 254 | idct_row_sse2(block + 8, table17, rounders[1]); 255 | idct_row_sse2(block + 16, table26, rounders[2]); 256 | idct_row_sse2(block + 24, table35, rounders[3]); 257 | idct_row_sse2(block + 32, table04, rounders[4]); 258 | idct_row_sse2(block + 40, table35, rounders[5]); 259 | idct_row_sse2(block + 48, table26, rounders[6]); 260 | idct_row_sse2(block + 56, table17, rounders[7]); 261 | 262 | idct_colx8_sse2(block); 263 | } 264 | 265 | 266 | void prefetch_ap922() 267 | { 268 | _mm_prefetch(reinterpret_cast(table04), _MM_HINT_NTA); 269 | _mm_prefetch(reinterpret_cast(table17), _MM_HINT_NTA); 270 | _mm_prefetch(reinterpret_cast(table26), _MM_HINT_NTA); 271 | _mm_prefetch(reinterpret_cast(table35), _MM_HINT_NTA); 272 | _mm_prefetch(reinterpret_cast(rounders[0]), _MM_HINT_NTA); 273 | _mm_prefetch(reinterpret_cast(tg[0]), _MM_HINT_NTA); 274 | } 275 | -------------------------------------------------------------------------------- /src/idct_llm_float_avx2.cpp: -------------------------------------------------------------------------------- 1 | #ifndef __AVX2__ 2 | #error arch:avx2 is not set. 3 | #endif 4 | 5 | #include 6 | #include "idct.h" 7 | 8 | alignas(64) static const float llm_coefs[] = { 9 | 1.175876f, 1.175876f, 1.175876f, 1.175876f, 1.175876f, 1.175876f, 1.175876f, 1.175876f, 10 | -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, 11 | -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, 12 | -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, 13 | -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, 14 | 0.298631f, 0.298631f, 0.298631f, 0.298631f, 0.298631f, 0.298631f, 0.298631f, 0.298631f, 15 | 2.053120f, 2.053120f, 2.053120f, 2.053120f, 2.053120f, 2.053120f, 2.053120f, 2.053120f, 16 | 3.072711f, 3.072711f, 3.072711f, 3.072711f, 3.072711f, 3.072711f, 3.072711f, 3.072711f, 17 | 1.501321f, 1.501321f, 1.501321f, 1.501321f, 1.501321f, 1.501321f, 1.501321f, 1.501321f, 18 | 0.541196f, 0.541196f, 0.541196f, 0.541196f, 0.541196f, 0.541196f, 0.541196f, 0.541196f, 19 | -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, 20 | 0.765367f, 0.765367f, 0.765367f, 0.765367f, 0.765367f, 0.765367f, 0.765367f, 0.765367f, 21 | }; 22 | 23 | 24 | static __forceinline __m256 25 | load_and_convert_to_float_x8_avx2(const int16_t* srcp) noexcept 26 | { 27 | __m128i s = _mm_load_si128(reinterpret_cast(srcp)); 28 | return _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(s)); 29 | } 30 | 31 | 32 | static __forceinline void 33 | transpose_8x8_avx2(__m256& a, __m256& b, __m256& c, __m256& d, __m256& e, __m256& f, __m256& g, __m256& h) noexcept 34 | { 35 | __m256 ac0145 = _mm256_unpacklo_ps(a, c); // a0 c0 a1 c1 a4 c4 a5 c5 36 | __m256 ac2367 = _mm256_unpackhi_ps(a, c); // a2 c2 a3 c3 a6 c6 a7 c7 37 | __m256 bd0145 = _mm256_unpacklo_ps(b, d); // b0 d0 b1 d1 b4 d4 b5 d5 38 | __m256 bd2367 = _mm256_unpackhi_ps(b, d); // b2 d2 b3 d3 b6 d6 b7 d7 39 | __m256 eg0145 = _mm256_unpacklo_ps(e, g); // e0 g0 e1 g1 e4 g4 e5 g5 40 | __m256 eg2367 = _mm256_unpackhi_ps(e, g); // e2 g2 e3 g3 e6 g6 e7 g7 41 | __m256 fh0145 = _mm256_unpacklo_ps(f, h); // f0 h0 f1 h1 f4 h4 f5 h5 42 | __m256 fh2367 = _mm256_unpackhi_ps(f, h); // f2 h2 f3 h3 f6 h6 f7 h7 43 | 44 | __m256 abcd04 = _mm256_unpacklo_ps(ac0145, bd0145); // a0 b0 c0 d0 a4 b4 c4 d4 45 | __m256 abcd15 = _mm256_unpackhi_ps(ac0145, bd0145); // a1 b1 c1 d1 a5 b5 c5 d5 46 | __m256 abcd26 = _mm256_unpacklo_ps(ac2367, bd2367); // a2 b2 c2 d2 a6 b6 c6 d6 47 | __m256 abcd37 = _mm256_unpackhi_ps(ac2367, bd2367); // a3 b3 c3 d3 a7 b7 c7 d7 48 | __m256 efgh04 = _mm256_unpacklo_ps(eg0145, fh0145); // e0 f0 g0 h0 e4 f4 g4 h4 49 | __m256 efgh15 = _mm256_unpackhi_ps(eg0145, fh0145); // e1 f1 g1 h1 e5 f5 g5 h5 50 | __m256 efgh26 = _mm256_unpacklo_ps(eg2367, fh2367); // e2 f2 g2 h2 e6 f6 g6 h6 51 | __m256 efgh37 = _mm256_unpackhi_ps(eg2367, fh2367); // e3 f3 g3 h3 e7 f7 g7 h7 52 | 53 | a = _mm256_permute2f128_ps(abcd04, efgh04, (2 << 4) | 0); //a0 b0 c0 d0 e0 f0 g0 h0 54 | e = _mm256_permute2f128_ps(abcd04, efgh04, (3 << 4) | 1); //a4 b4 c4 d4 e4 f4 g4 h4 55 | b = _mm256_permute2f128_ps(abcd15, efgh15, (2 << 4) | 0); //a1 b1 c1 d1 e1 f1 g1 h1 56 | f = _mm256_permute2f128_ps(abcd15, efgh15, (3 << 4) | 1); //a5 b5 c5 d5 e5 f5 g5 h5 57 | c = _mm256_permute2f128_ps(abcd26, efgh26, (2 << 4) | 0); //a2 b2 c2 d2 e2 f2 g2 h2 58 | g = _mm256_permute2f128_ps(abcd26, efgh26, (3 << 4) | 1); //a6 b6 c6 d6 e6 f6 g6 h6 59 | d = _mm256_permute2f128_ps(abcd37, efgh37, (2 << 4) | 0); //a3 b3 c3 d3 e3 f3 g3 h3 60 | h = _mm256_permute2f128_ps(abcd37, efgh37, (3 << 4) | 1); //a7 b7 c7 d7 e7 f7 g7 h7 61 | } 62 | 63 | 64 | static __forceinline void 65 | idct_8x8_fma3(__m256& s0, __m256& s1, __m256& s2, __m256& s3, __m256& s4, __m256& s5, __m256& s6, __m256& s7) noexcept 66 | { 67 | __m256 z0 = _mm256_add_ps(s1, s7); 68 | __m256 z1 = _mm256_add_ps(s3, s5); 69 | __m256 z2 = _mm256_add_ps(s3, s7); 70 | __m256 z3 = _mm256_add_ps(s1, s5); 71 | __m256 z4 = _mm256_mul_ps(_mm256_add_ps(z0, z1), _mm256_load_ps(llm_coefs)); 72 | 73 | z2 = _mm256_fmadd_ps(z2, _mm256_load_ps(llm_coefs + 8), z4); 74 | z3 = _mm256_fmadd_ps(z3, _mm256_load_ps(llm_coefs + 16), z4); 75 | z0 = _mm256_mul_ps(z0, _mm256_load_ps(llm_coefs + 24)); 76 | z1 = _mm256_mul_ps(z1, _mm256_load_ps(llm_coefs + 32)); 77 | 78 | __m256 b3 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 40), s7, _mm256_add_ps(z0, z2)); 79 | __m256 b2 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 48), s5, _mm256_add_ps(z1, z3)); 80 | __m256 b1 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 56), s3, _mm256_add_ps(z1, z2)); 81 | __m256 b0 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 64), s1, _mm256_add_ps(z0, z3)); 82 | 83 | z4 = _mm256_mul_ps(_mm256_add_ps(s2, s6), _mm256_load_ps(llm_coefs + 72)); 84 | z0 = _mm256_add_ps(s0, s4); 85 | z1 = _mm256_sub_ps(s0, s4); 86 | 87 | z2 = _mm256_fmadd_ps(s6, _mm256_load_ps(llm_coefs + 80), z4); 88 | z3 = _mm256_fmadd_ps(s2, _mm256_load_ps(llm_coefs + 88), z4); 89 | 90 | __m256 a0 = _mm256_add_ps(z0, z3); 91 | __m256 a3 = _mm256_sub_ps(z0, z3); 92 | __m256 a1 = _mm256_add_ps(z1, z2); 93 | __m256 a2 = _mm256_sub_ps(z1, z2); 94 | 95 | s0 = _mm256_add_ps(a0, b0); 96 | s7 = _mm256_sub_ps(a0, b0); 97 | s1 = _mm256_add_ps(a1, b1); 98 | s6 = _mm256_sub_ps(a1, b1); 99 | s2 = _mm256_add_ps(a2, b2); 100 | s5 = _mm256_sub_ps(a2, b2); 101 | s3 = _mm256_add_ps(a3, b3); 102 | s4 = _mm256_sub_ps(a3, b3); 103 | } 104 | 105 | 106 | static __forceinline void 107 | float_to_dst_avx2(const __m256& s0, const __m256& s1, int16_t* dst) noexcept 108 | { 109 | static const __m256 one_eighth = _mm256_set1_ps(0.1250f); 110 | static const __m256i minimum = _mm256_set1_epi16(-256); 111 | static const __m256i maximum = _mm256_set1_epi16(255); 112 | 113 | __m256 t0 = _mm256_mul_ps(s0, one_eighth); 114 | __m256 t1 = _mm256_mul_ps(s1, one_eighth); 115 | __m256i d0 = _mm256_packs_epi32(_mm256_cvtps_epi32(t0), _mm256_cvtps_epi32(t1)); 116 | d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(3, 1, 2, 0)); 117 | d0 = _mm256_max_epi16(_mm256_min_epi16(d0, maximum), minimum); 118 | _mm256_store_si256(reinterpret_cast<__m256i*>(dst), d0); 119 | } 120 | 121 | 122 | void idct_llm_float_avx2(int16_t* block) 123 | { 124 | __m256 s0 = load_and_convert_to_float_x8_avx2(block); 125 | __m256 s1 = load_and_convert_to_float_x8_avx2(block + 8); 126 | __m256 s2 = load_and_convert_to_float_x8_avx2(block + 16); 127 | __m256 s3 = load_and_convert_to_float_x8_avx2(block + 24); 128 | __m256 s4 = load_and_convert_to_float_x8_avx2(block + 32); 129 | __m256 s5 = load_and_convert_to_float_x8_avx2(block + 40); 130 | __m256 s6 = load_and_convert_to_float_x8_avx2(block + 48); 131 | __m256 s7 = load_and_convert_to_float_x8_avx2(block + 56); 132 | 133 | transpose_8x8_avx2(s0, s1, s2, s3, s4, s5, s6, s7); 134 | 135 | idct_8x8_fma3(s0, s1, s2, s3, s4, s5, s6, s7); 136 | 137 | transpose_8x8_avx2(s0, s1, s2, s3, s4, s5, s6, s7); 138 | 139 | idct_8x8_fma3(s0, s1, s2, s3, s4, s5, s6, s7); 140 | 141 | float_to_dst_avx2(s0, s1, block + 0); 142 | float_to_dst_avx2(s2, s3, block + 16); 143 | float_to_dst_avx2(s4, s5, block + 32); 144 | float_to_dst_avx2(s6, s7, block + 48); 145 | } 146 | 147 | 148 | void prefetch_llm_float_avx2() 149 | { 150 | _mm_prefetch(reinterpret_cast(llm_coefs), _MM_HINT_NTA); 151 | _mm_prefetch(reinterpret_cast(llm_coefs + 16), _MM_HINT_NTA); 152 | _mm_prefetch(reinterpret_cast(llm_coefs + 32), _MM_HINT_NTA); 153 | _mm_prefetch(reinterpret_cast(llm_coefs + 48), _MM_HINT_NTA); 154 | _mm_prefetch(reinterpret_cast(llm_coefs + 64), _MM_HINT_NTA); 155 | _mm_prefetch(reinterpret_cast(llm_coefs + 80), _MM_HINT_NTA); 156 | } 157 | -------------------------------------------------------------------------------- /src/idct_llm_float_sse2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "idct.h" 3 | 4 | 5 | alignas(64) static const float llm_coefs[] = { 6 | 1.175876f, 1.175876f, 1.175876f, 1.175876f, 7 | -1.961571f, -1.961571f, -1.961571f, -1.961571f, 8 | -0.390181f, -0.390181f, -0.390181f, -0.390181f, 9 | -0.899976f, -0.899976f, -0.899976f, -0.899976f, 10 | -2.562915f, -2.562915f, -2.562915f, -2.562915f, 11 | 0.298631f, 0.298631f, 0.298631f, 0.298631f, 12 | 2.053120f, 2.053120f, 2.053120f, 2.053120f, 13 | 3.072711f, 3.072711f, 3.072711f, 3.072711f, 14 | 1.501321f, 1.501321f, 1.501321f, 1.501321f, 15 | 0.541196f, 0.541196f, 0.541196f, 0.541196f, 16 | -1.847759f, -1.847759f, -1.847759f, -1.847759f, 17 | 0.765367f, 0.765367f, 0.765367f, 0.765367f, 18 | }; 19 | 20 | 21 | static inline void short_to_float(const short* srcp, float* dstp) noexcept 22 | { 23 | const __m128i zero = _mm_setzero_si128(); 24 | 25 | for (int i = 0; i < 64; i += 8) { 26 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + i)); 27 | __m128i mask = _mm_cmpgt_epi16(zero, s); 28 | __m128 d0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, mask)); 29 | __m128 d1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, mask)); 30 | _mm_store_ps(dstp + i, d0); 31 | _mm_store_ps(dstp + i + 4, d1); 32 | } 33 | } 34 | 35 | 36 | static inline void idct_8x4_with_transpose(const float* srcp, float* dstp) noexcept 37 | { 38 | __m128 s0 = _mm_load_ps(srcp); 39 | __m128 s1 = _mm_load_ps(srcp + 8); 40 | __m128 s2 = _mm_load_ps(srcp + 16); 41 | __m128 s3 = _mm_load_ps(srcp + 24); 42 | _MM_TRANSPOSE4_PS(s0, s1, s2, s3); 43 | __m128 s4 = _mm_load_ps(srcp + 4); 44 | __m128 s5 = _mm_load_ps(srcp + 12); 45 | __m128 s6 = _mm_load_ps(srcp + 20); 46 | __m128 s7 = _mm_load_ps(srcp + 28); 47 | _MM_TRANSPOSE4_PS(s4, s5, s6, s7); 48 | 49 | __m128 z0 = _mm_add_ps(s1, s7); 50 | __m128 z1 = _mm_add_ps(s3, s5); 51 | __m128 z2 = _mm_add_ps(s3, s7); 52 | __m128 z3 = _mm_add_ps(s1, s5); 53 | __m128 z4 = _mm_mul_ps(_mm_add_ps(z0, z1), _mm_load_ps(llm_coefs)); 54 | 55 | z2 = _mm_add_ps(_mm_mul_ps(z2, _mm_load_ps(llm_coefs + 4)), z4); 56 | z3 = _mm_add_ps(_mm_mul_ps(z3, _mm_load_ps(llm_coefs + 8)), z4); 57 | z0 = _mm_mul_ps(z0, _mm_load_ps(llm_coefs + 12)); 58 | z1 = _mm_mul_ps(z1, _mm_load_ps(llm_coefs + 16)); 59 | 60 | __m128 b3 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s7, _mm_load_ps(llm_coefs + 20)), z0), z2); 61 | __m128 b2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s5, _mm_load_ps(llm_coefs + 24)), z1), z3); 62 | __m128 b1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s3, _mm_load_ps(llm_coefs + 28)), z1), z2); 63 | __m128 b0 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s1, _mm_load_ps(llm_coefs + 32)), z0), z3); 64 | 65 | z4 = _mm_mul_ps(_mm_add_ps(s2, s6), _mm_load_ps(llm_coefs + 36)); 66 | z0 = _mm_add_ps(s0, s4); 67 | z1 = _mm_sub_ps(s0, s4); 68 | 69 | z2 = _mm_add_ps(z4, _mm_mul_ps(s6, _mm_load_ps(llm_coefs + 40))); 70 | z3 = _mm_add_ps(z4, _mm_mul_ps(s2, _mm_load_ps(llm_coefs + 44))); 71 | 72 | s0 = _mm_add_ps(z0, z3); 73 | s3 = _mm_sub_ps(z0, z3); 74 | s1 = _mm_add_ps(z1, z2); 75 | s2 = _mm_sub_ps(z1, z2); 76 | 77 | _mm_store_ps(dstp, _mm_add_ps(s0, b0)); 78 | _mm_store_ps(dstp + 56, _mm_sub_ps(s0, b0)); 79 | _mm_store_ps(dstp + 8, _mm_add_ps(s1, b1)); 80 | _mm_store_ps(dstp + 48, _mm_sub_ps(s1, b1)); 81 | _mm_store_ps(dstp + 16, _mm_add_ps(s2, b2)); 82 | _mm_store_ps(dstp + 40, _mm_sub_ps(s2, b2)); 83 | _mm_store_ps(dstp + 24, _mm_add_ps(s3, b3)); 84 | _mm_store_ps(dstp + 32, _mm_sub_ps(s3, b3)); 85 | } 86 | 87 | 88 | static inline void float_to_dst_llm(const float* srcp, int16_t* dstp) noexcept 89 | { 90 | static const __m128 one_eighth = _mm_set1_ps(0.1250f); 91 | static const __m128i minimum = _mm_set1_epi16(-256); 92 | static const __m128i maximum = _mm_set1_epi16(255); 93 | 94 | for (int i = 0; i < 64; i += 8) { 95 | __m128 s0 = _mm_load_ps(srcp + i); 96 | __m128 s1 = _mm_load_ps(srcp + i + 4); 97 | s0 = _mm_mul_ps(s0, one_eighth); 98 | s1 = _mm_mul_ps(s1, one_eighth); 99 | __m128i d = _mm_packs_epi32(_mm_cvtps_epi32(s0), _mm_cvtps_epi32(s1)); 100 | d = _mm_min_epi16(_mm_max_epi16(d, minimum), maximum); 101 | _mm_store_si128(reinterpret_cast<__m128i*>(dstp + i), d); 102 | } 103 | } 104 | 105 | 106 | void idct_llm_float_sse2(int16_t* block) 107 | { 108 | alignas(64) float blockf[64]; 109 | alignas(64) float tmp[64]; 110 | 111 | short_to_float(block, blockf); 112 | 113 | idct_8x4_with_transpose(blockf, tmp); 114 | idct_8x4_with_transpose(blockf + 32, tmp + 4); 115 | 116 | idct_8x4_with_transpose(tmp, blockf); 117 | idct_8x4_with_transpose(tmp + 32, blockf + 4); 118 | 119 | float_to_dst_llm(blockf, block); 120 | } 121 | 122 | 123 | void prefetch_llm_float_sse2() 124 | { 125 | _mm_prefetch(reinterpret_cast(llm_coefs), _MM_HINT_NTA); 126 | _mm_prefetch(reinterpret_cast(llm_coefs + 16), _MM_HINT_NTA); 127 | _mm_prefetch(reinterpret_cast(llm_coefs + 32), _MM_HINT_NTA); 128 | } 129 | 130 | -------------------------------------------------------------------------------- /src/idct_ref_sse3.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | idct_reference_sse3.cpp 3 | 4 | rewite to double precision sse3 intrinsic code. 5 | OKA Motofumi - August 29, 2016 6 | 7 | */ 8 | 9 | 10 | #include 11 | #include "idct.h" 12 | 13 | /* Perform IEEE 1180 reference (64-bit floating point, separable 8x1 14 | * direct matrix multiply) Inverse Discrete Cosine Transform 15 | */ 16 | 17 | 18 | /* cosine transform matrix for 8x1 IDCT */ 19 | alignas(64) static const double ref_dct_matrix_t[] = { 20 | 3.5355339059327379e-001, 4.9039264020161522e-001, 21 | 4.6193976625564337e-001, 4.1573480615127262e-001, 22 | 3.5355339059327379e-001, 2.7778511650980114e-001, 23 | 1.9134171618254492e-001, 9.7545161008064166e-002, 24 | 3.5355339059327379e-001, 4.1573480615127262e-001, 25 | 1.9134171618254492e-001, -9.7545161008064096e-002, 26 | -3.5355339059327373e-001, -4.9039264020161522e-001, 27 | -4.6193976625564342e-001, -2.7778511650980109e-001, 28 | 3.5355339059327379e-001, 2.7778511650980114e-001, 29 | -1.9134171618254486e-001, -4.9039264020161522e-001, 30 | -3.5355339059327384e-001, 9.7545161008064152e-002, 31 | 4.6193976625564326e-001, 4.1573480615127273e-001, 32 | 3.5355339059327379e-001, 9.7545161008064166e-002, 33 | -4.6193976625564337e-001, -2.7778511650980109e-001, 34 | 3.5355339059327368e-001, 4.1573480615127273e-001, 35 | -1.9134171618254495e-001, -4.9039264020161533e-001, 36 | 3.5355339059327379e-001, -9.7545161008064096e-002, 37 | -4.6193976625564342e-001, 2.7778511650980092e-001, 38 | 3.5355339059327384e-001, -4.1573480615127256e-001, 39 | -1.9134171618254528e-001, 4.9039264020161522e-001, 40 | 3.5355339059327379e-001, -2.7778511650980098e-001, 41 | -1.9134171618254517e-001, 4.9039264020161522e-001, 42 | -3.5355339059327334e-001, -9.7545161008064013e-002, 43 | 4.6193976625564337e-001, -4.1573480615127251e-001, 44 | 3.5355339059327379e-001, -4.1573480615127267e-001, 45 | 1.9134171618254500e-001, 9.7545161008064388e-002, 46 | -3.5355339059327356e-001, 4.9039264020161533e-001, 47 | -4.6193976625564320e-001, 2.7778511650980076e-001, 48 | 3.5355339059327379e-001, -4.9039264020161522e-001, 49 | 4.6193976625564326e-001, -4.1573480615127256e-001, 50 | 3.5355339059327329e-001, -2.7778511650980076e-001, 51 | 1.9134171618254478e-001, -9.7545161008064291e-002, 52 | }; 53 | 54 | 55 | #if 0 56 | static inline void transpose_8x8_c(const double* srcp, double* dstp) noexcept 57 | { 58 | for (int y = 0; y < 8; ++y) { 59 | for (int x = 0; x < 8; ++x) { 60 | dstp[x] = srcp[8 * x + y]; 61 | } 62 | dstp += 8; 63 | } 64 | } 65 | 66 | 67 | static inline void idct_ref_8x8_c(const double* srcp, double* dstp) noexcept 68 | { 69 | for (int y = 0; y < 8; ++y) { 70 | for (int x = 0; x < 8; ++x) { 71 | double t = 0; 72 | for (int z = 0; z < 8; ++z) { 73 | t += ref_dct_matrix_t[8 * x + z] * srcp[8 * y + z]; 74 | } 75 | dstp[8 * y + x] = t; 76 | } 77 | } 78 | } 79 | 80 | #endif 81 | 82 | 83 | static inline void short_to_double_sse2(const short* srcp, double* dstp) noexcept 84 | { 85 | const __m128i zero = _mm_setzero_si128(); 86 | for (int i = 0; i < 64; i += 8) { 87 | __m128i s = _mm_load_si128(reinterpret_cast(srcp + i)); 88 | __m128i mask = _mm_cmpgt_epi16(zero, s); 89 | __m128i s0 = _mm_unpacklo_epi16(s, mask); 90 | __m128i s1 = _mm_unpackhi_epi16(s, mask); 91 | __m128d d0 = _mm_cvtepi32_pd(s0); 92 | __m128d d1 = _mm_cvtepi32_pd(_mm_srli_si128(s0, 8)); 93 | __m128d d2 = _mm_cvtepi32_pd(s1); 94 | __m128d d3 = _mm_cvtepi32_pd(_mm_srli_si128(s1, 8)); 95 | _mm_store_pd(dstp + i, d0); 96 | _mm_store_pd(dstp + i + 2, d1); 97 | _mm_store_pd(dstp + i + 4, d2); 98 | _mm_store_pd(dstp + i + 6, d3); 99 | } 100 | } 101 | 102 | 103 | static inline void transpose_8x8_sse2(const double* srcp, double* dstp) noexcept 104 | { 105 | for (int y = 0; y < 8; y += 2) { 106 | double* d = dstp + y; 107 | for (int x = 0; x < 8; x += 2) { 108 | __m128d s0 = _mm_load_pd(srcp + x); 109 | __m128d s1 = _mm_load_pd(srcp + x + 8); 110 | _mm_store_pd(d, _mm_unpacklo_pd(s0, s1)); 111 | _mm_store_pd(d + 8, _mm_unpackhi_pd(s0, s1)); 112 | d += 16; 113 | } 114 | srcp += 16; 115 | } 116 | } 117 | 118 | 119 | static inline void idct_ref_8x8_sse3(const double* srcp, double* dstp) noexcept 120 | { 121 | for (int i = 0; i < 8; ++i) { 122 | __m128d s0 = _mm_load_pd(srcp + 8 * static_cast(i)); 123 | __m128d s1 = _mm_load_pd(srcp + 8 * static_cast(i) + 2); 124 | __m128d s2 = _mm_load_pd(srcp + 8 * static_cast(i) + 4); 125 | __m128d s3 = _mm_load_pd(srcp + 8 * static_cast(i) + 6); 126 | 127 | for (int j = 0; j < 8; j += 2) { 128 | const double* mpos = ref_dct_matrix_t + 8 * static_cast(j); 129 | 130 | __m128d m0 = _mm_mul_pd(_mm_load_pd(mpos), s0); 131 | __m128d m1 = _mm_mul_pd(_mm_load_pd(mpos + 2), s1); 132 | __m128d m2 = _mm_mul_pd(_mm_load_pd(mpos + 4), s2); 133 | __m128d m3 = _mm_mul_pd(_mm_load_pd(mpos + 6), s3); 134 | __m128d d0 = _mm_add_pd(_mm_add_pd(m0, m1), _mm_add_pd(m2, m3)); 135 | 136 | m0 = _mm_mul_pd(_mm_load_pd(mpos + 8), s0); 137 | m1 = _mm_mul_pd(_mm_load_pd(mpos + 10), s1); 138 | m2 = _mm_mul_pd(_mm_load_pd(mpos + 12), s2); 139 | m3 = _mm_mul_pd(_mm_load_pd(mpos + 14), s3); 140 | __m128d d1 = _mm_add_pd(_mm_add_pd(m0, m1), _mm_add_pd(m2, m3)); 141 | 142 | _mm_store_pd(dstp + 8 * static_cast(i) + j, _mm_hadd_pd(d0, d1)); 143 | } 144 | } 145 | } 146 | 147 | 148 | static inline void double_to_dst_sse2(const double* srcp, int16_t* dst) noexcept 149 | { 150 | static const __m128i minimum = _mm_set1_epi16(-256); 151 | static const __m128i maximum = _mm_set1_epi16(255); 152 | 153 | for (int i = 0; i < 64; i += 8) { 154 | __m128d s0 = _mm_load_pd(srcp + i); 155 | __m128d s1 = _mm_load_pd(srcp + i + 2); 156 | __m128d s2 = _mm_load_pd(srcp + i + 4); 157 | __m128d s3 = _mm_load_pd(srcp + i + 6); 158 | __m128i d0 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(s0), _mm_cvtpd_epi32(s1)); 159 | __m128i d1 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(s2), _mm_cvtpd_epi32(s3)); 160 | d0 = _mm_min_epi16(_mm_max_epi16(_mm_packs_epi32(d0, d1), minimum), maximum); 161 | _mm_store_si128(reinterpret_cast<__m128i*>(dst + i), d0); 162 | } 163 | } 164 | 165 | 166 | void idct_ref_sse3(int16_t* block) 167 | { 168 | alignas(64) double blockf[64]; 169 | alignas(64) double tmp[64]; 170 | 171 | short_to_double_sse2(block, blockf); 172 | 173 | idct_ref_8x8_sse3(blockf, tmp); 174 | 175 | transpose_8x8_sse2(tmp, blockf); 176 | 177 | idct_ref_8x8_sse3(blockf, tmp); 178 | 179 | transpose_8x8_sse2(tmp, blockf); 180 | 181 | double_to_dst_sse2(blockf, block); 182 | } 183 | 184 | 185 | void prefetch_ref() 186 | { 187 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 0), _MM_HINT_NTA); 188 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 8), _MM_HINT_NTA); 189 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 16), _MM_HINT_NTA); 190 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 24), _MM_HINT_NTA); 191 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 32), _MM_HINT_NTA); 192 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 40), _MM_HINT_NTA); 193 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 48), _MM_HINT_NTA); 194 | _mm_prefetch(reinterpret_cast(ref_dct_matrix_t + 56), _MM_HINT_NTA); 195 | } 196 | 197 | -------------------------------------------------------------------------------- /src/mc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Motion Compensation for MPEG2Dec3 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 7 | * 8 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2, or (at your option) 11 | * any later version. 12 | * 13 | * MPEG2Dec3 is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with GNU Make; see the file COPYING. If not, write to 20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21 | * 22 | */ 23 | 24 | 25 | // SSE2 intrinsic implementation 26 | // OKA Motofumi - August 23, 2016 27 | 28 | 29 | #include 30 | #include "mc.h" 31 | #ifndef _WIN32 32 | #include "win_import_min.h" 33 | #endif 34 | 35 | 36 | static __forceinline __m128i loadl(const uint8_t* p) 37 | { 38 | return _mm_loadl_epi64(reinterpret_cast(p)); 39 | } 40 | 41 | static __forceinline __m128i loadu(const uint8_t* p) 42 | { 43 | return _mm_loadu_si128(reinterpret_cast(p)); 44 | } 45 | 46 | static __forceinline __m128i avgu8(const __m128i& x, const __m128i& y) 47 | { 48 | return _mm_avg_epu8(x, y); 49 | } 50 | 51 | static __forceinline void storel(uint8_t* p, const __m128i& x) 52 | { 53 | _mm_storel_epi64(reinterpret_cast<__m128i*>(p), x); 54 | } 55 | 56 | static __forceinline void storeu(uint8_t* p, const __m128i& x) 57 | { 58 | _mm_storeu_si128(reinterpret_cast<__m128i*>(p), x); 59 | } 60 | 61 | 62 | static void MC_put_8_c(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 63 | { 64 | do { 65 | *reinterpret_cast(dest) = *reinterpret_cast(ref); 66 | dest += stride; ref += stride; 67 | } while (--height > 0); 68 | } 69 | 70 | 71 | static void MC_put_16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 72 | { 73 | do { 74 | storeu(dest, loadu(ref)); 75 | ref += stride; dest += stride; 76 | } while (--height > 0); 77 | } 78 | 79 | 80 | static void MC_avg_8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 81 | { 82 | do { 83 | storel(dest, avgu8(loadl(ref), loadl(dest))); 84 | ref += stride; dest += stride; 85 | } while (--height > 0); 86 | } 87 | 88 | 89 | static void MC_avg_16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 90 | { 91 | do { 92 | storeu(dest, avgu8(loadu(ref), loadu(dest))); 93 | ref += stride; dest += stride; 94 | } while (--height > 0); 95 | } 96 | 97 | 98 | static void MC_put_x8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 99 | { 100 | do { 101 | storel(dest, avgu8(loadl(ref), loadl(ref + 1))); 102 | ref += stride; dest += stride; 103 | } while (--height > 0); 104 | } 105 | 106 | 107 | static void MC_put_y8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 108 | { 109 | do { 110 | storel(dest, avgu8(loadl(ref), loadl(ref + offs))); 111 | ref += stride; dest += stride; 112 | } while (--height > 0); 113 | } 114 | 115 | 116 | static void MC_put_x16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 117 | { 118 | do { 119 | storeu(dest, avgu8(loadu(ref), loadu(ref + 1))); 120 | ref += stride; dest += stride; 121 | } while (--height > 0); 122 | } 123 | 124 | 125 | static void MC_put_y16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 126 | { 127 | do { 128 | storeu(dest, avgu8(loadu(ref), loadu(ref + offs))); 129 | ref += stride; dest += stride; 130 | } while (--height > 0); 131 | } 132 | 133 | 134 | static void MC_avg_x8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 135 | { 136 | do { 137 | storel(dest, avgu8(avgu8(loadl(ref), loadl(ref + 1)), loadl(dest))); 138 | ref += stride; dest += stride; 139 | } while (--height > 0); 140 | } 141 | 142 | 143 | static void MC_avg_y8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 144 | { 145 | do { 146 | storel(dest, avgu8(avgu8(loadl(ref), loadl(ref + offs)), loadl(dest))); 147 | ref += stride; dest += stride; 148 | } while (--height > 0); 149 | } 150 | 151 | 152 | static void MC_avg_x16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height) 153 | { 154 | do { 155 | storeu(dest, avgu8(avgu8(loadu(ref), loadu(ref + 1)), loadu(dest))); 156 | ref += stride; dest += stride; 157 | } while (--height > 0); 158 | } 159 | 160 | 161 | static void MC_avg_y16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 162 | { 163 | do { 164 | storeu(dest, avgu8(avgu8(loadu(ref), loadu(ref + offs)), loadu(dest))); 165 | ref += stride; dest += stride; 166 | } while (--height > 0); 167 | } 168 | 169 | 170 | static __forceinline __m128i 171 | get_correcter(const __m128i& r0, const __m128i& r1, const __m128i& r2, const __m128i& r3, 172 | const __m128i& avg0, const __m128i& avg1, const __m128i& one) 173 | { 174 | __m128i t0 = _mm_or_si128(_mm_xor_si128(r0, r3), _mm_xor_si128(r1, r2)); 175 | t0 = _mm_and_si128(t0, _mm_xor_si128(avg0, avg1)); 176 | return _mm_and_si128(t0, one); 177 | } 178 | 179 | 180 | static void MC_put_xy8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 181 | { 182 | static const __m128i one = _mm_set1_epi8(1); 183 | const uint8_t* ro = ref + offs; 184 | 185 | do { 186 | __m128i r0 = loadl(ref); 187 | __m128i r1 = loadl(ref + 1); 188 | __m128i r2 = loadl(ro); 189 | __m128i r3 = loadl(ro + 1); 190 | 191 | __m128i avg0 = avgu8(r0, r3); 192 | __m128i avg1 = avgu8(r1, r2); 193 | 194 | __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one); 195 | 196 | storel(dest, _mm_subs_epu8(avgu8(avg0, avg1), t0)); 197 | 198 | ref += stride; 199 | ro += stride; 200 | dest += stride; 201 | } while (--height > 0); 202 | } 203 | 204 | 205 | static void MC_put_xy16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 206 | { 207 | static const __m128i one = _mm_set1_epi8(1); 208 | const uint8_t* ro = ref + offs; 209 | 210 | do { 211 | __m128i r0 = loadu(ref); 212 | __m128i r1 = loadu(ref + 1); 213 | __m128i r2 = loadu(ro); 214 | __m128i r3 = loadu(ro + 1); 215 | 216 | __m128i avg0 = avgu8(r0, r3); 217 | __m128i avg1 = avgu8(r1, r2); 218 | 219 | __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one); 220 | 221 | storeu(dest, _mm_subs_epu8(avgu8(avg0, avg1), t0)); 222 | 223 | ref += stride; 224 | ro += stride; 225 | dest += stride; 226 | } while (--height > 0); 227 | } 228 | 229 | 230 | static void MC_avg_xy8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 231 | { 232 | static const __m128i one = _mm_set1_epi8(1); 233 | const uint8_t* ro = ref + offs; 234 | 235 | do { 236 | __m128i r0 = loadl(ref); 237 | __m128i r1 = loadl(ref + 1); 238 | __m128i r2 = loadl(ro); 239 | __m128i r3 = loadl(ro + 1); 240 | 241 | __m128i avg0 = avgu8(r0, r3); 242 | __m128i avg1 = avgu8(r1, r2); 243 | 244 | __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one); 245 | 246 | storel(dest, avgu8(_mm_subs_epu8(avgu8(avg0, avg1), t0), loadl(dest))); 247 | 248 | ref += stride; 249 | ro += stride; 250 | dest += stride; 251 | } while (--height > 0); 252 | } 253 | 254 | 255 | static void MC_avg_xy16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height) 256 | { 257 | static const __m128i one = _mm_set1_epi8(1); 258 | const uint8_t* ro = ref + offs; 259 | 260 | do { 261 | __m128i r0 = loadu(ref); 262 | __m128i r1 = loadu(ref + 1); 263 | __m128i r2 = loadu(ro); 264 | __m128i r3 = loadu(ro + 1); 265 | 266 | __m128i avg0 = avgu8(r0, r3); 267 | __m128i avg1 = avgu8(r1, r2); 268 | 269 | __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one); 270 | 271 | storeu(dest, avgu8(_mm_subs_epu8(avgu8(avg0, avg1), t0), loadu(dest))); 272 | 273 | ref += stride; 274 | ro += stride; 275 | dest += stride; 276 | } while (--height > 0); 277 | } 278 | 279 | 280 | 281 | // This project requires SSE2. MMX/MMX_EXT/3DNOW! are obsoute. 282 | // fastMC was discontinued...who cares about that? 283 | 284 | MCFuncPtr ppppf_motion[2][2][4]; 285 | 286 | void Choose_Prediction(void) 287 | { 288 | ppppf_motion[0][0][0] = MC_put_8_c; 289 | ppppf_motion[0][0][1] = MC_put_y8_sse2; 290 | ppppf_motion[0][0][2] = MC_put_x8_sse2; 291 | ppppf_motion[0][0][3] = MC_put_xy8_sse2; 292 | 293 | ppppf_motion[0][1][0] = MC_put_16_sse2; 294 | ppppf_motion[0][1][1] = MC_put_y16_sse2; 295 | ppppf_motion[0][1][2] = MC_put_x16_sse2; 296 | ppppf_motion[0][1][3] = MC_put_xy16_sse2; 297 | 298 | ppppf_motion[1][0][0] = MC_avg_8_sse2; 299 | ppppf_motion[1][0][1] = MC_avg_y8_sse2; 300 | ppppf_motion[1][0][2] = MC_avg_x8_sse2; 301 | ppppf_motion[1][0][3] = MC_avg_xy8_sse2; 302 | 303 | ppppf_motion[1][1][0] = MC_avg_16_sse2; 304 | ppppf_motion[1][1][1] = MC_avg_y16_sse2; 305 | ppppf_motion[1][1][2] = MC_avg_x16_sse2; 306 | ppppf_motion[1][1][3] = MC_avg_xy16_sse2; 307 | } 308 | -------------------------------------------------------------------------------- /src/mc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Motion Compensation for MPEG2Dec3 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 7 | * 8 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2, or (at your option) 11 | * any later version. 12 | * 13 | * MPEG2Dec3 is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with GNU Make; see the file COPYING. If not, write to 20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21 | * 22 | */ 23 | 24 | #ifndef MPEG2DEC_MC_H 25 | #define MPEG2DEC_MC_H 26 | 27 | #include 28 | 29 | typedef void (MCFunc)(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height); 30 | typedef MCFunc* MCFuncPtr; 31 | 32 | // Form prediction (motion compensation) function pointer array (GetPic.c) - Vlad59 04-20-2002 33 | extern MCFuncPtr ppppf_motion[2][2][4]; 34 | void Choose_Prediction(void); 35 | 36 | #endif // MPEG2DEC_MC_H 37 | -------------------------------------------------------------------------------- /src/misc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Misc Stuff for MPEG2Dec3 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 7 | * 8 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2, or (at your option) 11 | * any later version. 12 | * 13 | * MPEG2Dec3 is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with GNU Make; see the file COPYING. If not, write to 20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21 | * 22 | */ 23 | 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "misc.h" 30 | 31 | 32 | size_t __cdecl dprintf(char* fmt, ...) 33 | { 34 | char printString[1024]; 35 | 36 | va_list argp; 37 | 38 | va_start(argp, fmt); 39 | vsprintf_s(printString, 1024, fmt, argp); 40 | va_end(argp); 41 | fprintf(stderr, "%s", printString); 42 | return strlen(printString); 43 | } 44 | 45 | 46 | void __stdcall 47 | fast_copy(const uint8_t* src, const int src_stride, uint8_t* dst, 48 | const int dst_stride, const int horizontal_size, int vertical_size) noexcept 49 | { 50 | if (vertical_size == 0) { 51 | return; 52 | } 53 | else if (horizontal_size == src_stride && src_stride == dst_stride) { 54 | memcpy(dst, src, static_cast(horizontal_size) * vertical_size); 55 | } 56 | else { 57 | do { 58 | memcpy(dst, src, horizontal_size); 59 | dst += dst_stride; 60 | src += src_stride; 61 | } while (--vertical_size != 0); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Misc Stuff (profiling) for MPEG2Dec3 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 7 | * 8 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2, or (at your option) 11 | * any later version. 12 | * 13 | * MPEG2Dec3 is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with GNU Make; see the file COPYING. If not, write to 20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 21 | * 22 | */ 23 | 24 | #ifndef MPEG2DECPLUS_MISC_H 25 | #define MPEG2DECPLUS_MISC_H 26 | 27 | #ifndef _WIN32 28 | #include 29 | #include "win_import_min.h" 30 | #endif 31 | 32 | void __stdcall 33 | fast_copy(const uint8_t* src, const int src_stride, uint8_t* dst, 34 | const int dst_stride, const int horizontal_size, 35 | const int vertical_size) noexcept; 36 | 37 | size_t __cdecl dprintf(char* fmt, ...); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/store.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * MPEG2Dec3 : YV12 & PostProcessing 3 | * 4 | * Copyright (C) 2002-2003 Marc Fauconneau 5 | * 6 | * based of the intial MPEG2Dec Copyright (C) Chia-chen Kuo - April 2001 7 | * 8 | * This file is part of MPEG2Dec3, a free MPEG-2 decoder 9 | * 10 | * MPEG2Dec3 is free software; you can redistribute it and/or modify 11 | * it under the terms of the GNU General Public License as published by 12 | * the Free Software Foundation; either version 2, or (at your option) 13 | * any later version. 14 | * 15 | * MPEG2Dec3 is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with GNU Make; see the file COPYING. If not, write to 22 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 | * 24 | */ 25 | 26 | 27 | #include "color_convert.h" 28 | //#include "postprocess.h" 29 | #include "misc.h" 30 | #include "MPEG2Decoder.h" 31 | 32 | 33 | // Write 2-digits numbers in a 16x16 zone. 34 | static void write_quants(uint8_t* dst, int stride, int mb_width, int mb_height, 35 | const int* qp) 36 | { 37 | const uint8_t rien[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 38 | const uint8_t nums[10][8] = { 39 | { 1, 4, 4, 4, 4, 4, 1, 0 }, 40 | { 3, 3, 3, 3, 3, 3, 3, 0 }, 41 | { 1, 3, 3, 1, 2, 2, 1, 0 }, 42 | { 1, 3, 3, 1, 3, 3, 1, 0 }, 43 | { 4, 4, 4, 1, 3, 3, 3, 0 }, 44 | { 1, 2, 2, 1, 3, 3, 1, 0 }, 45 | { 1, 2, 2, 1, 4, 4, 1, 0 }, 46 | { 1, 3, 3, 3, 3, 3, 3, 0 }, 47 | { 1, 4, 4, 1, 4, 4, 1, 0 }, 48 | { 1, 4, 4, 1, 3, 3, 1, 0 }, 49 | }; 50 | 51 | auto write = [](const uint8_t* num, uint8_t* dst, const int stride) { 52 | for (int y = 0; y < 7; ++y) { 53 | if (num[y] == 1) { 54 | dst[1 + y * stride] = 0xFF; 55 | dst[2 + y * stride] = 0xFF; 56 | dst[3 + y * stride] = 0xFF; 57 | dst[4 + y * stride] = 0xFF; 58 | } 59 | if (num[y] == 2) { 60 | dst[1 + y * stride] = 0xFF; 61 | } 62 | if (num[y] == 3) { 63 | dst[4 + y * stride] = 0xFF; 64 | } 65 | if (num[y] == 4) { 66 | dst[1 + y * stride] = 0xFF; 67 | dst[4 + y * stride] = 0xFF; 68 | } 69 | } 70 | }; 71 | 72 | for (int y = 0; y < mb_height; ++y) { 73 | for (int x = 0; x < mb_width; ++x) { 74 | int number = qp[x + y * mb_width]; 75 | uint8_t* dstp = dst + static_cast(x) * 16 + static_cast(3) * stride; 76 | 77 | int c = (number / 100) % 10; 78 | const uint8_t* num = nums[c]; // x00 79 | if (c == 0) num = rien; 80 | write(num, dstp, stride); 81 | 82 | dstp += 5; 83 | int d = (number / 10) % 10; 84 | num = nums[d]; // 0x0 85 | if (c == 0 && d == 0) num = rien; 86 | write(num, dstp, stride); 87 | 88 | dstp += 5; 89 | num = nums[number % 10]; // 00x 90 | write(num, dstp, stride); 91 | } 92 | dst += static_cast(16) * stride; 93 | } 94 | } 95 | 96 | 97 | static void set_qparams(const int* qp, size_t mb_size, int& minquant, 98 | int& maxquant, int& avgquant) 99 | { 100 | int minq = qp[0], maxq = qp[0], sum = qp[0]; 101 | for (size_t i = 1; i < mb_size; ++i) { 102 | int q = qp[i]; 103 | if (q < minq) minq = q; 104 | if (q > maxq) maxq = q; 105 | sum += q; 106 | } 107 | minquant = minq; 108 | maxquant = maxq; 109 | avgquant = static_cast(static_cast(sum) / mb_size + 0.5f); 110 | } 111 | 112 | 113 | void CMPEG2Decoder::assembleFrame(uint8_t* src[], int pf, YV12PICT& dst) 114 | { 115 | dst.pf = pf; 116 | #if 0 117 | if (pp_mode != 0) 118 | { 119 | uint8_t* ppptr[3]; 120 | if (!(upConv > 0 && chroma_format == 1)) 121 | { 122 | ppptr[0] = dst->y; 123 | ppptr[1] = dst->u; 124 | ppptr[2] = dst->v; 125 | } 126 | else 127 | { 128 | ppptr[0] = dst->y; 129 | ppptr[1] = u422; 130 | ppptr[2] = v422; 131 | } 132 | bool iPPt; 133 | if (iPP == 1 || (iPP == -1 && pf == 0)) iPPt = true; 134 | else iPPt = false; 135 | postprocess(src, this->Coded_Picture_Width, this->Chroma_Width, 136 | ppptr, dst->ypitch, dst->uvpitch, this->Coded_Picture_Width, 137 | this->Coded_Picture_Height, this->QP, this->mb_width, pp_mode, moderate_h, moderate_v, 138 | chroma_format == 1 ? false : true, iPPt); 139 | if (upConv > 0 && chroma_format == 1) 140 | { 141 | if (iCC == 1 || (iCC == -1 && pf == 0)) 142 | { 143 | conv420to422I(ppptr[1], dst->u, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height); 144 | conv420to422I(ppptr[2], dst->v, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height); 145 | } 146 | else 147 | { 148 | conv420to422P(ppptr[1], dst->u, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height); 149 | conv420to422P(ppptr[2], dst->v, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height); 150 | } 151 | } 152 | } 153 | else 154 | #endif 155 | { 156 | fast_copy(src[0], Coded_Picture_Width, dst.y, dst.ypitch, Coded_Picture_Width, Coded_Picture_Height); 157 | if (upConv > 0 && chroma_format == 1) { 158 | if (iCC == 1 || (iCC == -1 && pf == 0)) { 159 | conv420to422I(src[1], dst.u, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height); 160 | conv420to422I(src[2], dst.v, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height); 161 | } 162 | else { 163 | conv420to422P(src[1], dst.u, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height); 164 | conv420to422P(src[2], dst.v, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height); 165 | } 166 | } 167 | else { 168 | fast_copy(src[1], Chroma_Width, dst.u, dst.uvpitch, Chroma_Width, Chroma_Height); 169 | fast_copy(src[2], Chroma_Width, dst.v, dst.uvpitch, Chroma_Width, Chroma_Height); 170 | } 171 | } 172 | 173 | if (has_prop || info == 1 || info == 2 || showQ) { 174 | // Re-order quant data for display order. 175 | const int* qp = (picture_coding_type == B_TYPE) ? auxQP : backwardQP; 176 | if (has_prop || info == 1 || info == 2) { 177 | set_qparams(qp, static_cast(mb_width) * mb_height, minquant, maxquant, avgquant); 178 | } 179 | if (showQ) { 180 | write_quants(dst.y, dst.ypitch, mb_width, mb_height, qp); 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/win_import_min.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WIN_IMPORT_MIN_H 3 | #define WIN_IMPORT_MIN_H 4 | 5 | /* support from recent _mingw.h */ 6 | 7 | #ifdef __cplusplus 8 | #define __forceinline inline __attribute__((__always_inline__)) 9 | #else 10 | #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) 11 | #endif /* __cplusplus */ 12 | 13 | #ifdef __GNUC__ 14 | #define _byteswap_ulong(x) __builtin_bswap32(x) 15 | #endif 16 | 17 | #define _read read 18 | #define _lseeki64 lseek 19 | #define _close close 20 | 21 | /* gnu libc offers the equivalent 'aligned_alloc' BUT requested 'size' 22 | has to be a multiple of 'alignment' - in case it isn't, I'll set 23 | a different size, rounding up the value */ 24 | #define _aligned_malloc(s,a) ( \ 25 | aligned_alloc(a,((s-1)/a+1)*a) \ 26 | ) 27 | 28 | #define _aligned_free(x) free(x) 29 | 30 | #define _atoi64(x) strtoll(x,NULL,10) 31 | #define sprintf_s(buf,...) snprintf((buf),sizeof(buf),__VA_ARGS__) 32 | #define strncpy_s(d,n,s,c) strncpy(d,s,c) 33 | #define vsprintf_s(d,n,t,v) vsprintf(d,t,v) 34 | #define sscanf_s(buf,...) sscanf((buf),__VA_ARGS__) 35 | #define fscanf_s(f,t,...) fscanf(f,t,__VA_ARGS__) 36 | 37 | #endif // WIN_IMPORT_MIN_H 38 | 39 | -------------------------------------------------------------------------------- /src/yv12pict.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Chia-chen Kuo - April 2001 3 | * 4 | * This file is part of DVD2AVI, a free MPEG-2 decoder 5 | * 6 | * DVD2AVI is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * DVD2AVI is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with GNU Make; see the file COPYING. If not, write to 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 19 | * 20 | */ 21 | 22 | // replace with one that doesn't need fixed size table - trbarry 3-22-2002 23 | 24 | #include 25 | #include 26 | 27 | #include "yv12pict.h" 28 | #ifndef _WIN32 29 | #include "win_import_min.h" 30 | #endif 31 | 32 | //#define ptr_t unsigned int 33 | 34 | 35 | 36 | // memory allocation for MPEG2Dec3. 37 | // 38 | // Changed this to handle/track both width/pitch for when 39 | // width != pitch and it simply makes things easier to have all 40 | // information in this struct. It now uses 32y/16uv byte alignment 41 | // by default, which makes internal bugs easier to catch. This can 42 | // easily be changed if needed. 43 | // 44 | // The definition of YV12PICT is in global.h 45 | // 46 | // tritical - May 16, 2005 47 | 48 | // Change to use constructor/destructor 49 | // chikuzen - Sep 6, 2016 50 | 51 | 52 | YV12PICT::YV12PICT(int height, int width, int chroma_format) : 53 | allocated(true), 54 | ywidth(width), uvwidth(width), 55 | yheight(height), uvheight(height) 56 | { 57 | if (chroma_format < 3) { 58 | uvwidth /= 2; 59 | } 60 | if (chroma_format < 2) { 61 | uvheight /= 2; 62 | } 63 | 64 | uvpitch = (uvwidth + 15) & ~15; 65 | ypitch = (ywidth + 31) & ~31; 66 | 67 | y = reinterpret_cast(_aligned_malloc(static_cast(height) * ypitch, 32)); 68 | u = reinterpret_cast(_aligned_malloc(static_cast(uvheight) * uvpitch, 16)); 69 | v = reinterpret_cast(_aligned_malloc(static_cast(uvheight) * uvpitch, 16)); 70 | if (!y || !u || !v) { 71 | _aligned_free(y); 72 | _aligned_free(u); 73 | throw std::runtime_error("failed to new YV12PICT"); 74 | } 75 | } 76 | 77 | 78 | YV12PICT::YV12PICT(PVideoFrame& frame) : 79 | allocated(false), 80 | y(frame->GetWritePtr(PLANAR_Y)), 81 | u(frame->GetWritePtr(PLANAR_U)), 82 | v(frame->GetWritePtr(PLANAR_V)), 83 | ypitch(frame->GetPitch(PLANAR_Y)), uvpitch(frame->GetPitch(PLANAR_U)), 84 | ywidth(frame->GetRowSize(PLANAR_Y)), uvwidth(frame->GetRowSize(PLANAR_U)), 85 | yheight(frame->GetHeight(PLANAR_Y)), uvheight(frame->GetHeight(PLANAR_U)) 86 | {} 87 | 88 | 89 | YV12PICT::YV12PICT(uint8_t* py, uint8_t* pu, uint8_t* pv, int yw, int cw, int h) : 90 | allocated(false), 91 | y(py), u(pu), v(pv), 92 | ypitch((yw + 31) & ~31), uvpitch((cw + 15) & ~15), 93 | ywidth(yw), uvwidth(cw), yheight(h), uvheight(h) 94 | {} 95 | 96 | 97 | YV12PICT::~YV12PICT() 98 | { 99 | if (allocated) { 100 | _aligned_free(y); 101 | _aligned_free(u); 102 | _aligned_free(v); 103 | } 104 | y = u = v = nullptr; 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/yv12pict.h: -------------------------------------------------------------------------------- 1 | #ifndef YV12PICT_H 2 | #define YV12PICT_H 3 | 4 | #include 5 | 6 | #include "avisynth.h" 7 | 8 | 9 | class YV12PICT { 10 | const bool allocated; 11 | public: 12 | uint8_t* y, * u, * v; 13 | int ypitch, uvpitch; 14 | int ywidth, uvwidth; 15 | int yheight, uvheight; 16 | int pf; 17 | 18 | YV12PICT(PVideoFrame& frame); 19 | YV12PICT(uint8_t* py, uint8_t* pu, uint8_t* pv, int yw, int cw, int h); 20 | YV12PICT(int height, int width, int chroma_format); 21 | ~YV12PICT(); 22 | }; 23 | 24 | #endif 25 | 26 | --------------------------------------------------------------------------------