├── .gitattributes
├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── COPYING.txt
├── History.txt
├── README.md
├── README_old.md
├── cmake_uninstall.cmake.in
├── msvc
    ├── D2VSource.sln
    ├── D2VSource.vcxproj
    └── D2VSource.vcxproj.filters
└── src
    ├── AVISynthAPI.cpp
    ├── AVISynthAPI.h
    ├── MPEG2Decoder.cpp
    ├── MPEG2Decoder.h
    ├── color_convert.cpp
    ├── color_convert.h
    ├── d2vsource.rc
    ├── getbit.cpp
    ├── gethdr.cpp
    ├── getpic.cpp
    ├── global.cpp
    ├── global.h
    ├── idct.h
    ├── idct_ap922_sse2.cpp
    ├── idct_llm_float_avx2.cpp
    ├── idct_llm_float_sse2.cpp
    ├── idct_ref_sse3.cpp
    ├── mc.cpp
    ├── mc.h
    ├── misc.cpp
    ├── misc.h
    ├── store.cpp
    ├── win_import_min.h
    ├── yv12pict.cpp
    └── yv12pict.h


/.gitattributes:
--------------------------------------------------------------------------------
 1 | #sources
 2 | *.c text
 3 | *.cc text
 4 | *.cxx text
 5 | *.cpp text
 6 | *.c++ text
 7 | *.hpp text
 8 | *.h text
 9 | *.h++ text
10 | *.hh text
11 | 
12 | # Compiled Object files
13 | *.slo binary
14 | *.lo binary
15 | *.o binary
16 | *.obj binary
17 | 
18 | # Precompiled Headers
19 | *.gch binary
20 | *.pch binary
21 | 
22 | # Compiled Dynamic libraries
23 | *.so binary
24 | *.dylib binary
25 | *.dll binary
26 | 
27 | # Compiled Static libraries
28 | *.lai binary
29 | *.la binary
30 | *.a binary
31 | *.lib binary
32 | 
33 | # Executables
34 | *.exe binary
35 | *.out binary
36 | *.app binary
37 | ###############################################################################
38 | # Set default behavior to automatically normalize line endings.
39 | ###############################################################################
40 | * text=auto
41 | 
42 | ###############################################################################
43 | # Set the merge driver for project and solution files
44 | #
45 | # Merging from the command prompt will add diff markers to the files if there
46 | # are conflicts (Merging from VS is not affected by the settings below, in VS
47 | # the diff markers are never inserted). Diff markers may cause the following 
48 | # file extensions to fail to load in VS. An alternative would be to treat
49 | # these files as binary and thus will always conflict and require user
50 | # intervention with every merge. To do so, just comment the entries below and
51 | # uncomment the group further below
52 | ###############################################################################
53 | 
54 | *.sln        text eol=crlf
55 | *.csproj     text eol=crlf
56 | *.vbproj     text eol=crlf
57 | *.vcxproj    text eol=crlf
58 | *.vcproj     text eol=crlf
59 | *.dbproj     text eol=crlf
60 | *.fsproj     text eol=crlf
61 | *.lsproj     text eol=crlf
62 | *.wixproj    text eol=crlf
63 | *.modelproj  text eol=crlf
64 | *.sqlproj    text eol=crlf
65 | *.wmaproj    text eol=crlf
66 | 
67 | *.xproj      text eol=crlf
68 | *.props      text eol=crlf
69 | *.filters    text eol=crlf
70 | *.vcxitems   text eol=crlf
71 | 
72 | 
73 | #*.sln       merge=binary
74 | #*.csproj    merge=binary
75 | #*.vbproj    merge=binary
76 | #*.vcxproj   merge=binary
77 | #*.vcproj    merge=binary
78 | #*.dbproj    merge=binary
79 | #*.fsproj    merge=binary
80 | #*.lsproj    merge=binary
81 | #*.wixproj   merge=binary
82 | #*.modelproj merge=binary
83 | #*.sqlproj   merge=binary
84 | #*.wwaproj   merge=binary
85 | 
86 | #*.xproj     merge=binary
87 | #*.props     merge=binary
88 | #*.filters   merge=binary
89 | #*.vcxitems  merge=binary
90 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.exe
 3 | *.dll
 4 | *.lib
 5 | *.log
 6 | *.diff
 7 | *.patch
 8 | *.old
 9 | *.bak
10 | *.orig
11 | *.rej
12 | *.aps
13 | *.ncb
14 | *.opensdf
15 | *.sdf
16 | *.VC.db
17 | *.VC.opendb
18 | *.suo
19 | *.user
20 | *.obj
21 | *.res
22 | *.exp
23 | *.ilk
24 | *.pdb
25 | build/*/*/ipch/*
26 | build/*/*/Debug/*
27 | build/*/*/Release/*
28 | bin/msvc*/*
29 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ##### 1.3.0:
 2 |     Changed `_SARDen`, `_SARNum` to display MPEG-4 PAR. (videoh)
 3 |     Changed `_AspectRatio` type to array int.
 4 | 
 5 | ##### 1.2.6:
 6 |     Restored previous behavior of frame property `_FieldBased`.
 7 |     Fixed frame properties `_DurationNum` and `_DurationDen`.
 8 |     Added frame properties `_SARDen`, `_SARNum`, `_FieldOrder`, `_FieldOperation`, `_TFF`, `_RFF`, `_Film`, `_ProgressiveFrame`, `_ChromaLocation`, `_AbsoluteTime`. (videoh)
 9 |     Added parameters `nocrop` and `rff`.
10 | 
11 | ##### 1.2.5:
12 |     Fixed frame property `_FieldBased`.
13 | 
14 | ##### 1.2.4:
15 |     Fixed regression for relative file paths.
16 | 
17 | ##### 1.2.3:
18 |     Fixed FFSAR_NUM, FFSAR_DEN, FFSAR.
19 | 
20 | ##### 1.2.2:
21 |     Fixed values of frame properties _Quants* when info=0.
22 | 
23 | ##### 1.2.1:
24 |     Added support for path with forward slash (Windows).
25 | 
26 | ##### 1.2.0:
27 |     Added variables (ffms2 like) - FFSAR_NUM, FFSAR_DEN, FFSAR.
28 | 
29 | ##### 1.1.0:
30 |     Set frame properties - _DurationNum, _DurationDen, _FieldBased, _AspectRatio, _GOPNumber, _GOPPosition, _GOPClosed, _EncodedFrameTop, _EncodedFrameBottom, _PictType, _Matrix, _QuantsAverage, _QuantsAverage, _QuantsMax.
31 | 
32 | ##### 1.0.0:
33 |     Renamed the plugin and function to D2VSource.
34 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16)
 2 | 
 3 | project(libd2vsource LANGUAGES CXX)
 4 | 
 5 | find_package (Git)
 6 | if (GIT_FOUND)
 7 |     execute_process (COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0
 8 |         OUTPUT_VARIABLE ver
 9 |         OUTPUT_STRIP_TRAILING_WHITESPACE
10 |     )
11 | else ()
12 |     message (STATUS "GIT not found")
13 | endif ()
14 | 
15 | add_library(d2vsource SHARED
16 |     src/AVISynthAPI.cpp
17 |     src/color_convert.cpp
18 |     src/getbit.cpp
19 |     src/gethdr.cpp
20 |     src/getpic.cpp
21 |     src/global.cpp
22 |     src/idct_ap922_sse2.cpp
23 |     src/idct_llm_float_avx2.cpp
24 |     src/idct_llm_float_sse2.cpp
25 |     src/idct_ref_sse3.cpp
26 |     src/mc.cpp
27 |     src/misc.cpp
28 |     src/MPEG2Decoder.cpp
29 |     src/store.cpp
30 |     src/yv12pict.cpp
31 | )
32 | 
33 | if (NOT CMAKE_BUILD_TYPE)
34 |     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
35 | endif()
36 | 
37 | message(STATUS "Build type - ${CMAKE_BUILD_TYPE}")
38 | 
39 | set_source_files_properties(src/idct_ap922_sse2.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse2")
40 | set_source_files_properties(src/idct_llm_float_sse2.cpp PROPERTIES COMPILE_OPTIONS "-mfpmath=sse;-msse2")
41 | set_source_files_properties(src/idct_ref_sse3.cpp PROPERTIES COMPILE_OPTIONS "-mssse3")
42 | set_source_files_properties(src/idct_llm_float_avx2.cpp PROPERTIES COMPILE_OPTIONS "-mavx2;-mfma")
43 | 
44 | target_include_directories(d2vsource PRIVATE
45 |     ${CMAKE_CURRENT_SOURCE_DIR}/src
46 |     /usr/local/include/avisynth
47 | )
48 | 
49 | set_target_properties(d2vsource PROPERTIES OUTPUT_NAME "d2vsource.${ver}")
50 | 
51 | target_compile_features(d2vsource PRIVATE cxx_std_17)
52 | 
53 | include(GNUInstallDirs)
54 | 
55 | INSTALL(TARGETS d2vsource
56 |         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/avisynth")
57 |         
58 | # uninstall target
59 | if(NOT TARGET uninstall)
60 |   configure_file(
61 |     "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
62 |     "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
63 |     IMMEDIATE @ONLY)
64 | 
65 |   add_custom_target(uninstall
66 |     COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
67 | endif()
68 | 


--------------------------------------------------------------------------------
/COPYING.txt:
--------------------------------------------------------------------------------
  1 |             GNU GENERAL PUBLIC LICENSE
  2 |                Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |      59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                 Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |             GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                 NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |              END OF TERMS AND CONDITIONS
281 | 
282 |         How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License
307 |     along with this program; if not, write to the Free Software
308 |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
309 | 
310 | 
311 | Also add information on how to contact you by electronic and paper mail.
312 | 
313 | If the program is interactive, make it output a short notice like this
314 | when it starts in an interactive mode:
315 | 
316 |     Gnomovision version 69, Copyright (C) year  name of author
317 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
318 |     This is free software, and you are welcome to redistribute it
319 |     under certain conditions; type `show c' for details.
320 | 
321 | The hypothetical commands `show w' and `show c' should show the appropriate
322 | parts of the General Public License.  Of course, the commands you use may
323 | be called something other than `show w' and `show c'; they could even be
324 | mouse-clicks or menu items--whatever suits your program.
325 | 
326 | You should also get your employer (if you work as a programmer) or your
327 | school, if any, to sign a "copyright disclaimer" for the program, if
328 | necessary.  Here is a sample; alter the names:
329 | 
330 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
331 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
332 | 
333 |   <signature of Ty Coon>, 1 April 1989
334 |   Ty Coon, President of Vice
335 | 
336 | This General Public License does not permit incorporating your program into
337 | proprietary programs.  If your program is a subroutine library, you may
338 | consider it more useful to permit linking proprietary applications with the
339 | library.  If this is what you want to do, use the GNU Library General
340 | Public License instead of this License.
341 | 


--------------------------------------------------------------------------------
/History.txt:
--------------------------------------------------------------------------------
  1 | 2005/01/11
  2 | ----------
  3 | 
  4 | Fixed a little bug with the info option, and added faster mmx conv
  5 | routines for 4:2:2 planar -> 4:2:2 packed and vice versa.
  6 | 
  7 | by "tritical"
  8 | 
  9 | 2005/01/05
 10 | ----------
 11 | 
 12 | This is a 1.0.13b4 version of dgdecode but with the following changes:
 13 | 
 14 | 1.) added info option to mpeg2source() to display info on frames
 15 |       - example usage:  mpeg2source(info=true)
 16 | 
 17 | 2.) iPP is a bool again.  If it is not explicitly set to true or false then it
 18 |        defaults to auto which switches between field/frame based pp based on the
 19 |        progressive_frame flag
 20 | 
 21 | 3.) 4:2:2 input support
 22 |       - can now correctly decode 4:2:2 input sources and correctly output it
 23 |            to AviSynth as YUY2
 24 | 
 25 |       *NOTE:  mpeg2source() now throws an error on 4:4:4 input, it could handle it
 26 |                but AviSynth 2.5 does not have internal YUV 4:4:4 support
 27 | 
 28 |     3a.) 4:2:2 PLANAR post-processing support
 29 | 
 30 | 4.) Upsampling to 4:2:2 from 4:2:0 based on progressive_frame flag.  Uses new
 31 |        upConv parameter of mpeg2source().
 32 | 
 33 |       - example   mpeg2source(upConv=true)
 34 | 
 35 |       *NOTES:  it will only work if input is 4:2:0, if input is anything else
 36 |                 then the upConv parameter is ignored
 37 | 
 38 | 5.) BlindPP now supports YUY2 colorspace
 39 | 
 40 | 6.) other stuff:
 41 | 
 42 |       - fixed blindPP syntax bug  (x and X)
 43 |       - fixed a small bug with showQ option
 44 |       - fixed small memory leak with FrameList/GOPList not being free'd
 45 |       - fixed a bug in the vertical chroma deblocking postprocessing
 46 |               QP pointer being passed was incorrect for 4:2:0
 47 |       - faster mmx 4:2:2 to packed YUY2 and YUY2 to planar 4:2:2 conversions
 48 |       - info output would not work correctly if temporal_reference was not zero
 49 |         based at the beginning of gops, it does now
 50 | 
 51 | by "tritical"
 52 | 
 53 | changes.doc (Sept 11, 2003)
 54 | --------------------------
 55 | 
 56 | Modified the decoding and random access code so that it never has
 57 | to skip B frames. This was the cause of the frame dropping problem.
 58 | This version must be used with an appropriately modified DVD2AVI
 59 | version, for example, DGIndex. If these two are used together,
 60 | frames will never be dropped. Refer to the DVD2AVI forum at doom9.org
 61 | for a discussion of this problem. Search for the thread "does dvd2avi
 62 | chop off frames?". Also fixes several bugs in MPEG2 decoding.
 63 | 
 64 | Note: This version is derived from Nic's MPEG2DEC3 1.10.
 65 | 
 66 | by Donald A. Graft
 67 | 
 68 | changes.doc (08 dec 2002)
 69 | -------------------------
 70 | 
 71 | here's the code of MPEG2Dec3
 72 | it's based on MPEG2Dec2, some parts of the code are intact, others heavily modified.
 73 | you'll need both nasm & masm to compile it.
 74 | it should compile flawlessly under M$ VC++ 6 SP4 (the compiler i use)
 75 | you can use the profiling define (commented out in global.h) to test optimisations.
 76 | BTW, if you don't have nasm, i added mcsse.obj to the sources.
 77 | 
 78 | MarcFD
 79 | 
 80 | changes.doc (05/10/2002)
 81 | -----------------
 82 | 
 83 | Removed Dividee filters from the sources.
 84 | 
 85 | Vlad59 (babas.lucas@laposte.net)
 86 | 
 87 | 
 88 | changes.doc (03/30/2002)
 89 | -----------------
 90 | 
 91 | In addition to pcdvdguy's changes below this version also is optimized for P4/SSE2 code. This will be used whenever the machine supports it and "iDCT_Algorithm=5" is specified int the .d2v file. That can be put there with an editor or by using the newer version of DVD2AVI that also supports this. See the save-oe project on Sourceforge.
 92 | 
 93 | This version also has other minor optimizations and something of a fix for crashing on garbage data such as ATSC HDTV captures.
 94 | 
 95 | If cropping has been specified in DVD2AVI then it will now work, without messing up the color. If resizing was specified it will still be ignored, so you'll have to do that in your .Avisynth script or elsewhere.
 96 | 
 97 | Tom Barry <trbarry@trbarry.com>
 98 | 
 99 | 
100 | 
101 | changes.doc (03/29/2002, late at night...)
102 | -----------------
103 | This document explains the changes made to the DVD2AVI/MPEG2DEC.DLL source-code, for the purpose of decoding MPEG-2 transport streams.
104 | 
105 | Special thanks to Ben Cooley, for writing HDTVtoMPEG2, a great source of inspiration!
106 | 
107 | MPEG-2 transport stream demuxing
108 | 
109 | 	This feature allows MPEG2DEC.DLL to parse MPEG2 transport streams (*.trp, *.ts), and decode MPEG-1/2 video elementary streams.  It has been successfully tested with several ATSC/DTV broadcasters in the Southern California area.  The code to support this feature is still considered 'preliminary', and suffers from the following limitations:
110 | 
111 | 1) When opened in DVD2AVI, the input-filename extension *.trp and *.ts are unconditionally treated as transport-streams.  Otherwise, other input-filenames are briefly checked (first 2048 bytes) for an MPEG-2 transport sync-byte sequence.  If this sequence is found, the entire stream is treated as an MPEG-2 transport stream.
112 | 
113 | 2) There is currently *NO* GUI to select the video-ID and audio-ID.  These variables are stored in the DVD2AVI.ini file, and the user (that's YOU) must manually edit the ini file to set the video-ID and audio-ID.
114 | 
115 | 2) If you want to use avisynth with DVD2AVI, you will need the updated MPEG2DEC.DLL.  The updated DLL has added support for transport-stream demuxing.  If you want to use DVD2AVI as a VFAPI frameserver, you will need the updated DVD2AVI.VFP (for the same reason.)
116 | 
117 | 3) Do NOT mix MPEG-2 program streams (*.vob, *.mpg) and MPEG-2 transport streams (*.trp, *.ts) in the same d2v project-file!
118 | 
119 | 4) DVD2AVI is sensitive to bitstream errors.  Your broadcast DTV recordings may contain errors, which will manifest as distortion, image breakup, and other visual artifacts.  At worst, mpeg2dec.dll can crash.  Unfortunately, there is no way to guard against this except to routinely check your DTV-receiver's quality-monitor and adjust accordingly.
120 | 
121 | mpeg2dec.dll relies on the variable "SystemStream_Flag==2" to identify transport-streams.  (In the d2v-project file, the following line indicates the video_stream_id and audio_stream_id, but only the video_stream_id is used.)
122 | 
123 | 5) The ATSC DTV standard allows many different formats. To work with the highest-resolution mode (1920x1080 30i), a fast CPU and lots of memory is recommended!
124 | 
125 | Source code-changes
126 | ----------------------------
127 | SystemStream_Flag == 2 ; // MPEG-2 transport stream (calls to Next_Packet() are redirected to Next_Transport_Packet() )
128 | 
129 | global.h - add declaration for function "Next_Transport_Packet()"
130 |   int MPEG2_Transport_VideoPID;  // VideoID for MPEG-2 transport streams
131 |   int MPEG2_Transport_AudioPID;// AudioID for MPEG-2 transport streams
132 | 
133 | getbit.c - add function Next_Transport_Packet(), this does the bulk of the work!
134 | 
135 | gui.cpp - initializes the variables (MPEG2_Transport_VideoPID, MPEG2_Transport_AudioPID), first by reading from dvd2avi.ini, and then by re-reading those values from the D2V project file (if one is opened, and the SystemStream_Flag==2.)  Also modify the 'open' dialog-box, to add an entry for file-filter ("*.trp, *.ts")
136 | 
137 | mpeg2dec.c - scans the first 2048 bytes of the MPEG-bitstream (to check if it's an MPEG-2 transport-stream.)
138 | 
139 | ---
140 | These source-files are changes to the mpeg2dec_dll.zip file from www.davetech.org/software2.htm
141 | 
142 | liaor@iname.com
143 | http://members.tripod.com/~liaor
144 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## D2VSource
  2 | 
  3 | This is a project (previously named as MPEG2DecPlus) to modify DGDecode.dll for AviSynth+.
  4 | 
  5 | ### Requirements:
  6 | 
  7 | - AviSynth 2.60 / AviSynth+ 3.4 or later
  8 | 
  9 | - Microsoft VisualC++ Redistributable Package 2022 (can be downloaded from [here](https://github.com/abbodi1406/vcredist/releases))
 10 | 
 11 | ### Usage:
 12 | 
 13 |  ```
 14 |  D2VSource(string "d2v", int "idct", bool "showQ", int "info", int "upConv", bool "i420", bool "iCC", bool "nocrop", int "rff")
 15 |  ```
 16 | 
 17 | ### Parameters:
 18 | 
 19 | - d2v\
 20 |     The path of the dv2 file.
 21 | 
 22 | - idct\
 23 |     The iDCT algorithm to use.\
 24 |     0: Follow the d2v specification.\
 25 |     1,2,3,6,7: AP922 integer (same as SSE2/MMX).\
 26 |     4: SSE2/AVX2 LLM (single precision floating point, SSE2/AVX2 determination is automatic).\
 27 |     5: IEEE 1180 reference (double precision floating point).\
 28 |     Default: -1.
 29 | 
 30 | - showQ\
 31 |     It displays macroblock quantizers..\
 32 |     Default: False.
 33 | 
 34 | - info\
 35 |     It prints debug information.\
 36 |     0: Do not display.\
 37 |     1: Overlay on video frame.\
 38 |     2: Output with OutputDebugString(). (The contents are confirmed by DebugView.exe).\
 39 |     3: Embed hints in 64 bytes of the frame upper left corner.\
 40 |     Default: 0.
 41 | 
 42 | - upConv\
 43 |     The output format.\
 44 |     0: No conversion. YUV420 output is YV12, YUV422 output is YV16.\
 45 |     1: Output YV16.\
 46 |     2: Output YV24.\
 47 |     Default: 0.
 48 | 
 49 | - i420\
 50 |     It determinates what is the output of YUV420.\
 51 |     True: The output is i410.\
 52 |     False: The output is YV12.\
 53 |     Default: False.
 54 | 
 55 | - iCC\
 56 |     It determinates how YUV420 is upscaled when upConv=true.\
 57 |     True: Force field-based upsampling.\
 58 |     False: Forse progressive upsampling.\
 59 |     Default: Auto determination based on the frame flag.
 60 | 
 61 | - nocrop\
 62 |     Use direct-rendered buffer, which may need cropping.\
 63 |     It could provide a speedup when you know you need to crop your image anyway, by avoiding extra memcpy calls.\
 64 |     Default: False.
 65 | 
 66 | - rff\
 67 |     Changes Field_Operation without the need of editing d2v or rescanning with different Field Operation.\
 68 |     0: Honor Pulldowns Flags.\
 69 |     1: Forced Film.\
 70 |     2: Ignored Pulldowns Flags.\
 71 |     Default: -1 - read the value from d2v.
 72 | 
 73 | ### Exported variables:
 74 | 
 75 | FFSAR_NUM, FFSAR_DEN, FFSAR (these indicate Generic PAR).
 76 | 
 77 | ### Frame properties
 78 | 
 79 | _AbsoluteTime [float]\
 80 | The frame’s absolute timestamp in seconds.
 81 | 
 82 | _AspectRatio [int]\
 83 | An array giving the display aspect ratio.
 84 | 
 85 | _ChromaLocation [int]\
 86 | Chroma sample position in YUV formats:
 87 | 0=left, 1=center, 2=topleft, 3=top, 4=bottomleft, 5=bottom.
 88 | 
 89 | _DurationNum [int], _DurationDen [int]\
 90 | The frame’s duration in seconds as a rational number.
 91 | 
 92 | _EncodedFrameTop [int], _EncodedFrameBottom [int]\
 93 | Frame number (before pulldown) used to generate this frame's
 94 | top/bottom field.
 95 | 
 96 | _FieldBased [int]\
 97 | Describes the composition of the frame:\
 98 | 0=frame based (progressive), 1=bottom field first, 2=top field first.\
 99 | Note that the GOP progressive flag is used to determine whether the frame is progressive.
100 | 
101 | _FieldOperation [int]\
102 | Describes the field operation option in effect:\
103 | 0=honor pulldown, 1=force film, 2=ignore pulldown.
104 | 
105 | _FieldOrder [int]\
106 | Display field order of the frame:\
107 | 0=bottom field first, 1=top field first.
108 | 
109 | _Film [int]\
110 | Set if the frame is part of a 3:2 soft pulldown section.\
111 | Note that this uses the RFF history of several preceding\
112 | frames, and so is valid only when doing linear access.
113 | 
114 | _GOPClosed [int]\
115 | Set if the current GOP is closed.
116 | 
117 | _GOPNumber [int]\
118 | The 0-based GOP number that contains the frame. Note that\
119 | if this is set as the value x, then propShow displays it as\
120 | [x, y] where y is the 0-based frame number of the first frame\
121 | in the GOP.
122 | 
123 | _GOPPosition [int]\
124 | The GOP position field from the D2V file for the GOP containing\
125 | the frame.
126 | 
127 | _Matrix [int]\
128 | The matrix number field from the D2V file for the GOP containing\
129 | the frame.
130 | 
131 | _PictType [data]\
132 | A single character describing the frame type. It uses the common\
133 | IPB characters but others may also be used for formats with\
134 | additional frame types.
135 | 
136 | _ProgressiveFrame [int]\
137 | Set if the progress_frame flag is set for this frame.
138 | 
139 | _QuantsAverage [int]\
140 | The average quantizer value for the frame.
141 | 
142 | _QuantsMax [int]\
143 | The maximum quantizer value for the frame.
144 | 
145 | _QuantsMin [int]\
146 | The minimum quantizer value for the frame.
147 | 
148 | _RFF [int]\
149 | If _FieldOperation is 2 (ignore pulldown) then _RFF describes whether the stream specifies that a repeat field operation is to be performed on this frame. If _FieldOperation is 0 (honor pulldown) or 1 (force film) then _RFF describes whether the frame was composed with field repetition.
150 | 
151 | _SARDen [int]
152 | The denominator of the "pixel size" (MPEG-4 PAR), also called the\
153 | Sample Aspect Ratio (SAR).
154 | 
155 | _SARNum [int]
156 | The numerator of the "pixel size" (MPEG-4 PAR), also called the\
157 | Sample Aspect Ratio (SAR).
158 | 
159 | _TFF [int]\
160 | If _FieldOperation is 2 (ignore pulldown) and _RFF is set, then _TFF\
161 | describes whether the stream specifies that the top field is to be repeated, otherwise the bottom field is to be repeated. If _FieldOperation is 0 (honor pulldown) or 1 (force film) then _TFF is inapplicable and is set to -1.
162 | 
163 | ### Building:
164 | 
165 | - Windows\
166 |     Use solution files.
167 | 
168 | - Linux
169 |     ```
170 |     Requirements:
171 |         - Git
172 |         - C++17 compiler
173 |         - CMake >= 3.16
174 |     ```
175 |     ```
176 |     git clone https://github.com/Asd-g/MPEG2DecPlus && \
177 |     cd MPEG2DecPlus && \
178 |     mkdir build && \
179 |     cd build && \
180 | 
181 |     cmake ..
182 |     make -j$(nproc)
183 |     sudo make install
184 |     ```
185 | 


--------------------------------------------------------------------------------
/README_old.md:
--------------------------------------------------------------------------------
 1 | # MPEG2DecPlus
 2 | これはDGDecode.dllをAvisynth+用に改造するプロジェクトです。
 3 | 
 4 | ###やりたいこと:
 5 |     - 改築を重ねた温泉旅館のようなコードをきれいにする。
 6 |     - VFAPI用コード、YUY2用コード等、現在では必要ないコードの排除。
 7 |     - アセンブラの排除による64bitへの対応、及びSSE2/AVX2でのintrinsicによる最適化。等
 8 | 
 9 | ###必要なもの:
10 |     - Windows Vista SP2 以降の Windows OS
11 |     - SSE3が使えるCPU(Intel Pentium4(prescott) または AMD Athlon64x2 以降)
12 |     - Avisynth+ r2172以降 またはAvisynth 2.60以降
13 |     - Microsoft VisualC++ Redistributable Package 2019.
14 | 
15 |  ###使い方:
16 |  ```
17 |  MPEG2Source(string "d2v", int "cpu", int "idct", bool "iPP", int "moderate_h", int "moderate_v",
18 |              bool "showQ", bool "fastMC", string "cpu2", int "info", int "upConv", bool "i420", bool "iCC")
19 |  ```
20 |     d2v: dv2ファイルのパス
21 | 
22 |     cpu: 現在使用不可。設定しても何も起こらない。iPP, moderate_h, moderate_v, fastMC, cpu2も同様。
23 | 
24 |     idct: 使用するiDCTアルゴリズム。
25 |         0: d2vの指定に従う。
26 |         1,2,3,6,7: AP922整数(SSE2MMXと同じもの)。
27 |         4: SSE2/AVX2 LLM(単精度浮動小数点、SSE2/AVX2の判定は自動)。
28 |         5: IEEE 1180 reference(倍精度浮動小数点)。
29 | 
30 |     showQ: マクロブロックの量子化器を表示する。
31 | 
32 |     info: デバッグ情報を出力する。
33 |         0: 表示しない。(デフォルト)
34 |         1: 動画フレームにオーバーレイで表示。
35 |         2: OutputDebugString()で出力。(内容はDebugView.exeで確認)
36 |         3: hintsをフレーム左上隅の64バイトに埋め込む。
37 | 
38 |     upConv: フレームを出力するフォーマット。
39 |         0: YUV420なソースはYV12で出力、YUV422なソースはYV16で出力。
40 |         1: YV16で出力。
41 |         2: YV24で出力。
42 | 
43 |     i420: trueであればYUV420をi420として出力する。現在ではどちらでもほぼ変わりはない。
44 | 
45 |     iCC: upConvにおけるYUV420の取扱いの設定。
46 |         未設定: フレームフラグに従ってinterlaced/progressiveを切り替える。
47 |         true: 全フレームをinterlacedとして処理する。
48 |         false: 全フレームをprogressiveとして処理する。
49 | 
50 | 
51 | ```
52 | LumaYUV(clip c, int "lumoff", int "lumgain")
53 | ```
54 | 入力クリップの輝度をlumoffとlumgainの値によって変更する。出力Y = (入力y * lumgain) + lumoff
55 | 
56 |     clip: Y8, YV12, YV16, YV411, YV24をサポート。
57 | 
58 |     lumoff: -255 ～ 255 (デフォルト0)
59 | 
60 |     lumgain: 0.0 ～ 2.0 (デフォルト1.0)
61 | 
62 | ###ソースコード
63 | 	https://github.com/chikuzen/MPEG2DecPlus/
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
 2 |   message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt")
 3 | endif()
 4 | 
 5 | file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files)
 6 | string(REGEX REPLACE "\n" ";" files "${files}")
 7 | foreach(file ${files})
 8 |   message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
 9 |   if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
10 |     exec_program(
11 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
12 |       OUTPUT_VARIABLE rm_out
13 |       RETURN_VALUE rm_retval
14 |       )
15 |     if(NOT "${rm_retval}" STREQUAL 0)
16 |       message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
17 |     endif()
18 |   else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
19 |     message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
20 |   endif()
21 | endforeach()
22 | 


--------------------------------------------------------------------------------
/msvc/D2VSource.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 16
 4 | VisualStudioVersion = 16.0.30204.135
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D2VSource", "D2VSource.vcxproj", "{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|x64 = Debug|x64
11 | 		Debug|x86 = Debug|x86
12 | 		Release|x64 = Release|x64
13 | 		Release|x86 = Release|x86
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x64.ActiveCfg = Debug|x64
17 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x64.Build.0 = Debug|x64
18 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x86.ActiveCfg = Debug|Win32
19 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Debug|x86.Build.0 = Debug|Win32
20 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x64.ActiveCfg = Release|x64
21 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x64.Build.0 = Release|x64
22 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x86.ActiveCfg = Release|Win32
23 | 		{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}.Release|x86.Build.0 = Release|Win32
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | 	GlobalSection(ExtensibilityGlobals) = postSolution
29 | 		SolutionGuid = {80DFC486-AC15-406C-BBDA-6D722A495010}
30 | 	EndGlobalSection
31 | EndGlobal
32 | 


--------------------------------------------------------------------------------
/msvc/D2VSource.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <VCProjectVersion>16.0</VCProjectVersion>
 23 |     <ProjectGuid>{BE45DDEC-9C54-4E57-BE8C-3CDE734BEAA6}</ProjectGuid>
 24 |     <Keyword>Win32Proj</Keyword>
 25 |     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
 26 |   </PropertyGroup>
 27 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 28 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 29 |     <ConfigurationType>Application</ConfigurationType>
 30 |     <UseDebugLibraries>true</UseDebugLibraries>
 31 |     <PlatformToolset>v142</PlatformToolset>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 34 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 35 |     <UseDebugLibraries>false</UseDebugLibraries>
 36 |     <PlatformToolset>v142</PlatformToolset>
 37 |   </PropertyGroup>
 38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 39 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 40 |     <UseDebugLibraries>true</UseDebugLibraries>
 41 |     <PlatformToolset>v142</PlatformToolset>
 42 |   </PropertyGroup>
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 44 |     <ConfigurationType>DynamicLibrary</ConfigurationType>
 45 |     <UseDebugLibraries>false</UseDebugLibraries>
 46 |     <PlatformToolset>v142</PlatformToolset>
 47 |   </PropertyGroup>
 48 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 49 |   <ImportGroup Label="ExtensionSettings">
 50 |   </ImportGroup>
 51 |   <ImportGroup Label="Shared">
 52 |   </ImportGroup>
 53 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 54 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 57 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 58 |   </ImportGroup>
 59 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <PropertyGroup Label="UserMacros" />
 66 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 67 |     <LinkIncremental>true</LinkIncremental>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 70 |     <LinkIncremental>false</LinkIncremental>
 71 |     <IncludePath>..\..\AviSynthPlus\avs_core\include;$(IncludePath)</IncludePath>
 72 |   </PropertyGroup>
 73 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 74 |     <IncludePath>..\..\AviSynthPlus\avs_core\include;$(IncludePath)</IncludePath>
 75 |     <LinkIncremental>false</LinkIncremental>
 76 |   </PropertyGroup>
 77 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 78 |     <IncludePath>..\..\AviSynthPlus\avs_core\include;$(IncludePath)</IncludePath>
 79 |   </PropertyGroup>
 80 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 81 |     <ClCompile>
 82 |       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 83 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
 84 |       <WarningLevel>Level3</WarningLevel>
 85 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
 86 |       <Optimization>Disabled</Optimization>
 87 |     </ClCompile>
 88 |     <Link>
 89 |       <TargetMachine>MachineX86</TargetMachine>
 90 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 91 |       <SubSystem>Windows</SubSystem>
 92 |     </Link>
 93 |   </ItemDefinitionGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 95 |     <ClCompile>
 96 |       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 97 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
 98 |       <WarningLevel>Level3</WarningLevel>
 99 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
100 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
101 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
102 |       <IntrinsicFunctions>true</IntrinsicFunctions>
103 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
104 |       <BufferSecurityCheck>true</BufferSecurityCheck>
105 |       <FunctionLevelLinking>true</FunctionLevelLinking>
106 |       <FloatingPointModel>Precise</FloatingPointModel>
107 |       <LanguageStandard>stdcpp17</LanguageStandard>
108 |       <WholeProgramOptimization>true</WholeProgramOptimization>
109 |     </ClCompile>
110 |     <Link>
111 |       <TargetMachine>MachineX86</TargetMachine>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <SubSystem>Windows</SubSystem>
114 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
115 |       <OptimizeReferences>true</OptimizeReferences>
116 |       <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
117 |     </Link>
118 |   </ItemDefinitionGroup>
119 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
120 |     <ClCompile>
121 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
122 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
123 |       <IntrinsicFunctions>true</IntrinsicFunctions>
124 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
125 |       <BufferSecurityCheck>true</BufferSecurityCheck>
126 |       <FunctionLevelLinking>true</FunctionLevelLinking>
127 |       <LanguageStandard>stdcpp17</LanguageStandard>
128 |       <WholeProgramOptimization>true</WholeProgramOptimization>
129 |       <FloatingPointModel>Precise</FloatingPointModel>
130 |     </ClCompile>
131 |     <Link>
132 |       <OptimizeReferences>true</OptimizeReferences>
133 |     </Link>
134 |     <Link>
135 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
136 |       <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
137 |     </Link>
138 |   </ItemDefinitionGroup>
139 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
140 |     <ClCompile>
141 |       <LanguageStandard>stdcpp17</LanguageStandard>
142 |     </ClCompile>
143 |     <Link />
144 |   </ItemDefinitionGroup>
145 |   <ItemGroup>
146 |     <ClCompile Include="..\src\AVISynthAPI.cpp" />
147 |     <ClCompile Include="..\src\color_convert.cpp" />
148 |     <ClCompile Include="..\src\getbit.cpp" />
149 |     <ClCompile Include="..\src\gethdr.cpp" />
150 |     <ClCompile Include="..\src\getpic.cpp" />
151 |     <ClCompile Include="..\src\global.cpp">
152 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
153 |     </ClCompile>
154 |     <ClCompile Include="..\src\idct_ap922_sse2.cpp" />
155 |     <ClCompile Include="..\src\idct_llm_float_avx2.cpp">
156 |       <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
157 |       <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
158 |       <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
159 |     </ClCompile>
160 |     <ClCompile Include="..\src\idct_llm_float_sse2.cpp" />
161 |     <ClCompile Include="..\src\idct_ref_sse3.cpp" />
162 |     <ClCompile Include="..\src\mc.cpp" />
163 |     <ClCompile Include="..\src\misc.cpp" />
164 |     <ClCompile Include="..\src\MPEG2Decoder.cpp" />
165 |     <ClCompile Include="..\src\store.cpp" />
166 |     <ClCompile Include="..\src\yv12pict.cpp">
167 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
168 |     </ClCompile>
169 |   </ItemGroup>
170 |   <ItemGroup>
171 |     <ClInclude Include="..\src\AVISynthAPI.h" />
172 |     <ClInclude Include="..\src\color_convert.h" />
173 |     <ClInclude Include="..\src\global.h">
174 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
175 |     </ClInclude>
176 |     <ClInclude Include="..\src\idct.h" />
177 |     <ClInclude Include="..\src\mc.h" />
178 |     <ClInclude Include="..\src\misc.h" />
179 |     <ClInclude Include="..\src\MPEG2Decoder.h" />
180 |     <ClInclude Include="..\src\win_import_min.h" />
181 |     <ClInclude Include="..\src\yv12pict.h">
182 |       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
183 |     </ClInclude>
184 |   </ItemGroup>
185 |   <ItemGroup>
186 |     <ResourceCompile Include="..\src\d2vsource.rc" />
187 |   </ItemGroup>
188 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
189 |   <ImportGroup Label="ExtensionTargets">
190 |   </ImportGroup>
191 | </Project>


--------------------------------------------------------------------------------
/msvc/D2VSource.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="..\src\AVISynthAPI.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="..\src\color_convert.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |     <ClCompile Include="..\src\getbit.cpp">
25 |       <Filter>Source Files</Filter>
26 |     </ClCompile>
27 |     <ClCompile Include="..\src\gethdr.cpp">
28 |       <Filter>Source Files</Filter>
29 |     </ClCompile>
30 |     <ClCompile Include="..\src\getpic.cpp">
31 |       <Filter>Source Files</Filter>
32 |     </ClCompile>
33 |     <ClCompile Include="..\src\global.cpp">
34 |       <Filter>Source Files</Filter>
35 |     </ClCompile>
36 |     <ClCompile Include="..\src\idct_ap922_sse2.cpp">
37 |       <Filter>Source Files</Filter>
38 |     </ClCompile>
39 |     <ClCompile Include="..\src\idct_llm_float_avx2.cpp">
40 |       <Filter>Source Files</Filter>
41 |     </ClCompile>
42 |     <ClCompile Include="..\src\idct_llm_float_sse2.cpp">
43 |       <Filter>Source Files</Filter>
44 |     </ClCompile>
45 |     <ClCompile Include="..\src\idct_ref_sse3.cpp">
46 |       <Filter>Source Files</Filter>
47 |     </ClCompile>
48 |     <ClCompile Include="..\src\mc.cpp">
49 |       <Filter>Source Files</Filter>
50 |     </ClCompile>
51 |     <ClCompile Include="..\src\misc.cpp">
52 |       <Filter>Source Files</Filter>
53 |     </ClCompile>
54 |     <ClCompile Include="..\src\MPEG2Decoder.cpp">
55 |       <Filter>Source Files</Filter>
56 |     </ClCompile>
57 |     <ClCompile Include="..\src\store.cpp">
58 |       <Filter>Source Files</Filter>
59 |     </ClCompile>
60 |     <ClCompile Include="..\src\yv12pict.cpp">
61 |       <Filter>Source Files</Filter>
62 |     </ClCompile>
63 |   </ItemGroup>
64 |   <ItemGroup>
65 |     <ClInclude Include="..\src\color_convert.h">
66 |       <Filter>Header Files</Filter>
67 |     </ClInclude>
68 |     <ClInclude Include="..\src\global.h">
69 |       <Filter>Header Files</Filter>
70 |     </ClInclude>
71 |     <ClInclude Include="..\src\mc.h">
72 |       <Filter>Header Files</Filter>
73 |     </ClInclude>
74 |     <ClInclude Include="..\src\misc.h">
75 |       <Filter>Header Files</Filter>
76 |     </ClInclude>
77 |     <ClInclude Include="..\src\MPEG2Decoder.h">
78 |       <Filter>Header Files</Filter>
79 |     </ClInclude>
80 |     <ClInclude Include="..\src\yv12pict.h">
81 |       <Filter>Header Files</Filter>
82 |     </ClInclude>
83 |     <ClInclude Include="..\src\idct.h">
84 |       <Filter>Header Files</Filter>
85 |     </ClInclude>
86 |     <ClInclude Include="..\src\AVISynthAPI.h">
87 |       <Filter>Header Files</Filter>
88 |     </ClInclude>
89 |     <ClInclude Include="..\src\win_import_min.h">
90 |       <Filter>Header Files</Filter>
91 |     </ClInclude>
92 |   </ItemGroup>
93 |   <ItemGroup>
94 |     <ResourceCompile Include="..\src\d2vsource.rc">
95 |       <Filter>Resource Files</Filter>
96 |     </ResourceCompile>
97 |   </ItemGroup>
98 | </Project>


--------------------------------------------------------------------------------
/src/AVISynthAPI.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Avisynth 2.5 API for MPEG2Dec3
 3 |  *
 4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
 5 |  *
 6 |  *  based of the intial MPEG2Dec Avisytnh API Copyright (C) Mathias Born - May 2001
 7 |  *
 8 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
 9 |  *
10 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
11 |  *  it under the terms of the GNU General Public License as published by
12 |  *  the Free Software Foundation; either version 2, or (at your option)
13 |  *  any later version.
14 |  *
15 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
16 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  *  GNU General Public License for more details.
19 |  *
20 |  *  You should have received a copy of the GNU General Public License
21 |  *  along with GNU Make; see the file COPYING.  If not, write to
22 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 |  *
24 |  */
25 | 
26 | #ifndef MPEG2DECPLUS_AVS_API_H
27 | #define MPEG2DECPLUS_AVS_API_H
28 | 
29 | #include <cstdint>
30 | 
31 | #include "avisynth.h"
32 | #include "MPEG2Decoder.h"
33 | 
34 | 
35 | class D2VSource : public IClip {
36 |     VideoInfo vi;
37 |     //int _PP_MODE;
38 |     uint8_t* bufY, * bufU, * bufV; // for 4:2:2 input support
39 |     CMPEG2Decoder* decoder;
40 |     bool luminanceFlag;
41 |     uint8_t luminanceTable[256];
42 |     bool has_at_least_v8;
43 |     int history[5];
44 | 
45 | public:
46 |     D2VSource(const char* d2v, int idct, bool showQ, int _info, int _upConv, bool _i420, int iCC, int _rff, IScriptEnvironment* env);
47 |     ~D2VSource() {}
48 |     PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env);
49 |     bool __stdcall GetParity(int n);
50 |     void __stdcall GetAudio(void* buf, int64_t start, int64_t count, IScriptEnvironment* env) {};
51 |     const VideoInfo& __stdcall GetVideoInfo() { return vi; }
52 |     int __stdcall SetCacheHints(int hints, int) { return hints == CACHE_GET_MTMODE ? MT_SERIALIZED : 0; };
53 |     static AVSValue __cdecl create(AVSValue args, void*, IScriptEnvironment* env);
54 | };
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/src/MPEG2Decoder.h:
--------------------------------------------------------------------------------
  1 | #ifndef MPEG2DECODER_H
  2 | #define MPEG2DECODER_H
  3 | 
  4 | #include <cmath>
  5 | #include <cstdint>
  6 | #include <cstdio>
  7 | #include <fcntl.h>
  8 | #include <string>
  9 | #include <vector>
 10 | 
 11 | #include "yv12pict.h"
 12 | #ifndef _WIN32
 13 | #include "win_import_min.h"
 14 | #else
 15 | #include <io.h>
 16 | #endif
 17 | 
 18 | 
 19 | /* code definition */
 20 | enum {
 21 |     PICTURE_START_CODE = 0x100,
 22 |     SLICE_START_CODE_MIN = 0x101,
 23 |     SLICE_START_CODE_MAX = 0x1AF,
 24 |     USER_DATA_START_CODE = 0x1B2,
 25 |     SEQUENCE_HEADER_CODE = 0x1B3,
 26 |     EXTENSION_START_CODE = 0x1B5,
 27 |     SEQUENCE_END_CODE = 0x1B7,
 28 |     GROUP_START_CODE = 0x1B8,
 29 | 
 30 |     SYSTEM_END_CODE = 0x1B9,
 31 |     PACK_START_CODE = 0x1BA,
 32 |     SYSTEM_START_CODE = 0x1BB,
 33 |     PRIVATE_STREAM_1 = 0x1BD,
 34 |     VIDEO_ELEMENTARY_STREAM = 0x1E0,
 35 | };
 36 | 
 37 | /* extension start code IDs */
 38 | enum {
 39 |     SEQUENCE_EXTENSION_ID = 1,
 40 |     SEQUENCE_DISPLAY_EXTENSION_ID = 2,
 41 |     QUANT_MATRIX_EXTENSION_ID = 3,
 42 |     COPYRIGHT_EXTENSION_ID = 4,
 43 |     PICTURE_DISPLAY_EXTENSION_ID = 7,
 44 |     PICTURE_CODING_EXTENSION_ID = 8,
 45 | };
 46 | 
 47 | enum {
 48 |     ZIG_ZAG = 0,
 49 |     MB_WEIGHT = 32,
 50 |     MB_CLASS4 = 64,
 51 | };
 52 | 
 53 | enum {
 54 |     I_TYPE = 1,
 55 |     P_TYPE = 2,
 56 |     B_TYPE = 3,
 57 |     D_TYPE = 4,
 58 | };
 59 | 
 60 | enum {
 61 |     TOP_FIELD = 1,
 62 |     BOTTOM_FIELD = 2,
 63 |     FRAME_PICTURE = 3,
 64 | };
 65 | 
 66 | enum {
 67 |     MC_FIELD = 1,
 68 |     MC_FRAME = 2,
 69 |     MC_16X8 = 2,
 70 |     MC_DMV = 3,
 71 | };
 72 | 
 73 | enum {
 74 |     MV_FIELD,
 75 |     MV_FRAME,
 76 | };
 77 | 
 78 | enum {
 79 |     CHROMA420 = 1,
 80 |     CHROMA422 = 2,
 81 |     CHROMA444 = 3,
 82 | };
 83 | 
 84 | 
 85 | 
 86 | 
 87 | enum {
 88 |     IDCT_AUTO = 0,
 89 |     IDCT_AP922_INT = 3,
 90 |     IDCT_LLM_FLOAT = 4,
 91 |     IDCT_REF = 5,
 92 | };
 93 | 
 94 | enum {
 95 |     FO_NONE = 0,
 96 |     FO_FILM = 1,
 97 |     FO_RAW = 2,
 98 | };
 99 | 
100 | enum {
101 |     IS_NOT_MPEG = 0,
102 |     IS_MPEG1,
103 |     IS_MPEG2,
104 | };
105 | 
106 | // Fault_Flag values
107 | #define OUT_OF_BITS 11
108 | 
109 | 
110 | struct GOPLIST {
111 |     uint32_t        number;
112 |     int             file;
113 |     int64_t         position;
114 |     uint32_t    I_count;
115 |     int             closed;
116 |     int             progressive;
117 |     int             matrix;
118 |     GOPLIST(int _film, int _matrix, int _file, int64_t pos, int ic, uint32_t type)
119 |     {
120 |         number = _film;
121 |         matrix = (_matrix < 0 || _matrix > 7) ? 3 : _matrix; // 3:reserved
122 |         file = _file;
123 |         position = pos;
124 |         I_count = ic;
125 |         closed = !!(type & 0x0400);
126 |         progressive = !!(type & 0x0200);
127 |     }
128 | };
129 | 
130 | struct FRAMELIST {
131 |     uint32_t top;
132 |     uint32_t bottom;
133 |     uint8_t pf;
134 |     uint8_t pct;
135 |     uint8_t type; // Valid only for FO_RAW. Records the TFF/RFF flags.
136 | };
137 | 
138 | 
139 | constexpr size_t BUFFER_SIZE = 128 * 1024; // 128KiB
140 | 
141 | 
142 | class CMPEG2Decoder
143 | {
144 |     //int moderate_h, moderate_v, pp_mode;
145 | 
146 |     // getbit.cpp
147 |     void Initialize_Buffer(void);
148 |     void Fill_Buffer(void);
149 |     void Next_Transport_Packet(void);
150 |     void Next_PVA_Packet(void);
151 |     void Next_Packet(void);
152 |     void Next_File(void);
153 | 
154 |     uint32_t Show_Bits(uint32_t N);
155 |     uint32_t Get_Bits(uint32_t N);
156 |     void Flush_Buffer(uint32_t N);
157 |     void Fill_Next(void);
158 |     uint32_t Get_Byte(void);
159 |     uint32_t Get_Short(void);
160 |     void Next_Start_Code(void);
161 | 
162 |     std::vector<uint8_t> ReadBuffer;
163 |     uint8_t* Rdbfr, * Rdptr, * Rdmax;
164 |     uint32_t CurrentBfr, NextBfr, BitsLeft, Val, Read;
165 |     uint8_t* buffer_invalid;
166 | 
167 |     // gethdr.cpp
168 |     int Get_Hdr(void);
169 |     void Sequence_Header(void);
170 |     int slice_header(void);
171 |     void group_of_pictures_header(void);
172 |     void picture_header(void);
173 |     void sequence_extension(void);
174 |     void sequence_display_extension(void);
175 |     void quant_matrix_extension(void);
176 |     void picture_display_extension(void);
177 |     void picture_coding_extension(void);
178 |     void copyright_extension(void);
179 |     int  extra_bit_information(void);
180 |     void extension_and_user_data(void);
181 | 
182 |     // getpic.cpp
183 |     void Decode_Picture(YV12PICT& dst);
184 |     void update_picture_buffers(void);
185 |     void picture_data(void);
186 |     void slice(int MBAmax, uint32_t code);
187 |     void macroblock_modes(int& pmacroblock_type, int& pmotion_type,
188 |         int& pmotion_vector_count, int& pmv_format, int& pdmv, int& pmvscale, int& pdct_type);
189 |     void clear_block(int count);
190 |     void add_block(int count, int bx, int by, int dct_type, int addflag);
191 |     void motion_compensation(int MBA, int macroblock_type, int motion_type,
192 |         int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2], int dct_type);
193 |     void skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2],
194 |         int& motion_type, int motion_vertical_field_select[2][2], int& macroblock_type);
195 |     void decode_macroblock(int& macroblock_type, int& motion_type, int& dct_type,
196 |         int PMV[2][2][2], int dc_dct_pred[3], int motion_vertical_field_select[2][2], int dmvector[2]);
197 |     void decode_mpeg1_intra_block(int comp, int dc_dct_pred[]);
198 |     void decode_mpeg1_non_intra_block(int comp);
199 |     void Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[]);
200 |     void Decode_MPEG2_Non_Intra_Block(int comp);
201 | 
202 |     int Get_macroblock_type(void);
203 |     int Get_I_macroblock_type(void);
204 |     int Get_P_macroblock_type(void);
205 |     int Get_B_macroblock_type(void);
206 |     int Get_D_macroblock_type(void);
207 |     int Get_coded_block_pattern(void);
208 |     int Get_macroblock_address_increment(void);
209 |     int Get_Luma_DC_dct_diff(void);
210 |     int Get_Chroma_DC_dct_diff(void);
211 | 
212 |     void form_predictions(int bx, int by, int macroblock_type, int motion_type,
213 |         int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2]);
214 | 
215 |     void form_prediction(uint8_t* src[], int sfield, uint8_t* dst[], int dfield,
216 |         int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag);
217 | 
218 |     // motion.cpp
219 |     void motion_vectors(int PMV[2][2][2], int dmvector[2], int motion_vertical_field_select[2][2],
220 |         int s, int motion_vector_count, int mv_format,
221 |         int h_r_size, int v_r_size, int dmv, int mvscale);
222 |     void Dual_Prime_Arithmetic(int DMV[][2], int* dmvector, int mvx, int mvy);
223 | 
224 |     void motion_vector(int* PMV, int* dmvector, int h_r_size, int v_r_size,
225 |         int dmv, int mvscale, int full_pel_vector);
226 |     void decode_motion_vector(int* pred, int r_size, int motion_code,
227 |         int motion_residualesidual, int full_pel_vector);
228 |     int Get_motion_code(void);
229 |     int Get_dmvector(void);
230 | 
231 |     // store.cpp
232 |     void assembleFrame(uint8_t* src[], int pf, YV12PICT& dst);
233 | 
234 |     // decoder operation control flags
235 |     int Fault_Flag;
236 |     int File_Flag;
237 |     void(*idctFunction)(int16_t* block);
238 |     void(*prefetchTables)();
239 |     int SystemStream_Flag;    // 0 = none, 1=program, 2=Transport 3=PVA
240 | 
241 |     int TransportPacketSize;
242 |     int MPEG2_Transport_AudioPID;  // used only for transport streams
243 |     int MPEG2_Transport_VideoPID;  // used only for transport streams
244 |     int MPEG2_Transport_PCRPID;  // used only for transport streams
245 | 
246 |     int lfsr0, lfsr1;
247 | 
248 |     std::vector<int> Infile;
249 |     int closed_gop;
250 | 
251 |     int intra_quantizer_matrix[64];
252 |     int non_intra_quantizer_matrix[64];
253 |     int chroma_intra_quantizer_matrix[64];
254 |     int chroma_non_intra_quantizer_matrix[64];
255 | 
256 |     int load_intra_quantizer_matrix;
257 |     int load_non_intra_quantizer_matrix;
258 |     int load_chroma_intra_quantizer_matrix;
259 |     int load_chroma_non_intra_quantizer_matrix;
260 | 
261 |     int q_scale_type;
262 |     int alternate_scan;
263 |     int quantizer_scale;
264 | 
265 |     short* block[8], * p_block[8];
266 |     int pf_backward, pf_forward, pf_current;
267 | 
268 |     // global values
269 |     uint8_t* backward_reference_frame[3], * forward_reference_frame[3];
270 |     uint8_t* auxframe[3], * current_frame[3];
271 |     //uint8_t *u422, *v422;
272 |     YV12PICT* auxFrame1;
273 |     YV12PICT* auxFrame2;
274 |     YV12PICT* saved_active;
275 |     YV12PICT* saved_store;
276 | 
277 |     enum {
278 |         ELEMENTARY_STREAM = 0,
279 |         MPEG1_PROGRAM_STREAM,
280 |         MPEG2_PROGRAM_STREAM,
281 |     };
282 | 
283 |     int Coded_Picture_Width, Coded_Picture_Height, Chroma_Width, Chroma_Height;
284 |     int block_count, Second_Field;
285 | 
286 |     /* ISO/IEC 13818-2 section 6.2.2.3:  sequence_extension() */
287 |     int progressive_sequence;
288 |     int chroma_format;
289 |     int matrix_coefficients;
290 | 
291 |     /* ISO/IEC 13818-2 section 6.2.3: picture_header() */
292 |     int picture_coding_type;
293 |     int temporal_reference;
294 |     int full_pel_forward_vector;
295 |     int forward_f_code;
296 |     int full_pel_backward_vector;
297 |     int backward_f_code;
298 | 
299 |     /* ISO/IEC 13818-2 section 6.2.3.1: picture_coding_extension() header */
300 |     int f_code[2][2];
301 |     int picture_structure;
302 |     int frame_pred_frame_dct;
303 |     int progressive_frame;
304 |     int concealment_motion_vectors;
305 |     int intra_dc_precision;
306 |     int top_field_first;
307 |     int repeat_first_field;
308 |     int intra_vlc_format;
309 | 
310 |     void copy_all(YV12PICT& src, YV12PICT& dst);
311 |     void copy_top(YV12PICT& src, YV12PICT& dst);
312 |     void copy_bottom(YV12PICT& src, YV12PICT& dst);
313 | 
314 |     int* QP, * backwardQP, * auxQP;
315 |     uint32_t  prev_frame;
316 | 
317 |     std::vector<char> DirectAccess;
318 | 
319 |     void create_file_lists(FILE* d2vf, const char* path, char* buf);
320 |     void setIDCT(int idct);
321 |     void create_gop_and_frame_lists(FILE* d2vf, char* buf);
322 |     void set_clip_properties();
323 |     void allocate_buffers();
324 |     void search_bad_starting();
325 |     void destroy();
326 | 
327 | public:
328 |     CMPEG2Decoder(FILE* file, const char* path, int _idct, int icc, int upconv, int info, bool showq, bool _i420, int _rff, int _cpu_flags);
329 |     ~CMPEG2Decoder() { destroy(); }
330 |     void Decode(uint32_t frame, YV12PICT& dst);
331 | 
332 |     std::vector<std::string> Infilename;
333 |     uint32_t BadStartingFrames;
334 | 
335 |     int Clip_Width, Clip_Height;
336 |     int D2V_Width, D2V_Height;
337 |     int Clip_Top, Clip_Bottom, Clip_Left, Clip_Right;
338 |     char Aspect_Ratio[20];
339 | 
340 |     std::vector<GOPLIST> GOPList;
341 |     std::vector<FRAMELIST> FrameList;
342 | 
343 |     int mpeg_type;
344 |     int FO_Flag;
345 |     int Field_Order;
346 |     bool HaveRFFs;
347 | 
348 |     int       VF_FrameRate;
349 |     uint32_t  VF_FrameRate_Num;
350 |     uint32_t  VF_FrameRate_Den;
351 | 
352 |     int horizontal_size, vertical_size, mb_width, mb_height, aspect_ratio_information;
353 |     //int iPP;
354 |     int iCC;
355 |     bool showQ;
356 |     int upConv;
357 |     bool i420;
358 | 
359 |     // info option stuff
360 |     int info;
361 |     int minquant, maxquant, avgquant;
362 |     bool has_prop = false;
363 | 
364 |     // Luminance Code
365 |     int lumGamma;
366 |     int lumOffset;
367 | 
368 |     int getChromaFormat() { return chroma_format; }
369 |     int getChromaWidth() { return Chroma_Width; }
370 |     int getLumaWidth() { return Coded_Picture_Width; }
371 |     int getLumaHeight() { return Coded_Picture_Height; }
372 |     int cpu_flags;
373 | };
374 | 
375 | 
376 | __forceinline uint32_t CMPEG2Decoder::Show_Bits(uint32_t N)
377 | {
378 |     if (N <= BitsLeft) {
379 |         return (CurrentBfr << (32 - BitsLeft)) >> (32 - N);;
380 |     }
381 |     else {
382 |         N -= BitsLeft;
383 |         int shift = 32 - BitsLeft;
384 |         //return (((CurrentBfr << shift) >> shift) << N) + (NextBfr >> (32 - N));;
385 |         return ((CurrentBfr << shift) >> (shift - N)) | (NextBfr >> (32 - N));
386 |     }
387 | }
388 | 
389 | __forceinline uint32_t CMPEG2Decoder::Get_Bits(uint32_t N)
390 | {
391 |     if (N < BitsLeft) {
392 |         Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - N);
393 |         BitsLeft -= N;
394 |         return Val;
395 |     }
396 |     else {
397 |         N -= BitsLeft;
398 |         int shift = 32 - BitsLeft;
399 |         Val = (CurrentBfr << shift) >> shift;
400 |         if (N != 0)
401 |             Val = (Val << N) | (NextBfr >> (32 - N));
402 |         CurrentBfr = NextBfr;
403 |         BitsLeft = 32 - N;
404 |         Fill_Next();
405 |         return Val;
406 |     }
407 | }
408 | 
409 | 
410 | __forceinline void CMPEG2Decoder::Flush_Buffer(uint32_t N)
411 | {
412 |     if (N < BitsLeft) {
413 |         BitsLeft -= N;
414 |     }
415 |     else {
416 |         CurrentBfr = NextBfr;
417 |         BitsLeft += 32 - N;
418 |         Fill_Next();
419 |     }
420 | }
421 | 
422 | 
423 | __forceinline void CMPEG2Decoder::Fill_Next()
424 | {
425 |     if (SystemStream_Flag && Rdptr > Rdmax - 4) {
426 |         if (Rdptr >= Rdmax)
427 |             Next_Packet();
428 |         NextBfr = Get_Byte() << 24;
429 | 
430 |         if (Rdptr >= Rdmax)
431 |             Next_Packet();
432 |         NextBfr |= Get_Byte() << 16;
433 | 
434 |         if (Rdptr >= Rdmax)
435 |             Next_Packet();
436 |         NextBfr |= Get_Byte() << 8;
437 | 
438 |         if (Rdptr >= Rdmax)
439 |             Next_Packet();
440 |         NextBfr |= Get_Byte();
441 |     }
442 |     else if (Rdptr < Rdbfr + BUFFER_SIZE - 3) {
443 |         //NextBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3);
444 |         NextBfr = _byteswap_ulong(*reinterpret_cast<uint32_t*>(Rdptr));
445 |         Rdptr += 4;
446 |     }
447 |     else {
448 |         switch (Rdbfr + BUFFER_SIZE - Rdptr) {
449 |             case 1:
450 |                 NextBfr = *Rdptr++ << 24;
451 |                 Fill_Buffer();
452 |                 NextBfr |= (Rdptr[0] << 16) | (Rdptr[1] << 8) | Rdptr[2];
453 |                 Rdptr += 3;
454 |                 break;
455 |             case 2:
456 |                 NextBfr = (Rdptr[0] << 24) | (Rdptr[1] << 16);
457 |                 Rdptr += 2;
458 |                 Fill_Buffer();
459 |                 NextBfr |= (Rdptr[0] << 8) | Rdptr[1];
460 |                 Rdptr += 2;
461 |                 break;
462 |             case 3:
463 |                 NextBfr = (Rdptr[0] << 24) | (Rdptr[1] << 16) | (Rdptr[2] << 8);
464 |                 Rdptr += 3;
465 |                 Fill_Buffer();
466 |                 NextBfr |= *Rdptr++;
467 |                 break;
468 |             default:
469 |                 Fill_Buffer();
470 |                 NextBfr = _byteswap_ulong(*reinterpret_cast<uint32_t*>(Rdptr));
471 |                 Rdptr += 4;
472 |         }
473 |     }
474 | }
475 | 
476 | 
477 | __forceinline void CMPEG2Decoder::Fill_Buffer()
478 | {
479 |     Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
480 | 
481 |     if (Read < BUFFER_SIZE)
482 |         Next_File();
483 | 
484 |     Rdptr = Rdbfr;
485 | 
486 |     if (SystemStream_Flag)
487 |         Rdmax -= BUFFER_SIZE;
488 | }
489 | 
490 | 
491 | __forceinline uint32_t CMPEG2Decoder::Get_Byte()
492 | {
493 |     while (Rdptr >= (Rdbfr + BUFFER_SIZE)) {
494 |         Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
495 |         if (Read < BUFFER_SIZE)
496 |             Next_File();
497 |         Rdptr -= BUFFER_SIZE;
498 |         Rdmax -= BUFFER_SIZE;
499 |     }
500 | 
501 |     return *Rdptr++;
502 | }
503 | 
504 | __forceinline uint32_t CMPEG2Decoder::Get_Short()
505 | {
506 |     uint32_t i = Get_Byte();
507 |     return (i << 8) + Get_Byte();
508 | }
509 | 
510 | 
511 | __forceinline void CMPEG2Decoder::Next_Start_Code()
512 | {
513 |     // This is contrary to the spec but is more resilient to some
514 |     // stream corruption scenarios.
515 |     BitsLeft = (BitsLeft + 7) & ~7;
516 | 
517 |     do {
518 |         uint32_t show = Show_Bits(24);
519 |         if (Fault_Flag == OUT_OF_BITS)
520 |             return;
521 |         if (show == 0x000001)
522 |             return;
523 |         Flush_Buffer(8);
524 |     } while (true);
525 | }
526 | 
527 | #endif
528 | 
529 | 


--------------------------------------------------------------------------------
/src/color_convert.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | MPEG2Dec's colorspace convertions Copyright (C) Chia-chen Kuo - April 2001
  4 | 
  5 | */
  6 | 
  7 | // modified to be pitch != width friendly
  8 | // tritical - May 16, 2005
  9 | 
 10 | // lots of bug fixes and new isse 422->444 routine
 11 | // tritical - August 18, 2005
 12 | 
 13 | // rewite all code to sse2 intrinsic
 14 | // OKA Motofumi - August 21, 2016
 15 | 
 16 | 
 17 | #include <cstring>
 18 | #include <emmintrin.h>
 19 | #include "color_convert.h"
 20 | #ifndef _WIN32
 21 | #include "win_import_min.h"
 22 | #endif
 23 | 
 24 | 
 25 | #if 0
 26 | // C implementation
 27 | void conv420to422I_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, int width, int height)
 28 | {
 29 |     const uint8_t* s0 = src;
 30 |     const uint8_t* s1 = src + src_pitch;
 31 |     uint8_t* d0 = dst;
 32 |     uint8_t* d1 = dst + dst_pitch;
 33 | 
 34 |     width /= 2;
 35 |     src_pitch *= 2;
 36 |     dst_pitch *= 2;
 37 | 
 38 |     std::memcpy(d0, s0, width);
 39 |     std::memcpy(d1, s1, width);
 40 | 
 41 |     d0 += dst_pitch;
 42 |     d1 += dst_pitch;
 43 | 
 44 |     for (int y = 2; y < height - 2; y += 4) {
 45 |         const uint8_t* s2 = s0 + src_pitch;
 46 |         const uint8_t* s3 = s1 + src_pitch;
 47 |         uint8_t* d2 = d0 + dst_pitch;
 48 |         uint8_t* d3 = d1 + dst_pitch;
 49 | 
 50 |         for (int x = 0; x < width; ++x) {
 51 |             d0[x] = (s0[x] * 5 + s2[x] * 3 + 4) / 8;
 52 |             d1[x] = (s1[x] * 7 + s3[x] * 1 + 4) / 8;
 53 |             d2[x] = (s0[x] * 1 + s2[x] * 7 + 4) / 8;
 54 |             d3[x] = (s1[x] * 3 + s3[x] * 5 + 4) / 8;
 55 |         }
 56 |         s0 = s2;
 57 |         s1 = s3;
 58 |         d0 = d2 + dst_pitch;
 59 |         d1 = d3 + dst_pitch;
 60 |     }
 61 | 
 62 |     std::memcpy(d0, s0, width);
 63 |     std::memcpy(d1, s1, width);
 64 | }
 65 | #endif
 66 | 
 67 | 
 68 | static __forceinline __m128i
 69 | avg_weight_1_7(const __m128i& x, const __m128i& y, const __m128i& four)
 70 | {
 71 |     //(x + y * 7 + 4) / 8
 72 |     __m128i t0 = _mm_subs_epu16(_mm_slli_epi16(y, 3), y);
 73 |     t0 = _mm_adds_epu16(_mm_adds_epu16(t0, x), four);
 74 |     return _mm_srli_epi16(t0, 3);
 75 | }
 76 | 
 77 | static __forceinline __m128i
 78 | avg_weight_3_5(const __m128i& x, const __m128i& y, const __m128i& four)
 79 | {
 80 |     //(x * 3 + y * 5 + 4) / 8
 81 |     __m128i t0 = _mm_adds_epu16(_mm_slli_epi16(x, 1), x);
 82 |     __m128i t1 = _mm_adds_epu16(_mm_slli_epi16(y, 2), y);
 83 |     t0 = _mm_adds_epu16(_mm_adds_epu16(t0, t1), four);
 84 |     return _mm_srli_epi16(t0, 3);
 85 | }
 86 | 
 87 | 
 88 | void conv420to422I(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch, int width, int height)
 89 | {
 90 |     const uint8_t* src0 = src;
 91 |     const uint8_t* src1 = src + src_pitch;
 92 |     uint8_t* dst0 = dst;
 93 |     uint8_t* dst1 = dst + dst_pitch;
 94 | 
 95 |     width /= 2;
 96 |     src_pitch *= 2;
 97 |     dst_pitch *= 2;
 98 | 
 99 |     std::memcpy(dst0, src0, width);
100 |     std::memcpy(dst1, src1, width);
101 | 
102 |     dst0 += dst_pitch;
103 |     dst1 += dst_pitch;
104 | 
105 |     const __m128i zero = _mm_setzero_si128();
106 |     const __m128i four = _mm_set1_epi16(0x0004);
107 | 
108 |     for (int y = 2; y < height - 2; y += 4) {
109 |         const uint8_t* src2 = src0 + src_pitch;
110 |         const uint8_t* src3 = src1 + src_pitch;
111 |         uint8_t* dst2 = dst0 + dst_pitch;
112 |         uint8_t* dst3 = dst1 + dst_pitch;
113 | 
114 |         for (int x = 0; x < width; x += 8) {
115 |             __m128i s0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src0 + x));
116 |             __m128i s1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src2 + x));
117 |             s0 = _mm_unpacklo_epi8(s0, zero);
118 |             s1 = _mm_unpacklo_epi8(s1, zero);
119 |             __m128i d = _mm_packus_epi16(avg_weight_3_5(s1, s0, four), zero);
120 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(dst0 + x), d);
121 |             d = _mm_packus_epi16(avg_weight_1_7(s0, s1, four), zero);
122 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(dst2 + x), d);
123 | 
124 |             s0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src1 + x));
125 |             s1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src3 + x));
126 |             s0 = _mm_unpacklo_epi8(s0, zero);
127 |             s1 = _mm_unpacklo_epi8(s1, zero);
128 |             d = _mm_packus_epi16(avg_weight_1_7(s1, s0, four), zero);
129 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(dst1 + x), d);
130 |             d = _mm_packus_epi16(avg_weight_3_5(s0, s1, four), zero);
131 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(dst3 + x), d);
132 |         }
133 |         src0 = src2;
134 |         src1 = src3;
135 |         dst0 = dst2 + dst_pitch;
136 |         dst1 = dst3 + dst_pitch;
137 |     }
138 | 
139 |     std::memcpy(dst0, src0, width);
140 |     std::memcpy(dst1, src1, width);
141 | }
142 | 
143 | 
144 | #if 0
145 | // C implementation
146 | void conv420to422P_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
147 |     int width, int height)
148 | {
149 |     const uint8_t* s0 = src;
150 |     const uint8_t* s1 = s0 + src_pitch;
151 |     uint8_t* d0 = dst;
152 |     uint8_t* d1 = dst + dst_pitch;
153 | 
154 |     width /= 2;
155 |     height /= 2;
156 |     dst_pitch *= 2;
157 | 
158 |     for (int x = 0; x < width; ++x) {
159 |         d0[x] = s0[x];
160 |         d1[x] = (s0[x] * 3 + s1[x] + 2) / 4;
161 |     }
162 | 
163 |     d0 += dst_pitch;
164 |     d1 += dst_pitch;
165 | 
166 |     for (int y = 0; y < height - 2; ++y) {
167 |         const uint8_t* s2 = s1 + src_pitch;
168 |         for (int x = 0; x < width; ++x) {
169 |             d0[x] = (s0[x] + s1[x] * 3 + 2) / 4;
170 |             d1[x] = (s2[x] + s1[x] * 3 + 2) / 4;
171 |         }
172 |         s0 = s1;
173 |         s1 = s2;
174 |         d0 += dst_pitch;
175 |         d1 += dst_pitch;
176 |     }
177 | 
178 |     for (int x = 0; x < width; ++x) {
179 |         d0[x] = (s0[x] + s1[x] * 3 + 2) / 4;
180 |         d1[x] = s1[x];
181 |     }
182 | }
183 | #endif
184 | 
185 | 
186 | void conv420to422P(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
187 |     int width, int height)
188 | {
189 |     const uint8_t* s0 = src;
190 |     const uint8_t* s1 = s0 + src_pitch;
191 |     uint8_t* d0 = dst;
192 |     uint8_t* d1 = dst + dst_pitch;
193 | 
194 |     width /= 2;
195 |     height /= 2;
196 |     dst_pitch *= 2;
197 | 
198 |     const __m128i one = _mm_set1_epi8(0x01);
199 | 
200 |     for (int x = 0; x < width; x += 16) {
201 |         const __m128i sx0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s0 + x));
202 |         __m128i sx1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1 + x));
203 | 
204 |         sx1 = _mm_subs_epu8(sx1, one);
205 |         sx1 = _mm_avg_epu8(_mm_avg_epu8(sx1, sx0), sx0);
206 | 
207 |         _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0);
208 |         _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx1);
209 |     }
210 | 
211 |     d0 += dst_pitch;
212 |     d1 += dst_pitch;
213 | 
214 |     for (int y = 0; y < height - 2; ++y) {
215 |         const uint8_t* s2 = s1 + src_pitch;
216 | 
217 |         for (int x = 0; x < width; x += 16) {
218 |             __m128i sx0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s0 + x));
219 |             const __m128i sx1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1 + x));
220 |             __m128i sx2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2 + x));
221 | 
222 |             sx0 = _mm_subs_epu8(sx0, one);
223 |             sx2 = _mm_subs_epu8(sx2, one);
224 |             sx0 = _mm_avg_epu8(_mm_avg_epu8(sx0, sx1), sx1);
225 |             sx2 = _mm_avg_epu8(_mm_avg_epu8(sx2, sx1), sx1);
226 | 
227 |             _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0);
228 |             _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx2);
229 |         }
230 |         s0 = s1;
231 |         s1 = s2;
232 |         d0 += dst_pitch;
233 |         d1 += dst_pitch;
234 |     }
235 | 
236 |     for (int x = 0; x < width; x += 16) {
237 |         __m128i sx0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s0 + x));
238 |         const __m128i sx1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1 + x));
239 | 
240 |         sx0 = _mm_subs_epu8(sx0, one);
241 |         sx0 = _mm_avg_epu8(_mm_avg_epu8(sx0, sx1), sx1);
242 | 
243 |         _mm_store_si128(reinterpret_cast<__m128i*>(d0 + x), sx0);
244 |         _mm_store_si128(reinterpret_cast<__m128i*>(d1 + x), sx1);
245 |     }
246 | }
247 | 
248 | #if 0
249 | // C implementation
250 | void conv422to444_c(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
251 |     int width, int height)
252 | {
253 |     width /= 2;
254 | 
255 |     for (int y = 0; y < height; ++y) {
256 |         for (int x = 0; x < width - 1; ++x) {
257 |             dst[2 * x] = src[x];
258 |             dst[2 * x + 1] = (src[x] + src[x + 1] + 1) / 2;
259 |         }
260 |         dst[2 * width - 2] = dst[2 * width - 1] = src[width - 1];
261 |         src += src_pitch;
262 |         dst += dst_pitch;
263 |     }
264 | }
265 | #endif
266 | 
267 | 
268 | void conv422to444(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
269 |     int width, int height)
270 | {
271 |     const int right = width - 1;
272 |     width /= 2;
273 | 
274 |     for (int y = 0; y < height; ++y) {
275 |         for (int x = 0; x < width; x += 16) {
276 |             __m128i s0 = _mm_load_si128(reinterpret_cast<const __m128i*>(src + x));
277 |             __m128i s1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src + x + 1));
278 |             s1 = _mm_avg_epu8(s1, s0);
279 |             __m128i d0 = _mm_unpacklo_epi8(s0, s1);
280 |             __m128i d1 = _mm_unpackhi_epi8(s0, s1);
281 |             _mm_store_si128(reinterpret_cast<__m128i*>(dst + static_cast<int64_t>(2) * x), d0);
282 |             _mm_store_si128(reinterpret_cast<__m128i*>(dst + static_cast<int64_t>(2) * x + 16), d1);
283 |         }
284 |         dst[right] = dst[right - 1];
285 |         src += src_pitch;
286 |         dst += dst_pitch;
287 |     }
288 | }
289 | 
290 | 
291 | #if 0
292 | const int64_t mmmask_0001 = 0x0001000100010001;
293 | const int64_t mmmask_0128 = 0x0080008000800080;
294 | 
295 | void conv444toRGB24(const uint8_t* py, const uint8_t* pu, const uint8_t* pv,
296 |     uint8_t* dst, int src_pitchY, int src_pitchUV, int dst_pitch, int width,
297 |     int height, int matrix, int pc_scale)
298 | {
299 |     int64_t RGB_Offset, RGB_Scale, RGB_CBU, RGB_CRV, RGB_CGX;
300 |     int dst_modulo = dst_pitch - (3 * width);
301 | 
302 |     if (pc_scale)
303 |     {
304 |         RGB_Scale = 0x1000254310002543;
305 |         RGB_Offset = 0x0010001000100010;
306 |         if (matrix == 7) // SMPTE 240M (1987)
307 |         {
308 |             RGB_CBU = 0x0000428500004285;
309 |             RGB_CGX = 0xF7BFEEA3F7BFEEA3;
310 |             RGB_CRV = 0x0000396900003969;
311 |         }
312 |         else if (matrix == 6 || matrix == 5) // SMPTE 170M/ITU-R BT.470-2 -- BT.601
313 |         {
314 |             RGB_CBU = 0x0000408D0000408D;
315 |             RGB_CGX = 0xF377E5FCF377E5FC;
316 |             RGB_CRV = 0x0000331300003313;
317 |         }
318 |         else if (matrix == 4) // FCC
319 |         {
320 |             RGB_CBU = 0x000040D8000040D8;
321 |             RGB_CGX = 0xF3E9E611F3E9E611;
322 |             RGB_CRV = 0x0000330000003300;
323 |         }
324 |         else // ITU-R Rec.709 (1990) -- BT.709
325 |         {
326 |             RGB_CBU = 0x0000439A0000439A;
327 |             RGB_CGX = 0xF92CEEF1F92CEEF1;
328 |             RGB_CRV = 0x0000395F0000395F;
329 |         }
330 |     }
331 |     else
332 |     {
333 |         RGB_Scale = 0x1000200010002000;
334 |         RGB_Offset = 0x0000000000000000;
335 |         if (matrix == 7) // SMPTE 240M (1987)
336 |         {
337 |             RGB_CBU = 0x00003A6F00003A6F;
338 |             RGB_CGX = 0xF8C0F0BFF8C0F0BF;
339 |             RGB_CRV = 0x0000326E0000326E;
340 |         }
341 |         else if (matrix == 6 || matrix == 5) // SMPTE 170M/ITU-R BT.470-2 -- BT.601
342 |         {
343 |             RGB_CBU = 0x000038B4000038B4;
344 |             RGB_CGX = 0xF4FDE926F4FDE926;
345 |             RGB_CRV = 0x00002CDD00002CDD;
346 |         }
347 |         else if (matrix == 4) // FCC
348 |         {
349 |             RGB_CBU = 0x000038F6000038F6;
350 |             RGB_CGX = 0xF561E938F561E938;
351 |             RGB_CRV = 0x00002CCD00002CCD;
352 |         }
353 |         else // ITU-R Rec.709 (1990) -- BT.709
354 |         {
355 |             RGB_CBU = 0x00003B6200003B62;
356 |             RGB_CGX = 0xFA00F104FA00F104;
357 |             RGB_CRV = 0x0000326600003266;
358 |         }
359 |     }
360 | 
361 |     __asm
362 |     {
363 |         mov         eax, [py]  // eax = py
364 |         mov         ebx, [pu]  // ebx = pu
365 |         mov         ecx, [pv]  // ecx = pv
366 |         mov         edx, [dst] // edx = dst
367 |         mov         edi, width // edi = width
368 |         xor esi, esi
369 |         pxor        mm0, mm0
370 | 
371 |         convRGB24 :
372 |         movd        mm1, [eax + esi]
373 |             movd        mm3, [ebx + esi]
374 |             punpcklbw   mm1, mm0
375 |             punpcklbw   mm3, mm0
376 |             movd        mm5, [ecx + esi]
377 |             punpcklbw   mm5, mm0
378 |             movq        mm7, [mmmask_0128]
379 |             psubw       mm3, mm7
380 |             psubw       mm5, mm7
381 | 
382 |             psubw       mm1, RGB_Offset
383 |             movq        mm2, mm1
384 |             movq        mm7, [mmmask_0001]
385 |             punpcklwd   mm1, mm7
386 |             punpckhwd   mm2, mm7
387 |             movq        mm7, RGB_Scale
388 |             pmaddwd     mm1, mm7
389 |             pmaddwd     mm2, mm7
390 | 
391 |             movq        mm4, mm3
392 |             punpcklwd   mm3, mm0
393 |             punpckhwd   mm4, mm0
394 |             movq        mm7, RGB_CBU
395 |             pmaddwd     mm3, mm7
396 |             pmaddwd     mm4, mm7
397 |             paddd       mm3, mm1
398 |             paddd       mm4, mm2
399 |             psrad       mm3, 13
400 |             psrad       mm4, 13
401 |             packuswb    mm3, mm0
402 |             packuswb    mm4, mm0
403 | 
404 |             movq        mm6, mm5
405 |             punpcklwd   mm5, mm0
406 |             punpckhwd   mm6, mm0
407 |             movq        mm7, RGB_CRV
408 |             pmaddwd     mm5, mm7
409 |             pmaddwd     mm6, mm7
410 |             paddd       mm5, mm1
411 |             paddd       mm6, mm2
412 |             psrad       mm5, 13
413 |             psrad       mm6, 13
414 |             packuswb    mm5, mm0
415 |             packuswb    mm6, mm0
416 | 
417 |             punpcklbw   mm3, mm5
418 |             punpcklbw   mm4, mm6
419 |             movq        mm5, mm3
420 |             movq        mm6, mm4
421 |             psrlq       mm5, 16
422 |             psrlq       mm6, 16
423 |             por         mm3, mm5
424 |             por         mm4, mm6
425 | 
426 |             movd        mm5, [ebx + esi]
427 |             movd        mm6, [ecx + esi]
428 |             punpcklbw   mm5, mm0
429 |             punpcklbw   mm6, mm0
430 |             movq        mm7, [mmmask_0128]
431 |             psubw       mm5, mm7
432 |             psubw       mm6, mm7
433 | 
434 |             movq        mm7, mm6
435 |             punpcklwd   mm6, mm5
436 |             punpckhwd   mm7, mm5
437 |             movq        mm5, RGB_CGX
438 |             pmaddwd     mm6, mm5
439 |             pmaddwd     mm7, mm5
440 |             paddd       mm6, mm1
441 |             paddd       mm7, mm2
442 | 
443 |             psrad       mm6, 13
444 |             psrad       mm7, 13
445 |             packuswb    mm6, mm0
446 |             packuswb    mm7, mm0
447 | 
448 |             punpcklbw   mm3, mm6
449 |             punpcklbw   mm4, mm7
450 | 
451 |             movq        mm1, mm3
452 |             movq        mm5, mm4
453 |             movq        mm6, mm4
454 | 
455 |             psrlq       mm1, 32
456 |             psllq       mm1, 24
457 |             por         mm1, mm3
458 | 
459 |             psrlq       mm3, 40
460 |             psllq       mm6, 16
461 |             por         mm3, mm6
462 |             movd[edx], mm1
463 | 
464 |             psrld       mm4, 16
465 |             psrlq       mm5, 24
466 |             por         mm5, mm4
467 |             movd[edx + 4], mm3
468 | 
469 |             add         edx, 0x0c
470 |             add         esi, 0x04
471 |             cmp         esi, edi
472 |             movd[edx - 4], mm5
473 | 
474 |             jl          convRGB24
475 | 
476 |             add         eax, src_pitchY
477 |             add         ebx, src_pitchUV
478 |             add         ecx, src_pitchUV
479 |             add         edx, dst_modulo
480 |             xor esi, esi
481 |             dec         height
482 |             jnz         convRGB24
483 | 
484 |             emms
485 |     }
486 | }
487 | 
488 | 
489 | void conv422PtoYUY2(const uint8_t* py, uint8_t* pu, uint8_t* pv, uint8_t* dst,
490 |     int pitch1Y, int pitch1UV, int pitch2, int width, int height)
491 | {
492 |     width /= 2;
493 | 
494 |     for (int y = 0; y < height; ++y) {
495 |         for (int x = 0; x < width; x += 8) {
496 |             __m128i u = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pu + x));
497 |             __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(pv + x));
498 |             __m128i uv = _mm_unpacklo_epi8(u, v);
499 |             __m128i y = _mm_load_si128(reinterpret_cast<const __m128i*>(py + 2 * x));
500 |             __m128i yuyv0 = _mm_unpacklo_epi8(y, uv);
501 |             __m128i yuyv1 = _mm_unpackhi_epi8(y, uv);
502 |             _mm_stream_si128(reinterpret_cast<__m128i*>(dst + 4 * x), yuyv0);
503 |             _mm_stream_si128(reinterpret_cast<__m128i*>(dst + 4 * x + 16), yuyv1);
504 |         }
505 |         py += pitch1Y;
506 |         pu += pitch1UV;
507 |         pv += pitch1UV;
508 |         dst += pitch2;
509 |     }
510 | }
511 | 
512 | 
513 | void convYUY2to422P(const uint8_t* src, uint8_t* py, uint8_t* pu, uint8_t* pv,
514 |     int pitch1, int pitch2y, int pitch2uv, int width, int height)
515 | {
516 |     width /= 2;
517 | 
518 |     for (int y = 0; y < height; ++y) {
519 |         for (int x = 0; x < width; x += 8) {
520 |             __m128i s0 = _mm_load_si128(reinterpret_cast<const __m128i*>(src + 4 * x));
521 |             __m128i s1 = _mm_load_si128(reinterpret_cast<const __m128i*>(src + 4 * x + 16));
522 | 
523 |             __m128i s2 = _mm_unpacklo_epi8(s0, s1);
524 |             __m128i s3 = _mm_unpackhi_epi8(s0, s1);
525 | 
526 |             s0 = _mm_unpacklo_epi8(s2, s3);
527 |             s1 = _mm_unpackhi_epi8(s2, s3);
528 | 
529 |             s2 = _mm_unpacklo_epi8(s0, s1);
530 |             s3 = _mm_unpackhi_epi8(s0, s1);
531 | 
532 |             s0 = _mm_unpacklo_epi8(s2, s3);
533 |             s2 = _mm_srli_si128(s2, 8);
534 |             s3 = _mm_srli_si128(s3, 8);
535 |             _mm_store_si128(reinterpret_cast<__m128i*>(py + 2 * x), s0);
536 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(pu + x), s2);
537 |             _mm_storel_epi64(reinterpret_cast<__m128i*>(pv + x), s3);
538 |         }
539 |         src += pitch1;
540 |         py += pitch2y;
541 |         pu += pitch2uv;
542 |         pv += pitch2uv;
543 |     }
544 | }
545 | #endif
546 | 


--------------------------------------------------------------------------------
/src/color_convert.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPEG2DECPLUS_COLOR_CONVERT_H
 2 | #define MPEG2DECPLUS_COLOR_CONVERT_H
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | void conv420to422P(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
 7 |     int width, int height);
 8 | 
 9 | void conv420to422I(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
10 |     int width, int height);
11 | 
12 | void conv422to444(const uint8_t* src, uint8_t* dst, int src_pitch, int dst_pitch,
13 |     int width, int height);
14 | 
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/src/d2vsource.rc:
--------------------------------------------------------------------------------
 1 | #include <winver.h>
 2 | 
 3 | VS_VERSION_INFO VERSIONINFO
 4 | FILEVERSION             1,3,0,0
 5 | PRODUCTVERSION        	1,3,0,0
 6 | FILEFLAGSMASK           VS_FFI_FILEFLAGSMASK
 7 | FILEFLAGS               0x0L
 8 | FILEOS                  VOS__WINDOWS32
 9 | FILETYPE                VFT_DLL
10 | FILESUBTYPE             VFT2_UNKNOWN
11 | BEGIN
12 |     BLOCK "StringFileInfo"
13 |     BEGIN
14 |         BLOCK "040904E4"
15 |         BEGIN
16 |         VALUE "Comments",         "Modified DGDecode."
17 |         VALUE "FileDescription",  "D2VSource for AviSynth 2.6 / AviSynth+"
18 |         VALUE "FileVersion",      "1.3.0"
19 |         VALUE "InternalName",     "D2VSource"
20 |         VALUE "OriginalFilename", "D2VSource.dll"
21 |         VALUE "ProductName",      "D2VSource"
22 |         VALUE "ProductVersion",   "1.3.0"
23 |         END
24 |     END
25 |     BLOCK "VarFileInfo"
26 |     BEGIN
27 |         VALUE "Translation", 0x409, 1252
28 |     END
29 | END
30 | 


--------------------------------------------------------------------------------
/src/getbit.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (C) Chia-chen Kuo - April 2001
  3 |  *
  4 |  *  This file is part of DVD2AVI, a free MPEG-2 decoder
  5 |  *
  6 |  *  DVD2AVI is free software; you can redistribute it and/or modify
  7 |  *  it under the terms of the GNU General Public License as published by
  8 |  *  the Free Software Foundation; either version 2, or (at your option)
  9 |  *  any later version.
 10 |  *
 11 |  *  DVD2AVI is distributed in the hope that it will be useful,
 12 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14 |  *  GNU General Public License for more details.
 15 |  *
 16 |  *  You should have received a copy of the GNU General Public License
 17 |  *  along with GNU Make; see the file COPYING.  If not, write to
 18 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | #include "MPEG2Decoder.h"
 24 | 
 25 | void CMPEG2Decoder::Initialize_Buffer()
 26 | {
 27 |     Rdptr = Rdbfr + BUFFER_SIZE;
 28 |     Rdmax = Rdptr;
 29 |     buffer_invalid = (uint8_t*)(UINTPTR_MAX);
 30 | 
 31 |     if (SystemStream_Flag)
 32 |     {
 33 |         if (Rdptr >= Rdmax)
 34 |             Next_Packet();
 35 |         CurrentBfr = *Rdptr++ << 24;
 36 | 
 37 |         if (Rdptr >= Rdmax)
 38 |             Next_Packet();
 39 |         CurrentBfr += *Rdptr++ << 16;
 40 | 
 41 |         if (Rdptr >= Rdmax)
 42 |             Next_Packet();
 43 |         CurrentBfr += *Rdptr++ << 8;
 44 | 
 45 |         if (Rdptr >= Rdmax)
 46 |             Next_Packet();
 47 |         CurrentBfr += *Rdptr++;
 48 | 
 49 |         Fill_Next();
 50 |     }
 51 |     else
 52 |     {
 53 |         Fill_Buffer();
 54 | 
 55 |         CurrentBfr = (*Rdptr << 24) + (*(Rdptr + 1) << 16) + (*(Rdptr + 2) << 8) + *(Rdptr + 3);
 56 |         Rdptr += 4;
 57 | 
 58 |         Fill_Next();
 59 |     }
 60 | 
 61 |     BitsLeft = 32;
 62 | }
 63 | 
 64 | 
 65 | struct  transport_packet {
 66 |     // 1 byte
 67 |     uint8_t sync_byte; //         8   bslbf
 68 | 
 69 |     // 2 bytes
 70 |     uint8_t transport_error_indicator;//      1   bslbf
 71 |     uint8_t payload_unit_start_indicator;//       1   bslbf
 72 |     uint8_t transport_priority; //        1   bslbf
 73 |     uint16_t pid; //  13  uimsbf
 74 | 
 75 |     // 1 byte
 76 |     uint8_t transport_scrambling_control;//       2   bslbf
 77 |     uint8_t adaptation_field_control;//       2   bslbf
 78 |     uint8_t continuity_counter;//     4   uimsbf
 79 | 
 80 |     // VVV (only valid if adaptation_field_control != 1)
 81 |     // 1 byte
 82 |     uint8_t adaptation_field_length; // 8 uimsbf
 83 | 
 84 |     // VVV (only valid if adaptation_field_length != 0)
 85 |     // 1 byte
 86 |     uint8_t discontinuity_indicator; //   1   bslbf
 87 |     uint8_t random_access_indicator; //   1   bslbf
 88 |     uint8_t elementary_stream_priority_indicator; //  1   bslbf
 89 |     uint8_t PCR_flag; //  1   bslbf
 90 |     uint8_t OPCR_flag; // 1   bslbf
 91 |     uint8_t splicing_point_flag; //   1   bslbf
 92 |     uint8_t transport_private_data_flag; //   1   bslbf
 93 |     uint8_t adaptation_field_extension_flag; //   1   bslbf
 94 | 
 95 |     /*
 96 |     if(adaptation_field_control=='10'  || adaptation_field_control=='11'){
 97 |         adaptation_field()
 98 |     }
 99 |     if(adaptation_field_control=='01' || adaptation_field_control=='11') {
100 |         for (i=0;i<N;i++){
101 |             data_byte       8   bslbf
102 |         }
103 |     }
104 |     */
105 | 
106 | };
107 | 
108 | #define SKIP_TRANSPORT_PACKET_BYTES( bytes_to_skip ) \
109 | {   Rdptr += (bytes_to_skip); Packet_Length -= (bytes_to_skip); }
110 | 
111 | void CMPEG2Decoder::Next_Transport_Packet()
112 | {
113 |     int Packet_Length;  // bytes remaining in MPEG-2 transport packet
114 |     int Packet_Header_Length;
115 |     uint32_t code;
116 |     transport_packet tp = { 0 };
117 | 
118 |     for (;;)
119 |     {
120 |         // 0) initialize some temp variables
121 |         Packet_Length = TransportPacketSize; // total length of an MPEG-2 transport packet
122 | 
123 |         if (TransportPacketSize == 192)
124 |         {
125 |             //Get_Byte();
126 |             //Get_Byte();
127 |             //Get_Byte();
128 |             //Get_Byte();
129 |             Rdptr += 4;
130 |             Packet_Length -= 4;
131 |         }
132 | 
133 |         // 1) Search for a sync byte. Gives some protection against emulation.
134 |         for (;;)
135 |         {
136 |             if ((tp.sync_byte = Get_Byte()) != 0x47)
137 |                 continue;
138 | 
139 |             if (Rdptr - Rdbfr > TransportPacketSize)
140 |             {
141 |                 if (Rdptr[-(TransportPacketSize + 1)] == 0x47)
142 |                     break;
143 |             }
144 |             else if (Rdbfr + Read - Rdptr > TransportPacketSize - static_cast<int64_t>(1))
145 |             {
146 |                 if (Rdptr[+(TransportPacketSize - 1)] == 0x47)
147 |                     break;
148 |             }
149 |             else
150 |             {
151 |                 // We can't check so just accept this sync byte.
152 |                 break;
153 |             }
154 |         }
155 |         --Packet_Length; // decrement the sync_byte;
156 | 
157 |         // 2) get pid, transport_error_indicator, payload_unit_start_indicator
158 |         code = Get_Short();
159 |         Packet_Length = Packet_Length - 2; // decrement the two bytes we just got;
160 |         tp.pid = code & 0x1FFF; // bits [12:0]
161 |         tp.transport_error_indicator = (code >> 15) & 0x01;  // bit#15
162 |         tp.payload_unit_start_indicator = (code >> 14) & 0x01; // bit#14
163 |         tp.transport_priority = (code >> 13) & 0x01; // bit#13
164 | 
165 |         // 3) get other fields
166 |         code = Get_Byte();
167 |         --Packet_Length; // decrement the 1 byte we just got;
168 |         tp.transport_scrambling_control = (code >> 6) & 0x03;//     2   bslbf
169 |         tp.adaptation_field_control = (code >> 4) & 0x03;//        2   bslbf
170 |         tp.continuity_counter = code & 0x0F;//      4   uimsbf
171 | 
172 | 
173 |         // 4) check for early-exit conditions ... (possibly skip packet)
174 |         // we don't care about the continuity counter
175 |         // if ( tp.continuity_counter != previous_continuity_counter ) ...
176 |         if (tp.transport_error_indicator ||
177 |             (tp.adaptation_field_control == 0))
178 |         {
179 |             // skip remaining bytes in current packet
180 |             SKIP_TRANSPORT_PACKET_BYTES(Packet_Length)
181 |                 continue; // abort, and circle back to top of 'for() loop'
182 |         }
183 | 
184 |         // 5) check
185 |         if (tp.adaptation_field_control == 2 || tp.adaptation_field_control == 3)
186 |         {
187 |             // adaptation field is present
188 |             tp.adaptation_field_length = Get_Byte(); // 8-bits
189 |             --Packet_Length; // decrement the 1 byte we just got;
190 | 
191 |             if (tp.adaptation_field_length != 0) // end of field already?
192 |             {
193 |                 // if we made it this far, we no longer need to decrement
194 |                 // Packet_Length.  We took care of it up there!
195 |                 code = Get_Byte();
196 |                 --Packet_Length; // decrement the 1 byte we just got;
197 |                 tp.discontinuity_indicator = (code >> 7) & 0x01; // 1   bslbf
198 |                 tp.random_access_indicator = (code >> 6) & 0x01; // 1   bslbf
199 |                 tp.elementary_stream_priority_indicator = (code >> 5) & 0x01; //    1   bslbf
200 |                 tp.PCR_flag = (code >> 4) & 0x01; //    1   bslbf
201 |                 tp.OPCR_flag = (code >> 3) & 0x01; //   1   bslbf
202 |                 tp.splicing_point_flag = (code >> 2) & 0x01; // 1   bslbf
203 |                 tp.transport_private_data_flag = (code >> 1) & 0x01; // 1   bslbf
204 |                 tp.adaptation_field_extension_flag = (code >> 0) & 0x01; // 1   bslbf
205 | 
206 |                 // skip the remainder of the adaptation_field
207 |                 SKIP_TRANSPORT_PACKET_BYTES(tp.adaptation_field_length - 1)
208 |             } // if ( tp.adaptation_field_length != 0 )
209 |         } // if ( tp.adaptation_field_control != 1 )
210 | 
211 |         // we've processed the header, so now just the payload is left...
212 | 
213 |         // video
214 |         if (tp.pid == MPEG2_Transport_VideoPID && Packet_Length > 0)
215 |         {
216 | #if 0
217 |             code = Get_Short();
218 |             code = (code & 0xffff) << 16 | Get_Short();
219 |             Packet_Length = Packet_Length - 4; // remove these two bytes
220 | 
221 |             // Packet start?
222 |             if (code < 0x000001E0 || code > 0x000001EF)
223 |                 if (!tp.payload_unit_start_indicator)
224 |                 {
225 |                     // No, move the buffer-pointer back.
226 |                     Rdptr -= 4;
227 |                     Packet_Length = Packet_Length + 4; // restore these four bytes
228 |                 }
229 |                 else
230 | #endif
231 |                     if (tp.payload_unit_start_indicator)
232 |                     {
233 |                         // YES, pull out PTS
234 |                         //Get_Short();
235 |                         //Get_Short();
236 |                         //Get_Short(); // MPEG2-PES total Packet_Length
237 |                         //Get_Byte(); // skip a byte
238 |                         Rdptr += 7;
239 |                         code = Get_Byte();
240 |                         Packet_Header_Length = Get_Byte();
241 |                         Packet_Length = Packet_Length - 9; // compensate the bytes we extracted
242 | 
243 |                         // get PTS, and skip rest of PES-header
244 |                         if (code >= 0x80 && Packet_Header_Length > 4) // Extension_flag ?
245 |                         {
246 |                             // Skip PES_PTS
247 |                             //Get_Short();
248 |                             //Get_Short();
249 |                             Rdptr += 4;
250 |                             Get_Byte();
251 |                             Packet_Length = Packet_Length - 5;
252 |                             SKIP_TRANSPORT_PACKET_BYTES(Packet_Header_Length - static_cast<int64_t>(5))
253 |                         }
254 |                         else
255 |                             SKIP_TRANSPORT_PACKET_BYTES(Packet_Header_Length)
256 |                     }
257 |             Rdmax = Rdptr + Packet_Length;
258 |             if (TransportPacketSize == 204)
259 |                 Rdmax -= 16;
260 |             return;
261 |         }
262 | 
263 |         // fall through case
264 |         // skip the remainder of the adaptation_field
265 |         SKIP_TRANSPORT_PACKET_BYTES(Packet_Length)
266 |     } // for
267 | }
268 | 
269 | // PVA packet data structure.
270 | struct pva_packet {
271 |     uint16_t sync_byte;
272 |     uint8_t stream_id;
273 |     uint8_t counter;
274 |     uint8_t reserved;
275 |     uint8_t flags;
276 |     uint16_t length;
277 | };
278 | 
279 | // PVA transport stream parser.
280 | void CMPEG2Decoder::Next_PVA_Packet()
281 | {
282 |     uint32_t Packet_Length;
283 |     pva_packet pva;
284 |     uint32_t PTS;
285 | 
286 |     for (;;)
287 |     {
288 |         // Search for a good sync.
289 |         while (true)
290 |         {
291 |             // Sync word is 0x4156.
292 |             if (Get_Byte() != 0x41) continue;
293 |             if (Get_Byte() != 0x56)
294 |             {
295 |                 // This byte might be a 0x41, so back up by one.
296 |                 Rdptr--;
297 |                 continue;
298 |             }
299 |             // To protect against emulation of the sync word,
300 |             // also check that the stream says audio or video.
301 |             pva.stream_id = Get_Byte();
302 |             if (pva.stream_id != 0x01 && pva.stream_id != 0x02)
303 |             {
304 |                 // This byte might be a 0x41, so back up by one.
305 |                 Rdptr--;
306 |                 continue;
307 |             }
308 |             break;
309 |         }
310 | 
311 |         // Pick up the remaining packet header fields.
312 |         pva.counter = Get_Byte();
313 |         pva.reserved = Get_Byte();
314 |         pva.flags = Get_Byte();
315 |         pva.length = Get_Byte() << 8;
316 |         pva.length |= Get_Byte();
317 |         Packet_Length = pva.length;
318 | 
319 |         // Any payload?
320 |         if (Packet_Length == 0 || pva.reserved != 0x55)
321 |             continue;  // No, try the next packet.
322 | 
323 |         // Check stream id for video.
324 |         if (pva.stream_id == 1)
325 |         {
326 |             // This is a video packet.
327 |             // Extract the PTS if it exists.
328 |             if (pva.flags & 0x10)
329 |             {
330 |                 // The spec is unclear about the significance of the prebytes field.
331 |                 // It appears to be safe to ignore it.
332 |                 PTS = (int)((Get_Byte() << 24) | (Get_Byte() << 16) | (Get_Byte() << 8) | Get_Byte());
333 |                 Packet_Length -= 4;
334 |             }
335 | 
336 |             // Deliver the video to the ES parsing layer.
337 |             Rdmax = Rdptr + Packet_Length;
338 |             return;
339 |         }
340 | 
341 |         // Not an video packet or an audio packet to be demultiplexed. Keep looking.
342 |         SKIP_TRANSPORT_PACKET_BYTES(Packet_Length);
343 |     }
344 | }
345 | 
346 | void CMPEG2Decoder::Next_Packet()
347 | {
348 |     if (SystemStream_Flag == 2)  // MPEG-2 transport packet?
349 |     {
350 |         Next_Transport_Packet();
351 |         return;
352 |     }
353 |     else if (SystemStream_Flag == 3)  // PVA packet?
354 |     {
355 |         Next_PVA_Packet();
356 |         return;
357 |     }
358 | 
359 |     uint32_t code, Packet_Length, Packet_Header_Length;
360 |     static int stream_type;
361 |     while (true) {
362 |         code = Get_Short();
363 |         code = (code << 16) + Get_Short();
364 | 
365 |         // remove system layer byte stuffing
366 |         while ((code & 0xffffff00) != 0x00000100) {
367 |             if (Fault_Flag == OUT_OF_BITS)
368 |                 return;
369 |             code = (code << 8) | Get_Byte();
370 |         }
371 | 
372 |         if (code == PACK_START_CODE) {
373 |             if ((Get_Byte() & 0xf0) == 0x20) {
374 |                 Rdptr += 7; // MPEG1 program stream
375 |                 stream_type = MPEG1_PROGRAM_STREAM;
376 |             }
377 |             else {
378 |                 Rdptr += 8; // MPEG2 program stream
379 |                 stream_type = MPEG2_PROGRAM_STREAM;
380 |             }
381 |         }
382 |         else if ((code & 0xfffffff0) == VIDEO_ELEMENTARY_STREAM) {
383 |             Packet_Length = Get_Short();
384 |             Rdmax = Rdptr + Packet_Length;
385 | 
386 |             if (stream_type == MPEG1_PROGRAM_STREAM) {
387 |                 // MPEG1 program stream.
388 |                 Packet_Header_Length = 0;
389 |                 // Stuffing bytes.
390 |                 do {
391 |                     code = Get_Byte();
392 |                     Packet_Header_Length += 1;
393 |                 } while (code == 0xff);
394 |                 if ((code & 0xc0) == 0x40) {
395 |                     // STD bytes.
396 |                     Get_Byte();
397 |                     code = Get_Byte();
398 |                     Packet_Header_Length += 2;
399 |                 }
400 |                 if ((code & 0xf0) == 0x20) {
401 |                     // PTS bytes.
402 |                     Get_Short();
403 |                     Get_Short();
404 |                     Packet_Header_Length += 4;
405 |                 }
406 |                 else if ((code & 0xf0) == 0x30) {
407 |                     // PTS/DTS bytes.
408 |                     Get_Short();
409 |                     Get_Short();
410 |                     Get_Short();
411 |                     Get_Short();
412 |                     Get_Byte();
413 |                     Packet_Header_Length += 9;
414 |                 }
415 |                 return;
416 |             }
417 |             else {
418 |                 // MPEG2 program stream.
419 |                 code = Get_Byte();
420 |                 if ((code & 0xc0) == 0x80)
421 |                 {
422 |                     //code = Get_Byte();
423 |                     ++Rdptr;
424 |                     Packet_Header_Length = Get_Byte();
425 | 
426 |                     Rdptr += Packet_Header_Length;
427 |                     return;
428 |                 }
429 |                 else
430 |                     Rdptr += Packet_Length - 1;
431 |             }
432 |         }
433 |         else if (code >= SYSTEM_START_CODE)
434 |         {
435 |             code = Get_Short();
436 |             Rdptr += code;
437 |         }
438 |     }
439 | }
440 | 
441 | 
442 | void CMPEG2Decoder::Next_File()
443 | {
444 |     if (File_Flag < static_cast<int>(Infile.size() - 1)) {
445 |         File_Flag++;
446 | 
447 |     }
448 |     else {
449 |         File_Flag = 0;
450 |     }
451 |     // Even if we ran out of files, we reread the first one, just so
452 |     // the decoder at least processes valid data until it detects the
453 |     // fault flag and exits.
454 |     _lseeki64(Infile[File_Flag], 0, SEEK_SET);
455 |     int bytes = _read(Infile[File_Flag], Rdbfr + Read, BUFFER_SIZE - Read);
456 |     if (Read + static_cast<int64_t>(bytes) == BUFFER_SIZE)
457 |         // The whole buffer has valid data.
458 |         buffer_invalid = (uint8_t*)(UINTPTR_MAX);
459 |     else
460 |         // Point to the first invalid buffer location.
461 |         buffer_invalid = Rdbfr + Read + bytes;
462 | }
463 | 
464 | 


--------------------------------------------------------------------------------
/src/gethdr.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
  2 | 
  3 | /*
  4 |  * Disclaimer of Warranty
  5 |  *
  6 |  * These software programs are available to the user without any license fee or
  7 |  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  8 |  * any and all warranties, whether express, implied, or statuary, including any
  9 |  * implied warranties or merchantability or of fitness for a particular
 10 |  * purpose.  In no event shall the copyright-holder be liable for any
 11 |  * incidental, punitive, or consequential damages of any kind whatsoever
 12 |  * arising from the use of these programs.
 13 |  *
 14 |  * This disclaimer of warranty extends to the user of these programs and user's
 15 |  * customers, employees, agents, transferees, successors, and assigns.
 16 |  *
 17 |  * The MPEG Software Simulation Group does not represent or warrant that the
 18 |  * programs furnished hereunder are free of infringement of any third-party
 19 |  * patents.
 20 |  *
 21 |  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 22 |  * are subject to royalty fees to patent holders.  Many of these patents are
 23 |  * general enough such that they are unavoidable regardless of implementation
 24 |  * design.
 25 |  *
 26 |  */
 27 | 
 28 | #include "global.h"
 29 | #include "MPEG2Decoder.h"
 30 | 
 31 | 
 32 |  /* decode headers from one input stream */
 33 | int CMPEG2Decoder::Get_Hdr()
 34 | {
 35 |     for (;;)
 36 |     {
 37 |         /* look for next_start_code */
 38 |         Next_Start_Code();
 39 |         if (Fault_Flag == OUT_OF_BITS)
 40 |         {
 41 |             // We've run dry on data from the stream.
 42 |             return 0;
 43 |         }
 44 | 
 45 |         switch (Get_Bits(32))
 46 |         {
 47 |             case SEQUENCE_HEADER_CODE:
 48 |                 Sequence_Header();
 49 |                 Second_Field = 0;
 50 |                 break;
 51 | 
 52 |             case GROUP_START_CODE:
 53 |                 group_of_pictures_header();
 54 |                 Second_Field = 0;
 55 |                 break;
 56 | 
 57 |             case PICTURE_START_CODE:
 58 |                 picture_header();
 59 |                 return 1;
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | 
 65 | /* decode group of pictures header */
 66 | /* ISO/IEC 13818-2 section 6.2.2.6 */
 67 | __forceinline void CMPEG2Decoder::group_of_pictures_header()
 68 | {
 69 | #if 0
 70 |     Get_Bits(1); //drop_flag
 71 |     Get_Bits(5); //gop_hour
 72 |     Get_Bits(6); //gop_minute
 73 |     Flush_Buffer(1);    // marker bit
 74 |     Get_Bits(6); //gop_sec
 75 |     Get_Bits(6); //gop_frame
 76 |     Get_Bits(1); //closed_gop
 77 |     Get_Bits(1); //broken_link
 78 | #else
 79 |     Flush_Buffer(27);
 80 | #endif
 81 |     extension_and_user_data();
 82 | }
 83 | 
 84 | 
 85 | /* decode picture header */
 86 | /* ISO/IEC 13818-2 section 6.2.3 */
 87 | inline void CMPEG2Decoder::picture_header()
 88 | {
 89 |     temporal_reference = Get_Bits(10);
 90 |     picture_coding_type = Get_Bits(3);
 91 |     Flush_Buffer(16);//Get_Bits(16); //vbv_delay
 92 | 
 93 |     if (picture_coding_type == P_TYPE || picture_coding_type == B_TYPE)
 94 |     {
 95 |         full_pel_forward_vector = Get_Bits(1);
 96 |         forward_f_code = Get_Bits(3);
 97 |     }
 98 | 
 99 |     if (picture_coding_type == B_TYPE)
100 |     {
101 |         full_pel_backward_vector = Get_Bits(1);
102 |         backward_f_code = Get_Bits(3);
103 |     }
104 | 
105 |     // MPEG1 defaults. May be overriden by picture coding extension.
106 |     intra_dc_precision = 0;
107 |     picture_structure = FRAME_PICTURE;
108 |     top_field_first = 1;
109 |     frame_pred_frame_dct = 1;
110 |     concealment_motion_vectors = 0;
111 |     q_scale_type = 0;
112 |     intra_vlc_format = 0;
113 |     alternate_scan = 0;
114 |     repeat_first_field = 0;
115 |     progressive_frame = 1;
116 | 
117 |     pf_current = progressive_frame;
118 | 
119 |     extra_bit_information(); // extra information byte count
120 |     extension_and_user_data();
121 | }
122 | 
123 | 
124 | /* decode sequence header */
125 | void CMPEG2Decoder::Sequence_Header()
126 | {
127 |     int i;
128 | 
129 |     horizontal_size = Get_Bits(12);
130 |     vertical_size = Get_Bits(12);
131 |     aspect_ratio_information = Get_Bits(4);
132 | #if 0
133 |     Get_Bits(4); //frame_rate_code
134 |     Get_Bits(18); //bit_rate_value
135 |     Flush_Buffer(1); // marker bit
136 |     Get_Bits(10); //vbv_buffer_size
137 |     Get_Bits(1); //constrained_parameters_flag
138 | #else
139 |     Flush_Buffer(34);
140 | #endif
141 | 
142 |     if ((load_intra_quantizer_matrix = Get_Bits(1)))
143 |     {
144 |         for (i = 0; i < 64; i++)
145 |             intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
146 |     }
147 |     else
148 |     {
149 |         for (i = 0; i < 64; i++)
150 |             intra_quantizer_matrix[i] = default_intra_quantizer_matrix[i];
151 |     }
152 | 
153 |     if ((load_non_intra_quantizer_matrix = Get_Bits(1)))
154 |     {
155 |         for (i = 0; i < 64; i++)
156 |             non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
157 |     }
158 |     else
159 |     {
160 |         for (i = 0; i < 64; i++)
161 |             non_intra_quantizer_matrix[i] = 16;
162 |     }
163 | 
164 |     /* copy luminance to chrominance matrices */
165 |     for (i = 0; i < 64; i++)
166 |     {
167 |         chroma_intra_quantizer_matrix[i] = intra_quantizer_matrix[i];
168 |         chroma_non_intra_quantizer_matrix[i] = non_intra_quantizer_matrix[i];
169 |     }
170 | 
171 |     // These are MPEG1 defaults. These will be overridden if we have MPEG2
172 |     // when the sequence header extension is parsed.
173 |     progressive_sequence = 1;
174 |     chroma_format = CHROMA420;
175 |     matrix_coefficients = 5;
176 | 
177 |     extension_and_user_data();
178 | }
179 | 
180 | /* decode slice header */
181 | /* ISO/IEC 13818-2 section 6.2.4 */
182 | int CMPEG2Decoder::slice_header()
183 | {
184 |     int slice_vertical_position_extension = 0;
185 |     if (mpeg_type == IS_MPEG2 && vertical_size > 2800) {
186 |         slice_vertical_position_extension = Get_Bits(3);
187 |     }
188 | 
189 |     int quantizer_scale_code = Get_Bits(5);
190 |     if (mpeg_type == IS_MPEG2)
191 |         quantizer_scale = q_scale_type ? Non_Linear_quantizer_scale[quantizer_scale_code] : quantizer_scale_code << 1;
192 |     else
193 |         quantizer_scale = quantizer_scale_code;
194 | 
195 |     while (Get_Bits(1)) Flush_Buffer(8);
196 | 
197 |     return slice_vertical_position_extension;
198 | }
199 | 
200 | /* decode extension and user data */
201 | /* ISO/IEC 13818-2 section 6.2.2.2 */
202 | void CMPEG2Decoder::extension_and_user_data()
203 | {
204 |     int code, ext_ID;
205 | 
206 |     Next_Start_Code();
207 | 
208 |     while ((code = Show_Bits(32)) == EXTENSION_START_CODE || code == USER_DATA_START_CODE)
209 |     {
210 |         if (Fault_Flag == OUT_OF_BITS) return;
211 | 
212 |         if (code == EXTENSION_START_CODE)
213 |         {
214 |             Flush_Buffer(32);
215 |             ext_ID = Get_Bits(4);
216 | 
217 |             switch (ext_ID)
218 |             {
219 |                 case SEQUENCE_EXTENSION_ID:
220 |                     sequence_extension();
221 |                     break;
222 | 
223 |                 case SEQUENCE_DISPLAY_EXTENSION_ID:
224 |                     sequence_display_extension();
225 |                     break;
226 | 
227 |                 case QUANT_MATRIX_EXTENSION_ID:
228 |                     quant_matrix_extension();
229 |                     break;
230 | 
231 |                 case PICTURE_DISPLAY_EXTENSION_ID:
232 |                     picture_display_extension();
233 |                     break;
234 | 
235 |                 case PICTURE_CODING_EXTENSION_ID:
236 |                     picture_coding_extension();
237 |                     break;
238 | 
239 |                 case COPYRIGHT_EXTENSION_ID:
240 |                     copyright_extension();
241 |                     break;
242 |             }
243 |             Next_Start_Code();
244 |         }
245 |         else
246 |         {
247 |             Flush_Buffer(32);
248 |             Next_Start_Code();
249 |         }
250 |     }
251 | }
252 | 
253 | /* decode sequence extension */
254 | /* ISO/IEC 13818-2 section 6.2.2.3 */
255 | __forceinline void CMPEG2Decoder::sequence_extension()
256 | {
257 |     Flush_Buffer(8); //Get_Bits(8); //profile_and_level_indication
258 |     progressive_sequence = Get_Bits(1);
259 |     chroma_format = Get_Bits(2);
260 |     int horizontal_size_extension = Get_Bits(2) << 12;
261 |     int vertical_size_extension = Get_Bits(2) << 12;
262 | #if 0
263 |     Get_Bits(12); //bit_rate_extension
264 |     Flush_Buffer(1);    // marker bit
265 |     Get_Bits(8); //vbv_buffer_size_extension
266 |     Get_Bits(1); //low_delay
267 | 
268 |     Get_Bits(2); //frame_rate_extension_n
269 |     Get_Bits(5); //frame_rate_extension_d
270 | #else
271 |     Flush_Buffer(29);
272 | #endif
273 | 
274 |     horizontal_size = horizontal_size_extension | (horizontal_size & 0x0fff);
275 |     vertical_size = vertical_size_extension | (vertical_size & 0x0fff);
276 | }
277 | 
278 | /* decode sequence display extension */
279 | __forceinline void CMPEG2Decoder::sequence_display_extension()
280 | {
281 |     Flush_Buffer(3);// Get_Bits(3); //video_format
282 | 
283 |     matrix_coefficients = 1;
284 |     if (Get_Bits(1)) //color_description
285 |     {
286 |         //Get_Bits(8); //color_primaries
287 |         //Get_Bits(8); //transfer_characteristics
288 |         Flush_Buffer(16);
289 |         matrix_coefficients = Get_Bits(8);
290 |     }
291 | #if 0
292 |     Get_Bits(14); //display_horizontal_size
293 |     Flush_Buffer(1); // marker bit
294 |     Get_Bits(14); //display_vertical_size
295 | #else
296 |     Flush_Buffer(29);
297 | #endif
298 | }
299 | 
300 | /* decode quant matrix entension */
301 | /* ISO/IEC 13818-2 section 6.2.3.2 */
302 | void CMPEG2Decoder::quant_matrix_extension()
303 | {
304 |     int i;
305 | 
306 |     if ((load_intra_quantizer_matrix = Get_Bits(1)))
307 |         for (i = 0; i < 64; i++)
308 |             chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
309 |             = intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
310 | 
311 |     if ((load_non_intra_quantizer_matrix = Get_Bits(1)))
312 |         for (i = 0; i < 64; i++)
313 |             chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
314 |             = non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
315 | 
316 |     if ((load_chroma_intra_quantizer_matrix = Get_Bits(1)))
317 |         for (i = 0; i < 64; i++)
318 |             chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
319 | 
320 |     if ((load_chroma_non_intra_quantizer_matrix = Get_Bits(1)))
321 |         for (i = 0; i < 64; i++)
322 |             chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
323 | }
324 | 
325 | /* decode picture display extension */
326 | /* ISO/IEC 13818-2 section 6.2.3.3. */
327 | void CMPEG2Decoder::picture_display_extension()
328 | {
329 |     int frame_center_horizontal_offset[3];
330 |     int frame_center_vertical_offset[3];
331 | 
332 |     int i;
333 |     int number_of_frame_center_offsets;
334 | 
335 |     /* based on ISO/IEC 13818-2 section 6.3.12
336 |        (November 1994) Picture display extensions */
337 | 
338 |        /* derive number_of_frame_center_offsets */
339 |     if (progressive_sequence)
340 |     {
341 |         if (repeat_first_field)
342 |         {
343 |             if (top_field_first)
344 |                 number_of_frame_center_offsets = 3;
345 |             else
346 |                 number_of_frame_center_offsets = 2;
347 |         }
348 |         else
349 |             number_of_frame_center_offsets = 1;
350 |     }
351 |     else
352 |     {
353 |         if (picture_structure != FRAME_PICTURE)
354 |             number_of_frame_center_offsets = 1;
355 |         else
356 |         {
357 |             if (repeat_first_field)
358 |                 number_of_frame_center_offsets = 3;
359 |             else
360 |                 number_of_frame_center_offsets = 2;
361 |         }
362 |     }
363 | 
364 |     /* now parse */
365 |     for (i = 0; i < number_of_frame_center_offsets; i++)
366 |     {
367 |         frame_center_horizontal_offset[i] = Get_Bits(16);
368 |         Flush_Buffer(1);    // marker bit
369 | 
370 |         frame_center_vertical_offset[i] = Get_Bits(16);
371 |         Flush_Buffer(1);    // marker bit
372 |     }
373 | }
374 | 
375 | /* decode picture coding extension */
376 | void CMPEG2Decoder::picture_coding_extension()
377 | {
378 |     f_code[0][0] = Get_Bits(4);
379 |     f_code[0][1] = Get_Bits(4);
380 |     f_code[1][0] = Get_Bits(4);
381 |     f_code[1][1] = Get_Bits(4);
382 | 
383 |     intra_dc_precision = Get_Bits(2);
384 |     picture_structure = Get_Bits(2);
385 |     top_field_first = Get_Bits(1);
386 |     frame_pred_frame_dct = Get_Bits(1);
387 |     concealment_motion_vectors = Get_Bits(1);
388 |     q_scale_type = Get_Bits(1);
389 |     intra_vlc_format = Get_Bits(1);
390 |     alternate_scan = Get_Bits(1);
391 |     repeat_first_field = Get_Bits(1);
392 |     Get_Bits(1); //uint32_t chroma_420_type
393 |     progressive_frame = Get_Bits(1);
394 | 
395 |     if (picture_structure != FRAME_PICTURE)
396 |     {
397 |         if (picture_structure == TOP_FIELD)
398 |             top_field_first = 1;
399 |         else
400 |             top_field_first = 0;
401 |         repeat_first_field = 0;
402 |         progressive_frame = 0;
403 |     }
404 | 
405 |     pf_current = progressive_frame;
406 | }
407 | 
408 | /* decode extra bit information */
409 | /* ISO/IEC 13818-2 section 6.2.3.4. */
410 | __forceinline int CMPEG2Decoder::extra_bit_information()
411 | {
412 |     int byte_count = 0;
413 | 
414 |     while (Get_Bits(1))
415 |     {
416 |         if (Fault_Flag == OUT_OF_BITS)
417 |             return byte_count;
418 |         Flush_Buffer(8);
419 |         ++byte_count;
420 |     }
421 | 
422 |     return byte_count;
423 | }
424 | 
425 | /* Copyright extension */
426 | /* ISO/IEC 13818-2 section 6.2.3.6. */
427 | /* (header added in November, 1994 to the IS document) */
428 | __forceinline void CMPEG2Decoder::copyright_extension()
429 | {
430 | #if 0
431 |     Get_Bits(1); //copyright_flag
432 |     Get_Bits(8); //copyright_identifier
433 |     Get_Bits(1); //original_or_copy
434 | 
435 |     /* reserved */
436 |     Get_Bits(7); //reserved_data
437 | 
438 |     Flush_Buffer(1); // marker bit
439 |     Get_Bits(20); //copyright_number_1
440 |     Flush_Buffer(1); // marker bit
441 |     Get_Bits(22); //copyright_number_2
442 |     Flush_Buffer(1); // marker bit
443 |     Get_Bits(22); //copyright_number_3
444 | #else
445 |     Flush_Buffer(32);
446 |     Flush_Buffer(32);
447 |     Flush_Buffer(20);
448 | #endif
449 | }
450 | 


--------------------------------------------------------------------------------
/src/global.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (C) Chia-chen Kuo - April 2001
 3 |  *
 4 |  *  This file is part of DVD2AVI, a free MPEG-2 decoder
 5 |  *  Ported to C++ by Mathias Born - May 2001
 6 |  *
 7 |  *  DVD2AVI is free software; you can redistribute it and/or modify
 8 |  *  it under the terms of the GNU General Public License as published by
 9 |  *  the Free Software Foundation; either version 2, or (at your option)
10 |  *  any later version.
11 |  *
12 |  *  DVD2AVI is distributed in the hope that it will be useful,
13 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 |  *  GNU General Public License for more details.
16 |  *
17 |  *  You should have received a copy of the GNU General Public License
18 |  *  along with GNU Make; see the file COPYING.  If not, write to
19 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 |  *
21 |  */
22 | 
23 | 
24 | #define GLOBAL
25 | #include "global.h"
26 | 
27 | int testint;
28 | 


--------------------------------------------------------------------------------
/src/global.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
  2 | 
  3 | /*
  4 |  * Disclaimer of Warranty
  5 |  *
  6 |  * These software programs are available to the user with any license fee or
  7 |  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  8 |  * any and all warranties, whether express, implied, or statuary, including any
  9 |  * implied warranties or merchantability or of fitness for a particular
 10 |  * purpose.  In no event shall the copyright-holder be liable for any
 11 |  * incidental, punitive, or consequential damages of any kind whatsoever
 12 |  * arising from the use of these programs.
 13 |  *
 14 |  * This disclaimer of warranty extends to the user of these programs and user's
 15 |  * customers, employees, agents, transferees, successors, and assigns.
 16 |  *
 17 |  * The MPEG Software Simulation Group does not represent or warrant that the
 18 |  * programs furnished hereunder are free of infringement of any third-party
 19 |  * patents.
 20 |  *
 21 |  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 22 |  * are subject to royalty fees to patent holders.  Many of these patents are
 23 |  * general enough such that they are unavoidable regardless of implementation
 24 |  * design.
 25 |  *
 26 |  */
 27 | 
 28 | #ifndef __GLOBAL_H
 29 | #define __GLOBAL_H
 30 | 
 31 | #include <cstdint>
 32 | 
 33 | 
 34 |  //#include "misc.h"
 35 | 
 36 | 
 37 | #ifdef GLOBAL
 38 | #define XTN
 39 | #else
 40 | #define XTN extern
 41 | #endif
 42 | 
 43 | enum {
 44 |     MACROBLOCK_INTRA = 1,
 45 |     MACROBLOCK_PATTERN = 2,
 46 |     MACROBLOCK_MOTION_BACKWARD = 4,
 47 |     MACROBLOCK_MOTION_FORWARD = 8,
 48 |     MACROBLOCK_QUANT = 16,
 49 | };
 50 | 
 51 | 
 52 | /* default intra quantization matrix */
 53 | XTN uint8_t default_intra_quantizer_matrix[64]
 54 | #ifdef GLOBAL
 55 | =
 56 | {
 57 |     8, 16, 19, 22, 26, 27, 29, 34,
 58 |     16, 16, 22, 24, 27, 29, 34, 37,
 59 |     19, 22, 26, 27, 29, 34, 34, 38,
 60 |     22, 22, 26, 27, 29, 34, 37, 40,
 61 |     22, 26, 27, 29, 32, 35, 40, 48,
 62 |     26, 27, 29, 32, 35, 40, 48, 58,
 63 |     26, 27, 29, 34, 38, 46, 56, 69,
 64 |     27, 29, 35, 38, 46, 56, 69, 83
 65 | }
 66 | #endif
 67 | ;
 68 | 
 69 | /* zig-zag and alternate scan patterns */
 70 | XTN uint8_t scan[2][64]
 71 | #ifdef GLOBAL
 72 | =
 73 | {
 74 |     { /* Zig-Zag scan pattern  */
 75 |         0,  1,  8, 16,  9,  2,  3, 10,
 76 |        17, 24, 32, 25, 18, 11,  4,  5,
 77 |        12, 19, 26, 33, 40, 48, 41, 34,
 78 |        27, 20, 13,  6,  7, 14, 21, 28,
 79 |        35, 42, 49, 56, 57, 50, 43, 36,
 80 |        29, 22, 15, 23, 30, 37, 44, 51,
 81 |        58, 59, 52, 45, 38, 31, 39, 46,
 82 |        53, 60, 61, 54, 47, 55, 62, 63
 83 |     }
 84 |     ,
 85 |     { /* Alternate scan pattern */
 86 |         0,  8, 16, 24,  1,  9,  2, 10,
 87 |        17, 25, 32, 40, 48, 56, 57, 49,
 88 |        41, 33, 26, 18,  3, 11, 4,  12,
 89 |        19, 27, 34, 42, 50, 58, 35, 43,
 90 |        51, 59, 20, 28,  5, 13,  6, 14,
 91 |        21, 29, 36, 44, 52, 60, 37, 45,
 92 |        53, 61, 22, 30,  7, 15, 23, 31,
 93 |        38, 46, 54, 62, 39, 47, 55, 63
 94 |     }
 95 | }
 96 | #endif
 97 | ;
 98 | 
 99 | /* non-linear quantization coefficient table */
100 | XTN uint8_t Non_Linear_quantizer_scale[32]
101 | #ifdef GLOBAL
102 | =
103 | {
104 |     0, 1, 2, 3, 4, 5, 6, 7,
105 |     8, 10, 12, 14, 16, 18, 20, 22,
106 |     24, 28, 32, 36, 40, 44, 48, 52,
107 |     56, 64, 72, 80, 88, 96, 104, 112
108 | }
109 | #endif
110 | ;
111 | 
112 | #define ERROR_VALUE (-1)
113 | 
114 | struct DCTtab {
115 |     char run, level, len;
116 | };
117 | 
118 | struct VLCtab {
119 |     char val, len;
120 | };
121 | 
122 | /* Table B-10, motion_code, codes 0001 ... 01xx */
123 | XTN VLCtab MVtab0[8]
124 | #ifdef GLOBAL
125 | =
126 | {
127 |     {ERROR_VALUE,0}, {3,3}, {2,2}, {2,2}, {1,1}, {1,1}, {1,1}, {1,1}
128 | }
129 | #endif
130 | ;
131 | 
132 | /* Table B-10, motion_code, codes 0000011 ... 000011x */
133 | XTN VLCtab MVtab1[8]
134 | #ifdef GLOBAL
135 | =
136 | {
137 |     {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,6}, {6,6}, {5,6}, {4,5}, {4,5}
138 | }
139 | #endif
140 | ;
141 | 
142 | /* Table B-10, motion_code, codes 0000001100 ... 000001011x */
143 | XTN VLCtab MVtab2[12]
144 | #ifdef GLOBAL
145 | =
146 | {
147 |     {16,9}, {15,9}, {14,9}, {13,9},
148 |     {12,9}, {11,9}, {10,8}, {10,8},
149 |     {9,8},  {9,8},  {8,8},  {8,8}
150 | }
151 | #endif
152 | ;
153 | 
154 | /* Table B-9, coded_block_pattern, codes 01000 ... 111xx */
155 | XTN VLCtab CBPtab0[32]
156 | #ifdef GLOBAL
157 | =
158 | {
159 |     {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
160 |     {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
161 |     {62,5}, {2,5},  {61,5}, {1,5},  {56,5}, {52,5}, {44,5}, {28,5},
162 |     {40,5}, {20,5}, {48,5}, {12,5}, {32,4}, {32,4}, {16,4}, {16,4},
163 |     {8,4},  {8,4},  {4,4},  {4,4},  {60,3}, {60,3}, {60,3}, {60,3}
164 | }
165 | #endif
166 | ;
167 | 
168 | /* Table B-9, coded_block_pattern, codes 00000100 ... 001111xx */
169 | XTN VLCtab CBPtab1[64]
170 | #ifdef GLOBAL
171 | =
172 | {
173 |     {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
174 |     {58,8}, {54,8}, {46,8}, {30,8},
175 |     {57,8}, {53,8}, {45,8}, {29,8}, {38,8}, {26,8}, {37,8}, {25,8},
176 |     {43,8}, {23,8}, {51,8}, {15,8}, {42,8}, {22,8}, {50,8}, {14,8},
177 |     {41,8}, {21,8}, {49,8}, {13,8}, {35,8}, {19,8}, {11,8}, {7,8},
178 |     {34,7}, {34,7}, {18,7}, {18,7}, {10,7}, {10,7}, {6,7},  {6,7},
179 |     {33,7}, {33,7}, {17,7}, {17,7}, {9,7},  {9,7},  {5,7},  {5,7},
180 |     {63,6}, {63,6}, {63,6}, {63,6}, {3,6},  {3,6},  {3,6},  {3,6},
181 |     {36,6}, {36,6}, {36,6}, {36,6}, {24,6}, {24,6}, {24,6}, {24,6}
182 | }
183 | #endif
184 | ;
185 | 
186 | /* Table B-9, coded_block_pattern, codes 000000001 ... 000000111 */
187 | XTN VLCtab CBPtab2[8]
188 | #ifdef GLOBAL
189 | =
190 | {
191 |     {ERROR_VALUE,0}, {0,9}, {39,9}, {27,9}, {59,9}, {55,9}, {47,9}, {31,9}
192 | }
193 | #endif
194 | ;
195 | 
196 | /* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
197 | XTN VLCtab MBAtab1[16]
198 | #ifdef GLOBAL
199 | =
200 | {
201 |     {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4},
202 |     {4,4}, {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
203 | }
204 | #endif
205 | ;
206 | 
207 | /* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
208 | XTN VLCtab MBAtab2[104]
209 | #ifdef GLOBAL
210 | =
211 | {
212 |     {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
213 |     {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
214 |     {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
215 |     {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},
216 |     {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},
217 |     {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},
218 |     {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},
219 |     {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},
220 |     {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},
221 |     {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
222 |     {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
223 |     {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},
224 |     {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7}
225 | }
226 | #endif
227 | ;
228 | 
229 | /* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
230 | XTN VLCtab DClumtab0[32]
231 | #ifdef GLOBAL
232 | =
233 | {
234 |     {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
235 |     {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
236 |     {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
237 |     {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {ERROR_VALUE, 0}
238 | }
239 | #endif
240 | ;
241 | 
242 | /* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
243 | XTN VLCtab DClumtab1[16]
244 | #ifdef GLOBAL
245 | =
246 | {
247 |     {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
248 |     {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
249 | }
250 | #endif
251 | ;
252 | 
253 | /* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
254 | XTN VLCtab DCchromtab0[32]
255 | #ifdef GLOBAL
256 | =
257 | {
258 |     {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
259 |     {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
260 |     {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
261 |     {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {ERROR_VALUE, 0}
262 | }
263 | #endif
264 | ;
265 | 
266 | /* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
267 | XTN VLCtab DCchromtab1[32]
268 | #ifdef GLOBAL
269 | =
270 | {
271 |     {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
272 |     {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
273 |     {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
274 |     {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
275 | }
276 | #endif
277 | ;
278 | 
279 | /* Table B-14, DCT coefficients table zero,
280 |  * codes 0100 ... 1xxx (used for first (DC) coefficient)
281 |  */
282 | XTN DCTtab DCTtabfirst[12]
283 | #ifdef GLOBAL
284 | =
285 | {
286 |     {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
287 |     {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
288 |     {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
289 | }
290 | #endif
291 | ;
292 | 
293 | /* Table B-14, DCT coefficients table zero,
294 |  * codes 0100 ... 1xxx (used for all other coefficients)
295 |  */
296 | XTN DCTtab DCTtabnext[12]
297 | #ifdef GLOBAL
298 | =
299 | {
300 |     {0,2,4},  {2,1,4},  {1,1,3},  {1,1,3},
301 |     {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
302 |     {0,1,2},  {0,1,2},  {0,1,2},  {0,1,2}
303 | }
304 | #endif
305 | ;
306 | 
307 | /* Table B-14, DCT coefficients table zero,
308 |  * codes 000001xx ... 00111xxx
309 |  */
310 | XTN DCTtab DCTtab0[60]
311 | #ifdef GLOBAL
312 | =
313 | {
314 |     {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
315 |     {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
316 |     {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
317 |     {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
318 |     {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
319 |     {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
320 |     {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
321 |     {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
322 |     {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
323 |     {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
324 |     {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
325 |     {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
326 |     {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
327 |     {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
328 |     {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
329 | }
330 | #endif
331 | ;
332 | 
333 | /* Table B-15, DCT coefficients table one,
334 |  * codes 000001xx ... 11111111
335 | */
336 | XTN DCTtab DCTtab0a[252]
337 | #ifdef GLOBAL
338 | =
339 | {
340 |     {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
341 |     {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
342 |     {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
343 |     {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
344 |     {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
345 |     {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
346 |     {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
347 |     {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
348 |     {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
349 |     {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
350 |     {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
351 |     {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
352 |     {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
353 |     {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
354 |     {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
355 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
356 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
357 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
358 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
359 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
360 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
361 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
362 |     {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
363 |     {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
364 |     {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
365 |     {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
366 |     {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
367 |     {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
368 |     {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
369 |     {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
370 |     {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
371 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
372 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
373 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
374 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
375 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
376 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
377 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
378 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
379 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
380 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
381 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
382 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
383 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
384 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
385 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
386 |     {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
387 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
388 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
389 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
390 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
391 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
392 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
393 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
394 |     {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
395 |     {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
396 |     {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
397 |     {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
398 |     {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
399 |     {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
400 |     {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
401 |     {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
402 |     {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
403 | }
404 | #endif
405 | ;
406 | 
407 | /* Table B-14, DCT coefficients table zero,
408 |  * codes 0000001000 ... 0000001111
409 |  */
410 | XTN DCTtab DCTtab1[8]
411 | #ifdef GLOBAL
412 | =
413 | {
414 |     {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
415 |     {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
416 | }
417 | #endif
418 | ;
419 | 
420 | /* Table B-15, DCT coefficients table one,
421 |  * codes 000000100x ... 000000111x
422 |  */
423 | XTN DCTtab DCTtab1a[8]
424 | #ifdef GLOBAL
425 | =
426 | {
427 |     {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
428 |     {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
429 | }
430 | #endif
431 | ;
432 | 
433 | /* Table B-14/15, DCT coefficients table zero / one,
434 |  * codes 000000010000 ... 000000011111
435 |  */
436 | XTN DCTtab DCTtab2[16]
437 | #ifdef GLOBAL
438 | =
439 | {
440 |     {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
441 |     {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
442 |     {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
443 |     {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
444 | }
445 | #endif
446 | ;
447 | 
448 | /* Table B-14/15, DCT coefficients table zero / one,
449 |  * codes 0000000010000 ... 0000000011111
450 |  */
451 | XTN DCTtab DCTtab3[16]
452 | #ifdef GLOBAL
453 | =
454 | {
455 |     {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
456 |     {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
457 |     {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
458 |     {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
459 | }
460 | #endif
461 | ;
462 | 
463 | /* Table B-14/15, DCT coefficients table zero / one,
464 |  * codes 00000000010000 ... 00000000011111
465 |  */
466 | XTN DCTtab DCTtab4[16]
467 | #ifdef GLOBAL
468 | =
469 | {
470 |     {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
471 |     {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
472 |     {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
473 |     {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
474 | }
475 | #endif
476 | ;
477 | 
478 | /* Table B-14/15, DCT coefficients table zero / one,
479 |  * codes 000000000010000 ... 000000000011111
480 |  */
481 | XTN DCTtab DCTtab5[16]
482 | #ifdef GLOBAL
483 | =
484 | {
485 |     {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
486 |     {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
487 |     {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
488 |     {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
489 | }
490 | #endif
491 | ;
492 | 
493 | /* Table B-14/15, DCT coefficients table zero / one,
494 |  * codes 0000000000010000 ... 0000000000011111
495 |  */
496 | XTN DCTtab DCTtab6[16]
497 | #ifdef GLOBAL
498 | =
499 | {
500 |     {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
501 |     {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
502 |     {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
503 |     {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
504 | }
505 | #endif
506 | ;
507 | // add extra table of table ptrs for performance - trbarry 5/2003
508 | XTN DCTtab* pDCTtabNonI[28]         // ptr to non_intra tables
509 | #ifdef GLOBAL
510 | =
511 | {
512 |     &DCTtab6[0] - 16,   // bsf val = 4,   code => 16
513 |     &DCTtab5[0] - 16,   // bsf val = 5,   code => 32
514 |     &DCTtab4[0] - 16,   // bsf val = 6,   code => 64
515 |     &DCTtab3[0] - 16,   // bsf val = 7,   code => 128
516 |     &DCTtab2[0] - 16,   // bsf val = 8,   code => 256
517 |     &DCTtab1[0] - 8,    // bsf val = 9,   code => 512
518 |     &DCTtab0[0] - 4,    // bsf val = 10,  code => 1024
519 |     &DCTtab0[0] - 4,    // bsf val = 11,  code => 2048, same
520 |     &DCTtab0[0] - 4,    // bsf val = 12,  code => 4096, same
521 |     &DCTtab0[0] - 4,    // bsf val = 13,  code => 8192, same
522 |     &DCTtab0[0] - 4,    // bsf val = 14,  code => 16384, same
523 |     &DCTtab0[0] - 4,    // bsf val = 15,  how big can this get??
524 |     &DCTtab0[0] - 4,    // bsf val = 16,  same?
525 |     &DCTtab0[0] - 4,    // bsf val = 17,  same?
526 |     &DCTtab0[0] - 4,    // bsf val = 18,  same?
527 |     &DCTtab0[0] - 4,    // bsf val = 19,  same?
528 |     &DCTtab0[0] - 4,    // bsf val = 20,  same?
529 |     &DCTtab0[0] - 4,    // bsf val = 21,  same?
530 |     &DCTtab0[0] - 4,    // bsf val = 22,  same?
531 |     &DCTtab0[0] - 4,    // bsf val = 23,  same?
532 |     &DCTtab0[0] - 4,    // bsf val = 24,  same?
533 |     &DCTtab0[0] - 4,    // bsf val = 25,  same?
534 |     &DCTtab0[0] - 4,    // bsf val = 26,  same?
535 |     &DCTtab0[0] - 4,    // bsf val = 27,  same?
536 |     &DCTtab0[0] - 4,    // bsf val = 28,  same?
537 |     &DCTtab0[0] - 4,    // bsf val = 29,  same?
538 |     &DCTtab0[0] - 4,    // bsf val = 30,  same?
539 |     &DCTtab0[0] - 4     // bsf val = 31,  same?
540 | }
541 | #endif
542 | ;
543 | // same as above but for when intra_vlc_format - trbarry 5/2003
544 | XTN DCTtab* pDCTtab_intra[28]       // ptr to non_intra tables
545 | #ifdef GLOBAL
546 | =
547 | {
548 |     &DCTtab6[0] - 16,   // bsf val = 4,   code => 16
549 |     &DCTtab5[0] - 16,   // bsf val = 5,   code => 32
550 |     &DCTtab4[0] - 16,   // bsf val = 6,   code => 64
551 |     &DCTtab3[0] - 16,   // bsf val = 7,   code => 128
552 |     &DCTtab2[0] - 16,   // bsf val = 8,   code => 256
553 |     &DCTtab1a[0] - 8,   // bsf val = 9,   code => 512
554 |     &DCTtab0a[0] - 4,   // bsf val = 10,  code => 1024
555 |     &DCTtab0a[0] - 4,   // bsf val = 11,  code => 2048, same
556 |     &DCTtab0a[0] - 4,   // bsf val = 12,  code => 4096, same
557 |     &DCTtab0a[0] - 4,   // bsf val = 13,  code => 8192, same
558 |     &DCTtab0a[0] - 4,   // bsf val = 14   code => 16384, same
559 |     &DCTtab0a[0] - 4,   // bsf val = 15,  code => how big can this get?
560 |     &DCTtab0a[0] - 4,   // bsf val = 16,  same?
561 |     &DCTtab0a[0] - 4,   // bsf val = 17,  same?
562 |     &DCTtab0a[0] - 4,   // bsf val = 18,  same?
563 |     &DCTtab0a[0] - 4,   // bsf val = 19,  same?
564 |     &DCTtab0a[0] - 4,   // bsf val = 20,  same?
565 |     &DCTtab0a[0] - 4,   // bsf val = 21,  same?
566 |     &DCTtab0a[0] - 4,   // bsf val = 22,  same?
567 |     &DCTtab0a[0] - 4,   // bsf val = 23,  same?
568 |     &DCTtab0a[0] - 4,   // bsf val = 24,  same?
569 |     &DCTtab0a[0] - 4,   // bsf val = 25,  same?
570 |     &DCTtab0a[0] - 4,   // bsf val = 26,  same?
571 |     &DCTtab0a[0] - 4,   // bsf val = 27,  same?
572 |     &DCTtab0a[0] - 4,   // bsf val = 28,  same?
573 |     &DCTtab0a[0] - 4,   // bsf val = 29,  same?
574 |     &DCTtab0a[0] - 4,   // bsf val = 30,  same?
575 |     &DCTtab0a[0] - 4    // bsf val = 31,  same?
576 | }
577 | #endif
578 | ;
579 | 
580 | // add extra table of shift amounts for performance - trbarry 5/2003
581 | XTN int DCTShiftTab[28] // amounts to shift code
582 | #ifdef GLOBAL
583 | =
584 | {
585 |     0,                  // bsf val = 4,   code => 16
586 |     1,                  // bsf val = 5,   code => 32
587 |     2,                  // bsf val = 6,   code => 64
588 |     3,                  // bsf val = 7,   code => 128
589 |     4,                  // bsf val = 8,   code => 256
590 |     6,                  // bsf val = 9,   code => 512
591 |     8,                  // bsf val = 10,  code => 1024
592 |     8,                  // bsf val = 11,  code => 2048, same
593 |     8,                  // bsf val = 12,  code => 4096, same
594 |     8,                  // bsf val = 13,  code => 8192, same
595 |     8,                  // bsf val = 14,  code => 16384, same
596 |     8,                  // bsf val = 15,  how big can this get?
597 |     8,                  // bsf val = 16,  same?
598 |     8,                  // bsf val = 17,  same?
599 |     8,                  // bsf val = 18,  same?
600 |     8,                  // bsf val = 19,  same?
601 |     8,                  // bsf val = 20,  same?
602 |     8,                  // bsf val = 21,  same?
603 |     8,                  // bsf val = 22,  same?
604 |     8,                  // bsf val = 23,  same?
605 |     8,                  // bsf val = 24,  same?
606 |     8,                  // bsf val = 25,  same?
607 |     8,                  // bsf val = 26,  same?
608 |     8,                  // bsf val = 27,  same?
609 |     8,                  // bsf val = 28,  same?
610 |     8,                  // bsf val = 29,  same?
611 |     8,                  // bsf val = 30,  same?
612 |     8                   // bsf val = 31,  same?
613 | }
614 | #endif
615 | ;
616 | 
617 | /* Table B-3, macroblock_type in P-pictures, codes 001..1xx */
618 | XTN VLCtab PMBtab0[8]
619 | #ifdef GLOBAL
620 | =
621 | {
622 |     {ERROR_VALUE,0},
623 |     {MACROBLOCK_MOTION_FORWARD,3},
624 |     {MACROBLOCK_PATTERN,2}, {MACROBLOCK_PATTERN,2},
625 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1},
626 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1},
627 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1},
628 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,1}
629 | }
630 | #endif
631 | ;
632 | 
633 | /* Table B-3, macroblock_type in P-pictures, codes 000001..00011x */
634 | XTN VLCtab PMBtab1[8]
635 | #ifdef GLOBAL
636 | =
637 | {
638 |     {ERROR_VALUE,0},
639 |     {MACROBLOCK_QUANT | MACROBLOCK_INTRA,6},
640 |     {MACROBLOCK_QUANT | MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT | MACROBLOCK_PATTERN,5},
641 |     {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,5},
642 |     {MACROBLOCK_INTRA,5}, {MACROBLOCK_INTRA,5}
643 | }
644 | #endif
645 | ;
646 | 
647 | /* Table B-4, macroblock_type in B-pictures, codes 0010..11xx */
648 | XTN VLCtab BMBtab0[16]
649 | #ifdef GLOBAL
650 | =
651 | {
652 |     {ERROR_VALUE,0},
653 |     {ERROR_VALUE,0},
654 |     {MACROBLOCK_MOTION_FORWARD,4},
655 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,4},
656 |     {MACROBLOCK_MOTION_BACKWARD,3},
657 |     {MACROBLOCK_MOTION_BACKWARD,3},
658 |     {MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,3},
659 |     {MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,3},
660 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2},
661 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2},
662 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2},
663 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD,2},
664 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2},
665 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2},
666 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2},
667 |     {MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,2}
668 | }
669 | #endif
670 | ;
671 | 
672 | /* Table B-4, macroblock_type in B-pictures, codes 000001..00011x */
673 | XTN VLCtab BMBtab1[8]
674 | #ifdef GLOBAL
675 | =
676 | {
677 |     {ERROR_VALUE,0},
678 |     {MACROBLOCK_QUANT | MACROBLOCK_INTRA,6},
679 |     {MACROBLOCK_QUANT | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,6},
680 |     {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_PATTERN,6},
681 |     {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,5},
682 |     {MACROBLOCK_QUANT | MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD | MACROBLOCK_PATTERN,5},
683 |     {MACROBLOCK_INTRA,5},
684 |     {MACROBLOCK_INTRA,5}
685 | }
686 | #endif
687 | ;
688 | 
689 | #undef XTN
690 | 
691 | #endif  // __GLOBAL_H
692 | 


--------------------------------------------------------------------------------
/src/idct.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPEG2DECPLUS_IDCT_H
 2 | #define MPEG2DECPLUS_IDCT_H
 3 | 
 4 | #include <cstdint>
 5 | #ifndef _WIN32
 6 | #include "win_import_min.h"
 7 | #endif
 8 | 
 9 | void idct_ref_sse3(int16_t* block);
10 | 
11 | void prefetch_ref();
12 | 
13 | void idct_ap922_sse2(int16_t* block);
14 | 
15 | void prefetch_ap922();
16 | 
17 | void idct_llm_float_sse2(int16_t* block);
18 | 
19 | void idct_llm_float_avx2(int16_t* block);
20 | 
21 | void prefetch_llm_float_sse2();
22 | 
23 | void prefetch_llm_float_avx2();
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/idct_ap922_sse2.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | idct_ap922_sse2.cpp
  3 | 
  4 | Originally provided by Intel at AP-922
  5 | http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
  6 | (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
  7 | but in a limited edition.
  8 | New macro implements a column part for precise iDCT
  9 | The routine precision now satisfies IEEE standard 1180-1990.
 10 | 
 11 | Copyright (c) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
 12 | Rounding trick Copyright (c) 2000 Michel Lespinasse <walken@zoy.org>
 13 | 
 14 | http://www.elecard.com/peter/idct.html
 15 | http://www.linuxvideo.org/mpeg2dec/
 16 | 
 17 | SSE2 code by Dmitry Rozhdestvensky
 18 | 
 19 | rewite to intrinsic by OKA Motofumi
 20 | 
 21 | ============================================================================
 22 | 
 23 | These examples contain code fragments for first stage iDCT 8x8
 24 | (for rows) and first stage DCT 8x8 (for columns)
 25 | 
 26 | ============================================================================
 27 | 
 28 |  The first stage iDCT 8x8 - inverse DCTs of rows
 29 | 
 30 | -----------------------------------------------------------------------------
 31 |  The 8-point inverse DCT direct algorithm
 32 | -----------------------------------------------------------------------------
 33 | 
 34 |  static const short w[32] = {
 35 |    FIX(cos_4_16),  FIX(cos_2_16),  FIX(cos_4_16),  FIX(cos_6_16),
 36 |    FIX(cos_4_16),  FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
 37 |    FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16),  FIX(cos_2_16),
 38 |    FIX(cos_4_16), -FIX(cos_2_16),  FIX(cos_4_16), -FIX(cos_6_16),
 39 |    FIX(cos_1_16),  FIX(cos_3_16),  FIX(cos_5_16),  FIX(cos_7_16),
 40 |    FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
 41 |    FIX(cos_5_16), -FIX(cos_1_16),  FIX(cos_7_16),  FIX(cos_3_16),
 42 |    FIX(cos_7_16), -FIX(cos_5_16),  FIX(cos_3_16), -FIX(cos_1_16) };
 43 | 
 44 |  #define DCT_8_INV_ROW(x, y)
 45 |  {
 46 |    int a0, a1, a2, a3, b0, b1, b2, b3;
 47 | 
 48 |    a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3];
 49 |    a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7];
 50 |    a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
 51 |    a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
 52 |    b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
 53 |    b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
 54 |    b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
 55 |    b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
 56 | 
 57 |    y[0] = SHIFT_ROUND ( a0 + b0 );
 58 |    y[1] = SHIFT_ROUND ( a1 + b1 );
 59 |    y[2] = SHIFT_ROUND ( a2 + b2 );
 60 |    y[3] = SHIFT_ROUND ( a3 + b3 );
 61 |    y[4] = SHIFT_ROUND ( a3 - b3 );
 62 |    y[5] = SHIFT_ROUND ( a2 - b2 );
 63 |    y[6] = SHIFT_ROUND ( a1 - b1 );
 64 |    y[7] = SHIFT_ROUND ( a0 - b0 );
 65 |  }
 66 | 
 67 | -----------------------------------------------------------------------------
 68 | 
 69 |  In this implementation the outputs of the iDCT-1D are multiplied
 70 |    for rows 0,4 - by cos_4_16,
 71 |    for rows 1,7 - by cos_1_16,
 72 |    for rows 2,6 - by cos_2_16,
 73 |    for rows 3,5 - by cos_3_16
 74 |  and are shifted to the left for better accuracy
 75 | 
 76 |  For the constants used,
 77 |    FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
 78 | 
 79 | =============================================================================
 80 | */
 81 | 
 82 | 
 83 | #include <cstdint>
 84 | #include <emmintrin.h>
 85 | 
 86 | #ifndef _WIN32
 87 | #include "win_import_min.h"
 88 | #endif
 89 | 
 90 | 
 91 | alignas(64) static constexpr int16_t table04[] = {
 92 |     16384, 21407,  16384,   8867,  16384,  -8867, 16384, -21407, // w0, w1, w4, w5, w8, w9,w12,w13
 93 |     16384,  8867, -16384, -21407, -16384,  21407, 16384,  -8867, // w2, w3, w6, w7,w10,w11,w14,w15
 94 |     22725, 19266,  19266,  -4520,  12873, -22725,  4520, -12873, //w16,w17,w20,w21,w24,w25,w28,w29
 95 |     12873,  4520, -22725, -12873,   4520,  19266, 19266, -22725, //w18,w19,w22,w23,w26,w27,w30,w31
 96 | };
 97 | 
 98 | alignas(64) static constexpr int16_t table17[] = {
 99 |     22725, 29692,  22725,  12299,  22725, -12299, 22725, -29692, // w0, w1, w4, w5, w8, w9,w12,w13
100 |     22725, 12299, -22725, -29692, -22725,  29692, 22725, -12299, // w2, w3, w6, w7,w10,w11,w14,w15
101 |     31521, 26722,  26722,  -6270,  17855, -31521,  6270, -17855, //w16,w17,w20,w21,w24,w25,w28,w29
102 |     17855,  6270, -31521, -17855,   6270,  26722, 26722, -31521, //w18,w19,w22,w23,w26,w27,w30,w31
103 | };
104 | 
105 | alignas(64) static constexpr int16_t table26[] = {
106 |     21407, 27969,  21407,  11585,  21407, -11585, 21407, -27969, // w0, w1, w4, w5, w8, w9,w12,w13
107 |     21407, 11585, -21407, -27969, -21407,  27969, 21407, -11585, // w2, w3, w6, w7,w10,w11,w14,w15
108 |     29692, 25172,  25172,  -5906,  16819, -29692,  5906, -16819, //w16,w17,w20,w21,w24,w25,w28,w29
109 |     16819,  5906, -29692, -16819,   5906,  25172, 25172, -29692, //w18,w19,w22,w23,w26,w27,w30,w31
110 | };
111 | 
112 | alignas(64) static constexpr int16_t table35[] = {
113 |     19266, 25172,  19266,  10426,  19266, -10426, 19266, -25172, // w0, w1, w4, w5, w8, w9,w12,w13
114 |     19266, 10426, -19266, -25172, -19266,  25172, 19266, -10426, // w2, w3, w6, w7,w10,w11,w14,w15
115 |     26722, 22654,  22654,  -5315,  15137, -26722,  5315, -15137, //w16,w17,w20,w21,w24,w25,w28,w29
116 |     15137,  5315, -26722, -15137,   5315,  22654, 22654, -26722, //w18,w19,w22,w23,w26,w27,w30,w31
117 | };
118 | 
119 | alignas(64) static constexpr int32_t rounders[8][4] = {
120 |     { 65536, 65536, 65536, 65536 },
121 |     { 3597, 3597, 3597, 3597 },
122 |     { 2260, 2260, 2260, 2260 },
123 |     { 1203, 1203, 1203, 1203 },
124 |     { 0, 0, 0, 0 },
125 |     { 120, 120, 120, 120 },
126 |     { 512, 512, 512, 512 },
127 |     { 512, 512, 512, 512 },
128 | };
129 | 
130 | alignas(64) static constexpr int16_t tg[4][8] = {
131 |     { 13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036 },
132 |     { 27146,  27146,  27146,  27146,  27146,  27146,  27146,  27146},
133 |     {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746},
134 |     { 23170,  23170,  23170,  23170,  23170,  23170,  23170,  23170},
135 | };
136 | 
137 | 
138 | static __forceinline void
139 | idct_row_sse2(int16_t* block, const int16_t* table, const int32_t* rounder) noexcept
140 | {
141 |     __m128i* blk = reinterpret_cast<__m128i*>(block);
142 |     const __m128i* tbl = reinterpret_cast<const __m128i*>(table);
143 |     const __m128i* rnd = reinterpret_cast<const __m128i*>(rounder);
144 | 
145 |     __m128i row = _mm_load_si128(blk);
146 |     row = _mm_shufflehi_epi16(row, _MM_SHUFFLE(3, 1, 2, 0));
147 |     row = _mm_shufflelo_epi16(row, _MM_SHUFFLE(3, 1, 2, 0));
148 | 
149 |     __m128i t0 = _mm_shuffle_epi32(row, _MM_SHUFFLE(0, 0, 0, 0));
150 |     t0 = _mm_madd_epi16(t0, _mm_load_si128(tbl));
151 | 
152 |     __m128i t1 = _mm_shuffle_epi32(row, _MM_SHUFFLE(2, 2, 2, 2));
153 |     t1 = _mm_madd_epi16(t1, _mm_load_si128(++tbl));
154 | 
155 |     t0 = _mm_add_epi32(_mm_add_epi32(t0, t1), _mm_load_si128(rnd));
156 | 
157 |     __m128i t2 = _mm_shuffle_epi32(row, _MM_SHUFFLE(1, 1, 1, 1));
158 |     t2 = _mm_madd_epi16(t2, _mm_load_si128(++tbl));
159 | 
160 |     __m128i t3 = _mm_shuffle_epi32(row, _MM_SHUFFLE(3, 3, 3, 3));
161 |     t3 = _mm_madd_epi16(t3, _mm_load_si128(++tbl));
162 | 
163 |     t3 = _mm_add_epi32(t2, t3);
164 | 
165 |     t1 = _mm_add_epi32(t0, t3);
166 |     t2 = _mm_sub_epi32(t0, t3);
167 | 
168 |     t0 = _mm_packs_epi32(_mm_srai_epi32(t1, 11), _mm_srai_epi32(t2, 11));
169 |     t0 = _mm_shufflehi_epi16(t0, _MM_SHUFFLE(0, 1, 2, 3));
170 | 
171 |     _mm_store_si128(blk, t0);
172 | }
173 | 
174 | 
175 | static __forceinline void
176 | idct_colx8_sse2(int16_t* block) noexcept
177 | {
178 |     const __m128i* tg1 = reinterpret_cast<const __m128i*>(tg[0]);
179 |     const __m128i* tg2 = reinterpret_cast<const __m128i*>(tg[1]);
180 |     const __m128i* tg3 = reinterpret_cast<const __m128i*>(tg[2]);
181 |     const __m128i* ocos4 = reinterpret_cast<const __m128i*>(tg[3]);
182 | 
183 |     __m128i* blk = reinterpret_cast<__m128i*>(block);
184 | 
185 |     __m128i x0 = _mm_load_si128(blk + 0);
186 |     __m128i x4 = _mm_load_si128(blk + 4);
187 |     __m128i x2 = _mm_load_si128(blk + 2);
188 |     __m128i x6 = _mm_load_si128(blk + 6);
189 |     __m128i tgx = _mm_load_si128(tg2);
190 | 
191 |     __m128i u04 = _mm_adds_epi16(x0, x4);
192 |     __m128i v04 = _mm_subs_epi16(x0, x4);
193 | 
194 |     __m128i t0 = _mm_mulhi_epi16(x2, tgx);
195 |     __m128i t1 = _mm_mulhi_epi16(x6, tgx);
196 |     __m128i v26 = _mm_subs_epi16(t0, x6);
197 |     __m128i u26 = _mm_adds_epi16(t1, x2);
198 | 
199 |     __m128i a0 = _mm_adds_epi16(u04, u26);
200 |     __m128i a1 = _mm_adds_epi16(v04, v26);
201 |     __m128i a2 = _mm_subs_epi16(v04, v26);
202 |     __m128i a3 = _mm_subs_epi16(u04, u26);
203 | 
204 |     __m128i x1 = _mm_load_si128(blk + 1);
205 |     __m128i x7 = _mm_load_si128(blk + 7);
206 |     __m128i x3 = _mm_load_si128(blk + 3);
207 |     __m128i x5 = _mm_load_si128(blk + 5);
208 |     tgx = _mm_load_si128(tg1);
209 | 
210 |     t0 = _mm_mulhi_epi16(x1, tgx);
211 |     t1 = _mm_mulhi_epi16(x7, tgx);
212 |     __m128i u17 = _mm_adds_epi16(t1, x1);
213 |     __m128i v17 = _mm_subs_epi16(t0, x7);
214 | 
215 |     tgx = _mm_load_si128(tg3);
216 | 
217 |     t0 = _mm_mulhi_epi16(x3, tgx);
218 |     t1 = _mm_mulhi_epi16(x5, tgx);
219 |     t0 = _mm_adds_epi16(t0, x3);
220 |     t1 = _mm_adds_epi16(t1, x5);
221 |     __m128i v35 = _mm_subs_epi16(t0, x5);
222 |     __m128i u35 = _mm_adds_epi16(t1, x3);
223 | 
224 |     __m128i b0 = _mm_adds_epi16(u17, u35);
225 |     __m128i b3 = _mm_subs_epi16(v17, v35);
226 |     __m128i u12 = _mm_subs_epi16(u17, u35);
227 |     __m128i v12 = _mm_adds_epi16(v17, v35);
228 | 
229 |     tgx = _mm_load_si128(ocos4);
230 |     t0 = _mm_adds_epi16(u12, v12);
231 |     t1 = _mm_subs_epi16(u12, v12);
232 |     t0 = _mm_mulhi_epi16(t0, tgx);
233 |     t1 = _mm_mulhi_epi16(t1, tgx);
234 |     __m128i b1 = _mm_adds_epi16(t0, t0);
235 |     __m128i b2 = _mm_adds_epi16(t1, t1);
236 | 
237 |     _mm_store_si128(blk + 0, _mm_srai_epi16(_mm_adds_epi16(a0, b0), 6));
238 |     _mm_store_si128(blk + 7, _mm_srai_epi16(_mm_subs_epi16(a0, b0), 6));
239 | 
240 |     _mm_store_si128(blk + 3, _mm_srai_epi16(_mm_adds_epi16(a3, b3), 6));
241 |     _mm_store_si128(blk + 4, _mm_srai_epi16(_mm_subs_epi16(a3, b3), 6));
242 | 
243 |     _mm_store_si128(blk + 1, _mm_srai_epi16(_mm_adds_epi16(a1, b1), 6));
244 |     _mm_store_si128(blk + 6, _mm_srai_epi16(_mm_subs_epi16(a1, b1), 6));
245 | 
246 |     _mm_store_si128(blk + 2, _mm_srai_epi16(_mm_adds_epi16(a2, b2), 6));
247 |     _mm_store_si128(blk + 5, _mm_srai_epi16(_mm_subs_epi16(a2, b2), 6));
248 | }
249 | 
250 | 
251 | void idct_ap922_sse2(int16_t* block)
252 | {
253 |     idct_row_sse2(block + 0, table04, rounders[0]);
254 |     idct_row_sse2(block + 8, table17, rounders[1]);
255 |     idct_row_sse2(block + 16, table26, rounders[2]);
256 |     idct_row_sse2(block + 24, table35, rounders[3]);
257 |     idct_row_sse2(block + 32, table04, rounders[4]);
258 |     idct_row_sse2(block + 40, table35, rounders[5]);
259 |     idct_row_sse2(block + 48, table26, rounders[6]);
260 |     idct_row_sse2(block + 56, table17, rounders[7]);
261 | 
262 |     idct_colx8_sse2(block);
263 | }
264 | 
265 | 
266 | void prefetch_ap922()
267 | {
268 |     _mm_prefetch(reinterpret_cast<const char*>(table04), _MM_HINT_NTA);
269 |     _mm_prefetch(reinterpret_cast<const char*>(table17), _MM_HINT_NTA);
270 |     _mm_prefetch(reinterpret_cast<const char*>(table26), _MM_HINT_NTA);
271 |     _mm_prefetch(reinterpret_cast<const char*>(table35), _MM_HINT_NTA);
272 |     _mm_prefetch(reinterpret_cast<const char*>(rounders[0]), _MM_HINT_NTA);
273 |     _mm_prefetch(reinterpret_cast<const char*>(tg[0]), _MM_HINT_NTA);
274 | }
275 | 


--------------------------------------------------------------------------------
/src/idct_llm_float_avx2.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef __AVX2__
  2 | #error arch:avx2 is not set.
  3 | #endif
  4 | 
  5 | #include <immintrin.h>
  6 | #include "idct.h"
  7 | 
  8 | alignas(64) static const float llm_coefs[] = {
  9 |      1.175876f,  1.175876f,  1.175876f,  1.175876f,  1.175876f,  1.175876f,  1.175876f,  1.175876f,
 10 |     -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f, -1.961571f,
 11 |     -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f, -0.390181f,
 12 |     -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f, -0.899976f,
 13 |     -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f, -2.562915f,
 14 |      0.298631f,  0.298631f,  0.298631f,  0.298631f,  0.298631f,  0.298631f,  0.298631f,  0.298631f,
 15 |      2.053120f,  2.053120f,  2.053120f,  2.053120f,  2.053120f,  2.053120f,  2.053120f,  2.053120f,
 16 |      3.072711f,  3.072711f,  3.072711f,  3.072711f,  3.072711f,  3.072711f,  3.072711f,  3.072711f,
 17 |      1.501321f,  1.501321f,  1.501321f,  1.501321f,  1.501321f,  1.501321f,  1.501321f,  1.501321f,
 18 |      0.541196f,  0.541196f,  0.541196f,  0.541196f,  0.541196f,  0.541196f,  0.541196f,  0.541196f,
 19 |     -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f, -1.847759f,
 20 |      0.765367f,  0.765367f,  0.765367f,  0.765367f,  0.765367f,  0.765367f,  0.765367f,  0.765367f,
 21 | };
 22 | 
 23 | 
 24 | static __forceinline __m256
 25 | load_and_convert_to_float_x8_avx2(const int16_t* srcp) noexcept
 26 | {
 27 |     __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp));
 28 |     return _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(s));
 29 | }
 30 | 
 31 | 
 32 | static __forceinline void
 33 | transpose_8x8_avx2(__m256& a, __m256& b, __m256& c, __m256& d, __m256& e, __m256& f, __m256& g, __m256& h) noexcept
 34 | {
 35 |     __m256 ac0145 = _mm256_unpacklo_ps(a, c); // a0 c0 a1 c1 a4 c4 a5 c5
 36 |     __m256 ac2367 = _mm256_unpackhi_ps(a, c); // a2 c2 a3 c3 a6 c6 a7 c7
 37 |     __m256 bd0145 = _mm256_unpacklo_ps(b, d); // b0 d0 b1 d1 b4 d4 b5 d5
 38 |     __m256 bd2367 = _mm256_unpackhi_ps(b, d); // b2 d2 b3 d3 b6 d6 b7 d7
 39 |     __m256 eg0145 = _mm256_unpacklo_ps(e, g); // e0 g0 e1 g1 e4 g4 e5 g5
 40 |     __m256 eg2367 = _mm256_unpackhi_ps(e, g); // e2 g2 e3 g3 e6 g6 e7 g7
 41 |     __m256 fh0145 = _mm256_unpacklo_ps(f, h); // f0 h0 f1 h1 f4 h4 f5 h5
 42 |     __m256 fh2367 = _mm256_unpackhi_ps(f, h); // f2 h2 f3 h3 f6 h6 f7 h7
 43 | 
 44 |     __m256 abcd04 = _mm256_unpacklo_ps(ac0145, bd0145); // a0 b0 c0 d0 a4 b4 c4 d4
 45 |     __m256 abcd15 = _mm256_unpackhi_ps(ac0145, bd0145); // a1 b1 c1 d1 a5 b5 c5 d5
 46 |     __m256 abcd26 = _mm256_unpacklo_ps(ac2367, bd2367); // a2 b2 c2 d2 a6 b6 c6 d6
 47 |     __m256 abcd37 = _mm256_unpackhi_ps(ac2367, bd2367); // a3 b3 c3 d3 a7 b7 c7 d7
 48 |     __m256 efgh04 = _mm256_unpacklo_ps(eg0145, fh0145); // e0 f0 g0 h0 e4 f4 g4 h4
 49 |     __m256 efgh15 = _mm256_unpackhi_ps(eg0145, fh0145); // e1 f1 g1 h1 e5 f5 g5 h5
 50 |     __m256 efgh26 = _mm256_unpacklo_ps(eg2367, fh2367); // e2 f2 g2 h2 e6 f6 g6 h6
 51 |     __m256 efgh37 = _mm256_unpackhi_ps(eg2367, fh2367); // e3 f3 g3 h3 e7 f7 g7 h7
 52 | 
 53 |     a = _mm256_permute2f128_ps(abcd04, efgh04, (2 << 4) | 0); //a0 b0 c0 d0 e0 f0 g0 h0
 54 |     e = _mm256_permute2f128_ps(abcd04, efgh04, (3 << 4) | 1); //a4 b4 c4 d4 e4 f4 g4 h4
 55 |     b = _mm256_permute2f128_ps(abcd15, efgh15, (2 << 4) | 0); //a1 b1 c1 d1 e1 f1 g1 h1
 56 |     f = _mm256_permute2f128_ps(abcd15, efgh15, (3 << 4) | 1); //a5 b5 c5 d5 e5 f5 g5 h5
 57 |     c = _mm256_permute2f128_ps(abcd26, efgh26, (2 << 4) | 0); //a2 b2 c2 d2 e2 f2 g2 h2
 58 |     g = _mm256_permute2f128_ps(abcd26, efgh26, (3 << 4) | 1); //a6 b6 c6 d6 e6 f6 g6 h6
 59 |     d = _mm256_permute2f128_ps(abcd37, efgh37, (2 << 4) | 0); //a3 b3 c3 d3 e3 f3 g3 h3
 60 |     h = _mm256_permute2f128_ps(abcd37, efgh37, (3 << 4) | 1); //a7 b7 c7 d7 e7 f7 g7 h7
 61 | }
 62 | 
 63 | 
 64 | static __forceinline void
 65 | idct_8x8_fma3(__m256& s0, __m256& s1, __m256& s2, __m256& s3, __m256& s4, __m256& s5, __m256& s6, __m256& s7) noexcept
 66 | {
 67 |     __m256 z0 = _mm256_add_ps(s1, s7);
 68 |     __m256 z1 = _mm256_add_ps(s3, s5);
 69 |     __m256 z2 = _mm256_add_ps(s3, s7);
 70 |     __m256 z3 = _mm256_add_ps(s1, s5);
 71 |     __m256 z4 = _mm256_mul_ps(_mm256_add_ps(z0, z1), _mm256_load_ps(llm_coefs));
 72 | 
 73 |     z2 = _mm256_fmadd_ps(z2, _mm256_load_ps(llm_coefs + 8), z4);
 74 |     z3 = _mm256_fmadd_ps(z3, _mm256_load_ps(llm_coefs + 16), z4);
 75 |     z0 = _mm256_mul_ps(z0, _mm256_load_ps(llm_coefs + 24));
 76 |     z1 = _mm256_mul_ps(z1, _mm256_load_ps(llm_coefs + 32));
 77 | 
 78 |     __m256 b3 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 40), s7, _mm256_add_ps(z0, z2));
 79 |     __m256 b2 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 48), s5, _mm256_add_ps(z1, z3));
 80 |     __m256 b1 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 56), s3, _mm256_add_ps(z1, z2));
 81 |     __m256 b0 = _mm256_fmadd_ps(_mm256_load_ps(llm_coefs + 64), s1, _mm256_add_ps(z0, z3));
 82 | 
 83 |     z4 = _mm256_mul_ps(_mm256_add_ps(s2, s6), _mm256_load_ps(llm_coefs + 72));
 84 |     z0 = _mm256_add_ps(s0, s4);
 85 |     z1 = _mm256_sub_ps(s0, s4);
 86 | 
 87 |     z2 = _mm256_fmadd_ps(s6, _mm256_load_ps(llm_coefs + 80), z4);
 88 |     z3 = _mm256_fmadd_ps(s2, _mm256_load_ps(llm_coefs + 88), z4);
 89 | 
 90 |     __m256 a0 = _mm256_add_ps(z0, z3);
 91 |     __m256 a3 = _mm256_sub_ps(z0, z3);
 92 |     __m256 a1 = _mm256_add_ps(z1, z2);
 93 |     __m256 a2 = _mm256_sub_ps(z1, z2);
 94 | 
 95 |     s0 = _mm256_add_ps(a0, b0);
 96 |     s7 = _mm256_sub_ps(a0, b0);
 97 |     s1 = _mm256_add_ps(a1, b1);
 98 |     s6 = _mm256_sub_ps(a1, b1);
 99 |     s2 = _mm256_add_ps(a2, b2);
100 |     s5 = _mm256_sub_ps(a2, b2);
101 |     s3 = _mm256_add_ps(a3, b3);
102 |     s4 = _mm256_sub_ps(a3, b3);
103 | }
104 | 
105 | 
106 | static __forceinline void
107 | float_to_dst_avx2(const __m256& s0, const __m256& s1, int16_t* dst) noexcept
108 | {
109 |     static const __m256 one_eighth = _mm256_set1_ps(0.1250f);
110 |     static const __m256i minimum = _mm256_set1_epi16(-256);
111 |     static const __m256i maximum = _mm256_set1_epi16(255);
112 | 
113 |     __m256 t0 = _mm256_mul_ps(s0, one_eighth);
114 |     __m256 t1 = _mm256_mul_ps(s1, one_eighth);
115 |     __m256i d0 = _mm256_packs_epi32(_mm256_cvtps_epi32(t0), _mm256_cvtps_epi32(t1));
116 |     d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(3, 1, 2, 0));
117 |     d0 = _mm256_max_epi16(_mm256_min_epi16(d0, maximum), minimum);
118 |     _mm256_store_si256(reinterpret_cast<__m256i*>(dst), d0);
119 | }
120 | 
121 | 
122 | void idct_llm_float_avx2(int16_t* block)
123 | {
124 |     __m256 s0 = load_and_convert_to_float_x8_avx2(block);
125 |     __m256 s1 = load_and_convert_to_float_x8_avx2(block + 8);
126 |     __m256 s2 = load_and_convert_to_float_x8_avx2(block + 16);
127 |     __m256 s3 = load_and_convert_to_float_x8_avx2(block + 24);
128 |     __m256 s4 = load_and_convert_to_float_x8_avx2(block + 32);
129 |     __m256 s5 = load_and_convert_to_float_x8_avx2(block + 40);
130 |     __m256 s6 = load_and_convert_to_float_x8_avx2(block + 48);
131 |     __m256 s7 = load_and_convert_to_float_x8_avx2(block + 56);
132 | 
133 |     transpose_8x8_avx2(s0, s1, s2, s3, s4, s5, s6, s7);
134 | 
135 |     idct_8x8_fma3(s0, s1, s2, s3, s4, s5, s6, s7);
136 | 
137 |     transpose_8x8_avx2(s0, s1, s2, s3, s4, s5, s6, s7);
138 | 
139 |     idct_8x8_fma3(s0, s1, s2, s3, s4, s5, s6, s7);
140 | 
141 |     float_to_dst_avx2(s0, s1, block + 0);
142 |     float_to_dst_avx2(s2, s3, block + 16);
143 |     float_to_dst_avx2(s4, s5, block + 32);
144 |     float_to_dst_avx2(s6, s7, block + 48);
145 | }
146 | 
147 | 
148 | void prefetch_llm_float_avx2()
149 | {
150 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs), _MM_HINT_NTA);
151 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 16), _MM_HINT_NTA);
152 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 32), _MM_HINT_NTA);
153 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 48), _MM_HINT_NTA);
154 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 64), _MM_HINT_NTA);
155 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 80), _MM_HINT_NTA);
156 | }
157 | 


--------------------------------------------------------------------------------
/src/idct_llm_float_sse2.cpp:
--------------------------------------------------------------------------------
  1 | #include <emmintrin.h>
  2 | #include "idct.h"
  3 | 
  4 | 
  5 | alignas(64) static const float llm_coefs[] = {
  6 |      1.175876f,  1.175876f,  1.175876f,  1.175876f,
  7 |     -1.961571f, -1.961571f, -1.961571f, -1.961571f,
  8 |     -0.390181f, -0.390181f, -0.390181f, -0.390181f,
  9 |     -0.899976f, -0.899976f, -0.899976f, -0.899976f,
 10 |     -2.562915f, -2.562915f, -2.562915f, -2.562915f,
 11 |      0.298631f,  0.298631f,  0.298631f,  0.298631f,
 12 |      2.053120f,  2.053120f,  2.053120f,  2.053120f,
 13 |      3.072711f,  3.072711f,  3.072711f,  3.072711f,
 14 |      1.501321f,  1.501321f,  1.501321f,  1.501321f,
 15 |      0.541196f,  0.541196f,  0.541196f,  0.541196f,
 16 |     -1.847759f, -1.847759f, -1.847759f, -1.847759f,
 17 |      0.765367f,  0.765367f,  0.765367f,  0.765367f,
 18 | };
 19 | 
 20 | 
 21 | static inline void short_to_float(const short* srcp, float* dstp) noexcept
 22 | {
 23 |     const __m128i zero = _mm_setzero_si128();
 24 | 
 25 |     for (int i = 0; i < 64; i += 8) {
 26 |         __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + i));
 27 |         __m128i mask = _mm_cmpgt_epi16(zero, s);
 28 |         __m128 d0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, mask));
 29 |         __m128 d1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, mask));
 30 |         _mm_store_ps(dstp + i, d0);
 31 |         _mm_store_ps(dstp + i + 4, d1);
 32 |     }
 33 | }
 34 | 
 35 | 
 36 | static inline void idct_8x4_with_transpose(const float* srcp, float* dstp) noexcept
 37 | {
 38 |     __m128 s0 = _mm_load_ps(srcp);
 39 |     __m128 s1 = _mm_load_ps(srcp + 8);
 40 |     __m128 s2 = _mm_load_ps(srcp + 16);
 41 |     __m128 s3 = _mm_load_ps(srcp + 24);
 42 |     _MM_TRANSPOSE4_PS(s0, s1, s2, s3);
 43 |     __m128 s4 = _mm_load_ps(srcp + 4);
 44 |     __m128 s5 = _mm_load_ps(srcp + 12);
 45 |     __m128 s6 = _mm_load_ps(srcp + 20);
 46 |     __m128 s7 = _mm_load_ps(srcp + 28);
 47 |     _MM_TRANSPOSE4_PS(s4, s5, s6, s7);
 48 | 
 49 |     __m128 z0 = _mm_add_ps(s1, s7);
 50 |     __m128 z1 = _mm_add_ps(s3, s5);
 51 |     __m128 z2 = _mm_add_ps(s3, s7);
 52 |     __m128 z3 = _mm_add_ps(s1, s5);
 53 |     __m128 z4 = _mm_mul_ps(_mm_add_ps(z0, z1), _mm_load_ps(llm_coefs));
 54 | 
 55 |     z2 = _mm_add_ps(_mm_mul_ps(z2, _mm_load_ps(llm_coefs + 4)), z4);
 56 |     z3 = _mm_add_ps(_mm_mul_ps(z3, _mm_load_ps(llm_coefs + 8)), z4);
 57 |     z0 = _mm_mul_ps(z0, _mm_load_ps(llm_coefs + 12));
 58 |     z1 = _mm_mul_ps(z1, _mm_load_ps(llm_coefs + 16));
 59 | 
 60 |     __m128 b3 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s7, _mm_load_ps(llm_coefs + 20)), z0), z2);
 61 |     __m128 b2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s5, _mm_load_ps(llm_coefs + 24)), z1), z3);
 62 |     __m128 b1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s3, _mm_load_ps(llm_coefs + 28)), z1), z2);
 63 |     __m128 b0 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(s1, _mm_load_ps(llm_coefs + 32)), z0), z3);
 64 | 
 65 |     z4 = _mm_mul_ps(_mm_add_ps(s2, s6), _mm_load_ps(llm_coefs + 36));
 66 |     z0 = _mm_add_ps(s0, s4);
 67 |     z1 = _mm_sub_ps(s0, s4);
 68 | 
 69 |     z2 = _mm_add_ps(z4, _mm_mul_ps(s6, _mm_load_ps(llm_coefs + 40)));
 70 |     z3 = _mm_add_ps(z4, _mm_mul_ps(s2, _mm_load_ps(llm_coefs + 44)));
 71 | 
 72 |     s0 = _mm_add_ps(z0, z3);
 73 |     s3 = _mm_sub_ps(z0, z3);
 74 |     s1 = _mm_add_ps(z1, z2);
 75 |     s2 = _mm_sub_ps(z1, z2);
 76 | 
 77 |     _mm_store_ps(dstp, _mm_add_ps(s0, b0));
 78 |     _mm_store_ps(dstp + 56, _mm_sub_ps(s0, b0));
 79 |     _mm_store_ps(dstp + 8, _mm_add_ps(s1, b1));
 80 |     _mm_store_ps(dstp + 48, _mm_sub_ps(s1, b1));
 81 |     _mm_store_ps(dstp + 16, _mm_add_ps(s2, b2));
 82 |     _mm_store_ps(dstp + 40, _mm_sub_ps(s2, b2));
 83 |     _mm_store_ps(dstp + 24, _mm_add_ps(s3, b3));
 84 |     _mm_store_ps(dstp + 32, _mm_sub_ps(s3, b3));
 85 | }
 86 | 
 87 | 
 88 | static inline void float_to_dst_llm(const float* srcp, int16_t* dstp) noexcept
 89 | {
 90 |     static const __m128 one_eighth = _mm_set1_ps(0.1250f);
 91 |     static const __m128i minimum = _mm_set1_epi16(-256);
 92 |     static const __m128i maximum = _mm_set1_epi16(255);
 93 | 
 94 |     for (int i = 0; i < 64; i += 8) {
 95 |         __m128 s0 = _mm_load_ps(srcp + i);
 96 |         __m128 s1 = _mm_load_ps(srcp + i + 4);
 97 |         s0 = _mm_mul_ps(s0, one_eighth);
 98 |         s1 = _mm_mul_ps(s1, one_eighth);
 99 |         __m128i d = _mm_packs_epi32(_mm_cvtps_epi32(s0), _mm_cvtps_epi32(s1));
100 |         d = _mm_min_epi16(_mm_max_epi16(d, minimum), maximum);
101 |         _mm_store_si128(reinterpret_cast<__m128i*>(dstp + i), d);
102 |     }
103 | }
104 | 
105 | 
106 | void idct_llm_float_sse2(int16_t* block)
107 | {
108 |     alignas(64) float blockf[64];
109 |     alignas(64) float tmp[64];
110 | 
111 |     short_to_float(block, blockf);
112 | 
113 |     idct_8x4_with_transpose(blockf, tmp);
114 |     idct_8x4_with_transpose(blockf + 32, tmp + 4);
115 | 
116 |     idct_8x4_with_transpose(tmp, blockf);
117 |     idct_8x4_with_transpose(tmp + 32, blockf + 4);
118 | 
119 |     float_to_dst_llm(blockf, block);
120 | }
121 | 
122 | 
123 | void prefetch_llm_float_sse2()
124 | {
125 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs), _MM_HINT_NTA);
126 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 16), _MM_HINT_NTA);
127 |     _mm_prefetch(reinterpret_cast<const char*>(llm_coefs + 32), _MM_HINT_NTA);
128 | }
129 | 
130 | 


--------------------------------------------------------------------------------
/src/idct_ref_sse3.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | idct_reference_sse3.cpp
  3 | 
  4 | rewite to double precision sse3 intrinsic code.
  5 | OKA Motofumi - August 29, 2016
  6 | 
  7 | */
  8 | 
  9 | 
 10 | #include <pmmintrin.h>
 11 | #include "idct.h"
 12 | 
 13 | /*  Perform IEEE 1180 reference (64-bit floating point, separable 8x1
 14 |  *  direct matrix multiply) Inverse Discrete Cosine Transform
 15 | */
 16 | 
 17 | 
 18 | /* cosine transform matrix for 8x1 IDCT */
 19 | alignas(64) static const double ref_dct_matrix_t[] = {
 20 |      3.5355339059327379e-001,  4.9039264020161522e-001,
 21 |      4.6193976625564337e-001,  4.1573480615127262e-001,
 22 |      3.5355339059327379e-001,  2.7778511650980114e-001,
 23 |      1.9134171618254492e-001,  9.7545161008064166e-002,
 24 |      3.5355339059327379e-001,  4.1573480615127262e-001,
 25 |      1.9134171618254492e-001, -9.7545161008064096e-002,
 26 |     -3.5355339059327373e-001, -4.9039264020161522e-001,
 27 |     -4.6193976625564342e-001, -2.7778511650980109e-001,
 28 |      3.5355339059327379e-001,  2.7778511650980114e-001,
 29 |     -1.9134171618254486e-001, -4.9039264020161522e-001,
 30 |     -3.5355339059327384e-001,  9.7545161008064152e-002,
 31 |      4.6193976625564326e-001,  4.1573480615127273e-001,
 32 |      3.5355339059327379e-001,  9.7545161008064166e-002,
 33 |     -4.6193976625564337e-001, -2.7778511650980109e-001,
 34 |      3.5355339059327368e-001,  4.1573480615127273e-001,
 35 |     -1.9134171618254495e-001, -4.9039264020161533e-001,
 36 |      3.5355339059327379e-001, -9.7545161008064096e-002,
 37 |     -4.6193976625564342e-001,  2.7778511650980092e-001,
 38 |      3.5355339059327384e-001, -4.1573480615127256e-001,
 39 |     -1.9134171618254528e-001,  4.9039264020161522e-001,
 40 |      3.5355339059327379e-001, -2.7778511650980098e-001,
 41 |     -1.9134171618254517e-001,  4.9039264020161522e-001,
 42 |     -3.5355339059327334e-001, -9.7545161008064013e-002,
 43 |      4.6193976625564337e-001, -4.1573480615127251e-001,
 44 |      3.5355339059327379e-001, -4.1573480615127267e-001,
 45 |      1.9134171618254500e-001,  9.7545161008064388e-002,
 46 |     -3.5355339059327356e-001,  4.9039264020161533e-001,
 47 |     -4.6193976625564320e-001,  2.7778511650980076e-001,
 48 |      3.5355339059327379e-001, -4.9039264020161522e-001,
 49 |      4.6193976625564326e-001, -4.1573480615127256e-001,
 50 |      3.5355339059327329e-001, -2.7778511650980076e-001,
 51 |      1.9134171618254478e-001, -9.7545161008064291e-002,
 52 | };
 53 | 
 54 | 
 55 | #if 0
 56 | static inline void transpose_8x8_c(const double* srcp, double* dstp) noexcept
 57 | {
 58 |     for (int y = 0; y < 8; ++y) {
 59 |         for (int x = 0; x < 8; ++x) {
 60 |             dstp[x] = srcp[8 * x + y];
 61 |         }
 62 |         dstp += 8;
 63 |     }
 64 | }
 65 | 
 66 | 
 67 | static inline void idct_ref_8x8_c(const double* srcp, double* dstp) noexcept
 68 | {
 69 |     for (int y = 0; y < 8; ++y) {
 70 |         for (int x = 0; x < 8; ++x) {
 71 |             double t = 0;
 72 |             for (int z = 0; z < 8; ++z) {
 73 |                 t += ref_dct_matrix_t[8 * x + z] * srcp[8 * y + z];
 74 |             }
 75 |             dstp[8 * y + x] = t;
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | #endif
 81 | 
 82 | 
 83 | static inline void short_to_double_sse2(const short* srcp, double* dstp) noexcept
 84 | {
 85 |     const __m128i zero = _mm_setzero_si128();
 86 |     for (int i = 0; i < 64; i += 8) {
 87 |         __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + i));
 88 |         __m128i mask = _mm_cmpgt_epi16(zero, s);
 89 |         __m128i s0 = _mm_unpacklo_epi16(s, mask);
 90 |         __m128i s1 = _mm_unpackhi_epi16(s, mask);
 91 |         __m128d d0 = _mm_cvtepi32_pd(s0);
 92 |         __m128d d1 = _mm_cvtepi32_pd(_mm_srli_si128(s0, 8));
 93 |         __m128d d2 = _mm_cvtepi32_pd(s1);
 94 |         __m128d d3 = _mm_cvtepi32_pd(_mm_srli_si128(s1, 8));
 95 |         _mm_store_pd(dstp + i, d0);
 96 |         _mm_store_pd(dstp + i + 2, d1);
 97 |         _mm_store_pd(dstp + i + 4, d2);
 98 |         _mm_store_pd(dstp + i + 6, d3);
 99 |     }
100 | }
101 | 
102 | 
103 | static inline void transpose_8x8_sse2(const double* srcp, double* dstp) noexcept
104 | {
105 |     for (int y = 0; y < 8; y += 2) {
106 |         double* d = dstp + y;
107 |         for (int x = 0; x < 8; x += 2) {
108 |             __m128d s0 = _mm_load_pd(srcp + x);
109 |             __m128d s1 = _mm_load_pd(srcp + x + 8);
110 |             _mm_store_pd(d, _mm_unpacklo_pd(s0, s1));
111 |             _mm_store_pd(d + 8, _mm_unpackhi_pd(s0, s1));
112 |             d += 16;
113 |         }
114 |         srcp += 16;
115 |     }
116 | }
117 | 
118 | 
119 | static inline void idct_ref_8x8_sse3(const double* srcp, double* dstp) noexcept
120 | {
121 |     for (int i = 0; i < 8; ++i) {
122 |         __m128d s0 = _mm_load_pd(srcp + 8 * static_cast<int64_t>(i));
123 |         __m128d s1 = _mm_load_pd(srcp + 8 * static_cast<int64_t>(i) + 2);
124 |         __m128d s2 = _mm_load_pd(srcp + 8 * static_cast<int64_t>(i) + 4);
125 |         __m128d s3 = _mm_load_pd(srcp + 8 * static_cast<int64_t>(i) + 6);
126 | 
127 |         for (int j = 0; j < 8; j += 2) {
128 |             const double* mpos = ref_dct_matrix_t + 8 * static_cast<int64_t>(j);
129 | 
130 |             __m128d m0 = _mm_mul_pd(_mm_load_pd(mpos), s0);
131 |             __m128d m1 = _mm_mul_pd(_mm_load_pd(mpos + 2), s1);
132 |             __m128d m2 = _mm_mul_pd(_mm_load_pd(mpos + 4), s2);
133 |             __m128d m3 = _mm_mul_pd(_mm_load_pd(mpos + 6), s3);
134 |             __m128d d0 = _mm_add_pd(_mm_add_pd(m0, m1), _mm_add_pd(m2, m3));
135 | 
136 |             m0 = _mm_mul_pd(_mm_load_pd(mpos + 8), s0);
137 |             m1 = _mm_mul_pd(_mm_load_pd(mpos + 10), s1);
138 |             m2 = _mm_mul_pd(_mm_load_pd(mpos + 12), s2);
139 |             m3 = _mm_mul_pd(_mm_load_pd(mpos + 14), s3);
140 |             __m128d d1 = _mm_add_pd(_mm_add_pd(m0, m1), _mm_add_pd(m2, m3));
141 | 
142 |             _mm_store_pd(dstp + 8 * static_cast<int64_t>(i) + j, _mm_hadd_pd(d0, d1));
143 |         }
144 |     }
145 | }
146 | 
147 | 
148 | static inline void double_to_dst_sse2(const double* srcp, int16_t* dst) noexcept
149 | {
150 |     static const __m128i minimum = _mm_set1_epi16(-256);
151 |     static const __m128i maximum = _mm_set1_epi16(255);
152 | 
153 |     for (int i = 0; i < 64; i += 8) {
154 |         __m128d s0 = _mm_load_pd(srcp + i);
155 |         __m128d s1 = _mm_load_pd(srcp + i + 2);
156 |         __m128d s2 = _mm_load_pd(srcp + i + 4);
157 |         __m128d s3 = _mm_load_pd(srcp + i + 6);
158 |         __m128i d0 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(s0), _mm_cvtpd_epi32(s1));
159 |         __m128i d1 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(s2), _mm_cvtpd_epi32(s3));
160 |         d0 = _mm_min_epi16(_mm_max_epi16(_mm_packs_epi32(d0, d1), minimum), maximum);
161 |         _mm_store_si128(reinterpret_cast<__m128i*>(dst + i), d0);
162 |     }
163 | }
164 | 
165 | 
166 | void idct_ref_sse3(int16_t* block)
167 | {
168 |     alignas(64) double blockf[64];
169 |     alignas(64) double tmp[64];
170 | 
171 |     short_to_double_sse2(block, blockf);
172 | 
173 |     idct_ref_8x8_sse3(blockf, tmp);
174 | 
175 |     transpose_8x8_sse2(tmp, blockf);
176 | 
177 |     idct_ref_8x8_sse3(blockf, tmp);
178 | 
179 |     transpose_8x8_sse2(tmp, blockf);
180 | 
181 |     double_to_dst_sse2(blockf, block);
182 | }
183 | 
184 | 
185 | void prefetch_ref()
186 | {
187 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 0), _MM_HINT_NTA);
188 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 8), _MM_HINT_NTA);
189 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 16), _MM_HINT_NTA);
190 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 24), _MM_HINT_NTA);
191 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 32), _MM_HINT_NTA);
192 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 40), _MM_HINT_NTA);
193 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 48), _MM_HINT_NTA);
194 |     _mm_prefetch(reinterpret_cast<const char*>(ref_dct_matrix_t + 56), _MM_HINT_NTA);
195 | }
196 | 
197 | 


--------------------------------------------------------------------------------
/src/mc.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Motion Compensation for MPEG2Dec3
  3 |  *
  4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
  5 |  *
  6 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
  7 |  *
  8 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
  9 |  *  it under the terms of the GNU General Public License as published by
 10 |  *  the Free Software Foundation; either version 2, or (at your option)
 11 |  *  any later version.
 12 |  *
 13 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
 14 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 |  *  GNU General Public License for more details.
 17 |  *
 18 |  *  You should have received a copy of the GNU General Public License
 19 |  *  along with GNU Make; see the file COPYING.  If not, write to
 20 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 21 |  *
 22 |  */
 23 | 
 24 | 
 25 |  // SSE2 intrinsic implementation
 26 |  // OKA Motofumi - August 23, 2016
 27 | 
 28 | 
 29 | #include <emmintrin.h>
 30 | #include "mc.h"
 31 | #ifndef _WIN32
 32 | #include "win_import_min.h"
 33 | #endif
 34 | 
 35 | 
 36 | static __forceinline __m128i loadl(const uint8_t* p)
 37 | {
 38 |     return _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p));
 39 | }
 40 | 
 41 | static __forceinline __m128i loadu(const uint8_t* p)
 42 | {
 43 |     return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
 44 | }
 45 | 
 46 | static __forceinline __m128i avgu8(const __m128i& x, const __m128i& y)
 47 | {
 48 |     return _mm_avg_epu8(x, y);
 49 | }
 50 | 
 51 | static __forceinline void storel(uint8_t* p, const __m128i& x)
 52 | {
 53 |     _mm_storel_epi64(reinterpret_cast<__m128i*>(p), x);
 54 | }
 55 | 
 56 | static __forceinline void storeu(uint8_t* p, const __m128i& x)
 57 | {
 58 |     _mm_storeu_si128(reinterpret_cast<__m128i*>(p), x);
 59 | }
 60 | 
 61 | 
 62 | static void MC_put_8_c(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
 63 | {
 64 |     do {
 65 |         *reinterpret_cast<uint64_t*>(dest) = *reinterpret_cast<const uint64_t*>(ref);
 66 |         dest += stride; ref += stride;
 67 |     } while (--height > 0);
 68 | }
 69 | 
 70 | 
 71 | static void MC_put_16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
 72 | {
 73 |     do {
 74 |         storeu(dest, loadu(ref));
 75 |         ref += stride; dest += stride;
 76 |     } while (--height > 0);
 77 | }
 78 | 
 79 | 
 80 | static void MC_avg_8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
 81 | {
 82 |     do {
 83 |         storel(dest, avgu8(loadl(ref), loadl(dest)));
 84 |         ref += stride; dest += stride;
 85 |     } while (--height > 0);
 86 | }
 87 | 
 88 | 
 89 | static void MC_avg_16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
 90 | {
 91 |     do {
 92 |         storeu(dest, avgu8(loadu(ref), loadu(dest)));
 93 |         ref += stride; dest += stride;
 94 |     } while (--height > 0);
 95 | }
 96 | 
 97 | 
 98 | static void MC_put_x8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
 99 | {
100 |     do {
101 |         storel(dest, avgu8(loadl(ref), loadl(ref + 1)));
102 |         ref += stride; dest += stride;
103 |     } while (--height > 0);
104 | }
105 | 
106 | 
107 | static void MC_put_y8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
108 | {
109 |     do {
110 |         storel(dest, avgu8(loadl(ref), loadl(ref + offs)));
111 |         ref += stride; dest += stride;
112 |     } while (--height > 0);
113 | }
114 | 
115 | 
116 | static void MC_put_x16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
117 | {
118 |     do {
119 |         storeu(dest, avgu8(loadu(ref), loadu(ref + 1)));
120 |         ref += stride; dest += stride;
121 |     } while (--height > 0);
122 | }
123 | 
124 | 
125 | static void MC_put_y16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
126 | {
127 |     do {
128 |         storeu(dest, avgu8(loadu(ref), loadu(ref + offs)));
129 |         ref += stride; dest += stride;
130 |     } while (--height > 0);
131 | }
132 | 
133 | 
134 | static void MC_avg_x8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
135 | {
136 |     do {
137 |         storel(dest, avgu8(avgu8(loadl(ref), loadl(ref + 1)), loadl(dest)));
138 |         ref += stride; dest += stride;
139 |     } while (--height > 0);
140 | }
141 | 
142 | 
143 | static void MC_avg_y8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
144 | {
145 |     do {
146 |         storel(dest, avgu8(avgu8(loadl(ref), loadl(ref + offs)), loadl(dest)));
147 |         ref += stride; dest += stride;
148 |     } while (--height > 0);
149 | }
150 | 
151 | 
152 | static void MC_avg_x16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int, int height)
153 | {
154 |     do {
155 |         storeu(dest, avgu8(avgu8(loadu(ref), loadu(ref + 1)), loadu(dest)));
156 |         ref += stride; dest += stride;
157 |     } while (--height > 0);
158 | }
159 | 
160 | 
161 | static void MC_avg_y16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
162 | {
163 |     do {
164 |         storeu(dest, avgu8(avgu8(loadu(ref), loadu(ref + offs)), loadu(dest)));
165 |         ref += stride; dest += stride;
166 |     } while (--height > 0);
167 | }
168 | 
169 | 
170 | static __forceinline __m128i
171 | get_correcter(const __m128i& r0, const __m128i& r1, const __m128i& r2, const __m128i& r3,
172 |     const __m128i& avg0, const __m128i& avg1, const __m128i& one)
173 | {
174 |     __m128i t0 = _mm_or_si128(_mm_xor_si128(r0, r3), _mm_xor_si128(r1, r2));
175 |     t0 = _mm_and_si128(t0, _mm_xor_si128(avg0, avg1));
176 |     return _mm_and_si128(t0, one);
177 | }
178 | 
179 | 
180 | static void MC_put_xy8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
181 | {
182 |     static const __m128i one = _mm_set1_epi8(1);
183 |     const uint8_t* ro = ref + offs;
184 | 
185 |     do {
186 |         __m128i r0 = loadl(ref);
187 |         __m128i r1 = loadl(ref + 1);
188 |         __m128i r2 = loadl(ro);
189 |         __m128i r3 = loadl(ro + 1);
190 | 
191 |         __m128i avg0 = avgu8(r0, r3);
192 |         __m128i avg1 = avgu8(r1, r2);
193 | 
194 |         __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one);
195 | 
196 |         storel(dest, _mm_subs_epu8(avgu8(avg0, avg1), t0));
197 | 
198 |         ref += stride;
199 |         ro += stride;
200 |         dest += stride;
201 |     } while (--height > 0);
202 | }
203 | 
204 | 
205 | static void MC_put_xy16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
206 | {
207 |     static const __m128i one = _mm_set1_epi8(1);
208 |     const uint8_t* ro = ref + offs;
209 | 
210 |     do {
211 |         __m128i r0 = loadu(ref);
212 |         __m128i r1 = loadu(ref + 1);
213 |         __m128i r2 = loadu(ro);
214 |         __m128i r3 = loadu(ro + 1);
215 | 
216 |         __m128i avg0 = avgu8(r0, r3);
217 |         __m128i avg1 = avgu8(r1, r2);
218 | 
219 |         __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one);
220 | 
221 |         storeu(dest, _mm_subs_epu8(avgu8(avg0, avg1), t0));
222 | 
223 |         ref += stride;
224 |         ro += stride;
225 |         dest += stride;
226 |     } while (--height > 0);
227 | }
228 | 
229 | 
230 | static void MC_avg_xy8_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
231 | {
232 |     static const __m128i one = _mm_set1_epi8(1);
233 |     const uint8_t* ro = ref + offs;
234 | 
235 |     do {
236 |         __m128i r0 = loadl(ref);
237 |         __m128i r1 = loadl(ref + 1);
238 |         __m128i r2 = loadl(ro);
239 |         __m128i r3 = loadl(ro + 1);
240 | 
241 |         __m128i avg0 = avgu8(r0, r3);
242 |         __m128i avg1 = avgu8(r1, r2);
243 | 
244 |         __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one);
245 | 
246 |         storel(dest, avgu8(_mm_subs_epu8(avgu8(avg0, avg1), t0), loadl(dest)));
247 | 
248 |         ref += stride;
249 |         ro += stride;
250 |         dest += stride;
251 |     } while (--height > 0);
252 | }
253 | 
254 | 
255 | static void MC_avg_xy16_sse2(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height)
256 | {
257 |     static const __m128i one = _mm_set1_epi8(1);
258 |     const uint8_t* ro = ref + offs;
259 | 
260 |     do {
261 |         __m128i r0 = loadu(ref);
262 |         __m128i r1 = loadu(ref + 1);
263 |         __m128i r2 = loadu(ro);
264 |         __m128i r3 = loadu(ro + 1);
265 | 
266 |         __m128i avg0 = avgu8(r0, r3);
267 |         __m128i avg1 = avgu8(r1, r2);
268 | 
269 |         __m128i t0 = get_correcter(r0, r1, r2, r3, avg0, avg1, one);
270 | 
271 |         storeu(dest, avgu8(_mm_subs_epu8(avgu8(avg0, avg1), t0), loadu(dest)));
272 | 
273 |         ref += stride;
274 |         ro += stride;
275 |         dest += stride;
276 |     } while (--height > 0);
277 | }
278 | 
279 | 
280 | 
281 | // This project requires SSE2. MMX/MMX_EXT/3DNOW! are obsoute.
282 | // fastMC was discontinued...who cares about that?
283 | 
284 | MCFuncPtr ppppf_motion[2][2][4];
285 | 
286 | void Choose_Prediction(void)
287 | {
288 |     ppppf_motion[0][0][0] = MC_put_8_c;
289 |     ppppf_motion[0][0][1] = MC_put_y8_sse2;
290 |     ppppf_motion[0][0][2] = MC_put_x8_sse2;
291 |     ppppf_motion[0][0][3] = MC_put_xy8_sse2;
292 | 
293 |     ppppf_motion[0][1][0] = MC_put_16_sse2;
294 |     ppppf_motion[0][1][1] = MC_put_y16_sse2;
295 |     ppppf_motion[0][1][2] = MC_put_x16_sse2;
296 |     ppppf_motion[0][1][3] = MC_put_xy16_sse2;
297 | 
298 |     ppppf_motion[1][0][0] = MC_avg_8_sse2;
299 |     ppppf_motion[1][0][1] = MC_avg_y8_sse2;
300 |     ppppf_motion[1][0][2] = MC_avg_x8_sse2;
301 |     ppppf_motion[1][0][3] = MC_avg_xy8_sse2;
302 | 
303 |     ppppf_motion[1][1][0] = MC_avg_16_sse2;
304 |     ppppf_motion[1][1][1] = MC_avg_y16_sse2;
305 |     ppppf_motion[1][1][2] = MC_avg_x16_sse2;
306 |     ppppf_motion[1][1][3] = MC_avg_xy16_sse2;
307 | }
308 | 


--------------------------------------------------------------------------------
/src/mc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Motion Compensation for MPEG2Dec3
 3 |  *
 4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
 5 |  *
 6 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
 7 |  *
 8 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
 9 |  *  it under the terms of the GNU General Public License as published by
10 |  *  the Free Software Foundation; either version 2, or (at your option)
11 |  *  any later version.
12 |  *
13 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
14 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 |  *  GNU General Public License for more details.
17 |  *
18 |  *  You should have received a copy of the GNU General Public License
19 |  *  along with GNU Make; see the file COPYING.  If not, write to
20 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21 |  *
22 |  */
23 | 
24 | #ifndef MPEG2DEC_MC_H
25 | #define MPEG2DEC_MC_H
26 | 
27 | #include <cstdint>
28 | 
29 | typedef void (MCFunc)(uint8_t* dest, const uint8_t* ref, int stride, int offs, int height);
30 | typedef MCFunc* MCFuncPtr;
31 | 
32 | // Form prediction (motion compensation) function pointer array (GetPic.c) - Vlad59 04-20-2002
33 | extern MCFuncPtr ppppf_motion[2][2][4];
34 | void Choose_Prediction(void);
35 | 
36 | #endif // MPEG2DEC_MC_H
37 | 


--------------------------------------------------------------------------------
/src/misc.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Misc Stuff for MPEG2Dec3
 3 |  *
 4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
 5 |  *
 6 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
 7 |  *
 8 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
 9 |  *  it under the terms of the GNU General Public License as published by
10 |  *  the Free Software Foundation; either version 2, or (at your option)
11 |  *  any later version.
12 |  *
13 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
14 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 |  *  GNU General Public License for more details.
17 |  *
18 |  *  You should have received a copy of the GNU General Public License
19 |  *  along with GNU Make; see the file COPYING.  If not, write to
20 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21 |  *
22 |  */
23 | 
24 | 
25 | #include <cstdarg>
26 | #include <cstdio>
27 | #include <memory>
28 | 
29 | #include "misc.h"
30 | 
31 | 
32 | size_t __cdecl dprintf(char* fmt, ...)
33 | {
34 |     char printString[1024];
35 | 
36 |     va_list argp;
37 | 
38 |     va_start(argp, fmt);
39 |     vsprintf_s(printString, 1024, fmt, argp);
40 |     va_end(argp);
41 |     fprintf(stderr, "%s", printString);
42 |     return strlen(printString);
43 | }
44 | 
45 | 
46 | void __stdcall
47 | fast_copy(const uint8_t* src, const int src_stride, uint8_t* dst,
48 |     const int dst_stride, const int horizontal_size, int vertical_size) noexcept
49 | {
50 |     if (vertical_size == 0) {
51 |         return;
52 |     }
53 |     else if (horizontal_size == src_stride && src_stride == dst_stride) {
54 |         memcpy(dst, src, static_cast<int64_t>(horizontal_size) * vertical_size);
55 |     }
56 |     else {
57 |         do {
58 |             memcpy(dst, src, horizontal_size);
59 |             dst += dst_stride;
60 |             src += src_stride;
61 |         } while (--vertical_size != 0);
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/misc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Misc Stuff (profiling) for MPEG2Dec3
 3 |  *
 4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
 5 |  *
 6 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
 7 |  *
 8 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
 9 |  *  it under the terms of the GNU General Public License as published by
10 |  *  the Free Software Foundation; either version 2, or (at your option)
11 |  *  any later version.
12 |  *
13 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
14 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 |  *  GNU General Public License for more details.
17 |  *
18 |  *  You should have received a copy of the GNU General Public License
19 |  *  along with GNU Make; see the file COPYING.  If not, write to
20 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21 |  *
22 |  */
23 | 
24 | #ifndef MPEG2DECPLUS_MISC_H
25 | #define MPEG2DECPLUS_MISC_H
26 | 
27 | #ifndef _WIN32
28 | #include <avisynth.h>
29 | #include "win_import_min.h"
30 | #endif
31 | 
32 | void __stdcall
33 | fast_copy(const uint8_t* src, const int src_stride, uint8_t* dst,
34 |     const int dst_stride, const int horizontal_size,
35 |     const int vertical_size) noexcept;
36 | 
37 | size_t __cdecl dprintf(char* fmt, ...);
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/src/store.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  MPEG2Dec3 : YV12 & PostProcessing
  3 |  *
  4 |  *  Copyright (C) 2002-2003 Marc Fauconneau <marc.fd@liberysurf.fr>
  5 |  *
  6 |  *  based of the intial MPEG2Dec Copyright (C) Chia-chen Kuo - April 2001
  7 |  *
  8 |  *  This file is part of MPEG2Dec3, a free MPEG-2 decoder
  9 |  *
 10 |  *  MPEG2Dec3 is free software; you can redistribute it and/or modify
 11 |  *  it under the terms of the GNU General Public License as published by
 12 |  *  the Free Software Foundation; either version 2, or (at your option)
 13 |  *  any later version.
 14 |  *
 15 |  *  MPEG2Dec3 is distributed in the hope that it will be useful,
 16 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 18 |  *  GNU General Public License for more details.
 19 |  *
 20 |  *  You should have received a copy of the GNU General Public License
 21 |  *  along with GNU Make; see the file COPYING.  If not, write to
 22 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 23 |  *
 24 |  */
 25 | 
 26 | 
 27 | #include "color_convert.h"
 28 |  //#include "postprocess.h"
 29 | #include "misc.h"
 30 | #include "MPEG2Decoder.h"
 31 | 
 32 | 
 33 | // Write 2-digits numbers in a 16x16 zone.
 34 | static void write_quants(uint8_t* dst, int stride, int mb_width, int mb_height,
 35 |     const int* qp)
 36 | {
 37 |     const uint8_t rien[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 38 |     const uint8_t nums[10][8] = {
 39 |         { 1, 4, 4, 4, 4, 4, 1, 0 },
 40 |         { 3, 3, 3, 3, 3, 3, 3, 0 },
 41 |         { 1, 3, 3, 1, 2, 2, 1, 0 },
 42 |         { 1, 3, 3, 1, 3, 3, 1, 0 },
 43 |         { 4, 4, 4, 1, 3, 3, 3, 0 },
 44 |         { 1, 2, 2, 1, 3, 3, 1, 0 },
 45 |         { 1, 2, 2, 1, 4, 4, 1, 0 },
 46 |         { 1, 3, 3, 3, 3, 3, 3, 0 },
 47 |         { 1, 4, 4, 1, 4, 4, 1, 0 },
 48 |         { 1, 4, 4, 1, 3, 3, 1, 0 },
 49 |     };
 50 | 
 51 |     auto write = [](const uint8_t* num, uint8_t* dst, const int stride) {
 52 |         for (int y = 0; y < 7; ++y) {
 53 |             if (num[y] == 1) {
 54 |                 dst[1 + y * stride] = 0xFF;
 55 |                 dst[2 + y * stride] = 0xFF;
 56 |                 dst[3 + y * stride] = 0xFF;
 57 |                 dst[4 + y * stride] = 0xFF;
 58 |             }
 59 |             if (num[y] == 2) {
 60 |                 dst[1 + y * stride] = 0xFF;
 61 |             }
 62 |             if (num[y] == 3) {
 63 |                 dst[4 + y * stride] = 0xFF;
 64 |             }
 65 |             if (num[y] == 4) {
 66 |                 dst[1 + y * stride] = 0xFF;
 67 |                 dst[4 + y * stride] = 0xFF;
 68 |             }
 69 |         }
 70 |     };
 71 | 
 72 |     for (int y = 0; y < mb_height; ++y) {
 73 |         for (int x = 0; x < mb_width; ++x) {
 74 |             int number = qp[x + y * mb_width];
 75 |             uint8_t* dstp = dst + static_cast<int64_t>(x) * 16 + static_cast<int64_t>(3) * stride;
 76 | 
 77 |             int c = (number / 100) % 10;
 78 |             const uint8_t* num = nums[c]; // x00
 79 |             if (c == 0) num = rien;
 80 |             write(num, dstp, stride);
 81 | 
 82 |             dstp += 5;
 83 |             int d = (number / 10) % 10;
 84 |             num = nums[d]; // 0x0
 85 |             if (c == 0 && d == 0) num = rien;
 86 |             write(num, dstp, stride);
 87 | 
 88 |             dstp += 5;
 89 |             num = nums[number % 10]; // 00x
 90 |             write(num, dstp, stride);
 91 |         }
 92 |         dst += static_cast<int64_t>(16) * stride;
 93 |     }
 94 | }
 95 | 
 96 | 
 97 | static void set_qparams(const int* qp, size_t mb_size, int& minquant,
 98 |     int& maxquant, int& avgquant)
 99 | {
100 |     int minq = qp[0], maxq = qp[0], sum = qp[0];
101 |     for (size_t i = 1; i < mb_size; ++i) {
102 |         int q = qp[i];
103 |         if (q < minq) minq = q;
104 |         if (q > maxq) maxq = q;
105 |         sum += q;
106 |     }
107 |     minquant = minq;
108 |     maxquant = maxq;
109 |     avgquant = static_cast<int>(static_cast<float>(sum) / mb_size + 0.5f);
110 | }
111 | 
112 | 
113 | void CMPEG2Decoder::assembleFrame(uint8_t* src[], int pf, YV12PICT& dst)
114 | {
115 |     dst.pf = pf;
116 | #if 0
117 |     if (pp_mode != 0)
118 |     {
119 |         uint8_t* ppptr[3];
120 |         if (!(upConv > 0 && chroma_format == 1))
121 |         {
122 |             ppptr[0] = dst->y;
123 |             ppptr[1] = dst->u;
124 |             ppptr[2] = dst->v;
125 |         }
126 |         else
127 |         {
128 |             ppptr[0] = dst->y;
129 |             ppptr[1] = u422;
130 |             ppptr[2] = v422;
131 |         }
132 |         bool iPPt;
133 |         if (iPP == 1 || (iPP == -1 && pf == 0)) iPPt = true;
134 |         else iPPt = false;
135 |         postprocess(src, this->Coded_Picture_Width, this->Chroma_Width,
136 |             ppptr, dst->ypitch, dst->uvpitch, this->Coded_Picture_Width,
137 |             this->Coded_Picture_Height, this->QP, this->mb_width, pp_mode, moderate_h, moderate_v,
138 |             chroma_format == 1 ? false : true, iPPt);
139 |         if (upConv > 0 && chroma_format == 1)
140 |         {
141 |             if (iCC == 1 || (iCC == -1 && pf == 0))
142 |             {
143 |                 conv420to422I(ppptr[1], dst->u, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height);
144 |                 conv420to422I(ppptr[2], dst->v, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height);
145 |             }
146 |             else
147 |             {
148 |                 conv420to422P(ppptr[1], dst->u, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height);
149 |                 conv420to422P(ppptr[2], dst->v, dst->uvpitch, dst->uvpitch, Coded_Picture_Width, Coded_Picture_Height);
150 |             }
151 |         }
152 |     }
153 |     else
154 | #endif
155 |     {
156 |         fast_copy(src[0], Coded_Picture_Width, dst.y, dst.ypitch, Coded_Picture_Width, Coded_Picture_Height);
157 |         if (upConv > 0 && chroma_format == 1) {
158 |             if (iCC == 1 || (iCC == -1 && pf == 0)) {
159 |                 conv420to422I(src[1], dst.u, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height);
160 |                 conv420to422I(src[2], dst.v, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height);
161 |             }
162 |             else {
163 |                 conv420to422P(src[1], dst.u, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height);
164 |                 conv420to422P(src[2], dst.v, Chroma_Width, dst.uvpitch, Coded_Picture_Width, Coded_Picture_Height);
165 |             }
166 |         }
167 |         else {
168 |             fast_copy(src[1], Chroma_Width, dst.u, dst.uvpitch, Chroma_Width, Chroma_Height);
169 |             fast_copy(src[2], Chroma_Width, dst.v, dst.uvpitch, Chroma_Width, Chroma_Height);
170 |         }
171 |     }
172 | 
173 |     if (has_prop || info == 1 || info == 2 || showQ) {
174 |         // Re-order quant data for display order.
175 |         const int* qp = (picture_coding_type == B_TYPE) ? auxQP : backwardQP;
176 |         if (has_prop || info == 1 || info == 2) {
177 |             set_qparams(qp, static_cast<int64_t>(mb_width) * mb_height, minquant, maxquant, avgquant);
178 |         }
179 |         if (showQ) {
180 |             write_quants(dst.y, dst.ypitch, mb_width, mb_height, qp);
181 |         }
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/src/win_import_min.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef WIN_IMPORT_MIN_H
 3 | #define WIN_IMPORT_MIN_H
 4 | 
 5 | /* support from recent _mingw.h */
 6 | 
 7 | #ifdef __cplusplus
 8 | #define __forceinline inline __attribute__((__always_inline__))
 9 | #else
10 | #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__))
11 | #endif /* __cplusplus */
12 | 
13 | #ifdef __GNUC__
14 | #define _byteswap_ulong(x)           __builtin_bswap32(x)
15 | #endif
16 | 
17 | #define _read                        read
18 | #define _lseeki64                    lseek
19 | #define _close                       close
20 | 
21 | /* gnu libc offers the equivalent 'aligned_alloc' BUT requested 'size'
22 |    has to be a multiple of 'alignment' - in case it isn't, I'll set
23 |    a different size, rounding up the value */
24 | #define _aligned_malloc(s,a)         (                               \
25 |                                      aligned_alloc(a,((s-1)/a+1)*a)  \
26 |                                      )
27 | 
28 | #define _aligned_free(x)             free(x)
29 | 
30 | #define _atoi64(x)                   strtoll(x,NULL,10)
31 | #define sprintf_s(buf,...)           snprintf((buf),sizeof(buf),__VA_ARGS__)
32 | #define strncpy_s(d,n,s,c)           strncpy(d,s,c)
33 | #define vsprintf_s(d,n,t,v)          vsprintf(d,t,v)
34 | #define sscanf_s(buf,...)            sscanf((buf),__VA_ARGS__)
35 | #define fscanf_s(f,t,...)            fscanf(f,t,__VA_ARGS__)
36 | 
37 | #endif // WIN_IMPORT_MIN_H
38 | 
39 | 


--------------------------------------------------------------------------------
/src/yv12pict.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (C) Chia-chen Kuo - April 2001
  3 |  *
  4 |  *  This file is part of DVD2AVI, a free MPEG-2 decoder
  5 |  *
  6 |  *  DVD2AVI is free software; you can redistribute it and/or modify
  7 |  *  it under the terms of the GNU General Public License as published by
  8 |  *  the Free Software Foundation; either version 2, or (at your option)
  9 |  *  any later version.
 10 |  *
 11 |  *  DVD2AVI is distributed in the hope that it will be useful,
 12 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 14 |  *  GNU General Public License for more details.
 15 |  *
 16 |  *  You should have received a copy of the GNU General Public License
 17 |  *  along with GNU Make; see the file COPYING.  If not, write to
 18 |  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 19 |  *
 20 |  */
 21 | 
 22 |  // replace with one that doesn't need fixed size table - trbarry 3-22-2002
 23 | 
 24 | #include <malloc.h>
 25 | #include <stdexcept>
 26 | 
 27 | #include "yv12pict.h"
 28 | #ifndef _WIN32
 29 | #include "win_import_min.h"
 30 | #endif
 31 | 
 32 | //#define ptr_t unsigned int
 33 | 
 34 | 
 35 | 
 36 | // memory allocation for MPEG2Dec3.
 37 | //
 38 | // Changed this to handle/track both width/pitch for when
 39 | // width != pitch and it simply makes things easier to have all
 40 | // information in this struct.  It now uses 32y/16uv byte alignment
 41 | // by default, which makes internal bugs easier to catch.  This can
 42 | // easily be changed if needed.
 43 | //
 44 | // The definition of YV12PICT is in global.h
 45 | //
 46 | // tritical - May 16, 2005
 47 | 
 48 | // Change to use constructor/destructor
 49 | // chikuzen - Sep 6, 2016
 50 | 
 51 | 
 52 | YV12PICT::YV12PICT(int height, int width, int chroma_format) :
 53 |     allocated(true),
 54 |     ywidth(width), uvwidth(width),
 55 |     yheight(height), uvheight(height)
 56 | {
 57 |     if (chroma_format < 3) {
 58 |         uvwidth /= 2;
 59 |     }
 60 |     if (chroma_format < 2) {
 61 |         uvheight /= 2;
 62 |     }
 63 | 
 64 |     uvpitch = (uvwidth + 15) & ~15;
 65 |     ypitch = (ywidth + 31) & ~31;
 66 | 
 67 |     y = reinterpret_cast<uint8_t*>(_aligned_malloc(static_cast<int64_t>(height) * ypitch, 32));
 68 |     u = reinterpret_cast<uint8_t*>(_aligned_malloc(static_cast<int64_t>(uvheight) * uvpitch, 16));
 69 |     v = reinterpret_cast<uint8_t*>(_aligned_malloc(static_cast<int64_t>(uvheight) * uvpitch, 16));
 70 |     if (!y || !u || !v) {
 71 |         _aligned_free(y);
 72 |         _aligned_free(u);
 73 |         throw std::runtime_error("failed to new YV12PICT");
 74 |     }
 75 | }
 76 | 
 77 | 
 78 | YV12PICT::YV12PICT(PVideoFrame& frame) :
 79 |     allocated(false),
 80 |     y(frame->GetWritePtr(PLANAR_Y)),
 81 |     u(frame->GetWritePtr(PLANAR_U)),
 82 |     v(frame->GetWritePtr(PLANAR_V)),
 83 |     ypitch(frame->GetPitch(PLANAR_Y)), uvpitch(frame->GetPitch(PLANAR_U)),
 84 |     ywidth(frame->GetRowSize(PLANAR_Y)), uvwidth(frame->GetRowSize(PLANAR_U)),
 85 |     yheight(frame->GetHeight(PLANAR_Y)), uvheight(frame->GetHeight(PLANAR_U))
 86 | {}
 87 | 
 88 | 
 89 | YV12PICT::YV12PICT(uint8_t* py, uint8_t* pu, uint8_t* pv, int yw, int cw, int h) :
 90 |     allocated(false),
 91 |     y(py), u(pu), v(pv),
 92 |     ypitch((yw + 31) & ~31), uvpitch((cw + 15) & ~15),
 93 |     ywidth(yw), uvwidth(cw), yheight(h), uvheight(h)
 94 | {}
 95 | 
 96 | 
 97 | YV12PICT::~YV12PICT()
 98 | {
 99 |     if (allocated) {
100 |         _aligned_free(y);
101 |         _aligned_free(u);
102 |         _aligned_free(v);
103 |     }
104 |     y = u = v = nullptr;
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/src/yv12pict.h:
--------------------------------------------------------------------------------
 1 | #ifndef YV12PICT_H
 2 | #define YV12PICT_H
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | #include "avisynth.h"
 7 | 
 8 | 
 9 | class YV12PICT {
10 |     const bool allocated;
11 | public:
12 |     uint8_t* y, * u, * v;
13 |     int ypitch, uvpitch;
14 |     int ywidth, uvwidth;
15 |     int yheight, uvheight;
16 |     int pf;
17 | 
18 |     YV12PICT(PVideoFrame& frame);
19 |     YV12PICT(uint8_t* py, uint8_t* pu, uint8_t* pv, int yw, int cw, int h);
20 |     YV12PICT(int height, int width, int chroma_format);
21 |     ~YV12PICT();
22 | };
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------