├── .github └── workflows │ └── docker-image.yml ├── .gitignore ├── 0_hello_world.c ├── 2_remuxing.c ├── 3_transcoding.c ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── Makefile ├── README-cn.md ├── README-es.md ├── README-ko.md ├── README-pt.md ├── README-vn.md ├── README.md ├── build └── .gitignore ├── fetch_bbb_video.sh ├── img ├── adaptive-streaming.png ├── boxes_fragmente_mp4.png ├── boxes_normal_mp4.png ├── container.png ├── decoding.png ├── encoding.png ├── ffmpeg_libav_workflow.jpeg ├── generated_frame.png ├── h264_properties.png ├── hello_world_frames │ ├── frame0.png │ ├── frame1.png │ ├── frame2.png │ ├── frame3.png │ ├── frame4.png │ └── frame5.png ├── hevc_properties.png ├── remuxing_libav_components.png ├── transcoding.png ├── transcoding_flow.png ├── transmuxing.png ├── transrating.png └── transsizing.png ├── remuxed_small_bunny_1080p_60fps.ts ├── small_bunny_1080p_60fps.mp4 ├── video_debugging.c └── video_debugging.h /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Run hello example 18 | run: make make_hello 19 | - name: Run remuxing 20 | run: make make_remuxing 21 | - name: Run transcoding 22 | run: make make_transcoding 23 | 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *pgm 2 | build/* 3 | bunny_1080p_60fps.mp4 4 | bunny_1s_gop.mp4 5 | bunny_1s_gop.mp4.ts 6 | bunny_1s_gop.mp4.webm 7 | .vscode 8 | .clangd 9 | compile_commands.json -------------------------------------------------------------------------------- /0_hello_world.c: -------------------------------------------------------------------------------- 1 | /* 2 | * http://ffmpeg.org/doxygen/trunk/index.html 3 | * 4 | * Main components 5 | * 6 | * Format (Container) - a wrapper, providing sync, metadata and muxing for the streams. 7 | * Stream - a continuous stream (audio or video) of data over time. 8 | * Codec - defines how data are enCOded (from Frame to Packet) 9 | * and DECoded (from Packet to Frame). 10 | * Packet - are the data (kind of slices of the stream data) to be decoded as raw frames. 11 | * Frame - a decoded raw frame (to be encoded or filtered). 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | // print out the steps and errors 23 | static void logging(const char *fmt, ...); 24 | // decode packets into frames 25 | static int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame); 26 | // save a frame into a .pgm file 27 | static void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename); 28 | 29 | int main(int argc, const char *argv[]) 30 | { 31 | 32 | if (argc < 2) { 33 | printf("You need to specify a media file.\n"); 34 | return -1; 35 | } 36 | 37 | logging("initializing all the containers, codecs and protocols."); 38 | 39 | // AVFormatContext holds the header information from the format (Container) 40 | // Allocating memory for this component 41 | // http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html 42 | AVFormatContext *pFormatContext = avformat_alloc_context(); 43 | if (!pFormatContext) { 44 | logging("ERROR could not allocate memory for Format Context"); 45 | return -1; 46 | } 47 | 48 | logging("opening the input file (%s) and loading format (container) header", argv[1]); 49 | // Open the file and read its header. The codecs are not opened. 50 | // The function arguments are: 51 | // AVFormatContext (the component we allocated memory for), 52 | // url (filename), 53 | // AVInputFormat (if you pass NULL it'll do the auto detect) 54 | // and AVDictionary (which are options to the demuxer) 55 | // http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49 56 | if (avformat_open_input(&pFormatContext, argv[1], NULL, NULL) != 0) { 57 | logging("ERROR could not open the file"); 58 | return -1; 59 | } 60 | 61 | // now we have access to some information about our file 62 | // since we read its header we can say what format (container) it's 63 | // and some other information related to the format itself. 64 | logging("format %s, duration %lld us, bit_rate %lld", pFormatContext->iformat->name, pFormatContext->duration, pFormatContext->bit_rate); 65 | 66 | logging("finding stream info from format"); 67 | // read Packets from the Format to get stream information 68 | // this function populates pFormatContext->streams 69 | // (of size equals to pFormatContext->nb_streams) 70 | // the arguments are: 71 | // the AVFormatContext 72 | // and options contains options for codec corresponding to i-th stream. 73 | // On return each dictionary will be filled with options that were not found. 74 | // https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb 75 | if (avformat_find_stream_info(pFormatContext, NULL) < 0) { 76 | logging("ERROR could not get the stream info"); 77 | return -1; 78 | } 79 | 80 | // the component that knows how to enCOde and DECode the stream 81 | // it's the codec (audio or video) 82 | // http://ffmpeg.org/doxygen/trunk/structAVCodec.html 83 | AVCodec *pCodec = NULL; 84 | // this component describes the properties of a codec used by the stream i 85 | // https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html 86 | AVCodecParameters *pCodecParameters = NULL; 87 | int video_stream_index = -1; 88 | 89 | // loop though all the streams and print its main information 90 | for (int i = 0; i < pFormatContext->nb_streams; i++) 91 | { 92 | AVCodecParameters *pLocalCodecParameters = NULL; 93 | pLocalCodecParameters = pFormatContext->streams[i]->codecpar; 94 | logging("AVStream->time_base before open coded %d/%d", pFormatContext->streams[i]->time_base.num, pFormatContext->streams[i]->time_base.den); 95 | logging("AVStream->r_frame_rate before open coded %d/%d", pFormatContext->streams[i]->r_frame_rate.num, pFormatContext->streams[i]->r_frame_rate.den); 96 | logging("AVStream->start_time %" PRId64, pFormatContext->streams[i]->start_time); 97 | logging("AVStream->duration %" PRId64, pFormatContext->streams[i]->duration); 98 | 99 | logging("finding the proper decoder (CODEC)"); 100 | 101 | AVCodec *pLocalCodec = NULL; 102 | 103 | // finds the registered decoder for a codec ID 104 | // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca 105 | pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id); 106 | 107 | if (pLocalCodec==NULL) { 108 | logging("ERROR unsupported codec!"); 109 | // In this example if the codec is not found we just skip it 110 | continue; 111 | } 112 | 113 | // when the stream is a video we store its index, codec parameters and codec 114 | if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) { 115 | if (video_stream_index == -1) { 116 | video_stream_index = i; 117 | pCodec = pLocalCodec; 118 | pCodecParameters = pLocalCodecParameters; 119 | } 120 | 121 | logging("Video Codec: resolution %d x %d", pLocalCodecParameters->width, pLocalCodecParameters->height); 122 | } else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) { 123 | logging("Audio Codec: %d channels, sample rate %d", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate); 124 | } 125 | 126 | // print its name, id and bitrate 127 | logging("\tCodec %s ID %d bit_rate %lld", pLocalCodec->name, pLocalCodec->id, pLocalCodecParameters->bit_rate); 128 | } 129 | 130 | if (video_stream_index == -1) { 131 | logging("File %s does not contain a video stream!", argv[1]); 132 | return -1; 133 | } 134 | 135 | // https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html 136 | AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec); 137 | if (!pCodecContext) 138 | { 139 | logging("failed to allocated memory for AVCodecContext"); 140 | return -1; 141 | } 142 | 143 | // Fill the codec context based on the values from the supplied codec parameters 144 | // https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16 145 | if (avcodec_parameters_to_context(pCodecContext, pCodecParameters) < 0) 146 | { 147 | logging("failed to copy codec params to codec context"); 148 | return -1; 149 | } 150 | 151 | // Initialize the AVCodecContext to use the given AVCodec. 152 | // https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d 153 | if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) 154 | { 155 | logging("failed to open codec through avcodec_open2"); 156 | return -1; 157 | } 158 | 159 | // https://ffmpeg.org/doxygen/trunk/structAVFrame.html 160 | AVFrame *pFrame = av_frame_alloc(); 161 | if (!pFrame) 162 | { 163 | logging("failed to allocate memory for AVFrame"); 164 | return -1; 165 | } 166 | // https://ffmpeg.org/doxygen/trunk/structAVPacket.html 167 | AVPacket *pPacket = av_packet_alloc(); 168 | if (!pPacket) 169 | { 170 | logging("failed to allocate memory for AVPacket"); 171 | return -1; 172 | } 173 | 174 | int response = 0; 175 | int how_many_packets_to_process = 8; 176 | 177 | // fill the Packet with data from the Stream 178 | // https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61 179 | while (av_read_frame(pFormatContext, pPacket) >= 0) 180 | { 181 | // if it's the video stream 182 | if (pPacket->stream_index == video_stream_index) { 183 | logging("AVPacket->pts %" PRId64, pPacket->pts); 184 | response = decode_packet(pPacket, pCodecContext, pFrame); 185 | if (response < 0) 186 | break; 187 | // stop it, otherwise we'll be saving hundreds of frames 188 | if (--how_many_packets_to_process <= 0) break; 189 | } 190 | // https://ffmpeg.org/doxygen/trunk/group__lavc__packet.html#ga63d5a489b419bd5d45cfd09091cbcbc2 191 | av_packet_unref(pPacket); 192 | } 193 | 194 | logging("releasing all the resources"); 195 | 196 | avformat_close_input(&pFormatContext); 197 | av_packet_free(&pPacket); 198 | av_frame_free(&pFrame); 199 | avcodec_free_context(&pCodecContext); 200 | return 0; 201 | } 202 | 203 | static void logging(const char *fmt, ...) 204 | { 205 | va_list args; 206 | fprintf( stderr, "LOG: " ); 207 | va_start( args, fmt ); 208 | vfprintf( stderr, fmt, args ); 209 | va_end( args ); 210 | fprintf( stderr, "\n" ); 211 | } 212 | 213 | static int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame) 214 | { 215 | // Supply raw packet data as input to a decoder 216 | // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3 217 | int response = avcodec_send_packet(pCodecContext, pPacket); 218 | 219 | if (response < 0) { 220 | logging("Error while sending a packet to the decoder: %s", av_err2str(response)); 221 | return response; 222 | } 223 | 224 | while (response >= 0) 225 | { 226 | // Return decoded output data (into a frame) from a decoder 227 | // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c 228 | response = avcodec_receive_frame(pCodecContext, pFrame); 229 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 230 | break; 231 | } else if (response < 0) { 232 | logging("Error while receiving a frame from the decoder: %s", av_err2str(response)); 233 | return response; 234 | } 235 | 236 | if (response >= 0) { 237 | logging( 238 | "Frame %d (type=%c, size=%d bytes, format=%d) pts %d key_frame %d [DTS %d]", 239 | pCodecContext->frame_number, 240 | av_get_picture_type_char(pFrame->pict_type), 241 | pFrame->pkt_size, 242 | pFrame->format, 243 | pFrame->pts, 244 | pFrame->key_frame, 245 | pFrame->coded_picture_number 246 | ); 247 | 248 | char frame_filename[1024]; 249 | snprintf(frame_filename, sizeof(frame_filename), "%s-%d.pgm", "frame", pCodecContext->frame_number); 250 | // Check if the frame is a planar YUV 4:2:0, 12bpp 251 | // That is the format of the provided .mp4 file 252 | // RGB formats will definitely not give a gray image 253 | // Other YUV image may do so, but untested, so give a warning 254 | if (pFrame->format != AV_PIX_FMT_YUV420P) 255 | { 256 | logging("Warning: the generated file may not be a grayscale image, but could e.g. be just the R component if the video format is RGB"); 257 | } 258 | // save a grayscale frame into a .pgm file 259 | save_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename); 260 | } 261 | } 262 | return 0; 263 | } 264 | 265 | static void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename) 266 | { 267 | FILE *f; 268 | int i; 269 | f = fopen(filename,"w"); 270 | // writing the minimal required header for a pgm file format 271 | // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example 272 | fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255); 273 | 274 | // writing line by line 275 | for (i = 0; i < ysize; i++) 276 | fwrite(buf + i * wrap, 1, xsize, f); 277 | fclose(f); 278 | } 279 | -------------------------------------------------------------------------------- /2_remuxing.c: -------------------------------------------------------------------------------- 1 | // based on https://ffmpeg.org/doxygen/trunk/remuxing_8c-example.html 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) 6 | { 7 | AVFormatContext *input_format_context = NULL, *output_format_context = NULL; 8 | AVPacket packet; 9 | const char *in_filename, *out_filename; 10 | int ret, i; 11 | int stream_index = 0; 12 | int *streams_list = NULL; 13 | int number_of_streams = 0; 14 | int fragmented_mp4_options = 0; 15 | 16 | if (argc < 3) { 17 | printf("You need to pass at least two parameters.\n"); 18 | return -1; 19 | } else if (argc == 4) { 20 | fragmented_mp4_options = 1; 21 | } 22 | 23 | in_filename = argv[1]; 24 | out_filename = argv[2]; 25 | 26 | if ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) { 27 | fprintf(stderr, "Could not open input file '%s'", in_filename); 28 | goto end; 29 | } 30 | if ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) { 31 | fprintf(stderr, "Failed to retrieve input stream information"); 32 | goto end; 33 | } 34 | 35 | avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename); 36 | if (!output_format_context) { 37 | fprintf(stderr, "Could not create output context\n"); 38 | ret = AVERROR_UNKNOWN; 39 | goto end; 40 | } 41 | 42 | number_of_streams = input_format_context->nb_streams; 43 | streams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list)); 44 | 45 | if (!streams_list) { 46 | ret = AVERROR(ENOMEM); 47 | goto end; 48 | } 49 | 50 | for (i = 0; i < input_format_context->nb_streams; i++) { 51 | AVStream *out_stream; 52 | AVStream *in_stream = input_format_context->streams[i]; 53 | AVCodecParameters *in_codecpar = in_stream->codecpar; 54 | if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && 55 | in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO && 56 | in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { 57 | streams_list[i] = -1; 58 | continue; 59 | } 60 | streams_list[i] = stream_index++; 61 | out_stream = avformat_new_stream(output_format_context, NULL); 62 | if (!out_stream) { 63 | fprintf(stderr, "Failed allocating output stream\n"); 64 | ret = AVERROR_UNKNOWN; 65 | goto end; 66 | } 67 | ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); 68 | if (ret < 0) { 69 | fprintf(stderr, "Failed to copy codec parameters\n"); 70 | goto end; 71 | } 72 | } 73 | // https://ffmpeg.org/doxygen/trunk/group__lavf__misc.html#gae2645941f2dc779c307eb6314fd39f10 74 | av_dump_format(output_format_context, 0, out_filename, 1); 75 | 76 | // unless it's a no file (we'll talk later about that) write to the disk (FLAG_WRITE) 77 | // but basically it's a way to save the file to a buffer so you can store it 78 | // wherever you want. 79 | if (!(output_format_context->oformat->flags & AVFMT_NOFILE)) { 80 | ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE); 81 | if (ret < 0) { 82 | fprintf(stderr, "Could not open output file '%s'", out_filename); 83 | goto end; 84 | } 85 | } 86 | AVDictionary* opts = NULL; 87 | 88 | if (fragmented_mp4_options) { 89 | // https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE 90 | av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov+default_base_moof", 0); 91 | } 92 | // https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga18b7b10bb5b94c4842de18166bc677cb 93 | ret = avformat_write_header(output_format_context, &opts); 94 | if (ret < 0) { 95 | fprintf(stderr, "Error occurred when opening output file\n"); 96 | goto end; 97 | } 98 | while (1) { 99 | AVStream *in_stream, *out_stream; 100 | ret = av_read_frame(input_format_context, &packet); 101 | if (ret < 0) 102 | break; 103 | in_stream = input_format_context->streams[packet.stream_index]; 104 | if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) { 105 | av_packet_unref(&packet); 106 | continue; 107 | } 108 | packet.stream_index = streams_list[packet.stream_index]; 109 | out_stream = output_format_context->streams[packet.stream_index]; 110 | /* copy packet */ 111 | packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 112 | packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 113 | packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); 114 | // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903 115 | packet.pos = -1; 116 | 117 | //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1 118 | ret = av_interleaved_write_frame(output_format_context, &packet); 119 | if (ret < 0) { 120 | fprintf(stderr, "Error muxing packet\n"); 121 | break; 122 | } 123 | av_packet_unref(&packet); 124 | } 125 | //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13 126 | av_write_trailer(output_format_context); 127 | end: 128 | avformat_close_input(&input_format_context); 129 | /* close output */ 130 | if (output_format_context && !(output_format_context->oformat->flags & AVFMT_NOFILE)) 131 | avio_closep(&output_format_context->pb); 132 | avformat_free_context(output_format_context); 133 | av_freep(&streams_list); 134 | if (ret < 0 && ret != AVERROR_EOF) { 135 | fprintf(stderr, "Error occurred: %s\n", av_err2str(ret)); 136 | return 1; 137 | } 138 | return 0; 139 | } 140 | 141 | -------------------------------------------------------------------------------- /3_transcoding.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "video_debugging.h" 11 | 12 | typedef struct StreamingParams { 13 | char copy_video; 14 | char copy_audio; 15 | char *output_extension; 16 | char *muxer_opt_key; 17 | char *muxer_opt_value; 18 | char *video_codec; 19 | char *audio_codec; 20 | char *codec_priv_key; 21 | char *codec_priv_value; 22 | } StreamingParams; 23 | 24 | typedef struct StreamingContext { 25 | AVFormatContext *avfc; 26 | AVCodec *video_avc; 27 | AVCodec *audio_avc; 28 | AVStream *video_avs; 29 | AVStream *audio_avs; 30 | AVCodecContext *video_avcc; 31 | AVCodecContext *audio_avcc; 32 | int video_index; 33 | int audio_index; 34 | char *filename; 35 | } StreamingContext; 36 | 37 | int fill_stream_info(AVStream *avs, AVCodec **avc, AVCodecContext **avcc) { 38 | *avc = avcodec_find_decoder(avs->codecpar->codec_id); 39 | if (!*avc) {logging("failed to find the codec"); return -1;} 40 | 41 | *avcc = avcodec_alloc_context3(*avc); 42 | if (!*avcc) {logging("failed to alloc memory for codec context"); return -1;} 43 | 44 | if (avcodec_parameters_to_context(*avcc, avs->codecpar) < 0) {logging("failed to fill codec context"); return -1;} 45 | 46 | if (avcodec_open2(*avcc, *avc, NULL) < 0) {logging("failed to open codec"); return -1;} 47 | return 0; 48 | } 49 | 50 | int open_media(const char *in_filename, AVFormatContext **avfc) { 51 | *avfc = avformat_alloc_context(); 52 | if (!*avfc) {logging("failed to alloc memory for format"); return -1;} 53 | 54 | if (avformat_open_input(avfc, in_filename, NULL, NULL) != 0) {logging("failed to open input file %s", in_filename); return -1;} 55 | 56 | if (avformat_find_stream_info(*avfc, NULL) < 0) {logging("failed to get stream info"); return -1;} 57 | return 0; 58 | } 59 | 60 | int prepare_decoder(StreamingContext *sc) { 61 | for (int i = 0; i < sc->avfc->nb_streams; i++) { 62 | if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { 63 | sc->video_avs = sc->avfc->streams[i]; 64 | sc->video_index = i; 65 | 66 | if (fill_stream_info(sc->video_avs, &sc->video_avc, &sc->video_avcc)) {return -1;} 67 | } else if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { 68 | sc->audio_avs = sc->avfc->streams[i]; 69 | sc->audio_index = i; 70 | 71 | if (fill_stream_info(sc->audio_avs, &sc->audio_avc, &sc->audio_avcc)) {return -1;} 72 | } else { 73 | logging("skipping streams other than audio and video"); 74 | } 75 | } 76 | 77 | return 0; 78 | } 79 | 80 | int prepare_video_encoder(StreamingContext *sc, AVCodecContext *decoder_ctx, AVRational input_framerate, StreamingParams sp) { 81 | sc->video_avs = avformat_new_stream(sc->avfc, NULL); 82 | 83 | sc->video_avc = avcodec_find_encoder_by_name(sp.video_codec); 84 | if (!sc->video_avc) {logging("could not find the proper codec"); return -1;} 85 | 86 | sc->video_avcc = avcodec_alloc_context3(sc->video_avc); 87 | if (!sc->video_avcc) {logging("could not allocated memory for codec context"); return -1;} 88 | 89 | av_opt_set(sc->video_avcc->priv_data, "preset", "fast", 0); 90 | if (sp.codec_priv_key && sp.codec_priv_value) 91 | av_opt_set(sc->video_avcc->priv_data, sp.codec_priv_key, sp.codec_priv_value, 0); 92 | 93 | sc->video_avcc->height = decoder_ctx->height; 94 | sc->video_avcc->width = decoder_ctx->width; 95 | sc->video_avcc->sample_aspect_ratio = decoder_ctx->sample_aspect_ratio; 96 | if (sc->video_avc->pix_fmts) 97 | sc->video_avcc->pix_fmt = sc->video_avc->pix_fmts[0]; 98 | else 99 | sc->video_avcc->pix_fmt = decoder_ctx->pix_fmt; 100 | 101 | sc->video_avcc->bit_rate = 2 * 1000 * 1000; 102 | sc->video_avcc->rc_buffer_size = 4 * 1000 * 1000; 103 | sc->video_avcc->rc_max_rate = 2 * 1000 * 1000; 104 | sc->video_avcc->rc_min_rate = 2.5 * 1000 * 1000; 105 | 106 | sc->video_avcc->time_base = av_inv_q(input_framerate); 107 | sc->video_avs->time_base = sc->video_avcc->time_base; 108 | 109 | if (avcodec_open2(sc->video_avcc, sc->video_avc, NULL) < 0) {logging("could not open the codec"); return -1;} 110 | avcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc); 111 | return 0; 112 | } 113 | 114 | int prepare_audio_encoder(StreamingContext *sc, int sample_rate, StreamingParams sp){ 115 | sc->audio_avs = avformat_new_stream(sc->avfc, NULL); 116 | 117 | sc->audio_avc = avcodec_find_encoder_by_name(sp.audio_codec); 118 | if (!sc->audio_avc) {logging("could not find the proper codec"); return -1;} 119 | 120 | sc->audio_avcc = avcodec_alloc_context3(sc->audio_avc); 121 | if (!sc->audio_avcc) {logging("could not allocated memory for codec context"); return -1;} 122 | 123 | int OUTPUT_CHANNELS = 2; 124 | int OUTPUT_BIT_RATE = 196000; 125 | sc->audio_avcc->channels = OUTPUT_CHANNELS; 126 | sc->audio_avcc->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS); 127 | sc->audio_avcc->sample_rate = sample_rate; 128 | sc->audio_avcc->sample_fmt = sc->audio_avc->sample_fmts[0]; 129 | sc->audio_avcc->bit_rate = OUTPUT_BIT_RATE; 130 | sc->audio_avcc->time_base = (AVRational){1, sample_rate}; 131 | 132 | sc->audio_avcc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; 133 | 134 | sc->audio_avs->time_base = sc->audio_avcc->time_base; 135 | 136 | if (avcodec_open2(sc->audio_avcc, sc->audio_avc, NULL) < 0) {logging("could not open the codec"); return -1;} 137 | avcodec_parameters_from_context(sc->audio_avs->codecpar, sc->audio_avcc); 138 | return 0; 139 | } 140 | 141 | int prepare_copy(AVFormatContext *avfc, AVStream **avs, AVCodecParameters *decoder_par) { 142 | *avs = avformat_new_stream(avfc, NULL); 143 | avcodec_parameters_copy((*avs)->codecpar, decoder_par); 144 | return 0; 145 | } 146 | 147 | int remux(AVPacket **pkt, AVFormatContext **avfc, AVRational decoder_tb, AVRational encoder_tb) { 148 | av_packet_rescale_ts(*pkt, decoder_tb, encoder_tb); 149 | if (av_interleaved_write_frame(*avfc, *pkt) < 0) { logging("error while copying stream packet"); return -1; } 150 | return 0; 151 | } 152 | 153 | int encode_video(StreamingContext *decoder, StreamingContext *encoder, AVFrame *input_frame) { 154 | if (input_frame) input_frame->pict_type = AV_PICTURE_TYPE_NONE; 155 | 156 | AVPacket *output_packet = av_packet_alloc(); 157 | if (!output_packet) {logging("could not allocate memory for output packet"); return -1;} 158 | 159 | int response = avcodec_send_frame(encoder->video_avcc, input_frame); 160 | 161 | while (response >= 0) { 162 | response = avcodec_receive_packet(encoder->video_avcc, output_packet); 163 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 164 | break; 165 | } else if (response < 0) { 166 | logging("Error while receiving packet from encoder: %s", av_err2str(response)); 167 | return -1; 168 | } 169 | 170 | output_packet->stream_index = decoder->video_index; 171 | output_packet->duration = encoder->video_avs->time_base.den / encoder->video_avs->time_base.num / decoder->video_avs->avg_frame_rate.num * decoder->video_avs->avg_frame_rate.den; 172 | 173 | av_packet_rescale_ts(output_packet, decoder->video_avs->time_base, encoder->video_avs->time_base); 174 | response = av_interleaved_write_frame(encoder->avfc, output_packet); 175 | if (response != 0) { logging("Error %d while receiving packet from decoder: %s", response, av_err2str(response)); return -1;} 176 | } 177 | av_packet_unref(output_packet); 178 | av_packet_free(&output_packet); 179 | return 0; 180 | } 181 | 182 | int encode_audio(StreamingContext *decoder, StreamingContext *encoder, AVFrame *input_frame) { 183 | AVPacket *output_packet = av_packet_alloc(); 184 | if (!output_packet) {logging("could not allocate memory for output packet"); return -1;} 185 | 186 | int response = avcodec_send_frame(encoder->audio_avcc, input_frame); 187 | 188 | while (response >= 0) { 189 | response = avcodec_receive_packet(encoder->audio_avcc, output_packet); 190 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 191 | break; 192 | } else if (response < 0) { 193 | logging("Error while receiving packet from encoder: %s", av_err2str(response)); 194 | return -1; 195 | } 196 | 197 | output_packet->stream_index = decoder->audio_index; 198 | 199 | av_packet_rescale_ts(output_packet, decoder->audio_avs->time_base, encoder->audio_avs->time_base); 200 | response = av_interleaved_write_frame(encoder->avfc, output_packet); 201 | if (response != 0) { logging("Error %d while receiving packet from decoder: %s", response, av_err2str(response)); return -1;} 202 | } 203 | av_packet_unref(output_packet); 204 | av_packet_free(&output_packet); 205 | return 0; 206 | } 207 | 208 | int transcode_audio(StreamingContext *decoder, StreamingContext *encoder, AVPacket *input_packet, AVFrame *input_frame) { 209 | int response = avcodec_send_packet(decoder->audio_avcc, input_packet); 210 | if (response < 0) {logging("Error while sending packet to decoder: %s", av_err2str(response)); return response;} 211 | 212 | while (response >= 0) { 213 | response = avcodec_receive_frame(decoder->audio_avcc, input_frame); 214 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 215 | break; 216 | } else if (response < 0) { 217 | logging("Error while receiving frame from decoder: %s", av_err2str(response)); 218 | return response; 219 | } 220 | 221 | if (response >= 0) { 222 | if (encode_audio(decoder, encoder, input_frame)) return -1; 223 | } 224 | av_frame_unref(input_frame); 225 | } 226 | return 0; 227 | } 228 | 229 | int transcode_video(StreamingContext *decoder, StreamingContext *encoder, AVPacket *input_packet, AVFrame *input_frame) { 230 | int response = avcodec_send_packet(decoder->video_avcc, input_packet); 231 | if (response < 0) {logging("Error while sending packet to decoder: %s", av_err2str(response)); return response;} 232 | 233 | while (response >= 0) { 234 | response = avcodec_receive_frame(decoder->video_avcc, input_frame); 235 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 236 | break; 237 | } else if (response < 0) { 238 | logging("Error while receiving frame from decoder: %s", av_err2str(response)); 239 | return response; 240 | } 241 | 242 | if (response >= 0) { 243 | if (encode_video(decoder, encoder, input_frame)) return -1; 244 | } 245 | av_frame_unref(input_frame); 246 | } 247 | return 0; 248 | } 249 | 250 | int main(int argc, char *argv[]) 251 | { 252 | /* 253 | * H264 -> H265 254 | * Audio -> remuxed (untouched) 255 | * MP4 - MP4 256 | */ 257 | StreamingParams sp = {0}; 258 | sp.copy_audio = 1; 259 | sp.copy_video = 0; 260 | sp.video_codec = "libx265"; 261 | sp.codec_priv_key = "x265-params"; 262 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 263 | 264 | /* 265 | * H264 -> H264 (fixed gop) 266 | * Audio -> remuxed (untouched) 267 | * MP4 - MP4 268 | */ 269 | //StreamingParams sp = {0}; 270 | //sp.copy_audio = 1; 271 | //sp.copy_video = 0; 272 | //sp.video_codec = "libx264"; 273 | //sp.codec_priv_key = "x264-params"; 274 | //sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 275 | 276 | /* 277 | * H264 -> H264 (fixed gop) 278 | * Audio -> remuxed (untouched) 279 | * MP4 - fragmented MP4 280 | */ 281 | //StreamingParams sp = {0}; 282 | //sp.copy_audio = 1; 283 | //sp.copy_video = 0; 284 | //sp.video_codec = "libx264"; 285 | //sp.codec_priv_key = "x264-params"; 286 | //sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 287 | //sp.muxer_opt_key = "movflags"; 288 | //sp.muxer_opt_value = "frag_keyframe+empty_moov+delay_moov+default_base_moof"; 289 | 290 | /* 291 | * H264 -> H264 (fixed gop) 292 | * Audio -> AAC 293 | * MP4 - MPEG-TS 294 | */ 295 | //StreamingParams sp = {0}; 296 | //sp.copy_audio = 0; 297 | //sp.copy_video = 0; 298 | //sp.video_codec = "libx264"; 299 | //sp.codec_priv_key = "x264-params"; 300 | //sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 301 | //sp.audio_codec = "aac"; 302 | //sp.output_extension = ".ts"; 303 | 304 | /* 305 | * H264 -> VP9 306 | * Audio -> Vorbis 307 | * MP4 - WebM 308 | */ 309 | //StreamingParams sp = {0}; 310 | //sp.copy_audio = 0; 311 | //sp.copy_video = 0; 312 | //sp.video_codec = "libvpx-vp9"; 313 | //sp.audio_codec = "libvorbis"; 314 | //sp.output_extension = ".webm"; 315 | 316 | StreamingContext *decoder = (StreamingContext*) calloc(1, sizeof(StreamingContext)); 317 | decoder->filename = argv[1]; 318 | 319 | StreamingContext *encoder = (StreamingContext*) calloc(1, sizeof(StreamingContext)); 320 | encoder->filename = argv[2]; 321 | 322 | if (sp.output_extension) 323 | strcat(encoder->filename, sp.output_extension); 324 | 325 | if (open_media(decoder->filename, &decoder->avfc)) return -1; 326 | if (prepare_decoder(decoder)) return -1; 327 | 328 | avformat_alloc_output_context2(&encoder->avfc, NULL, NULL, encoder->filename); 329 | if (!encoder->avfc) {logging("could not allocate memory for output format");return -1;} 330 | 331 | for (int i = 0; i < decoder->avfc->nb_streams; i++) { 332 | if (decoder->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) 333 | { 334 | if (!sp.copy_video) { 335 | AVRational input_framerate = av_guess_frame_rate(decoder->avfc, decoder->video_avs, NULL); 336 | prepare_video_encoder(encoder, decoder->video_avcc, input_framerate, sp); 337 | } else { 338 | if (prepare_copy(encoder->avfc, &encoder->video_avs, decoder->video_avs->codecpar)) {return -1;} 339 | } 340 | } 341 | 342 | if (decoder->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) 343 | { 344 | if (!sp.copy_audio) { 345 | if (prepare_audio_encoder(encoder, decoder->audio_avcc->sample_rate, sp)) {return -1;} 346 | } else { 347 | if (prepare_copy(encoder->avfc, &encoder->audio_avs, decoder->audio_avs->codecpar)) {return -1;} 348 | } 349 | } 350 | } 351 | 352 | if (encoder->avfc->oformat->flags & AVFMT_GLOBALHEADER) 353 | encoder->avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 354 | 355 | if (!(encoder->avfc->oformat->flags & AVFMT_NOFILE)) { 356 | if (avio_open(&encoder->avfc->pb, encoder->filename, AVIO_FLAG_WRITE) < 0) { 357 | logging("could not open the output file"); 358 | return -1; 359 | } 360 | } 361 | 362 | AVDictionary* muxer_opts = NULL; 363 | 364 | if (sp.muxer_opt_key && sp.muxer_opt_value) { 365 | av_dict_set(&muxer_opts, sp.muxer_opt_key, sp.muxer_opt_value, 0); 366 | } 367 | 368 | if (avformat_write_header(encoder->avfc, &muxer_opts) < 0) {logging("an error occurred when opening output file"); return -1;} 369 | 370 | AVFrame *input_frame = av_frame_alloc(); 371 | if (!input_frame) {logging("failed to allocated memory for AVFrame"); return -1;} 372 | 373 | AVPacket *input_packet = av_packet_alloc(); 374 | if (!input_packet) {logging("failed to allocated memory for AVPacket"); return -1;} 375 | 376 | while (av_read_frame(decoder->avfc, input_packet) >= 0) 377 | { 378 | if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { 379 | if (!sp.copy_video) { 380 | // TODO: refactor to be generic for audio and video (receiving a function pointer to the differences) 381 | if (transcode_video(decoder, encoder, input_packet, input_frame)) return -1; 382 | av_packet_unref(input_packet); 383 | } else { 384 | if (remux(&input_packet, &encoder->avfc, decoder->video_avs->time_base, encoder->video_avs->time_base)) return -1; 385 | } 386 | } else if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { 387 | if (!sp.copy_audio) { 388 | if (transcode_audio(decoder, encoder, input_packet, input_frame)) return -1; 389 | av_packet_unref(input_packet); 390 | } else { 391 | if (remux(&input_packet, &encoder->avfc, decoder->audio_avs->time_base, encoder->audio_avs->time_base)) return -1; 392 | } 393 | } else { 394 | logging("ignoring all non video or audio packets"); 395 | } 396 | } 397 | 398 | if (!sp.copy_video) 399 | if (encode_video(decoder, encoder, NULL)) return -1; 400 | if (!sp.copy_audio) 401 | if (encode_audio(decoder, encoder, NULL)) return -1; 402 | 403 | av_write_trailer(encoder->avfc); 404 | 405 | if (muxer_opts != NULL) { 406 | av_dict_free(&muxer_opts); 407 | muxer_opts = NULL; 408 | } 409 | 410 | if (input_frame != NULL) { 411 | av_frame_free(&input_frame); 412 | input_frame = NULL; 413 | } 414 | 415 | if (input_packet != NULL) { 416 | av_packet_free(&input_packet); 417 | input_packet = NULL; 418 | } 419 | 420 | avformat_close_input(&decoder->avfc); 421 | 422 | avformat_free_context(decoder->avfc); decoder->avfc = NULL; 423 | avformat_free_context(encoder->avfc); encoder->avfc = NULL; 424 | 425 | avcodec_free_context(&decoder->video_avcc); decoder->video_avcc = NULL; 426 | avcodec_free_context(&decoder->audio_avcc); decoder->audio_avcc = NULL; 427 | 428 | free(decoder); decoder = NULL; 429 | free(encoder); encoder = NULL; 430 | return 0; 431 | } 432 | 433 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.17) 2 | project(libav_tutorial) 3 | 4 | # set out directory 5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 8 | 9 | # set ffmpeg root directory 10 | if(NOT FFMPEG_DEV_ROOT) 11 | message(FATAL_ERROR "set FFMPEG_DEV_ROOT to use ffmpeg libraries") 12 | endif() 13 | 14 | # set ffmpeg develop environment 15 | include_directories(${FFMPEG_DEV_ROOT}/include) 16 | link_directories(${FFMPEG_DEV_ROOT}/lib) 17 | link_libraries( 18 | avcodec 19 | avformat 20 | avfilter 21 | avdevice 22 | swresample 23 | swscale 24 | avutil 25 | ) 26 | 27 | # copy dlls 28 | file(GLOB ffmpeg_shared_libries ${FFMPEG_DEV_ROOT}/bin/*dll) 29 | file(COPY ${ffmpeg_shared_libries} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) 30 | 31 | # copy test file 32 | file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/small_bunny_1080p_60fps.mp4 DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) 33 | 34 | 35 | # add library 36 | set(debug_src ${CMAKE_CURRENT_SOURCE_DIR}/video_debugging.c) 37 | add_library(video_debug ${debug_src}) 38 | link_libraries(video_debug) 39 | 40 | # add project/executables 41 | file(GLOB srcs *.c) 42 | list(REMOVE_ITEM srcs ${debug_src}) 43 | foreach(src ${srcs}) 44 | get_filename_component(TARGET ${src} NAME) 45 | add_executable(${TARGET} ${src}) 46 | message(STATUS "${TARGET} added") 47 | endforeach() 48 | 49 | 50 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # ffmpeg - http://ffmpeg.org/download.html 2 | # 3 | # From https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu 4 | # 5 | # https://hub.docker.com/r/jrottenberg/ffmpeg/ 6 | # 7 | # 8 | FROM ubuntu:20.04 AS base 9 | 10 | WORKDIR /tmp/workdir 11 | 12 | RUN apt-get -yqq update && \ 13 | apt-get install -yq --no-install-recommends ca-certificates expat libgomp1 && \ 14 | apt-get autoremove -y && \ 15 | apt-get clean -y 16 | 17 | FROM base as build 18 | 19 | ENV FFMPEG_VERSION=4.4 \ 20 | AOM_VERSION=v1.0.0 \ 21 | FDKAAC_VERSION=0.1.5 \ 22 | FONTCONFIG_VERSION=2.12.4 \ 23 | FREETYPE_VERSION=2.10.4 \ 24 | FRIBIDI_VERSION=0.19.7 \ 25 | KVAZAAR_VERSION=2.0.0 \ 26 | LAME_VERSION=3.100 \ 27 | LIBASS_VERSION=0.13.7 \ 28 | LIBPTHREAD_STUBS_VERSION=0.4 \ 29 | LIBVIDSTAB_VERSION=1.1.0 \ 30 | LIBXCB_VERSION=1.13.1 \ 31 | XCBPROTO_VERSION=1.13 \ 32 | OGG_VERSION=1.3.2 \ 33 | OPENCOREAMR_VERSION=0.1.5 \ 34 | OPUS_VERSION=1.2 \ 35 | OPENJPEG_VERSION=2.1.2 \ 36 | THEORA_VERSION=1.1.1 \ 37 | VORBIS_VERSION=1.3.5 \ 38 | VPX_VERSION=1.8.0 \ 39 | WEBP_VERSION=1.0.2 \ 40 | X264_VERSION=20170226-2245-stable \ 41 | X265_VERSION=3.4 \ 42 | XAU_VERSION=1.0.9 \ 43 | XORG_MACROS_VERSION=1.19.2 \ 44 | XPROTO_VERSION=7.0.31 \ 45 | XVID_VERSION=1.3.4 \ 46 | LIBXML2_VERSION=2.9.10 \ 47 | LIBBLURAY_VERSION=1.1.2 \ 48 | LIBZMQ_VERSION=4.3.2 \ 49 | LIBSRT_VERSION=1.4.1 \ 50 | LIBARIBB24_VERSION=1.0.3 \ 51 | LIBPNG_VERSION=1.6.9 \ 52 | LIBVMAF_VERSION=2.1.1 \ 53 | SRC=/usr/local 54 | 55 | ARG FREETYPE_SHA256SUM="5eab795ebb23ac77001cfb68b7d4d50b5d6c7469247b0b01b2c953269f658dac freetype-2.10.4.tar.gz" 56 | ARG FRIBIDI_SHA256SUM="3fc96fa9473bd31dcb5500bdf1aa78b337ba13eb8c301e7c28923fea982453a8 0.19.7.tar.gz" 57 | ARG LIBASS_SHA256SUM="8fadf294bf701300d4605e6f1d92929304187fca4b8d8a47889315526adbafd7 0.13.7.tar.gz" 58 | ARG LIBVIDSTAB_SHA256SUM="14d2a053e56edad4f397be0cb3ef8eb1ec3150404ce99a426c4eb641861dc0bb v1.1.0.tar.gz" 59 | ARG OGG_SHA256SUM="e19ee34711d7af328cb26287f4137e70630e7261b17cbe3cd41011d73a654692 libogg-1.3.2.tar.gz" 60 | ARG OPUS_SHA256SUM="77db45a87b51578fbc49555ef1b10926179861d854eb2613207dc79d9ec0a9a9 opus-1.2.tar.gz" 61 | ARG THEORA_SHA256SUM="40952956c47811928d1e7922cda3bc1f427eb75680c3c37249c91e949054916b libtheora-1.1.1.tar.gz" 62 | ARG VORBIS_SHA256SUM="6efbcecdd3e5dfbf090341b485da9d176eb250d893e3eb378c428a2db38301ce libvorbis-1.3.5.tar.gz" 63 | ARG XVID_SHA256SUM="4e9fd62728885855bc5007fe1be58df42e5e274497591fec37249e1052ae316f xvidcore-1.3.4.tar.gz" 64 | ARG LIBXML2_SHA256SUM="f07dab13bf42d2b8db80620cce7419b3b87827cc937c8bb20fe13b8571ee9501 libxml2-v2.9.10.tar.gz" 65 | ARG LIBBLURAY_SHA256SUM="a3dd452239b100dc9da0d01b30e1692693e2a332a7d29917bf84bb10ea7c0b42 libbluray-1.1.2.tar.bz2" 66 | ARG LIBZMQ_SHA256SUM="02ecc88466ae38cf2c8d79f09cfd2675ba299a439680b64ade733e26a349edeb v4.3.2.tar.gz" 67 | ARG LIBARIBB24_SHA256SUM="f61560738926e57f9173510389634d8c06cabedfa857db4b28fb7704707ff128 v1.0.3.tar.gz" 68 | ARG LIBVMAF_SHA256SUM="e7fc00ae1322a7eccfcf6d4f1cdf9c67eec8058709887c8c6c3795c617326f77 v2.1.1.tar.gz" 69 | 70 | 71 | ARG LD_LIBRARY_PATH=/opt/ffmpeg/lib 72 | ARG MAKEFLAGS="-j2" 73 | ARG PKG_CONFIG_PATH="/opt/ffmpeg/share/pkgconfig:/opt/ffmpeg/lib/pkgconfig:/opt/ffmpeg/lib64/pkgconfig" 74 | ARG PREFIX=/opt/ffmpeg 75 | ARG LD_LIBRARY_PATH="/opt/ffmpeg/lib:/opt/ffmpeg/lib64" 76 | 77 | 78 | ARG DEBIAN_FRONTEND=noninteractive 79 | 80 | RUN buildDeps="autoconf \ 81 | automake \ 82 | cmake \ 83 | curl \ 84 | bzip2 \ 85 | libexpat1-dev \ 86 | g++ \ 87 | gcc \ 88 | git \ 89 | gperf \ 90 | libtool \ 91 | make \ 92 | meson \ 93 | nasm \ 94 | perl \ 95 | pkg-config \ 96 | python \ 97 | libssl-dev \ 98 | yasm \ 99 | zlib1g-dev" && \ 100 | apt-get -yqq update && \ 101 | apt-get install -yq --no-install-recommends ${buildDeps} 102 | ## libvmaf https://github.com/Netflix/vmaf 103 | RUN \ 104 | if which meson || false; then \ 105 | echo "Building VMAF." && \ 106 | DIR=/tmp/vmaf && \ 107 | mkdir -p ${DIR} && \ 108 | cd ${DIR} && \ 109 | curl -sLO https://github.com/Netflix/vmaf/archive/v${LIBVMAF_VERSION}.tar.gz && \ 110 | tar -xz --strip-components=1 -f v${LIBVMAF_VERSION}.tar.gz && \ 111 | cd /tmp/vmaf/libvmaf && \ 112 | meson build --buildtype release --prefix=${PREFIX} && \ 113 | ninja -vC build && \ 114 | ninja -vC build install && \ 115 | mkdir -p ${PREFIX}/share/model/ && \ 116 | cp -r /tmp/vmaf/model/* ${PREFIX}/share/model/ && \ 117 | rm -rf ${DIR}; \ 118 | else \ 119 | echo "VMAF skipped."; \ 120 | fi 121 | 122 | ## opencore-amr https://sourceforge.net/projects/opencore-amr/ 123 | RUN \ 124 | DIR=/tmp/opencore-amr && \ 125 | mkdir -p ${DIR} && \ 126 | cd ${DIR} && \ 127 | curl -sL https://versaweb.dl.sourceforge.net/project/opencore-amr/opencore-amr/opencore-amr-${OPENCOREAMR_VERSION}.tar.gz | \ 128 | tar -zx --strip-components=1 && \ 129 | ./configure --prefix="${PREFIX}" --enable-shared && \ 130 | make && \ 131 | make install && \ 132 | rm -rf ${DIR} 133 | ## x264 http://www.videolan.org/developers/x264.html 134 | RUN \ 135 | DIR=/tmp/x264 && \ 136 | mkdir -p ${DIR} && \ 137 | cd ${DIR} && \ 138 | curl -sL https://download.videolan.org/pub/videolan/x264/snapshots/x264-snapshot-${X264_VERSION}.tar.bz2 | \ 139 | tar -jx --strip-components=1 && \ 140 | ./configure --prefix="${PREFIX}" --enable-shared --enable-pic --disable-cli && \ 141 | make && \ 142 | make install && \ 143 | rm -rf ${DIR} 144 | ### x265 http://x265.org/ 145 | RUN \ 146 | DIR=/tmp/x265 && \ 147 | mkdir -p ${DIR} && \ 148 | cd ${DIR} && \ 149 | curl -sL https://github.com/videolan/x265/archive/refs/tags/${X265_VERSION}.tar.gz | \ 150 | tar -zx && \ 151 | cd x265-${X265_VERSION}/build/linux && \ 152 | sed -i "/-DEXTRA_LIB/ s/$/ -DCMAKE_INSTALL_PREFIX=\${PREFIX}/" multilib.sh && \ 153 | sed -i "/^cmake/ s/$/ -DENABLE_CLI=OFF/" multilib.sh && \ 154 | ./multilib.sh && \ 155 | make -C 8bit install && \ 156 | rm -rf ${DIR} 157 | ### libogg https://www.xiph.org/ogg/ 158 | RUN \ 159 | DIR=/tmp/ogg && \ 160 | mkdir -p ${DIR} && \ 161 | cd ${DIR} && \ 162 | curl -sLO http://downloads.xiph.org/releases/ogg/libogg-${OGG_VERSION}.tar.gz && \ 163 | echo ${OGG_SHA256SUM} | sha256sum --check && \ 164 | tar -zx --strip-components=1 -f libogg-${OGG_VERSION}.tar.gz && \ 165 | ./configure --prefix="${PREFIX}" --enable-shared && \ 166 | make && \ 167 | make install && \ 168 | rm -rf ${DIR} 169 | ### libopus https://www.opus-codec.org/ 170 | RUN \ 171 | DIR=/tmp/opus && \ 172 | mkdir -p ${DIR} && \ 173 | cd ${DIR} && \ 174 | curl -sLO https://archive.mozilla.org/pub/opus/opus-${OPUS_VERSION}.tar.gz && \ 175 | echo ${OPUS_SHA256SUM} | sha256sum --check && \ 176 | tar -zx --strip-components=1 -f opus-${OPUS_VERSION}.tar.gz && \ 177 | autoreconf -fiv && \ 178 | ./configure --prefix="${PREFIX}" --enable-shared && \ 179 | make && \ 180 | make install && \ 181 | rm -rf ${DIR} 182 | ### libvorbis https://xiph.org/vorbis/ 183 | RUN \ 184 | DIR=/tmp/vorbis && \ 185 | mkdir -p ${DIR} && \ 186 | cd ${DIR} && \ 187 | curl -sLO http://downloads.xiph.org/releases/vorbis/libvorbis-${VORBIS_VERSION}.tar.gz && \ 188 | echo ${VORBIS_SHA256SUM} | sha256sum --check && \ 189 | tar -zx --strip-components=1 -f libvorbis-${VORBIS_VERSION}.tar.gz && \ 190 | ./configure --prefix="${PREFIX}" --with-ogg="${PREFIX}" --enable-shared && \ 191 | make && \ 192 | make install && \ 193 | rm -rf ${DIR} 194 | ### libtheora http://www.theora.org/ 195 | RUN \ 196 | DIR=/tmp/theora && \ 197 | mkdir -p ${DIR} && \ 198 | cd ${DIR} && \ 199 | curl -sLO http://downloads.xiph.org/releases/theora/libtheora-${THEORA_VERSION}.tar.gz && \ 200 | echo ${THEORA_SHA256SUM} | sha256sum --check && \ 201 | tar -zx --strip-components=1 -f libtheora-${THEORA_VERSION}.tar.gz && \ 202 | ./configure --prefix="${PREFIX}" --with-ogg="${PREFIX}" --enable-shared && \ 203 | make && \ 204 | make install && \ 205 | rm -rf ${DIR} 206 | ### libvpx https://www.webmproject.org/code/ 207 | RUN \ 208 | DIR=/tmp/vpx && \ 209 | mkdir -p ${DIR} && \ 210 | cd ${DIR} && \ 211 | curl -sL https://codeload.github.com/webmproject/libvpx/tar.gz/v${VPX_VERSION} | \ 212 | tar -zx --strip-components=1 && \ 213 | ./configure --prefix="${PREFIX}" --enable-vp8 --enable-vp9 --enable-vp9-highbitdepth --enable-pic --enable-shared \ 214 | --disable-debug --disable-examples --disable-docs --disable-install-bins && \ 215 | make && \ 216 | make install && \ 217 | rm -rf ${DIR} 218 | ### libwebp https://developers.google.com/speed/webp/ 219 | RUN \ 220 | DIR=/tmp/vebp && \ 221 | mkdir -p ${DIR} && \ 222 | cd ${DIR} && \ 223 | curl -sL https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-${WEBP_VERSION}.tar.gz | \ 224 | tar -zx --strip-components=1 && \ 225 | ./configure --prefix="${PREFIX}" --enable-shared && \ 226 | make && \ 227 | make install && \ 228 | rm -rf ${DIR} 229 | ### libmp3lame http://lame.sourceforge.net/ 230 | RUN \ 231 | DIR=/tmp/lame && \ 232 | mkdir -p ${DIR} && \ 233 | cd ${DIR} && \ 234 | curl -sL https://versaweb.dl.sourceforge.net/project/lame/lame/$(echo ${LAME_VERSION} | sed -e 's/[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)/\1.\2/')/lame-${LAME_VERSION}.tar.gz | \ 235 | tar -zx --strip-components=1 && \ 236 | ./configure --prefix="${PREFIX}" --bindir="${PREFIX}/bin" --enable-shared --enable-nasm --disable-frontend && \ 237 | make && \ 238 | make install && \ 239 | rm -rf ${DIR} 240 | ### xvid https://www.xvid.com/ 241 | RUN \ 242 | DIR=/tmp/xvid && \ 243 | mkdir -p ${DIR} && \ 244 | cd ${DIR} && \ 245 | curl -sLO https://xvid.com/downloads/xvidcore-${XVID_VERSION}.tar.gz && \ 246 | echo ${XVID_SHA256SUM} | sha256sum --check && \ 247 | tar -zx -f xvidcore-${XVID_VERSION}.tar.gz && \ 248 | cd xvidcore/build/generic && \ 249 | ./configure --prefix="${PREFIX}" --bindir="${PREFIX}/bin" && \ 250 | make && \ 251 | make install && \ 252 | rm -rf ${DIR} 253 | ### fdk-aac https://github.com/mstorsjo/fdk-aac 254 | RUN \ 255 | DIR=/tmp/fdk-aac && \ 256 | mkdir -p ${DIR} && \ 257 | cd ${DIR} && \ 258 | curl -sL https://github.com/mstorsjo/fdk-aac/archive/v${FDKAAC_VERSION}.tar.gz | \ 259 | tar -zx --strip-components=1 && \ 260 | autoreconf -fiv && \ 261 | ./configure --prefix="${PREFIX}" --enable-shared --datadir="${DIR}" && \ 262 | make && \ 263 | make install && \ 264 | rm -rf ${DIR} 265 | ## openjpeg https://github.com/uclouvain/openjpeg 266 | RUN \ 267 | DIR=/tmp/openjpeg && \ 268 | mkdir -p ${DIR} && \ 269 | cd ${DIR} && \ 270 | curl -sL https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz | \ 271 | tar -zx --strip-components=1 && \ 272 | cmake -DBUILD_THIRDPARTY:BOOL=ON -DCMAKE_INSTALL_PREFIX="${PREFIX}" . && \ 273 | make && \ 274 | make install && \ 275 | rm -rf ${DIR} 276 | ## freetype https://www.freetype.org/ 277 | RUN \ 278 | DIR=/tmp/freetype && \ 279 | mkdir -p ${DIR} && \ 280 | cd ${DIR} && \ 281 | curl -sLO https://download.savannah.gnu.org/releases/freetype/freetype-${FREETYPE_VERSION}.tar.gz && \ 282 | echo ${FREETYPE_SHA256SUM} | sha256sum --check && \ 283 | tar -zx --strip-components=1 -f freetype-${FREETYPE_VERSION}.tar.gz && \ 284 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 285 | make && \ 286 | make install && \ 287 | rm -rf ${DIR} 288 | ## libvstab https://github.com/georgmartius/vid.stab 289 | RUN \ 290 | DIR=/tmp/vid.stab && \ 291 | mkdir -p ${DIR} && \ 292 | cd ${DIR} && \ 293 | curl -sLO https://github.com/georgmartius/vid.stab/archive/v${LIBVIDSTAB_VERSION}.tar.gz && \ 294 | echo ${LIBVIDSTAB_SHA256SUM} | sha256sum --check && \ 295 | tar -zx --strip-components=1 -f v${LIBVIDSTAB_VERSION}.tar.gz && \ 296 | cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" . && \ 297 | make && \ 298 | make install && \ 299 | rm -rf ${DIR} 300 | ## fridibi https://www.fribidi.org/ 301 | RUN \ 302 | DIR=/tmp/fribidi && \ 303 | mkdir -p ${DIR} && \ 304 | cd ${DIR} && \ 305 | curl -sLO https://github.com/fribidi/fribidi/archive/${FRIBIDI_VERSION}.tar.gz && \ 306 | echo ${FRIBIDI_SHA256SUM} | sha256sum --check && \ 307 | tar -zx --strip-components=1 -f ${FRIBIDI_VERSION}.tar.gz && \ 308 | sed -i 's/^SUBDIRS =.*/SUBDIRS=gen.tab charset lib bin/' Makefile.am && \ 309 | ./bootstrap --no-config --auto && \ 310 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 311 | make -j1 && \ 312 | make install && \ 313 | rm -rf ${DIR} 314 | ## fontconfig https://www.freedesktop.org/wiki/Software/fontconfig/ 315 | RUN \ 316 | DIR=/tmp/fontconfig && \ 317 | mkdir -p ${DIR} && \ 318 | cd ${DIR} && \ 319 | curl -sLO https://www.freedesktop.org/software/fontconfig/release/fontconfig-${FONTCONFIG_VERSION}.tar.bz2 && \ 320 | tar -jx --strip-components=1 -f fontconfig-${FONTCONFIG_VERSION}.tar.bz2 && \ 321 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 322 | make && \ 323 | make install && \ 324 | rm -rf ${DIR} 325 | ## libass https://github.com/libass/libass 326 | RUN \ 327 | DIR=/tmp/libass && \ 328 | mkdir -p ${DIR} && \ 329 | cd ${DIR} && \ 330 | curl -sLO https://github.com/libass/libass/archive/${LIBASS_VERSION}.tar.gz && \ 331 | echo ${LIBASS_SHA256SUM} | sha256sum --check && \ 332 | tar -zx --strip-components=1 -f ${LIBASS_VERSION}.tar.gz && \ 333 | ./autogen.sh && \ 334 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 335 | make && \ 336 | make install && \ 337 | rm -rf ${DIR} 338 | ## kvazaar https://github.com/ultravideo/kvazaar 339 | RUN \ 340 | DIR=/tmp/kvazaar && \ 341 | mkdir -p ${DIR} && \ 342 | cd ${DIR} && \ 343 | curl -sLO https://github.com/ultravideo/kvazaar/archive/v${KVAZAAR_VERSION}.tar.gz && \ 344 | tar -zx --strip-components=1 -f v${KVAZAAR_VERSION}.tar.gz && \ 345 | ./autogen.sh && \ 346 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 347 | make && \ 348 | make install && \ 349 | rm -rf ${DIR} 350 | 351 | RUN \ 352 | DIR=/tmp/aom && \ 353 | git clone --branch ${AOM_VERSION} --depth 1 https://aomedia.googlesource.com/aom ${DIR} ; \ 354 | cd ${DIR} ; \ 355 | rm -rf CMakeCache.txt CMakeFiles ; \ 356 | mkdir -p ./aom_build ; \ 357 | cd ./aom_build ; \ 358 | cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" -DBUILD_SHARED_LIBS=1 ..; \ 359 | make ; \ 360 | make install ; \ 361 | rm -rf ${DIR} 362 | 363 | ## libxcb (and supporting libraries) for screen capture https://xcb.freedesktop.org/ 364 | RUN \ 365 | DIR=/tmp/xorg-macros && \ 366 | mkdir -p ${DIR} && \ 367 | cd ${DIR} && \ 368 | curl -sLO https://www.x.org/archive//individual/util/util-macros-${XORG_MACROS_VERSION}.tar.gz && \ 369 | tar -zx --strip-components=1 -f util-macros-${XORG_MACROS_VERSION}.tar.gz && \ 370 | ./configure --srcdir=${DIR} --prefix="${PREFIX}" && \ 371 | make && \ 372 | make install && \ 373 | rm -rf ${DIR} 374 | 375 | RUN \ 376 | DIR=/tmp/xproto && \ 377 | mkdir -p ${DIR} && \ 378 | cd ${DIR} && \ 379 | curl -sLO https://www.x.org/archive/individual/proto/xproto-${XPROTO_VERSION}.tar.gz && \ 380 | tar -zx --strip-components=1 -f xproto-${XPROTO_VERSION}.tar.gz && \ 381 | ./configure --srcdir=${DIR} --prefix="${PREFIX}" && \ 382 | make && \ 383 | make install && \ 384 | rm -rf ${DIR} 385 | 386 | RUN \ 387 | DIR=/tmp/libXau && \ 388 | mkdir -p ${DIR} && \ 389 | cd ${DIR} && \ 390 | curl -sLO https://www.x.org/archive/individual/lib/libXau-${XAU_VERSION}.tar.gz && \ 391 | tar -zx --strip-components=1 -f libXau-${XAU_VERSION}.tar.gz && \ 392 | ./configure --srcdir=${DIR} --prefix="${PREFIX}" && \ 393 | make && \ 394 | make install && \ 395 | rm -rf ${DIR} 396 | 397 | RUN \ 398 | DIR=/tmp/libpthread-stubs && \ 399 | mkdir -p ${DIR} && \ 400 | cd ${DIR} && \ 401 | curl -sLO https://xcb.freedesktop.org/dist/libpthread-stubs-${LIBPTHREAD_STUBS_VERSION}.tar.gz && \ 402 | tar -zx --strip-components=1 -f libpthread-stubs-${LIBPTHREAD_STUBS_VERSION}.tar.gz && \ 403 | ./configure --prefix="${PREFIX}" && \ 404 | make && \ 405 | make install && \ 406 | rm -rf ${DIR} 407 | 408 | RUN \ 409 | DIR=/tmp/libxcb-proto && \ 410 | mkdir -p ${DIR} && \ 411 | cd ${DIR} && \ 412 | curl -sLO https://xcb.freedesktop.org/dist/xcb-proto-${XCBPROTO_VERSION}.tar.gz && \ 413 | tar -zx --strip-components=1 -f xcb-proto-${XCBPROTO_VERSION}.tar.gz && \ 414 | ACLOCAL_PATH="${PREFIX}/share/aclocal" ./autogen.sh && \ 415 | ./configure --prefix="${PREFIX}" && \ 416 | make && \ 417 | make install && \ 418 | rm -rf ${DIR} 419 | 420 | RUN \ 421 | DIR=/tmp/libxcb && \ 422 | mkdir -p ${DIR} && \ 423 | cd ${DIR} && \ 424 | curl -sLO https://xcb.freedesktop.org/dist/libxcb-${LIBXCB_VERSION}.tar.gz && \ 425 | tar -zx --strip-components=1 -f libxcb-${LIBXCB_VERSION}.tar.gz && \ 426 | ACLOCAL_PATH="${PREFIX}/share/aclocal" ./autogen.sh && \ 427 | ./configure --prefix="${PREFIX}" --disable-static --enable-shared && \ 428 | make && \ 429 | make install && \ 430 | rm -rf ${DIR} 431 | 432 | ## libxml2 - for libbluray 433 | RUN \ 434 | DIR=/tmp/libxml2 && \ 435 | mkdir -p ${DIR} && \ 436 | cd ${DIR} && \ 437 | curl -sLO https://gitlab.gnome.org/GNOME/libxml2/-/archive/v${LIBXML2_VERSION}/libxml2-v${LIBXML2_VERSION}.tar.gz && \ 438 | echo ${LIBXML2_SHA256SUM} | sha256sum --check && \ 439 | tar -xz --strip-components=1 -f libxml2-v${LIBXML2_VERSION}.tar.gz && \ 440 | ./autogen.sh --prefix="${PREFIX}" --with-ftp=no --with-http=no --with-python=no && \ 441 | make && \ 442 | make install && \ 443 | rm -rf ${DIR} 444 | 445 | ## libbluray - Requires libxml, freetype, and fontconfig 446 | RUN \ 447 | DIR=/tmp/libbluray && \ 448 | mkdir -p ${DIR} && \ 449 | cd ${DIR} && \ 450 | curl -sLO https://download.videolan.org/pub/videolan/libbluray/${LIBBLURAY_VERSION}/libbluray-${LIBBLURAY_VERSION}.tar.bz2 && \ 451 | echo ${LIBBLURAY_SHA256SUM} | sha256sum --check && \ 452 | tar -jx --strip-components=1 -f libbluray-${LIBBLURAY_VERSION}.tar.bz2 && \ 453 | ./configure --prefix="${PREFIX}" --disable-examples --disable-bdjava-jar --disable-static --enable-shared && \ 454 | make && \ 455 | make install && \ 456 | rm -rf ${DIR} 457 | 458 | ## libzmq https://github.com/zeromq/libzmq/ 459 | RUN \ 460 | DIR=/tmp/libzmq && \ 461 | mkdir -p ${DIR} && \ 462 | cd ${DIR} && \ 463 | curl -sLO https://github.com/zeromq/libzmq/archive/v${LIBZMQ_VERSION}.tar.gz && \ 464 | echo ${LIBZMQ_SHA256SUM} | sha256sum --check && \ 465 | tar -xz --strip-components=1 -f v${LIBZMQ_VERSION}.tar.gz && \ 466 | ./autogen.sh && \ 467 | ./configure --prefix="${PREFIX}" && \ 468 | make && \ 469 | make check && \ 470 | make install && \ 471 | rm -rf ${DIR} 472 | 473 | ## libsrt https://github.com/Haivision/srt 474 | RUN \ 475 | DIR=/tmp/srt && \ 476 | mkdir -p ${DIR} && \ 477 | cd ${DIR} && \ 478 | curl -sLO https://github.com/Haivision/srt/archive/v${LIBSRT_VERSION}.tar.gz && \ 479 | tar -xz --strip-components=1 -f v${LIBSRT_VERSION}.tar.gz && \ 480 | cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" . && \ 481 | make && \ 482 | make install && \ 483 | rm -rf ${DIR} 484 | 485 | ## libpng 486 | RUN \ 487 | DIR=/tmp/png && \ 488 | mkdir -p ${DIR} && \ 489 | cd ${DIR} && \ 490 | git clone https://git.code.sf.net/p/libpng/code ${DIR} -b v${LIBPNG_VERSION} --depth 1 && \ 491 | ./autogen.sh && \ 492 | ./configure --prefix="${PREFIX}" && \ 493 | make check && \ 494 | make install && \ 495 | rm -rf ${DIR} 496 | 497 | ## libaribb24 498 | RUN \ 499 | DIR=/tmp/b24 && \ 500 | mkdir -p ${DIR} && \ 501 | cd ${DIR} && \ 502 | curl -sLO https://github.com/nkoriyama/aribb24/archive/v${LIBARIBB24_VERSION}.tar.gz && \ 503 | echo ${LIBARIBB24_SHA256SUM} | sha256sum --check && \ 504 | tar -xz --strip-components=1 -f v${LIBARIBB24_VERSION}.tar.gz && \ 505 | autoreconf -fiv && \ 506 | ./configure CFLAGS="-I${PREFIX}/include -fPIC" --prefix="${PREFIX}" && \ 507 | make && \ 508 | make install && \ 509 | rm -rf ${DIR} 510 | 511 | ## ffmpeg https://ffmpeg.org/ 512 | RUN \ 513 | DIR=/tmp/ffmpeg && mkdir -p ${DIR} && cd ${DIR} && \ 514 | curl -sLO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.bz2 && \ 515 | tar -jx --strip-components=1 -f ffmpeg-${FFMPEG_VERSION}.tar.bz2 516 | 517 | 518 | 519 | RUN \ 520 | DIR=/tmp/ffmpeg && mkdir -p ${DIR} && cd ${DIR} && \ 521 | ./configure \ 522 | --disable-debug \ 523 | --disable-doc \ 524 | --disable-ffplay \ 525 | --enable-shared \ 526 | --enable-avresample \ 527 | --enable-libopencore-amrnb \ 528 | --enable-libopencore-amrwb \ 529 | --enable-gpl \ 530 | --enable-libass \ 531 | --enable-fontconfig \ 532 | --enable-libfreetype \ 533 | --enable-libvidstab \ 534 | --enable-libmp3lame \ 535 | --enable-libopus \ 536 | --enable-libtheora \ 537 | --enable-libvorbis \ 538 | --enable-libvpx \ 539 | --enable-libwebp \ 540 | --enable-libxcb \ 541 | --enable-libx265 \ 542 | --enable-libxvid \ 543 | --enable-libx264 \ 544 | --enable-nonfree \ 545 | --enable-openssl \ 546 | --enable-libfdk_aac \ 547 | --enable-postproc \ 548 | --enable-small \ 549 | --enable-version3 \ 550 | --enable-libbluray \ 551 | --enable-libzmq \ 552 | --extra-libs=-ldl \ 553 | --prefix="${PREFIX}" \ 554 | --enable-libopenjpeg \ 555 | --enable-libkvazaar \ 556 | --enable-libaom \ 557 | --extra-libs=-lpthread \ 558 | --enable-libsrt \ 559 | --enable-libaribb24 \ 560 | --enable-libvmaf \ 561 | --extra-cflags="-I${PREFIX}/include" \ 562 | --extra-ldflags="-L${PREFIX}/lib" && \ 563 | make && \ 564 | make install && \ 565 | make tools/zmqsend && cp tools/zmqsend ${PREFIX}/bin/ && \ 566 | make distclean && \ 567 | hash -r && \ 568 | cd tools && \ 569 | make qt-faststart && cp qt-faststart ${PREFIX}/bin/ 570 | 571 | # Let's make sure the app built correctly 572 | # Convenient to verify on https://hub.docker.com/r/jrottenberg/ffmpeg/builds/ console output 573 | 574 | FROM base AS release 575 | ENV LD_LIBRARY_PATH /opt/ffmpeg/lib:/usr/local/lib 576 | RUN apt-get -yqq update && \ 577 | apt-get install -yq --no-install-recommends build-essential && \ 578 | apt-get autoremove -y && \ 579 | apt-get clean -y 580 | 581 | COPY --from=build /opt/ffmpeg /opt/ffmpeg 582 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Leandro Moreira 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | usage: 2 | echo "make fetch_small_bunny_video && make run_hello" 3 | 4 | all: clean fetch_bbb_video make_hello run_hello make_remuxing run_remuxing_ts run_remuxing_fragmented_mp4 make_transcoding 5 | .PHONY: all 6 | 7 | clean: 8 | @rm -rf ./build/* 9 | 10 | fetch_small_bunny_video: 11 | ./fetch_bbb_video.sh 12 | 13 | make_hello: clean 14 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \ 15 | gcc -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/0_hello_world.c \ 16 | -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \ 17 | -o /files/build/hello 18 | 19 | run_hello: make_hello 20 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/hello /files/small_bunny_1080p_60fps.mp4 21 | 22 | make_remuxing: clean 23 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \ 24 | gcc -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/2_remuxing.c \ 25 | -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \ 26 | -o /files/build/remuxing 27 | 28 | run_remuxing_ts: make_remuxing 29 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/remuxing /files/small_bunny_1080p_60fps.mp4 /files/remuxed_small_bunny_1080p_60fps.ts 30 | 31 | run_remuxing_fragmented_mp4: make_remuxing 32 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/remuxing /files/small_bunny_1080p_60fps.mp4 /files/fragmented_small_bunny_1080p_60fps.mp4 fragmented 33 | 34 | make_transcoding: clean 35 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \ 36 | gcc -g -Wall -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/3_transcoding.c /files/video_debugging.c \ 37 | -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \ 38 | -o /files/build/3_transcoding 39 | 40 | run_transcoding: make_transcoding 41 | docker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 ./build/3_transcoding /files/small_bunny_1080p_60fps.mp4 /files/bunny_1s_gop.mp4 42 | -------------------------------------------------------------------------------- /README-cn.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg) 2 | 3 | 起初我在寻找可以学习使用FFmpeg库(又名 libav)的教程或书籍,然后找到了名为["如何在1k行代码内实现视频播放器"](http://dranger.com/ffmpeg/)的指南。但该项目已经停止维护,因此我决定撰写此教程。 4 | 5 | 此项目主要使用C语言开发,**但请不用担心**:项目内容非常通俗易懂。FFmpeg libav具有许多其他语言的实现,例如[python](https://pyav.org/),[go](https://github.com/imkira/go-libav)。即使其中没有你熟悉的编程语言,仍然可以通过 `ffi` 为它提供支持(这是一个 [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua) 的示例)。 6 | 7 | 下文将会简单介绍什么是视频、音频、编解码和容器,然后我们将尝试使用 FFmpeg 命令行工具,最终使用代码实现一些功能。如果你拥有一些经验,可以随时跳过这些内容,直接阅读 [笨办法学 FFmpeg libav](#笨办法学-FFmpeg-libav) 章节。 8 | 9 | 许多人认为网络视频流媒体是传统 TV 的未来。无论如何,FFmpeg 值得我们深入学习。 10 | 11 | __目录__ 12 | 13 | * [介绍](#介绍) 14 | * [视频 - 目光所见](#视频---目光所见) 15 | * [音频 - 耳朵所听](#音频---耳朵所听) 16 | * [编解码 - 压缩数据](#编解码---压缩数据) 17 | * [容器 - 整合音频和视频](#容器---整合音视频) 18 | * [FFmpeg - 命令行](#FFmpeg---命令行) 19 | * [FFmpeg 命令行工具 101](#FFmpeg-命令行工具-101) 20 | * [通用视频操作](#通用视频操作) 21 | * [转码](#转码) 22 | * [转封装](#转封装) 23 | * [转码率](#转码率) 24 | * [转分辨率](#转分辨率) 25 | * [自适应流](#自适应流) 26 | * [更多](#更多) 27 | * [笨办法学 FFmpeg libav](#笨办法学-FFmpeg-libav) 28 | * [章节0 - 臭名昭著的 hello world](#章节0---臭名昭著的-hello-world) 29 | * [FFmpeg libav 架构](#FFmpeg-libav-架构) 30 | * [章节1 - 音视频同步](#章节-1---音视频同步) 31 | * [章节2 - 重新封装](#章节-2---重新封装) 32 | * [章节3 - 转码](#章节-3---转码) 33 | 34 | # 介绍 35 | 36 | ## 视频 - 目光所见 37 | 38 | 如果以一定的频率播放一组图片([比如每秒24张图片](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)),人将会产生[视觉暂留现象](https://en.wikipedia.org/wiki/Persistence_of_vision)。 39 | 概括来讲,视频的本质就是: **以给定频率播放的一系列图片/帧**. 40 | 41 | 42 | 43 | 当代插画 (1886) 44 | 45 | ## 音频 - 耳朵所听 46 | 47 | 尽管一个没有声音的视频也可以表达很多感受和情绪,但加入音频会带来更多的体验乐趣。 48 | 49 | 声音是指以压力波形式通过空气或其他介质(例如气体、液体或者固体)传播的振动。 50 | 51 | > 在数字音频系统中,麦克风将声音转换为模拟电信号,然后通常使用脉冲编码调制([PCM](https://en.wikipedia.org/wiki/Pulse-code_modulation))的模数转换器(ADC)将模拟信号转换为数字信号。 52 | 53 | ![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png "audio analog to digital") 54 | 55 | >[图片来源](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg) 56 | 57 | ## 编解码 - 压缩数据 58 | 59 | > CODEC是用于压缩或解压缩数字音频/视频的硬件或软件。 它提供将原始(未压缩的)数字音频/视频与压缩格式相互转换的能力。 60 | > 61 | > https://en.wikipedia.org/wiki/Video_codec 62 | 63 | 如果我们选择打包数百万张图片来生成一个视频文件,那么该文件的大小将会非常惊人。让我们来计算一下: 64 | 65 | 假如我们创建一个 `1080x1920` (高x宽)的视频,每个像素占用 `3 bytes` 对颜色进行编码(或使用 [24 bit](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29) 真色彩, 这可以提供 16,777,216 种不同的颜色),每秒 24 帧,视频时长为 30 分钟。 66 | 67 | ```c 68 | toppf = 1080 * 1920 // 每帧所有的像素点 69 | cpp = 3 // 每个像素的大小(bytes) 70 | tis = 30 * 60 // 时长(秒) 71 | fps = 24 // 每秒帧数 72 | 73 | required_storage = tis * fps * toppf * cpp 74 | ``` 75 | 76 | 计算结果显示,此视频需要大约 `250.28G` 的存储空间或 `1.19Gbps` 的带宽。这就是我们为什么需要使用 [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work) 的原因。 77 | 78 | ## 容器 - 整合音视频 79 | 80 | > 容器或者封装格式描述了不同的数据元素和元数据是如何在计算机文件中共存的。 81 | > https://en.wikipedia.org/wiki/Digital_container_format 82 | 83 | **单个这样的文件包含所有的流**(主要是音频和视频),并提供**同步和通用元数据**,比如标题、分辨率等等。 84 | 85 | 一般我们可以通过文件的后缀来判断文件格式:比如 video.webm 通常是一个使用 [`webm`](https://www.webmproject.org/) 容器格式的视频。 86 | 87 | ![container](/img/container.png) 88 | 89 | # FFmpeg - 命令行 90 | 91 | > 这是一个完整的跨平台解决方案,可用于音视频的录制、转换和流式传输等。 92 | 93 | 我们使用非常优秀的工具/库 [FFmpeg](https://www.ffmpeg.org/) 来处理多媒体文件。你可能对它有些了解,也可能已经直接或者间接的在使用它了(你用过 [Chrome](https://www.chromium.org/developers/design-documents/video) 吗?) 94 | 95 | `ffmpeg` 是该方案中简单而强大的命令行工具。例如,可以通过以下命令将一个 `mp4` 文件转换成 `avi` 格式: 96 | 97 | ```bash 98 | $ ffmpeg -i input.mp4 output.avi 99 | ``` 100 | 101 | 通过上述步骤,我们做了一次重新封装,从一个容器转换为另外一个容器。FFmpeg 也可以用于转码,我们稍后再针对它进行讨论。 102 | 103 | ## **FFmpeg 命令行工具 101** 104 | 105 | FFmpeg 有一个非常完善的[文档](https://www.ffmpeg.org/ffmpeg.html)来介绍它是如何工作的。 106 | 107 | 简单来说,FFmpeg 命令行程序需要以下参数格式来执行操作: `ffmpeg {1} {2} -i {3} {4} {5}`,分别是: 108 | 109 | 1. 全局参数 110 | 2. 输入文件参数 111 | 3. 输入文件 112 | 4. 输出文件参数 113 | 5. 输出文件 114 | 115 | 选项 2、3、4、5 可以可以根据自己的需求进行添加。以下是一个易于理解的示例: 116 | 117 | ``` bash 118 | # 警告:这个文件大约 300MB 119 | $ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4 120 | 121 | $ ffmpeg \ 122 | -y \ # 全局参数 123 | -c:a libfdk_aac \ # 输入文件参数 124 | -i bunny_1080p_60fps.mp4 \ # 输入文件 125 | -c:v libvpx-vp9 -c:a libvorbis \ # 输出文件参数 126 | bunny_1080p_60fps_vp9.webm # 输出文件 127 | ``` 128 | 129 | 这个命令行作用是将一个 `mp4` 文件(包含了 `aac` 格式的音频流,`h264` 编码格式的视频流)转换为 `webm`,同时改变了音视频的编码格式。 130 | 131 | 我们可以简化上述命令行,但请注意 FFmpeg 会猜测或采用默认值。例如我们仅输入 `ffmpeg -i input.avi output.mp4` 时,FFmpeg 会使用哪种音频/视频编码来生成 `output.mp4` 呢? 132 | 133 | Werner Robitza 写了一篇 [关于 ffmpeg 编码和编辑的教程](https://slhck.info/ffmpeg-encoding-course/#/)。 134 | 135 | # 通用视频操作 136 | 137 | 在处理音频/视频时,我们通常会执行一系列操作。 138 | 139 | ## 转码 140 | 141 | ![transcoding](/img/transcoding.png) 142 | 143 | **是什么?** 将其中一个流(视频流或音频流)从一种编码格式转换成另一种 144 | 145 | **为什么?** 有时候有些设备(TV,智能手机等等)不支持 X ,但是支持 Y 和一些更新的编码方式,这些方式能提供更好的压缩比 146 | 147 | **如何做?** 转换 `H264`(AVC)视频为 `H265`(HEVC) 148 | 149 | ```bash 150 | $ ffmpeg \ 151 | -i bunny_1080p_60fps.mp4 \ 152 | -c:v libx265 \ 153 | bunny_1080p_60fps_h265.mp4 154 | ``` 155 | 156 | ## 转封装 157 | 158 | ![transmuxing](/img/transmuxing.png) 159 | 160 | **是什么?** 将视频/音频从某一种格式(容器)转换成另一种 161 | 162 | **为什么?** 有时候有些设备(TV,智能手机等等)不支持 X ,但是支持 Y 和一些新的容器,这些格式提供了更现代的功能/特征 163 | 164 | **如何做?** 转换一个 `mp4` 为 `ts` 165 | 166 | ```bash 167 | $ ffmpeg \ 168 | -i bunny_1080p_60fps.mp4 \ 169 | -c copy \ # 令 ffmpeg 跳过编解码过程 170 | bunny_1080p_60fps.ts 171 | ``` 172 | 173 | ## 转码率 174 | 175 | ![transrating](/img/transrating.png) 176 | 177 | **是什么?** 改变码率或生成其他版本。 178 | 179 | **为什么?** 有的人使用用较为落后的智能手机通过 `2G` (edge) 的网络连接来观看视频,有些人使用 4K 电视及光纤网络来观看视频,因此我们需要提供不同的码率的视频来满足不同的需求。 180 | 181 | **如何做?** 生成视频码率在 3856k 和 2000K 之间的版本。 182 | 183 | ```bash 184 | $ ffmpeg \ 185 | -i bunny_1080p_60fps.mp4 \ 186 | -minrate 964K -maxrate 3856K -bufsize 2000K \ 187 | bunny_1080p_60fps_transrating_964_3856.mp4 188 | ``` 189 | 190 | 我们通常会同时使用改变码率和分辨率的操作。Werner Robitza 写了另一篇关于 [FFmpeg 码率控制](https://slhck.info/posts/) 的必知必会系列文章。 191 | 192 | ## 转分辨率 193 | 194 | ![transsizing](/img/transsizing.png) 195 | 196 | **是什么?** 将视频从一种分辨率转为其他分辨率的操作。正如上文所述,改变分辨率的操作通常与改变码率的操作同时使用。 197 | 198 | **为什么?** 原因与转码率相同,需要满足不同情况下的不同需求。 199 | 200 | **如何做?** 将视频从 `1080p` 转换为 `480p` 201 | 202 | ```bash 203 | $ ffmpeg \ 204 | -i bunny_1080p_60fps.mp4 \ 205 | -vf scale=480:-1 \ 206 | bunny_1080p_60fps_transsizing_480.mp4 207 | ``` 208 | 209 | ## 自适应流 210 | 211 | ![adaptive streaming](/img/adaptive-streaming.png) 212 | 213 | **是什么?** 生成很多不同分辨率/码率的视频并分块,通过http进行传输。 214 | 215 | **为什么?** 为了在不同的终端和网络环境下提供更加灵活的观看体验,比如低端智能手机或者4K电视。这也使得扩展和部署更为简单方便,但是会增加延迟。 216 | 217 | **如何做?** 用 DASH 创建一个自适应的 WebM。 218 | 219 | ```bash 220 | # 视频流 221 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm 222 | 223 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm 224 | 225 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm 226 | 227 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm 228 | 229 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm 230 | 231 | # 音频流 232 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm 233 | 234 | # DASH 格式 235 | $ ffmpeg \ 236 | -f webm_dash_manifest -i video_160x90_250k.webm \ 237 | -f webm_dash_manifest -i video_320x180_500k.webm \ 238 | -f webm_dash_manifest -i video_640x360_750k.webm \ 239 | -f webm_dash_manifest -i video_640x360_1000k.webm \ 240 | -f webm_dash_manifest -i video_1280x720_500k.webm \ 241 | -f webm_dash_manifest -i audio_128k.webm \ 242 | -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \ 243 | -f webm_dash_manifest \ 244 | -adaptation_sets "id=0,streams=0,1,2,3,4 id=1,streams=5" \ 245 | manifest.mpd 246 | ``` 247 | 248 | PS: 该样例借鉴自 [使用 DASH 播放自适应 WebM](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash) 249 | 250 | ## 更多 251 | 252 | FFmpeg 还有很多[其他用法](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly)。我会利用 FFmpeg 结合 iMovie 为 YouTube 编辑视频,你当然也可以更专业地使用它。 253 | 254 | # 笨办法学 FFmpeg libav 255 | 256 | > Don't you wonder sometimes 'bout sound and vision? 257 | > **David Robert Jones** 258 | 259 | 既然 [FFmpeg](#ffmpeg---command-line) 作为命令行工具对多媒体文件进行基本处理这么有效,那么我们如何在自己的程序里使用它呢? 260 | 261 | FFmpeg 是由几个可以集成到程序里的[lib库](https://www.ffmpeg.org/doxygen/trunk/index.html)组成的。通常在安装FFmpeg时,会自动安装这些库。我们将这些库统一叫做 **FFmpeg libav**。 262 | 263 | > 这个标题是对 Zed Shaw 的[笨办法学XX](https://learncodethehardway.org/)系列丛书的致敬,特别是笨办法学C语言。 264 | 265 | ## 章节0 - 臭名昭著的 hello world 266 | 267 | 这里说的 hello world 实际上不是在终端里输出 “hello world” :tongue:,而是**输出视频信息**,例如:格式、时长、分辨率、音频轨道,最后我们将**解码一些帧,并保存为图片**。 268 | 269 | 270 | ### FFmpeg libav 架构 271 | 272 | 在我们开始之前,我们需要先了解一下**FFmpeg libav 架构**的工作流程和各个组件之间的工作方式。 273 | 274 | 下面是一张视频解码的处理流程图: 275 | 276 | ![ffmpeg libav architecture - decoding process](/img/decoding.png) 277 | 278 | 首先,我们需要加载媒体文件到 [AVFormatContext](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 组件(为便于理解,容器看作是文件格式即可)。这个过程并不是加载整个文件,它通常只是加载了文件头。 279 | 280 | 我们加载**容器的头部信息**后,就可以访问媒体文件流(流可以认为是基本的音频和视频数据)。每个流在 [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html) 组件中可用。 281 | 282 | > 流是数据流的一个昵称 283 | 284 | 假设我们的视频文件包含两个流:一个是 [AAC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) 音频流,一个是 [H264(AVC)](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC)视频流。我们可以从每一个流中提取出被称为数据包的数据片段(切片),这些数据包将被加载到 [AVPacket](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 组件中。 285 | 286 | **数据包中的数据仍然是被编码的**(被压缩),为了解码这些数据,我们需要将这些数据给到 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html)。 287 | 288 | `AVCodec` 将解码这些数据到 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html),最后我们将得到**解码后的帧**。注意,视频流和音频流共用此处理流程。 289 | 290 | ### 构建要求 291 | 292 | 由于有些人编译或者运行示例时会遇到许多[问题](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling),因此我们使用 `Docker` 来构建开发/运行环境。我们将使用一个 Big Buck Bunny 的视频来作为示例,如果你没有这个视频,运行 `make fetch_small_bunny_video` 来获取。 293 | 294 | ### 章节 0 - 代码一览 295 | 296 | > 展示[代码](/0_hello_world.c)并执行。 297 | > 298 | > ```bash 299 | > $ make run_hello 300 | > ``` 301 | 302 | 我们将跳过一些细节,不过不用担心,[代码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/blob/master/0_hello_world.c)都在Github上维护。 303 | 304 | 我们首先为 [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 分配内存,利用它可以获得相关格式(容器)的信息。 305 | 306 | ```c 307 | AVFormatContext *pFormatContext = avformat_alloc_context(); 308 | ``` 309 | 310 | 我们将打开一个文件并读取文件的头信息,利用相关格式的简要信息填充 `AVFormatContext`(注意,编解码器通常不会被打开)。需要使用 [`avformat_open_input`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49) 函数,该函数需要 `AVFormatContext`、文件名和两个可选参数:[`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html)(如果为NULL,FFmpeg将猜测格式)、[`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html)(解封装参数)。 311 | 312 | ```c 313 | avformat_open_input(&pFormatContext, filename, NULL, NULL); 314 | ``` 315 | 316 | 可以输出视频的格式和时长: 317 | 318 | ```c 319 | printf("Format %s, duration %lld us", pFormatContext->iformat->long_name, pFormatContext->duration); 320 | ``` 321 | 322 | 为了访问数据流,我们需要从媒体文件中读取数据。需要利用函数 [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb)完成此步骤。`pFormatContext->nb_streams` 将获取所有的流信息,并且通过 `pFormatContext->streams[i]` 获取到指定的 `i` 数据流([`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html))。 323 | 324 | ```c 325 | avformat_find_stream_info(pFormatContext, NULL); 326 | ``` 327 | 328 | 可以使用循环来获取所有流数据: 329 | 330 | ```c 331 | for (int i = 0; i < pFormatContext->nb_streams; i++) 332 | { 333 | // 334 | } 335 | ``` 336 | 337 | 针对每个流维护一个对应的 [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html),该结构体描述了被编码流的各种属性。 338 | 339 | ```c 340 | AVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar; 341 | ``` 342 | 343 | 通过codec id和 [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) 函数可以找到对应已经注册的解码器,返回 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html) 指针,该组件能让我们知道如何编解码这个流。 344 | 345 | ```c 346 | AVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id); 347 | ``` 348 | 349 | 现在可以输出一些编解码信息。 350 | 351 | ```c 352 | // 用于视频和音频 353 | if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) { 354 | printf("Video Codec: resolution %d x %d", pLocalCodecParameters->width, pLocalCodecParameters->height); 355 | } else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) { 356 | printf("Audio Codec: %d channels, sample rate %d", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate); 357 | } 358 | // 通用 359 | printf("\tCodec %s ID %d bit_rate %lld", pLocalCodec->long_name, pLocalCodec->id, pCodecParameters->bit_rate); 360 | ``` 361 | 362 | 利用刚刚获取的 `AVCodec` 为 [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html) 分配内存,它将维护解码/编码过程的上下文。 然后需要使用 [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16)和被编码流的参数(`AVCodecParameters`) 来填充 `AVCodecContext`。 363 | 364 | 完成上下文填充后,使用 [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) 来打开解码器。 365 | 366 | ```c 367 | AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec); 368 | avcodec_parameters_to_context(pCodecContext, pCodecParameters); 369 | avcodec_open2(pCodecContext, pCodec, NULL); 370 | ``` 371 | 372 | 现在我们将从流中读取数据包并将它们解码为帧。但首先,需要为 [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 和 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) 分配内存。 373 | 374 | ```c 375 | AVPacket *pPacket = av_packet_alloc(); 376 | AVFrame *pFrame = av_frame_alloc(); 377 | ``` 378 | 379 | 使用函数 [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) 读取帧数据来填充数据包。 380 | 381 | ```c 382 | while (av_read_frame(pFormatContext, pPacket) >= 0) { 383 | //... 384 | } 385 | ``` 386 | 387 | 使用函数 [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 来把**原始数据包**(未解压的帧)发送给解码器。 388 | 389 | ```c 390 | avcodec_send_packet(pCodecContext, pPacket); 391 | ``` 392 | 393 | 使用函数 [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 从解码器接受原始数据帧(解压后的帧)。 394 | 395 | ```c 396 | avcodec_receive_frame(pCodecContext, pFrame); 397 | ``` 398 | 399 | 可以输出 frame 编号、[PTS](https://en.wikipedia.org/wiki/Presentation_timestamp)、DTS、[frame 类型](https://en.wikipedia.org/wiki/Video_compression_picture_types)等其他信息。 400 | 401 | ```c 402 | printf( 403 | "Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]", 404 | av_get_picture_type_char(pFrame->pict_type), 405 | pCodecContext->frame_number, 406 | pFrame->pts, 407 | pFrame->pkt_dts, 408 | pFrame->key_frame, 409 | pFrame->coded_picture_number, 410 | pFrame->display_picture_number 411 | ); 412 | ``` 413 | 414 | 最后,我们可以将解码后的帧保存为[灰度图](https://en.wikipedia.org/wiki/Netpbm#PGM_example)。处理过程非常简单,使用 `pFrame->data`,它的索引与 [Y, Cb 和 Cr 分量](https://en.wikipedia.org/wiki/YCbCr) 相关联。我们只选择 `0`(Y 分量)数据保存灰度图。 415 | 416 | ```c 417 | save_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename); 418 | 419 | static void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename) 420 | { 421 | FILE *f; 422 | int i; 423 | f = fopen(filename,"w"); 424 | // 编写 pgm 格式所需的最小文件头 425 | // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example 426 | fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255); 427 | 428 | // 逐行写入 429 | for (i = 0; i < ysize; i++) 430 | fwrite(buf + i * wrap, 1, xsize, f); 431 | fclose(f); 432 | } 433 | ``` 434 | 435 | 现在将得到一张2MB大小的灰度图: 436 | 437 | ![saved frame](/img/generated_frame.png) 438 | 439 | ## 章节 1 - 音视频同步 440 | 441 | > **Be the player** - 一个年轻 JS 开发者开发的新 MSE 视频播放器。 442 | 443 | 在我们学习 [重新封装](#章节-2---重新封装) 之前,我们来谈谈timing(时机/时间点),或者说播放器如何知道在正确的时间来播放每一帧。 444 | 445 | 在上一个例子中,我们保存了一些帧: 446 | 447 | ![frame 0](/img/hello_world_frames/frame0.png) 448 | ![frame 1](/img/hello_world_frames/frame1.png) 449 | ![frame 2](/img/hello_world_frames/frame2.png) 450 | ![frame 3](/img/hello_world_frames/frame3.png) 451 | ![frame 4](/img/hello_world_frames/frame4.png) 452 | ![frame 5](/img/hello_world_frames/frame5.png) 453 | 454 | 当我们在设计一个播放器的时候,需要**以给定的速度播放每一帧**。否则,我们很难获得好的体验,因为在观看的过程中很可能播放得太快或者太慢。 455 | 456 | 因此我们需要引入一些机制来流畅地播放每一帧。每一帧都将拥有一个**播放时间戳**(PTS)。它是一个将**timebase**(时基,FFmpeg中一种特殊的时间度量单位,**timescale**可以认为是它的倒数)作为单位的递增数字。 457 | 458 | 我们来模仿几个场景,通过以下示例可以更迅速地理解。 459 | 460 | 例如 `fps=60/1` , `timebase=1/60000`,PTS 将以 `timescale / fps = 1000` 进行递增,因此每一帧对应的 PTS 如下(假设开始为0): 461 | 462 | * `frame=0, PTS = 0, PTS_TIME = 0` 463 | * `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016` 464 | * `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033` 465 | 466 | 相同情况下,将 timebase 修改为 `1/60`: 467 | 468 | * `frame=0, PTS = 0, PTS_TIME = 0` 469 | * `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016` 470 | * `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033` 471 | * `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050` 472 | 473 | 如 `fps=25`,`timebase=1/75`,PTS 将以 `timescale / fps = 3` 进行递增,因此每一帧对应的 PTS 如下(假设开始为0): 474 | 475 | * `frame=0, PTS = 0, PTS_TIME = 0` 476 | * `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04` 477 | * `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08` 478 | * `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12` 479 | * ... 480 | * `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96` 481 | * ... 482 | * `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56` 483 | 484 | 通过 `pts_time`, 我们可以找到一种渲染它和音频的 `pts_time` 或系统时钟进行同步的方式。FFmpeg libav 提供了获取这些信息的接口: 485 | 486 | - fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad) 487 | - tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad) 488 | - tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6) 489 | 490 | 被保存的帧按照 DTS 顺序发送(frames:1,6,4,2,3,5),按照 PTS 顺序播放(frames:1,2,3,4,5)。同时,我们可以发现B帧相对于P帧和I帧压缩率更高,更加节省空间。 491 | 492 | ``` 493 | LOG: AVStream->r_frame_rate 60/1 494 | LOG: AVStream->time_base 1/60000 495 | ... 496 | LOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0] 497 | LOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3] 498 | LOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4] 499 | LOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2] 500 | LOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5] 501 | LOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1] 502 | ``` 503 | 504 | ## 章节 2 - 重新封装 505 | 506 | 重新封装是将文件从一种格式转换为另一种格式。例如:我们可以非常容易地利用 FFmpeg 将 [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) 格式的视频 转换成 [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) 格式。 507 | 508 | ```bash 509 | ffmpeg input.mp4 -c copy output.ts 510 | ``` 511 | 512 | 以上命令将在不编解码的情况下(`-c copy`)来对 mp4 做解封装,然重新后封装为 `mpegts` 文件。如果不用 `-f` 参数来指定格式的话,ffmpeg 会根据文件扩展名来进行猜测。 513 | 514 | FFmpeg 或 libav 的一般用法遵循以下模式/架构或工作流: 515 | 516 | * **[协议层](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - 接收一个输入(例如一个文件,也可以是 `rtmp` 或 `http`) 517 | * **[格式层](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 解封装数据内容,暴露出元数据和流信息 518 | * **[编码层](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 解码原始数据流 *可选* 519 | * **[像素层](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - 可以对原始帧应用一些 `filters`(例如调整大小)*可选* 520 | * 然后反过来做相同的操作 521 | * **[编码层](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 编码(重新编码或者转码)原始帧*可选* 522 | * **[格式层](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 封装(或重新封装)原始数据流(压缩数据) 523 | * **[协议层](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - 将封装后数据输出 (另外的文件或远程服务器) 524 | 525 | ![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg) 526 | 527 | > 这张图的灵感来自 [Leixiaohua's](https://leixiaohua1020.github.io/#ffmpeg-development-examples) 和 [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9) 的作品。 528 | 529 | 现在我们将使用 libav 编写一个示例,完成与此命令行相同的效果: `ffmpeg input.mp4 -c copy output.ts` 530 | 531 | 我们读取一个输入文件(`input_format_context`),并且将修改保存至输出(`output_format_context`)。 532 | 533 | ```c 534 | AVFormatContext *input_format_context = NULL; 535 | AVFormatContext *output_format_context = NULL; 536 | ``` 537 | 538 | 通常我们的做法是分配内存并打开输入文件。对于这个示例,我们将打开一个输入文件并为一个输出文件分配内存。 539 | 540 | ```c 541 | if ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) { 542 | fprintf(stderr, "Could not open input file '%s'", in_filename); 543 | goto end; 544 | } 545 | if ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) { 546 | fprintf(stderr, "Failed to retrieve input stream information"); 547 | goto end; 548 | } 549 | 550 | avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename); 551 | if (!output_format_context) { 552 | fprintf(stderr, "Could not create output context\n"); 553 | ret = AVERROR_UNKNOWN; 554 | goto end; 555 | } 556 | ``` 557 | 558 | 我们将重新封装视频、音频、字幕流,因此需要将用到的这些流存入一个数组中。 559 | 560 | ```c 561 | number_of_streams = input_format_context->nb_streams; 562 | streams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list)); 563 | ``` 564 | 565 | 分配完所需要的内存之后,我们将遍历所有的流,然后利用 [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 为每一个流创建一个对应的输出流。注意,当前只需要针对视频、音频、字幕流进行处理。 566 | 567 | ```c 568 | for (i = 0; i < input_format_context->nb_streams; i++) { 569 | AVStream *out_stream; 570 | AVStream *in_stream = input_format_context->streams[i]; 571 | AVCodecParameters *in_codecpar = in_stream->codecpar; 572 | if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && 573 | in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO && 574 | in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { 575 | streams_list[i] = -1; 576 | continue; 577 | } 578 | streams_list[i] = stream_index++; 579 | out_stream = avformat_new_stream(output_format_context, NULL); 580 | if (!out_stream) { 581 | fprintf(stderr, "Failed allocating output stream\n"); 582 | ret = AVERROR_UNKNOWN; 583 | goto end; 584 | } 585 | ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); 586 | if (ret < 0) { 587 | fprintf(stderr, "Failed to copy codec parameters\n"); 588 | goto end; 589 | } 590 | } 591 | ``` 592 | 593 | 现在,我们需要创建一个输出文件。 594 | 595 | ```c 596 | if (!(output_format_context->oformat->flags & AVFMT_NOFILE)) { 597 | ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE); 598 | if (ret < 0) { 599 | fprintf(stderr, "Could not open output file '%s'", out_filename); 600 | goto end; 601 | } 602 | } 603 | 604 | ret = avformat_write_header(output_format_context, NULL); 605 | if (ret < 0) { 606 | fprintf(stderr, "Error occurred when opening output file\n"); 607 | goto end; 608 | } 609 | ``` 610 | 611 | 完成上述操作之后,我们就可以将输入流逐个数据包复制到输出流。我们通过(`av_read_frame`)循环读取每一个数据包。对于每一数据包,我们都要重新计算 PTS 和 DTS,最终通过 `av_interleaved_write_frame` 写入输出格式的上下文。 612 | 613 | ```c 614 | while (1) { 615 | AVStream *in_stream, *out_stream; 616 | ret = av_read_frame(input_format_context, &packet); 617 | if (ret < 0) 618 | break; 619 | in_stream = input_format_context->streams[packet.stream_index]; 620 | if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) { 621 | av_packet_unref(&packet); 622 | continue; 623 | } 624 | packet.stream_index = streams_list[packet.stream_index]; 625 | out_stream = output_format_context->streams[packet.stream_index]; 626 | /* 赋值数据包 */ 627 | packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 628 | packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 629 | packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); 630 | // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903 631 | packet.pos = -1; 632 | 633 | //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1 634 | ret = av_interleaved_write_frame(output_format_context, &packet); 635 | if (ret < 0) { 636 | fprintf(stderr, "Error muxing packet\n"); 637 | break; 638 | } 639 | av_packet_unref(&packet); 640 | } 641 | ``` 642 | 643 | 最后我们要使用函数 [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) 输出文件尾。 644 | 645 | ```c 646 | av_write_trailer(output_format_context); 647 | ``` 648 | 649 | 现在可以进行测试了,首先我们将文件从 MP4 转换成 MPEG-TS 格式。使用 libav 来代替命令行 `ffmpeg input.mp4 -c copy output.ts `的作用。 650 | 651 | ```bash 652 | make run_remuxing_ts 653 | ``` 654 | 655 | 它起作用了!!!不相信吗?我们可以使用 ffprobe 来检测一下: 656 | 657 | ```bash 658 | ffprobe -i remuxed_small_bunny_1080p_60fps.ts 659 | 660 | Input #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts': 661 | Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s 662 | Program 1 663 | Metadata: 664 | service_name : Service01 665 | service_provider: FFmpeg 666 | Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc 667 | Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s 668 | ``` 669 | 670 | 下图中总结了我们所做的工作,我们可以回顾一下之前关于[libav如何工作](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture)的介绍。但图中也表明我们跳过了编解码的部分。 671 | 672 | ![remuxing libav components](/img/remuxing_libav_components.png) 673 | 674 | 在结束本章之前,我想展示一下重新封装中的一个重要功能 — — **使用选项**。比如我们想要 [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) 格式的文件,需要使用 [fragmented mp4](https://stackoverflow.com/a/35180327)(有时称为fmp4)而不是 MPEG-TS 或者普通的 MPEG-4。 675 | 676 | 使用[命令行](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting)可以简单地实现该功能: 677 | 678 | ``` 679 | ffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4 680 | ``` 681 | 682 | 使用 libav 进行实现也非常简单,只需要在写入输出头时(复制数据包之前),传递相应选项即可。 683 | 684 | ```c 685 | AVDictionary* opts = NULL; 686 | av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov+default_base_moof", 0); 687 | ret = avformat_write_header(output_format_context, &opts); 688 | ``` 689 | 690 | 现在可以生成 fragmented mp4 文件: 691 | 692 | ```bash 693 | make run_remuxing_fragmented_mp4 694 | ``` 695 | 696 | 可以使用非常优秀的 [gpac/mp4box.js](https://gpac.github.io/mp4box.js/),或者在线工具 [http://mp4parser.com/](http://mp4parser.com/) 来对比差异。首先加载普通mp4: 697 | 698 | ![mp4 boxes](/img/boxes_normal_mp4.png) 699 | 700 | 如你所见,`mdat` atom/box 是**存放实际音视频帧数据**的地方。现在我们加载 fragmented mp4,看看它是如何组织 `mdat` 的。 701 | 702 | ![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png) 703 | 704 | ## 章节 3 - 转码 705 | 706 | > #### 展示代码并执行 707 | > 708 | > ```bash 709 | > $ make run_transcoding 710 | > ``` 711 | > 712 | > 我们将跳过一些细节,但是请不用担心:[代码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/blob/master/3_transcoding.c)维护在 github。 713 | 714 | 在这一章,我们将用 C 写一个精简的转码器,使用 **FFmpeg/libav库**,特别是[libavcodec](https://ffmpeg.org/libavcodec.html)、libavformat 和 libavutil,将 H264 编码的视频转换为 H265。 715 | 716 | ![media transcoding flow](/img/transcoding_flow.png) 717 | 718 | > 简单回顾:[**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 是多媒体文件格式的抽象(例如:MKV,MP4,Webm,TS)。 [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) 代表给定格式的数据类型(例如:音频,视频,字幕,元数据)。 [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) 是从 `AVStream` 获得的压缩数据的切片,可由 [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html)(例如av1,h264,vp9,hevc)解码,从而生成称为 [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html) 的原始数据。 719 | 720 | ### 转封装 721 | 722 | 我们将从简单的转封装操作开始,然后在此代码基础上进行构建,第一步需要**加载输入文件**。 723 | 724 | ```c 725 | // 为 AVFormatContext 分配内存 726 | avfc = avformat_alloc_context(); 727 | // 打开一个输入流并读取头信息 728 | avformat_open_input(avfc, in_filename, NULL, NULL); 729 | // 读取文件数据包以获取流信息 730 | avformat_find_stream_info(avfc, NULL); 731 | ``` 732 | 733 | 现在需要设置解码器,`AVFormatContext` 将使我们能够访问所有 `AVStream` 组件,获取它们的 `AVCodec` 并创建特定的 `AVCodecContext`,最后我们可以打开给定的编解码器进行解码。 734 | 735 | > [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) 保存相关媒体文件的数据包括:码率,帧率,采样率,通道、高度等等。 736 | 737 | ```c 738 | for (int i = 0; i < avfc->nb_streams; i++) 739 | { 740 | AVStream *avs = avfc->streams[i]; 741 | AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id); 742 | AVCodecContext *avcc = avcodec_alloc_context3(*avc); 743 | avcodec_parameters_to_context(*avcc, avs->codecpar); 744 | avcodec_open2(*avcc, *avc, NULL); 745 | } 746 | ``` 747 | 748 | 现在我们需要准备输出文件,首先为 `AVFormatContext` **分配内存**。我们为为输出的格式创建**每一个流**。为了正确打包这些流,我们从解码器中**复制编解码参数**。 749 | 750 | 通过设置 `AV_CODEC_FLAG_GLOBAL_HEADER` 来告诉编码器可以使用这个全局头信息,最终打开输出文件写入文件头。 751 | 752 | ```c 753 | avformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename); 754 | 755 | AVStream *avs = avformat_new_stream(encoder_avfc, NULL); 756 | avcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar); 757 | 758 | if (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER) 759 | encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 760 | 761 | avio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE); 762 | avformat_write_header(encoder->avfc, &muxer_opts); 763 | 764 | ``` 765 | 766 | 我们从解码器获得 `AVPacket`,调整时间戳后写到输出文件。尽管 `av_interleaved_write_frame` 从函数名上来看是 “写入帧信息”,但我们实际是在存储数据包。最后通过写入文件尾来结束转封装操作。 767 | 768 | ```c 769 | AVFrame *input_frame = av_frame_alloc(); 770 | AVPacket *input_packet = av_packet_alloc(); 771 | 772 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 773 | { 774 | av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base); 775 | av_interleaved_write_frame(*avfc, input_packet) < 0)); 776 | } 777 | 778 | av_write_trailer(encoder_avfc); 779 | ``` 780 | 781 | ### 转码 782 | 783 | 前面的章节展示了一个转封装的程序,现在我们将添加对文件做编码的功能,具体是将视频从 `h264` 编码转换为 `h265`。 784 | 785 | 在我们设置解码器之后及准备输出文件之前,需要设置编码器。 786 | 787 | * 使用 [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 和编码器创建 `AVStream` 788 | * 使用名为 `libx265` 的 `AVCodec`,利用 [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37) 获取 789 | * 利用 [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315) 及编解码器创建 `AVCodecContext` 790 | * 为编解码设置基础属性 791 | * 打开编解码器,使用 [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) 和 [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe) 将参数从上下文复制到流中 792 | 793 | ```c 794 | AVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL); 795 | AVStream *video_avs = avformat_new_stream(encoder_avfc, NULL); 796 | 797 | char *codec_name = "libx265"; 798 | char *codec_priv_key = "x265-params"; 799 | // 我们将对 x265 使用内置的参数 800 | // 禁用场景切换并且把 GOP 调整为 60 帧 801 | char *codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 802 | 803 | AVCodec *video_avc = avcodec_find_encoder_by_name(codec_name); 804 | AVCodecContext *video_avcc = avcodec_alloc_context3(video_avc); 805 | // 编码参数 806 | av_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0); 807 | video_avcc->height = decoder_ctx->height; 808 | video_avcc->width = decoder_ctx->width; 809 | video_avcc->pix_fmt = video_avc->pix_fmts[0]; 810 | // 控制码率 811 | video_avcc->bit_rate = 2 * 1000 * 1000; 812 | video_avcc->rc_buffer_size = 4 * 1000 * 1000; 813 | video_avcc->rc_max_rate = 2 * 1000 * 1000; 814 | video_avcc->rc_min_rate = 2.5 * 1000 * 1000; 815 | // 时间基数 816 | video_avcc->time_base = av_inv_q(input_framerate); 817 | video_avs->time_base = sc->video_avcc->time_base; 818 | 819 | avcodec_open2(sc->video_avcc, sc->video_avc, NULL); 820 | avcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc); 821 | ``` 822 | 823 | 为了视频流转码,我们需要拓展解码的步骤: 824 | 825 | - 利用 [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 发送空的 `AVPacket` 给解码器 826 | - 利用 [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 接收未压缩的 `AVFrame` 827 | - 开始转码原始数据 828 | - 使用 [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169) 发送原始数据 829 | - 基于编解码器和 `AVPacket`,利用 [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decodinghtml#ga5b8eff59cf259747cf0b31563e38ded6) 接受编码数据 830 | - 设置时间戳,调用 [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e) 831 | - 写入输出文件 [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1) 832 | 833 | ```c 834 | AVFrame *input_frame = av_frame_alloc(); 835 | AVPacket *input_packet = av_packet_alloc(); 836 | 837 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 838 | { 839 | int response = avcodec_send_packet(decoder_video_avcc, input_packet); 840 | while (response >= 0) { 841 | response = avcodec_receive_frame(decoder_video_avcc, input_frame); 842 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 843 | break; 844 | } else if (response < 0) { 845 | return response; 846 | } 847 | if (response >= 0) { 848 | encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index); 849 | } 850 | av_frame_unref(input_frame); 851 | } 852 | av_packet_unref(input_packet); 853 | } 854 | av_write_trailer(encoder_avfc); 855 | 856 | // used function 857 | int encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) { 858 | AVPacket *output_packet = av_packet_alloc(); 859 | int response = avcodec_send_frame(video_avcc, input_frame); 860 | 861 | while (response >= 0) { 862 | response = avcodec_receive_packet(video_avcc, output_packet); 863 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 864 | break; 865 | } else if (response < 0) { 866 | return -1; 867 | } 868 | 869 | output_packet->stream_index = index; 870 | output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den; 871 | 872 | av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base); 873 | response = av_interleaved_write_frame(avfc, output_packet); 874 | } 875 | av_packet_unref(output_packet); 876 | av_packet_free(&output_packet); 877 | return 0; 878 | } 879 | 880 | ``` 881 | 882 | 我们将媒体流从 `h264` 编码转换为 `h265`,和预期的一样,`h265` 编码的文件相较于 h264 更小。本次[创建的程序](/3_transcoding.c)能够完成以下转换: 883 | 884 | ```c 885 | /* 886 | * H264 -> H265 887 | * Audio -> remuxed (untouched) 888 | * MP4 - MP4 889 | */ 890 | StreamingParams sp = {0}; 891 | sp.copy_audio = 1; 892 | sp.copy_video = 0; 893 | sp.video_codec = "libx265"; 894 | sp.codec_priv_key = "x265-params"; 895 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 896 | 897 | /* 898 | * H264 -> H264 (fixed gop) 899 | * Audio -> remuxed (untouched) 900 | * MP4 - MP4 901 | */ 902 | StreamingParams sp = {0}; 903 | sp.copy_audio = 1; 904 | sp.copy_video = 0; 905 | sp.video_codec = "libx264"; 906 | sp.codec_priv_key = "x264-params"; 907 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 908 | 909 | /* 910 | * H264 -> H264 (fixed gop) 911 | * Audio -> remuxed (untouched) 912 | * MP4 - fragmented MP4 913 | */ 914 | StreamingParams sp = {0}; 915 | sp.copy_audio = 1; 916 | sp.copy_video = 0; 917 | sp.video_codec = "libx264"; 918 | sp.codec_priv_key = "x264-params"; 919 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 920 | sp.muxer_opt_key = "movflags"; 921 | sp.muxer_opt_value = "frag_keyframe+empty_moov+delay_moov+default_base_moof"; 922 | 923 | /* 924 | * H264 -> H264 (fixed gop) 925 | * Audio -> AAC 926 | * MP4 - MPEG-TS 927 | */ 928 | StreamingParams sp = {0}; 929 | sp.copy_audio = 0; 930 | sp.copy_video = 0; 931 | sp.video_codec = "libx264"; 932 | sp.codec_priv_key = "x264-params"; 933 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 934 | sp.audio_codec = "aac"; 935 | sp.output_extension = ".ts"; 936 | 937 | /* WIP :P -> it's not playing on VLC, the final bit rate is huge 938 | * H264 -> VP9 939 | * Audio -> Vorbis 940 | * MP4 - WebM 941 | */ 942 | //StreamingParams sp = {0}; 943 | //sp.copy_audio = 0; 944 | //sp.copy_video = 0; 945 | //sp.video_codec = "libvpx-vp9"; 946 | //sp.audio_codec = "libvorbis"; 947 | //sp.output_extension = ".webm"; 948 | ``` 949 | 950 | > 老实说,完成这个教程[比我想象中的难](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54),必须深入理解 [FFmpeg 命令行源码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749)并进行大量测试。而且我想我肯定遗漏了一些细节,因为我必须强制执行 `force-cfr` 才能使 h264 正常工作,并且现在仍然会出现一些 warning 信息,例如 `warning messages (forced frame type (5) at 80 was changed to frame type (3))`。 951 | -------------------------------------------------------------------------------- /README-ko.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg) 2 | 3 | [FFmpeg](https://www.ffmpeg.org/)을 라이브러리처럼(a.k.a. libav) 사용하려면 어떻게 시작해야할지 알려줄만할 튜토리얼/책을 찾아봤었습니다. 그리고는 ["How to write a video player in less than 1k lines"](http://dranger.com/ffmpeg/) 라는 튜토리얼을 찾았죠. 4 | 하지만 안타깝게도 그건 더이상 관리가 안되고 있어서 이 글을 쓰기로 결정했습니다. 5 | 6 | 여기서 사용된 대부분의 코드는 C로 되어있습니다. **하지만 걱정하지 마세요**: 당신도 쉽게 이해할 것이고 선호하는 언어에도 적용하실 수 있을겁니다. 7 | FFmpeg libav는 [python](https://pyav.org/), [go](https://github.com/imkira/go-libav)와 같은 다양한 언어로 된 많은 bindings을 제공합니다. 만약 사용하려는 언어에 그것이 없다면 `ffi`를 통해서도 지원할 수 있습니다. ([Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua) 예시) 8 | 9 | 우리는 비디오와 오디오, 코덱, 컨테이너가 무엇인지에 대해 빠르게 학습한 후에 `FFmpeg` 명령을 어떻게 사용하는지 대해서 파헤쳐보고 마지막으로 코드도 작성해볼 것입니다, [삽질하면서 FFmpeg libav 배우기](#삽질하면서-FFmpeg-libav-배우기) 섹션으로 바로 넘어가셔도 좋습니다. 10 | 11 | 혹자는 인터넷 비디오 스트리밍이 전통적인 TV의 미래라고 이야기하기도 합니다. 어떻게 되든 FFmpeg은 공부해둘만한 가치가 있는 것입니다. 12 | 13 | __목차__ 14 | 15 | * [소개](#intro) 16 | * [비디오 - 당신이 무엇을 보는지!](#비디오---당신이-무엇을-보는지!) 17 | * [오디오 - 당신이 무엇을 듣는지!](#오디오---당신이-무엇을-듣는지!) 18 | * [코덱 - 데이터를 줄이기](#코덱---데이터를-줄이기) 19 | * [컨테이너 - 오디오와 비디오의 안식처](#[컨테이너---오디오와-비디오의-안식처) 20 | * [FFmpeg - 명령줄 도구](#FFmpeg---명령줄-도구) 21 | * [FFmpeg 명령줄 도구 101](#FFmpeg-명령줄-도구-101) 22 | * [공통 비디오 연산](#공통-비디오-연산) 23 | * [트랜스코딩 (Transcoding)](#트랜스코딩-(Transcoding)) 24 | * [트랜스먹싱 (Transmuxing)](#트랜스먹싱-(Transmuxing)) 25 | * [트랜스레이팅 (Transrating)](#트랜스레이팅-(Transrating)) 26 | * [트랜스사이징 (Transsizing)](#트랜스사이징-(Transsizing)) 27 | * [보너스: 적응형 스트리밍 (Adaptive Streaming)](#보너스:-적응형-스트리밍-(Adaptive-Streaming)) 28 | * [더 들어가기](#더-들어가기) 29 | * [삽질하면서 FFmpeg libav 배우기](#삽질하면서-FFmpeg-libav-배우기) 30 | * [챕터 0 - 악명 높은 hello world](#챕터-0---악명-높은-hello-world) 31 | * [FFmpeg libav 아키텍처](#FFmpeg-libav-아키텍처) 32 | * [챕터 1 - 타이밍 (timing)](#챕터-1---오디오와-비디오-동기화) 33 | * [챕터 2 - 리먹싱 (remuxing)](#챕터-2---리먹싱-(remuxing)) 34 | * [챕터 3 - 트랜스코딩 (transcoding)](#챕터-3---트랜스코딩-(transcoding)) 35 | 36 | # 소개 37 | 38 | ## 비디오 - 당신이 무엇을 보는지! 39 | 40 | 만약 당신이 여러 연속된 이미지들을 가지고 있고 이것들을 주어진 주파수에 맞게 변화시킨다면 (이를테면 [초당 24장의 이미지](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), [움직임의 잔상](https://en.wikipedia.org/wiki/Persistence_of_vision)을 만들게 될 것입니다. 41 | 요약하면 이게 비디오라는 것의 가장 기본적인 아이디어입니다: **정해진 속도에 맞게 돌아가는 연속된 사진들 / 프레임들**. 42 | 43 | 44 | 45 | Zeitgenössische Illustration (1886) 46 | 47 | ## 오디오 - 당신이 무엇을 듣는지! 48 | 49 | 음소거된 비디오만으로도 다양한 감정들을 표현할 수는 있지만 여기에 소리를 더해준다면 훨씬 더 즐거운 경험을 가져다 줄 것입니다. 50 | 51 | 소리는 공기 혹은 가스, 액체, 고체와 같은 다른 매체들을 통해 압력의 파동 형태로 전파되는 진동입니다. 52 | 53 | > 디지털 오디오 시스템에서는 마이크가 소리를 아날로그 전기 신호로 전환하고, 아날로그-디지털 변환기 (ADC) - 보통 [펄스-부호 변조 (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation)를 이용하여 - 아날로그 신호를 디지탈 신호로 변환합니다. 54 | 55 | ![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png "audio analog to digital") 56 | >[원문](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg) 57 | 58 | ## 코덱 - 데이터를 줄이기 59 | 60 | > CODEC은 **디지털 오디오/비디오를 압축하거나 압축해제하는** 전자회로나 소프트웨어입니다. 이것은 raw (압축이안된) 디지털 오디오/비디오를 압축된 형태로 혹은 그 반대로 변환합니다. 61 | > https://en.wikipedia.org/wiki/Video_codec 62 | 63 | 만약 우리가 수많은 이미지들을 차곡차곡 채워서 영화라고 부르는 하나의 파일로 만든다면, 결과적으로 엄청나게 큰 하나의 파일을 접하게 될 것 입니다. 한번 계산해봅시다: 64 | 65 | 한번 가정해봅시다. 해상도가 `1080 x 1920` (높이 x 너비)인 비디오를 하나 만들건데 색을 인코딩하는데 픽셀당 `3 bytes` (화면의 최소 화소)를 쓸 것입니다. (혹은 [24비트 컬러](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), 16,777,216개의 다른 색상을 제공) 그리고 이 비디오는 `초당 24프레임`으로 재생되고 `30분` 정도 길이입니다. 66 | 67 | ```c 68 | toppf = 1080 * 1920 //total_of_pixels_per_frame 69 | cpp = 3 //cost_per_pixel 70 | tis = 30 * 60 //time_in_seconds 71 | fps = 24 //frames_per_second 72 | 73 | required_storage = tis * fps * toppf * cpp 74 | ``` 75 | 76 | 이 비디오는 거의 `250.28GB`의 저장 용량이 필요하며 `1.19Gbps`의 대역폭이 요구됩니다! 이것이 바로 우리가 [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work)을 사용해야하는 이유입니다. 77 | 78 | ## 컨테이너 - 오디오와 비디오의 안식처 79 | 80 | > 컨테이너 혹은 래퍼(wrapper) 포맷은 데이터와 메타데이터의 다양한 요소들이 어떻게 하나의 컴퓨터 파일에 구성되어있는지를 기술하는 스펙을 담은 메타파일 포맷입니다. 81 | > https://en.wikipedia.org/wiki/Digital_container_format 82 | 83 | **하나의 파일이 모든 스트림을 담고 있고** (주로 오디오와 비디오) 이것은 또 동기화와 제목, 해상도 등과 같은 일반적인 메타데이터도 제공합니다. 84 | 85 | 보통 우리는 파일의 확장자를 보고 포맷을 유추할 수 있습니다: 예를들면 `video.webm`은 아마도 [`webm`](https://www.webmproject.org/)를 컨테이너로 사용하는 비디오겠죠. 86 | 87 | ![container](/img/container.png) 88 | 89 | # FFmpeg - 명령줄 도구 90 | 91 | > 오디오와 비디오를 녹화하고 변환하고 스트리밍할 수 있는 완전한 크로스-플랫폼 솔루션. 92 | 93 | 멀티미디어 작업을 한다면 우리는 [FFmpeg](https://www.ffmpeg.org/)이라고 하는 정말 쩌는 툴/라이브러리를 사용할 수 있습니다. 아마도 여러분도 이것을 직간접적으로 알고있거나/사용했던 기회가 있었을 것입니다. ([Chrome](https://www.chromium.org/developers/design-documents/video) 사용시죠?). 94 | 95 | 이것은 `ffmpeg`이라고하는 아주 단순하지만 파워풀한 바이너리 형태의 명려줄 프로그램도 제공합니다. 96 | 예를들어, 아래 명령을 치는 것만으로도 컨테이너를 `mp4`에서 `avi`로 변환할 수 있습니다: 97 | 98 | ```bash 99 | $ ffmpeg -i input.mp4 output.avi 100 | ``` 101 | 102 | 우리는 방금 어떤 컨테이너에서 다른 컨테이너로 변환하는 과정인 **remuxing**을 해보았습니다. 103 | 기술적으로 FFmpeg은 트랜스코딩(transcoding)도 할 수 있습니다만 이것들에 대해서는 뒤에서 다시 이야기하겠습니다. 104 | 105 | ## FFmpeg 명령줄 도구 101 106 | 107 | FFmpeg이 어떻게 동작하는지를 아주 잘 설명하고 있는 [문서](https://www.ffmpeg.org/ffmpeg.html)가 있습니다. 108 | 109 | 간단히 정리하면, FFmpeg 명령줄 프로그램은 실행하기 위해 다음과 같은 형식의 인자를 갖춰야합니다 `ffmpeg {1} {2} -i {3} {4} {5}`, 여기서: 110 | 111 | 1. 전역 옵션 112 | 2. 입력 파일 옵션 113 | 3. 입력 url 114 | 4. 출력 파일 옵션 115 | 5. 출력 url 116 | 117 | 2, 3, 4, 5 부분은 필요한만큼 많아질 수 있습니다. 118 | 실제로 수행해보면 이 인자 형식을 더 쉽게 이해할 수 있습니다: 119 | 120 | ``` bash 121 | # WARNING: this file is around 300MB 122 | $ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4 123 | 124 | $ ffmpeg \ 125 | -y \ # 전역 옵션 126 | -c:a libfdk_aac \ # 입력 파일 옵션 127 | -i bunny_1080p_60fps.mp4 \ # 입력 url 128 | -c:v libvpx-vp9 -c:a libvorbis \ # 출력 파일 옵션 129 | bunny_1080p_60fps_vp9.webm # 출력 url 130 | ``` 131 | 이 명령은 두개의 스트림(`aac` 코덱으로 인코딩된 오디오와 `h264` 코덱으로 인코딩된 비디오)을 포함하는 `mp4`를 입력 파일로 받고 이를 `webm`으로 변환합니다, 물론 그 안의 오디오와 비디오 코덱들도 변환하고 있죠. 132 | 133 | 위의 명령을 더 단순화할 수도 있는데 그러면 FFmpeg이 기본값들을 사용하거나 추측하게될 것입니다. 134 | 예를들어 `ffmpeg -i input.avi output.mp4` 이렇게만 친다면 어떤 오디오/비디오 코덱이 `output.mp4`를 만들기 위해 사용될까요? 135 | 136 | Werner Robitza가 작성한 꼭 읽고/실행해볼만한 [FFmpeg으로 인코딩하고 편집하는 것에 대한 튜토리얼](http://slhck.info/ffmpeg-encoding-course/#/)이 있습니다. 137 | 138 | # 공통 비디오 연산 139 | 140 | 오디오/비디오 작업 중 보통 미디어에 대해 일련의 작업을 수행하게 됩니다. 141 | 142 | ## 트랜스코딩 (Transcoding) 143 | 144 | ![transcoding](/img/transcoding.png) 145 | 146 | **무엇인가?** 스트림 (오디오 또는 비디오) 중에 하나를 기존 코덱에서 다른 코덱으로 변환하는 작업. 147 | 148 | **왜?** 가끔 어떤 장치들은 (텔레비전, 스마트폰, 콘솔 등) X는 지원하지 않지만 Y를 지원합니다. 그리고 더 새로운 코덱들은 더 나은 압축률을 제공하기도 합니다. 149 | 150 | **어떻게?** `H264` (AVC) 비디오를 `H265` (HEVC)로 변환하기. 151 | ```bash 152 | $ ffmpeg \ 153 | -i bunny_1080p_60fps.mp4 \ 154 | -c:v libx265 \ 155 | bunny_1080p_60fps_h265.mp4 156 | ``` 157 | 158 | ## 트랜스먹싱 (Transmuxing) 159 | 160 | ![transmuxing](/img/transmuxing.png) 161 | 162 | **무엇인가?** 하나의 포맷을 (컨테이너) 다른 포맷으로 변환하는 작업. 163 | 164 | **왜?** 가끔 어떤 장치들은 (텔레비전, 스마트폰, 콘솔 등) X는 지원하지 않지만 Y를 지원합니다. 그리고 때때로 더 새로운 컨테이터들은 최신으로 요구되는 피처들을 제공합니다. 165 | 166 | **어떻게?** `mp4`에서 `webm`으로 변환하기. 167 | ```bash 168 | $ ffmpeg \ 169 | -i bunny_1080p_60fps.mp4 \ 170 | -c copy \ # just saying to ffmpeg to skip encoding 171 | bunny_1080p_60fps.webm 172 | ``` 173 | 174 | ## 트랜스레이팅 (Transrating) 175 | 176 | ![transrating](/img/transrating.png) 177 | 178 | **무엇인가?** 비트레이트를 변환하거나 다른 변환본(renditions)을 만드는 작업. 179 | 180 | **왜?** 사람들은 `2G` (edge)가 연결된 저사양의 스마트폰에서든 `광통신` 인터넷이 연결된 4K 텔레비전에든 당신의 비디오 볼 것이다. 그래서 같은 비디오라도 여러 비트레이트를 가진 하나 이상의 변환본을 제공해야합니다. 181 | 182 | **어떻게?** 3856K와 2000K 사이의 비트레이트를 가진 변환본을 생성하기. 183 | ```bash 184 | $ ffmpeg \ 185 | -i bunny_1080p_60fps.mp4 \ 186 | -minrate 964K -maxrate 3856K -bufsize 2000K \ 187 | bunny_1080p_60fps_transrating_964_3856.mp4 188 | ``` 189 | 190 | 보통 트랜스레이팅(transrating)은 트랜스사이징(transsizing)과 함께 사용합니다. Werner Robitza가 작성한 또 다른 필독/실행물 [FFmpeg rate 제어에 대한 연재 포스팅](http://slhck.info/posts/)가 있습니다. 191 | 192 | ## 트랜스사이징 (Transsizing) 193 | 194 | ![transsizing](/img/transsizing.png) 195 | 196 | **무엇인가?** 하나의 해상도에서 다른 것으로 변환하는 작업. 이전에 언급한 것처럼 트랜스사이징(transsizing)은 주로 트랜스레이팅(transrating)과 함께 사용됩니다. 197 | 198 | **왜?** 트랜스레이팅(transrating)에서의 이유와 동일함. 199 | 200 | **어떻게?** `1080p`의 해상도를 `480p`로 변환하기. 201 | ```bash 202 | $ ffmpeg \ 203 | -i bunny_1080p_60fps.mp4 \ 204 | -vf scale=480:-1 \ 205 | bunny_1080p_60fps_transsizing_480.mp4 206 | ``` 207 | 208 | ## 보너스: 적응형 스트리밍 (Adaptive Streaming) 209 | 210 | ![adaptive streaming](/img/adaptive-streaming.png) 211 | 212 | **무엇인가?** 다양한 (비트레이트의) 해상도를 생성하고 미디어들을 여러 청크로 나눠서 http를 통해 서비스하는 작업. 213 | 214 | **왜?** 저사양 스마트폰 혹은 4K TV에서 시청할 수 있는 유연한 미디어를 제공하기 위해. 또한 이렇게 하면 확장이나 배포하기가 쉽습니다. 다만 지연시간이 생길 수 있습니다. 215 | 216 | **어떻게?** DASH를 이용하여 적응형 WebM을 생성하기. 217 | ```bash 218 | # video streams 219 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm 220 | 221 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm 222 | 223 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm 224 | 225 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm 226 | 227 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm 228 | 229 | # audio streams 230 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm 231 | 232 | # the DASH manifest 233 | $ ffmpeg \ 234 | -f webm_dash_manifest -i video_160x90_250k.webm \ 235 | -f webm_dash_manifest -i video_320x180_500k.webm \ 236 | -f webm_dash_manifest -i video_640x360_750k.webm \ 237 | -f webm_dash_manifest -i video_640x360_1000k.webm \ 238 | -f webm_dash_manifest -i video_1280x720_500k.webm \ 239 | -f webm_dash_manifest -i audio_128k.webm \ 240 | -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \ 241 | -f webm_dash_manifest \ 242 | -adaptation_sets "id=0,streams=0,1,2,3,4 id=1,streams=5" \ 243 | manifest.mpd 244 | ``` 245 | 246 | PS: 저는 이 예제를 [DASH를 이용한 Adaptive WebM 재생에 대한 지침](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)에서 가져왔습니다. 247 | 248 | ## 더 들어가기 249 | 250 | [FFmpeg에 대한 아주 수많은 다른 사용방법들이](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly) 있습니다. 251 | 저는 이걸 YouTube 용 동영상들을 만들고/편집하는데 *iMovie*와 함께 사용합니다. 물론 여러분도 프로페셔널처럼 사용하실 수 있습니다. 252 | 253 | # 삽질하면서 FFmpeg libav 배우기 254 | 255 | > 가끔 '소리나는 것과 보이는 것이' 궁금하지 않으세요? 256 | > **David Robert Jones** 257 | 258 | [FFmpeg](#ffmpeg---command-line)는 미디어 파일들에 대한 필수 작업들을 수행하는 명령줄 도구로써 매우 유용합니다. 어떻게 우리의 프로그램에 이용할 수 있을까요? 259 | 260 | FFmpeg는 우리의 프로그램에 통합될 수 있는 [여러 라이브러리들로 구성](https://www.ffmpeg.org/doxygen/trunk/index.html)되어있습니다. 261 | 보통, FFmpeg을 설치할때 이 모든 라이브러리들도 자동으로 설치됩니다. 이 라이브러리 모음들을 **FFmpeg libav**라고 해보죠. 262 | 263 | > 이 제목은 Zed Shaw의 [Learn X the Hard Way](https://learncodethehardway.org/) 시리즈, 특히 그의 책 Learn C the Hard Way에 대한 오마주입니다. 264 | 265 | ## 챕터 0 - 악명 높은 hello world 266 | 이 hello world는 실제로 `"hello world"` 메시지를 터미널에 보여주진 않습니다. :tongue: 267 | 대신 우리는 **비디오의 정보를 출력**할 것입니다. 비디오의 포맷 (컨테이너), 길이, 해상도, 오디오 채널들 같은 것들을 말이죠. 그리고 마지막으로 **몇몇 프레임들을 디코딩하고 이미지 파일로 저장**해보겠습니다. 268 | 269 | ### FFmpeg libav 아키텍처 270 | 271 | 하지만 코딩을 시작하기 전에, **FFmpeg libav 아키텍처**가 어떻게 동작하는지 이것들의 컴포넌트들이 서로 어떻게 통신하는지를 배워봅시다. 272 | 273 | 여기 비디오가 디코딩되는 프로세스를 담은 다이어그램이 하나 있습니다. 274 | 275 | ![ffmpeg libav architecture - decoding process](/img/decoding.png) 276 | 277 | 우선 여러분의 미디어 파일을 [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (비디오 컨테이너는 포맷이라고도 합니다)라고 불리는 컴포넌트로 불러올 필요가 있습니다. 278 | 이건 사실 파일 전체를 불러오는건 아닙니다: 종종 헤더만을 읽죠. 279 | 280 | 일단 최소한의 **컨테이너 헤더**를 불러왔다면, 우리는 이것의 스트림 (기본적이고 필수적인 오디오와 비디오 데이터라고 간주하시면 됩니다)에 접근할 수 있습니다. 281 | 각 스트림은 [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)라고 하는 컴포넌트로 접근 가능합니다. 282 | 283 | > 스트림은 데이터의 연속적인 흐름을 의미하는 fancy한 이름입니다. 284 | 285 | 비디오가 두개의 스트림을 가지고 있다고 해봅시다: 오디오는 [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding)로 인코딩되어있고 비디오는 [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC)로 인코딩되어있습니다. 각 스트림으로부터 [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 컴포넌트로 로드될 패킷이라 칭하는 **데이터의 조각들**을 추출할 수 있습니다. 286 | 287 | **패킷안의 데이터는 여전히 인코딩되어 있습니다** (압축된상태). 이 패킷을 디코딩하기 위해서 우리는 이것들을 특정한 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html)에 넘겨야합니다. 288 | 289 | `AVCodec`은 그것들을 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html)으로 디코딩하며 최종적으로 우리에게 **압축 해제된 프레임**을 넘겨줍니다. 오디오 및 비디오 스트림에서 동일한 용어/프로세스가 사용된다는 점을 유의하십시오. 290 | 291 | ### 요구 사항 292 | 293 | 간혹 예제를 컴파일하고 실행하는데 이슈들을 겪는 분들이 계셔서 **우리의 개발/실행 환경으로 [`Docker`](https://docs.docker.com/install/)를 사용할 것입니다,** 우리는 또한 big buck bunny 비디오를 사용할 것인데 따로 로컬에 가지고 있지 않다면 `make fetch_small_bunny_video` 명령만 실행해주시면 됩니다. 294 | 295 | ### 챕터 0 - 몸풀기 코드 296 | 297 | > #### TLDR; [코드](/0_hello_world.c)랑 실행하는거나 보여주세요. 298 | > ```bash 299 | > $ make run_hello 300 | > ``` 301 | > 좀 상세한 부분은 넘어가겠습니다. 그러나 걱정하진 마세요: [소스 코드는 github에 있습니다](/0_hello_world.c). 302 | 303 | 포맷 (컨테이너)에 관한 정보를 담고 있는 [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 컴포넌트에게 메모리를 할당합니다. 304 | 305 | ```c 306 | AVFormatContext *pFormatContext = avformat_alloc_context(); 307 | ``` 308 | 309 | 이제 우리는 파일을 열고 헤더를 읽어서 `AVFormatContext`에 포맷에 관한 기본적인 정보를 채워줄 것입니다 (보통 코덱은 열리지 않음). 310 | 이를 위해 사용할 함수는 [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49)입니다. 이 함수는 `AVFormatContext`와 `filename` 두개의 옵셔널 인자를 받습니다: [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (`NULL`을 넘기면 FFmpeg이 포맷을 추측)과 [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (demuxer에 대한 옵션) 311 | 312 | ```c 313 | avformat_open_input(&pFormatContext, filename, NULL, NULL); 314 | ``` 315 | 316 | 포맷 이름과 미디어 길이를 출력할 수 있습니다: 317 | 318 | ```c 319 | printf("Format %s, duration %lld us", pFormatContext->iformat->long_name, pFormatContext->duration); 320 | ``` 321 | 322 | `streams`에 접근하기 위해서는, 미디어로부터 데이터를 읽어야합니다. [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) 함수가 그 일을 하죠. 323 | `pFormatContext->nb_streams`가 스트림의 개수를 가지고 있고 `pFormatContext->streams[i]`는 `i`번째 스트림 ([`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html))을 반환합니다. 324 | 325 | ```c 326 | avformat_find_stream_info(pFormatContext, NULL); 327 | ``` 328 | 329 | 이제 모든 스트림에 대해 루프를 돌아보겠습니다. 330 | 331 | ```c 332 | for (int i = 0; i < pFormatContext->nb_streams; i++) 333 | { 334 | // 335 | } 336 | ``` 337 | 338 | 각 스트림에 대해서, `i`번째 스트림에 사용된 코덱 속성들을 담고있는 [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html)를 가져오겠습니다. 339 | 340 | ```c 341 | AVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar; 342 | ``` 343 | 344 | 이 코덱 속성을 이용하여 [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) 함수를 통해 적절한 코덱을 찾을 수 있습니다. 코덱 id에 맞는 등록된 디코더를 찾고 스트림을 어떻게 en**CO**de와 **DEC**ode할지를 알고 있는 [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html) 컴포넌트를 반환받을 수 있습니다. 345 | 346 | ```c 347 | AVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id); 348 | ``` 349 | 350 | 이제 코덱에 관한 정보를 출력할 수 있습니다. 351 | 352 | ```c 353 | // specific for video and audio 354 | if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) { 355 | printf("Video Codec: resolution %d x %d", pLocalCodecParameters->width, pLocalCodecParameters->height); 356 | } else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) { 357 | printf("Audio Codec: %d channels, sample rate %d", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate); 358 | } 359 | // general 360 | printf("\tCodec %s ID %d bit_rate %lld", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate); 361 | ``` 362 | 363 | 이 코덱을 기반으로 디코딩/인코딩 프로세스에 대한 컨텍스트를 담고있는 [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html)의 메모리를 할당할 수 있습니다. 그 다음 코덱 파라미터로 코덱 컨텍스트를 채워줍니다; [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16)로 가능합니다. 364 | 365 | 일단 코덱 컨텍스트를 채웠다면 이제 코덱을 열 수 있습니다. [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d)로 가능합니다. 366 | 367 | ```c 368 | AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec); 369 | avcodec_parameters_to_context(pCodecContext, pCodecParameters); 370 | avcodec_open2(pCodecContext, pCodec, NULL); 371 | ``` 372 | 373 | 이제 스트림으로부터 패킷을 읽고 디코딩하여 프레임으로 만들어볼 예정입니다. 그러나 우선, [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html)와 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) 두 컴포넌트에 대해 메모리 할당이 필요합니다. 374 | 375 | ```c 376 | AVPacket *pPacket = av_packet_alloc(); 377 | AVFrame *pFrame = av_frame_alloc(); 378 | ``` 379 | 380 | 패킷이 존재하는 동안 루프를 돌면서 [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) 함수를 이용해 스트림으로부터 패킷을 받아오겠습니다. 381 | 382 | ```c 383 | while (av_read_frame(pFormatContext, pPacket) >= 0) { 384 | //... 385 | } 386 | ``` 387 | 388 | 코덱 컨텍스트를 [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 함수를 통해 디코더에 **raw 데이터 패킷 (압축된 프레임)을 보내**봅시다. 389 | 390 | ```c 391 | avcodec_send_packet(pCodecContext, pPacket); 392 | ``` 393 | 394 | 그리고 마찬가지로 코덱 컨텍스트를 [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 함수를 통해 디코더로부터 **raw 데이터 프레임 (압축 해제된 프레임)를 받아**봅시다. 395 | 396 | ```c 397 | avcodec_receive_frame(pCodecContext, pFrame); 398 | ``` 399 | 400 | 프레임 번호, [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [프레임 타입](https://en.wikipedia.org/wiki/Video_compression_picture_types) 등을 출력해볼 수 있습니다. 401 | 402 | ```c 403 | printf( 404 | "Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]", 405 | av_get_picture_type_char(pFrame->pict_type), 406 | pCodecContext->frame_number, 407 | pFrame->pts, 408 | pFrame->pkt_dts, 409 | pFrame->key_frame, 410 | pFrame->coded_picture_number, 411 | pFrame->display_picture_number 412 | ); 413 | ``` 414 | 415 | 마지막으로 디코딩된 프레임을 [심플 흑백 이미지](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example)로 저장해볼 수 있습니다. 이 과정은 매우 단순합니다, 인덱스가 [planes Y, Cb, Cr](https://en.wikipedia.org/wiki/YCbCr)를 참조하고 있는 `pFrame->data`를 사용할 것입니다. 우리는 흑백 이미지를 저장하기 위해 `0` (Y) 인덱스를 선택했습니다. 416 | 417 | ```c 418 | save_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename); 419 | 420 | static void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename) 421 | { 422 | FILE *f; 423 | int i; 424 | f = fopen(filename,"w"); 425 | // writing the minimal required header for a pgm file format 426 | // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example 427 | fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255); 428 | 429 | // writing line by line 430 | for (i = 0; i < ysize; i++) 431 | fwrite(buf + i * wrap, 1, xsize, f); 432 | fclose(f); 433 | } 434 | ``` 435 | 436 | voilà! 이제 우리는 2MB짜리 흑백 이미지를 얻어냈습니다: 437 | 438 | ![saved frame](/img/generated_frame.png) 439 | 440 | ## 챕터 1 - 오디오와 비디오 동기화 441 | 442 | > **플레이어가 되세요** - 신규 MSE 비디오 플레이어를 작성 중인 젊은 JS 개발자 443 | 444 | [트랜스코딩 예제 코드](#챕터-3---트랜스코딩-(transcoding))로 넘어가기 전에 **타이밍** 혹은 어떻게 비디오 플레이어가 하나의 프레임을 제시간에 재생해야하는지에 대해서 이야기해봅시다. 445 | 446 | 지난 예제에서, 우리는 이렇게 보이는 프레임들을 저장했습니다. 447 | 448 | ![frame 0](/img/hello_world_frames/frame0.png) 449 | ![frame 1](/img/hello_world_frames/frame1.png) 450 | ![frame 2](/img/hello_world_frames/frame2.png) 451 | ![frame 3](/img/hello_world_frames/frame3.png) 452 | ![frame 4](/img/hello_world_frames/frame4.png) 453 | ![frame 5](/img/hello_world_frames/frame5.png) 454 | 455 | 비디오 플레이어를 디자인 할때 **각 프레임을 주어진 속도에 재생**해야합니다, 그렇지 않으면 너무 빠르거나 너무 느리게 재생되기 때문에 비디오를 제대로 즐기기 어려울 것입니다. 456 | 457 | 그래서 뭔가 프레임을 원활하게 재생할 수 있는 로직을 소개할 필요가 있습니다. 이 이슈를 위해, 각 프레임은 **프리젠테이션 타임스탬프** (PTS)를 갖게 되는데 이것은 **프레임속도(fps)** 로 나누어지는 **타임베이스(timebase)** 라고 하는 유리수(분모가 **타임스케일(timescale)** 로 알려진)로 구성된(factored) 증가하는 숫자입니다. 458 | 459 | 예제를 좀 본다면 이해가 더 쉬울 것입니다, 몇개의 시나리오를 시뮬레이션해죠. 460 | 461 | `fps=60/1` 이고 `timebase=1/60000` 라면 각 PTS는 `timescale / fps = 1000`를 증가할 것 입니다. 그래서 각 프레임의 **PTS 실제 시간**은 이렇게 됩니다 (0부터 시작한다고 하면): 462 | 463 | * `frame=0, PTS = 0, PTS_TIME = 0` 464 | * `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016` 465 | * `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033` 466 | 467 | 동일한 시나리오지만 타임베이스가 `1/60`이라면. 468 | 469 | * `frame=0, PTS = 0, PTS_TIME = 0` 470 | * `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016` 471 | * `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033` 472 | * `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050` 473 | 474 | `fps=25/1`와 `timebase=1/75`에 대해서는 각 PTS는 `timescale / fps = 3`만큼 증가할 것이고 PTS 시간은 이렇게 될 것 입니다: 475 | 476 | * `frame=0, PTS = 0, PTS_TIME = 0` 477 | * `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04` 478 | * `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08` 479 | * `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12` 480 | * ... 481 | * `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96` 482 | * ... 483 | * `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56` 484 | 485 | 이제 이 `pts_time`으로 오디오의 `pts_time` 혹은 시스템 시간과 동기화해서 재생할 방법을 찾을 수 있습니다. FFmpeg libav는 그 정보들을 아래 API를 통해 제공합니다. 486 | 487 | - fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad) 488 | - tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad) 489 | - tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6) 490 | 491 | 호기심에 보자면, 우리가 저장했던 프레임들을 DTS 순으로 (frames: 1,6,4,2,3,5) 보내졌지만 재생은 PTS 순 (frames: 1,2,3,4,5)로 되었습니다. 또한, B-프레임이 P 혹은 I-프레임 대비 얼마나 저렴한지도 알 수 있죠. 492 | 493 | ``` 494 | LOG: AVStream->r_frame_rate 60/1 495 | LOG: AVStream->time_base 1/60000 496 | ... 497 | LOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0] 498 | LOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3] 499 | LOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4] 500 | LOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2] 501 | LOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5] 502 | LOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1] 503 | ``` 504 | 505 | ## 챕터 2 - 리먹싱 (remuxing) 506 | 507 | Remuxing은 하나의 포맷 (컨테이너)에서 다른 것으로 변경하는 작업입니다. 다음 예제처럼 FFmpeg을 쓰면 별로 어렵지 않게 [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) 비디오를 [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream)로 변경할 수 있습니다: 508 | 509 | ```bash 510 | ffmpeg input.mp4 -c copy output.ts 511 | ``` 512 | 513 | 이것은 mp4를 demux하지만 디코딩이나 인코딩은 하지 않습니다. (`-c copy`) 최종적으로 `mpegts` 파일로 mux할 것입니다. 만약 포맷을 의미하는 `-f`를 제공하지 않으면 ffmpeg은 파일 확장자로 포맷을 추측할 것입니다. 514 | 515 | FFmpeg 혹은 libav의 일반적인 사용법은 아래 패턴/아키텍처 또는 워크플로우를 따릅니다: 516 | * **[프로토콜 레이어](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - `input`을 받음 (예를들면 `file`이지만 `rtmp` 또는 `HTTP` 입력도 가능). 517 | * **[포맷 레이어](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 컨텐츠를 `demuxes`, 대부분 메타데이터와 스트림을 열어봄 518 | * **[코덱 레이어](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 압축된 스트림 데이터를 `decodes` *optional* 519 | * **[픽셀 레이어](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - raw 프레임에 대해 (리사이징 같은) `filters`를 적용할 수도 있음 *optional* 520 | * and then it does the reverse path 521 | * **[코덱 레이어](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - raw 프레임을 `encodes` (또는 `re-encodes` 혹은 `transcodes` 까지도) *optional* 522 | * **[포맷 레이어](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - raw 스트림 (압축된 데이터)를 `muxes` (또는 `remuxes`) 523 | * **[프로토콜 레이어](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - 그리고 마지막으로 muxed된 데이터를 `output`으로 전송 (또다른 파일 혹은 네트워크 원격 서버일 수도 있음) 524 | 525 | ![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg) 526 | > 이 그래프는 [Leixiaohua's](http://leixiaohua1020.github.io/#ffmpeg-development-examples)와 [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9)의 작업으로부터 큰 영감을 받은 것입니다. 527 | 528 | 자 이제 `ffmpeg input.mp4 -c copy output.ts`와 동일한 효과를 제공할 수 있도록 libav 를 이용한 예제를 하나 구현해봅시다. 529 | 530 | 입력 (`input_format_context`)으로부터 읽은 것을 다른 출력 (`output_format_context`)으로 변환해보겠습니다. 531 | 532 | ```c 533 | AVFormatContext *input_format_context = NULL; 534 | AVFormatContext *output_format_context = NULL; 535 | ``` 536 | 537 | 일반적으로 메모리 할당을 시작하고 입력 포맷을 엽니다. 이번같은 특정한 경우에는, 입력 파일을 열고나서 출력 파일을 위한 메모리를 할당하겠습니다. 538 | 539 | ```c 540 | if ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) { 541 | fprintf(stderr, "Could not open input file '%s'", in_filename); 542 | goto end; 543 | } 544 | if ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) { 545 | fprintf(stderr, "Failed to retrieve input stream information"); 546 | goto end; 547 | } 548 | 549 | avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename); 550 | if (!output_format_context) { 551 | fprintf(stderr, "Could not create output context\n"); 552 | ret = AVERROR_UNKNOWN; 553 | goto end; 554 | } 555 | ``` 556 | 557 | 비디오, 오디오, 자막 타입의 스트림만 remux할 것이며 사용하게될 스트림을 인덱스 배열에 들고 있겠습니다. 558 | 559 | ```c 560 | number_of_streams = input_format_context->nb_streams; 561 | streams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list)); 562 | ``` 563 | 564 | 필요한만큼의 메모리를 할당한 후, 모든 스트림에 대해 각각 루프를 돌면서 [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 함수를 통해 출력 포맷 컨텍스트에다가 새로운 출력 스트림을 생성해야합니다. 비디오, 오디오, 자막이 아닌 모든 스트림들에 대해서는 마킹을 해서 나중에 스킵할 수 있게 하겠습니다. 565 | 566 | ```c 567 | for (i = 0; i < input_format_context->nb_streams; i++) { 568 | AVStream *out_stream; 569 | AVStream *in_stream = input_format_context->streams[i]; 570 | AVCodecParameters *in_codecpar = in_stream->codecpar; 571 | if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && 572 | in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO && 573 | in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { 574 | streams_list[i] = -1; 575 | continue; 576 | } 577 | streams_list[i] = stream_index++; 578 | out_stream = avformat_new_stream(output_format_context, NULL); 579 | if (!out_stream) { 580 | fprintf(stderr, "Failed allocating output stream\n"); 581 | ret = AVERROR_UNKNOWN; 582 | goto end; 583 | } 584 | ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); 585 | if (ret < 0) { 586 | fprintf(stderr, "Failed to copy codec parameters\n"); 587 | goto end; 588 | } 589 | } 590 | ``` 591 | 592 | 이제 출력 파일을 생성할 수 있습니다. 593 | 594 | ```c 595 | if (!(output_format_context->oformat->flags & AVFMT_NOFILE)) { 596 | ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE); 597 | if (ret < 0) { 598 | fprintf(stderr, "Could not open output file '%s'", out_filename); 599 | goto end; 600 | } 601 | } 602 | 603 | ret = avformat_write_header(output_format_context, NULL); 604 | if (ret < 0) { 605 | fprintf(stderr, "Error occurred when opening output file\n"); 606 | goto end; 607 | } 608 | ``` 609 | 610 | 그런 후에, 입력 스트림에서 패킷을 하나씩 출력 스트림으로 복사하겠습니다. 패킷이 존재하는 동안 (`av_read_frame`), 각 패킷에 대해 PTS와 DTS를 다시 계산하고 마지막으로 포맷 컨텍스트에 (`av_interleaved_write_frame`) 씁니다. 611 | 612 | ```c 613 | while (1) { 614 | AVStream *in_stream, *out_stream; 615 | ret = av_read_frame(input_format_context, &packet); 616 | if (ret < 0) 617 | break; 618 | in_stream = input_format_context->streams[packet.stream_index]; 619 | if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) { 620 | av_packet_unref(&packet); 621 | continue; 622 | } 623 | packet.stream_index = streams_list[packet.stream_index]; 624 | out_stream = output_format_context->streams[packet.stream_index]; 625 | /* copy packet */ 626 | packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 627 | packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 628 | packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); 629 | // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903 630 | packet.pos = -1; 631 | 632 | //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1 633 | ret = av_interleaved_write_frame(output_format_context, &packet); 634 | if (ret < 0) { 635 | fprintf(stderr, "Error muxing packet\n"); 636 | break; 637 | } 638 | av_packet_unref(&packet); 639 | } 640 | ``` 641 | 642 | 마무리를 위해 [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) 함수를 통해 스트림 트레일러(trailer)를 출력 미디어 파일에 씁니다. 643 | 644 | ```c 645 | av_write_trailer(output_format_context); 646 | ``` 647 | 648 | 이제 테스트할 준비가 되었습니다. 첫번째 테스트는 MP4에서 MPEG-TS 비디오 파일로의 포맷 (비디오 컨테이너) 변환입니다. 우리는 기본적으로 `ffmpeg input.mp4 -c copy output.ts` 명령줄을 libav를 이용해 만든 것입니다. 649 | 650 | ```bash 651 | make run_remuxing_ts 652 | ``` 653 | 654 | 동작합니다!!! 절 믿지 않았나요?! 그러시면 안되죠, `ffprobe`로 한번 확인해보겠습니다: 655 | 656 | ```bash 657 | ffprobe -i remuxed_small_bunny_1080p_60fps.ts 658 | 659 | Input #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts': 660 | Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s 661 | Program 1 662 | Metadata: 663 | service_name : Service01 664 | service_provider: FFmpeg 665 | Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc 666 | Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s 667 | ``` 668 | 669 | 우리가 했던 것을 그래프로 정리하기 위해, 초반 [libav의 동작 방식에 대한 아이디어](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-li기bav-architecture)를 다시 한번 살펴보면 코덱 부분만 건너뛴걸 볼 수 있습니다. 670 | 671 | ![remuxing libav components](/img/remuxing_libav_components.png) 672 | 673 | 이 챕터를 끝내기 전에 리먹싱(remuxing) 프로세스의 중요한 부분을 보여드리고자 합니다, **muxer에 옵션을 줄 수 있다**는 것인데요. 만약에 전송을 [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) 포맷으로 하고 싶다면 MPEG-TS나 기본 MPEG-4 대신 (`fmp4`라고 부르는) [fragmented mp4](https://stackoverflow.com/a/35180327)를 사용해야합니다. 674 | 675 | [명령줄로는 이렇게 쉽게 할 수 있습니다](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting). 676 | 677 | ``` 678 | ffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4 679 | ``` 680 | 681 | libav 버전도 거의 명령줄 만큼이나 쉽습니다. 패킷을 복사하기 바로 전, 출력 헤더를 쓸때 해당 옵션을 넘겨주기만 하면 됩니다. 682 | 683 | ```c 684 | AVDictionary* opts = NULL; 685 | av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov+default_base_moof", 0); 686 | ret = avformat_write_header(output_format_context, &opts); 687 | ``` 688 | 689 | 이제 fragmented mp4 파일을 생성할 수 있습니다: 690 | 691 | ```bash 692 | make run_remuxing_fragmented_mp4 693 | ``` 694 | 695 | 제가 여러분께 거짓말하고 있지 않다는걸 보여드리죠. 결과물의 차이를 확인하기 위해 [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) 혹은 [http://mp4parser.com/](http://mp4parser.com/) 같은 아주 훌륭한 사이트/툴을 이용할 수 있습니다. 일단 "common" mp4 파일을 로드해보세요. 696 | 697 | ![mp4 boxes](/img/boxes_normal_mp4.png) 698 | 699 | 보시다시피 단 하나의 `mdat` 박스(atom)가 있습니다, **여기에 비디오와 오디오 프레임이 담겨있습니다**. 이번엔 fragmented mp4를 로드해서 `mdat` 박스가 어떻게 흩어져있는지 보시겠습니다. 700 | 701 | ![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png) 702 | 703 | ## 챕터 3 - 트랜스코딩 (transcoding) 704 | 705 | > #### TLDR; [코드](/3_transcoding.c)랑 실행하는거나 보여주세요. 706 | > ```bash 707 | > $ make run_transcoding 708 | > ``` 709 | > 좀 상세한 부분은 넘어가겠습니다, 그러나 걱정하진 마세요: [소스 코드는 github에 있습니다](/3_transcoding.c). 710 | 711 | 이번 챕터에서는 아주 간단한 트랜스코더를 만들어보겠습니다, C로 작성할 것이고, 이것으로 H264로 인코딩된 비디오를 H265로 변환할 수 있을겁니다. **FFmpeg/libav** 라이브러리, 특히 [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, libavutil를 이용하겠습니다. 712 | 713 | ![media transcoding flow](/img/transcoding_flow.png) 714 | 715 | > _빠르게 복습해보면:_ [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html)는 컨테이너 (ex: MKV, MP4, Webm, TS) 같은 미디어 파일 포맷에 대한 추상화 구조체입니다. [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html)는 주어진 포맷 (ex: 오디오, 비디오, 자막, 메타데이터)에 대한 각 데이터 유형을 나타냅니다. [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html)은 `AVStream`으로부터 얻어진 압축된 데이터의 조각입니다. 그리고 이것은 [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ex: av1, h264, vp9, hevc)에 의해 디코딩되어 [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html)라고 불리는 raw 데이터로 만들어집니다. 716 | 717 | ### 트랜스먹싱 (Transmuxing) 718 | 719 | 간단한 트랜스먹싱 작업을 시작해봅시다. 그리고나서 이 코드 기반으로 빌드할 수 있을겁니다. 첫번째 단계는 **입력 파일 로드하기**입니다. 720 | 721 | ```c 722 | // Allocate an AVFormatContext 723 | avfc = avformat_alloc_context(); 724 | // Open an input stream and read the header. 725 | avformat_open_input(avfc, in_filename, NULL, NULL); 726 | // Read packets of a media file to get stream information. 727 | avformat_find_stream_info(avfc, NULL); 728 | ``` 729 | 730 | 이제 디코더를 설정할 것인데, `AVFormatContext`가 모든 `AVStream` 컴포넌트에 접근할 수 있게 해줄 것입니다. 그리고 각각의 스트림에 대해서, `AVCodec`을 가져와서 특정 `AVCodecContext`를 생성합니다. 그리고 마지막으로 주어진 코덱을 열게되고 디코딩 프로세스를 수행할 수 있습니다. 731 | 732 | > [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html)는 비트레이트, 프레임 속도, 샘플레이트, 채널, 높이 등과 같은 미디어 설정에 대한 데이터를 가지고 있습니다. 733 | 734 | ```c 735 | for (int i = 0; i < avfc->nb_streams; i++) 736 | { 737 | AVStream *avs = avfc->streams[i]; 738 | AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id); 739 | AVCodecContext *avcc = avcodec_alloc_context3(*avc); 740 | avcodec_parameters_to_context(*avcc, avs->codecpar); 741 | avcodec_open2(*avcc, *avc, NULL); 742 | } 743 | ``` 744 | 745 | 마찬가지로 트랜스먹싱에서도 출력 미디어 파일을 준비해둬야합니다, 우선 출력 `AVFormatContext`에 대해 **메모리를 할당**합니다. 이 출력 포맷에 **각 스트림**을 생성합니다. 스트림을 제대로 적재시키기 위해 디코더로부터 **코덱 파라미터를 복사**합니다. 746 | 747 | 인코더가 글로벌 헤더를 사용할 수 있도록 지정하는 `AV_CODEC_FLAG_GLOBAL_HEADER` **플래그를 설정**합니다. 그리고 출력으로 **쓰기 위한 파일**을 열고 헤더를 저장합니다. 748 | 749 | ```c 750 | avformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename); 751 | 752 | AVStream *avs = avformat_new_stream(encoder_avfc, NULL); 753 | avcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar); 754 | 755 | if (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER) 756 | encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 757 | 758 | avio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE); 759 | avformat_write_header(encoder->avfc, &muxer_opts); 760 | 761 | ``` 762 | 763 | 디코더로부터 `AVPacket`을 얻어서, 타임스탬프를 조정하고, 패킷을 출력 파일에 제대로 씁니다. `av_interleaved_write_frame` 이 함수 이름이 "write frame"라고 되어있긴 하지만 이것은 패킷을 저장합니다 . 이제 파일에 스트림 트레일러를 쓰면서 트랜스먹싱 프로세스를 마무리합니다. 764 | 765 | ```c 766 | AVFrame *input_frame = av_frame_alloc(); 767 | AVPacket *input_packet = av_packet_alloc(); 768 | 769 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 770 | { 771 | av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base); 772 | av_interleaved_write_frame(*avfc, input_packet) < 0)); 773 | } 774 | 775 | av_write_trailer(encoder_avfc); 776 | ``` 777 | 778 | ### 트랜스코딩 (Transcoding) 779 | 780 | 이전 섹션에서 간단한 트랜스먹서 프로그램을 봤는데요, 이번엔 여기에 인코딩을 기능을 추가해보겠습니다. 특히, `h264`에서 `h265`로 비디오를 트랜스코딩할 수 있게 하겠습니다. 781 | 782 | 디코더를 준비한 후, 그리고 출력 미디어 파일을 다루기 전에 인코더를 설정할 것입니다. 783 | 784 | * 인코더에 비디오 `AVStream`를 생성합니다, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 785 | * `libx265`라고 하는 `AVCodec`을 사용합니다, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37) 786 | * 생성한 코덱을 기반으로 `AVCodecContext`를 생성합니다,[`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315) 787 | * 트랜스코딩 세션에 대해 기본적인 속성들을 설정합니다, 그리고 788 | * 코덱을 열고 컨텍스트에서 스트림으로 파라미터를 복사합니다. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d), [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe) 789 | 790 | ```c 791 | AVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL); 792 | AVStream *video_avs = avformat_new_stream(encoder_avfc, NULL); 793 | 794 | char *codec_name = "libx265"; 795 | char *codec_priv_key = "x265-params"; 796 | // we're going to use internal options for the x265 797 | // it disables the scene change detection and fix then 798 | // GOP on 60 frames. 799 | char *codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 800 | 801 | AVCodec *video_avc = avcodec_find_encoder_by_name(codec_name); 802 | AVCodecContext *video_avcc = avcodec_alloc_context3(video_avc); 803 | // encoder codec params 804 | av_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0); 805 | video_avcc->height = decoder_ctx->height; 806 | video_avcc->width = decoder_ctx->width; 807 | video_avcc->pix_fmt = video_avc->pix_fmts[0]; 808 | // control rate 809 | video_avcc->bit_rate = 2 * 1000 * 1000; 810 | video_avcc->rc_buffer_size = 4 * 1000 * 1000; 811 | video_avcc->rc_max_rate = 2 * 1000 * 1000; 812 | video_avcc->rc_min_rate = 2.5 * 1000 * 1000; 813 | // time base 814 | video_avcc->time_base = av_inv_q(input_framerate); 815 | video_avs->time_base = sc->video_avcc->time_base; 816 | 817 | avcodec_open2(sc->video_avcc, sc->video_avc, NULL); 818 | avcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc); 819 | ``` 820 | 821 | 비디오 스트림의 트랜스코딩을 위해 디코딩 루프를 확장해야합니다. 822 | 823 | * 디코더에 빈 `AVPacket`를 전송합니다, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 824 | * 압축이 해제된 `AVFrame`를 받아옵니다, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 825 | * 이 raw 프레임의 트랜스코딩을 시작합니다, 826 | * raw 프레임을 (인코더에) 보내고, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169) 827 | * 코덱에 맞게 압축된 `AVPacket`을 받아옵니다, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6) 828 | * 타임스탬프를 설정하고, [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e) 829 | * 패킷을 출력 파일에 씁니다. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1) 830 | 831 | ```c 832 | AVFrame *input_frame = av_frame_alloc(); 833 | AVPacket *input_packet = av_packet_alloc(); 834 | 835 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 836 | { 837 | int response = avcodec_send_packet(decoder_video_avcc, input_packet); 838 | while (response >= 0) { 839 | response = avcodec_receive_frame(decoder_video_avcc, input_frame); 840 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 841 | break; 842 | } else if (response < 0) { 843 | return response; 844 | } 845 | if (response >= 0) { 846 | encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index); 847 | } 848 | av_frame_unref(input_frame); 849 | } 850 | av_packet_unref(input_packet); 851 | } 852 | av_write_trailer(encoder_avfc); 853 | 854 | // used function 855 | int encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) { 856 | AVPacket *output_packet = av_packet_alloc(); 857 | int response = avcodec_send_frame(video_avcc, input_frame); 858 | 859 | while (response >= 0) { 860 | response = avcodec_receive_packet(video_avcc, output_packet); 861 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 862 | break; 863 | } else if (response < 0) { 864 | return -1; 865 | } 866 | 867 | output_packet->stream_index = index; 868 | output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den; 869 | 870 | av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base); 871 | response = av_interleaved_write_frame(avfc, output_packet); 872 | } 873 | av_packet_unref(output_packet); 874 | av_packet_free(&output_packet); 875 | return 0; 876 | } 877 | 878 | ``` 879 | 880 | 아시다시피 `h265` 버전의 미디어 파일이 `h264`보다 사이즈가 작기 때문에 미디어 스트림을 `h264`에서 `h265`로 변환했습니다. 하지만 [작성한 프로그램](/3_transcoding.c)은 다음의 작업들도 수행할 수 있습니다: 881 | 882 | ```c 883 | 884 | /* 885 | * H264 -> H265 886 | * Audio -> remuxed (untouched) 887 | * MP4 - MP4 888 | */ 889 | StreamingParams sp = {0}; 890 | sp.copy_audio = 1; 891 | sp.copy_video = 0; 892 | sp.video_codec = "libx265"; 893 | sp.codec_priv_key = "x265-params"; 894 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 895 | 896 | /* 897 | * H264 -> H264 (fixed gop) 898 | * Audio -> remuxed (untouched) 899 | * MP4 - MP4 900 | */ 901 | StreamingParams sp = {0}; 902 | sp.copy_audio = 1; 903 | sp.copy_video = 0; 904 | sp.video_codec = "libx264"; 905 | sp.codec_priv_key = "x264-params"; 906 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 907 | 908 | /* 909 | * H264 -> H264 (fixed gop) 910 | * Audio -> remuxed (untouched) 911 | * MP4 - fragmented MP4 912 | */ 913 | StreamingParams sp = {0}; 914 | sp.copy_audio = 1; 915 | sp.copy_video = 0; 916 | sp.video_codec = "libx264"; 917 | sp.codec_priv_key = "x264-params"; 918 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 919 | sp.muxer_opt_key = "movflags"; 920 | sp.muxer_opt_value = "frag_keyframe+empty_moov+delay_moov+default_base_moof"; 921 | 922 | /* 923 | * H264 -> H264 (fixed gop) 924 | * Audio -> AAC 925 | * MP4 - MPEG-TS 926 | */ 927 | StreamingParams sp = {0}; 928 | sp.copy_audio = 0; 929 | sp.copy_video = 0; 930 | sp.video_codec = "libx264"; 931 | sp.codec_priv_key = "x264-params"; 932 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 933 | sp.audio_codec = "aac"; 934 | sp.output_extension = ".ts"; 935 | 936 | /* WIP :P -> it's not playing on VLC, the final bit rate is huge 937 | * H264 -> VP9 938 | * Audio -> Vorbis 939 | * MP4 - WebM 940 | */ 941 | //StreamingParams sp = {0}; 942 | //sp.copy_audio = 0; 943 | //sp.copy_video = 0; 944 | //sp.video_codec = "libvpx-vp9"; 945 | //sp.audio_codec = "libvorbis"; 946 | //sp.output_extension = ".webm"; 947 | 948 | ``` 949 | 950 | > 이제서야 솔직히 말하자면, [제가 생각했던 것보다 더 삽질](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/53)했는데요. [FFmpeg 명령줄 소스코드](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749)를 파봐야했고 테스트도 엄청 돌려봤습니다. 그리고 제가 뭔가 놓치는게 있는 것 같은데요, 왜냐하면 `force-cfr`을 강제로 넣어줘야지만 `h264`가 작용하고 `warning messages (forced frame type (5) at 80 was changed to frame type (3))` 같은 경고 메시지도 여전히 나고 있기 때문이죠. 951 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [🇨🇳](/README-cn.md "Simplified Chinese") 2 | [🇰🇷](/README-ko.md "Korean") 3 | [🇪🇸](/README-es.md "Spanish") 4 | [🇻🇳](/README-vn.md "Vietnamese") 5 | [🇧🇷](/README-pt.md "Portuguese") 6 | 7 | [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg) 8 | 9 | I was looking for a tutorial/book that would teach me how to start to use [FFmpeg](https://www.ffmpeg.org/) as a library (a.k.a. libav) and then I found the ["How to write a video player in less than 1k lines"](http://dranger.com/ffmpeg/) tutorial. 10 | Unfortunately it was deprecated, so I decided to write this one. 11 | 12 | Most of the code in here will be in C **but don't worry**: you can easily understand and apply it to your preferred language. 13 | FFmpeg libav has lots of bindings for many languages like [python](https://pyav.org/), [go](https://github.com/imkira/go-libav) and even if your language doesn't have it, you can still support it through the `ffi` (here's an example with [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)). 14 | 15 | We'll start with a quick lesson about what is video, audio, codec and container and then we'll go to a crash course on how to use `FFmpeg` command line and finally we'll write code, feel free to skip directly to[ ](http://newmediarockstars.com/wp-content/uploads/2015/11/nintendo-direct-iwata.jpg)the section [Learn FFmpeg libav the Hard Way.](#learn-ffmpeg-libav-the-hard-way) 16 | 17 | Some people used to say that the Internet video streaming is the future of the traditional TV, in any case, the FFmpeg is something that is worth studying. 18 | 19 | __Table of Contents__ 20 | 21 | * [Intro](#intro) 22 | * [video - what you see!](#video---what-you-see) 23 | * [audio - what you listen!](#audio---what-you-listen) 24 | * [codec - shrinking data](#codec---shrinking-data) 25 | * [container - a comfy place for audio and video](#container---a-comfy-place-for-audio-and-video) 26 | * [FFmpeg - command line](#ffmpeg---command-line) 27 | * [FFmpeg command line tool 101](#ffmpeg-command-line-tool-101) 28 | * [Common video operations](#common-video-operations) 29 | * [Transcoding](#transcoding) 30 | * [Transmuxing](#transmuxing) 31 | * [Transrating](#transrating) 32 | * [Transsizing](#transsizing) 33 | * [Bonus Round: Adaptive Streaming](#bonus-round-adaptive-streaming) 34 | * [Going beyond](#going-beyond) 35 | * [Learn FFmpeg libav the Hard Way](#learn-ffmpeg-libav-the-hard-way) 36 | * [Chapter 0 - The infamous hello world](#chapter-0---the-infamous-hello-world) 37 | * [FFmpeg libav architecture](#ffmpeg-libav-architecture) 38 | * [Chapter 1 - timing](#chapter-1---syncing-audio-and-video) 39 | * [Chapter 2 - remuxing](#chapter-2---remuxing) 40 | * [Chapter 3 - transcoding](#chapter-3---transcoding) 41 | 42 | # Intro 43 | 44 | ## video - what you see! 45 | 46 | If you have a sequence series of images and change them at a given frequency (let's say [24 images per second](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), you will create an [illusion of movement](https://en.wikipedia.org/wiki/Persistence_of_vision). 47 | In summary this is the very basic idea behind a video: **a series of pictures / frames running at a given rate**. 48 | 49 | 50 | 51 | Zeitgenössische Illustration (1886) 52 | 53 | ## audio - what you listen! 54 | 55 | Although a muted video can express a variety of feelings, adding sound to it brings more pleasure to the experience. 56 | 57 | Sound is the vibration that propagates as a wave of pressure, through the air or any other transmission medium, such as a gas, liquid or solid. 58 | 59 | > In a digital audio system, a microphone converts sound to an analog electrical signal, then an analog-to-digital converter (ADC) — typically using [pulse-code modulation (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation) - converts the analog signal into a digital signal. 60 | 61 | ![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png "audio analog to digital") 62 | >[Source](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg) 63 | 64 | ## codec - shrinking data 65 | 66 | > CODEC is an electronic circuit or software that **compresses or decompresses digital audio/video.** It converts raw (uncompressed) digital audio/video to a compressed format or vice versa. 67 | > https://en.wikipedia.org/wiki/Video_codec 68 | 69 | But if we chose to pack millions of images in a single file and called it a movie, we might end up with a huge file. Let's do the math: 70 | 71 | Suppose we are creating a video with a resolution of `1080 x 1920` (height x width) and that we'll spend `3 bytes` per pixel (the minimal point at a screen) to encode the color (or [24 bit color](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), what gives us 16,777,216 different colors) and this video runs at `24 frames per second` and it is `30 minutes` long. 72 | 73 | ```c 74 | toppf = 1080 * 1920 //total_of_pixels_per_frame 75 | cpp = 3 //cost_per_pixel 76 | tis = 30 * 60 //time_in_seconds 77 | fps = 24 //frames_per_second 78 | 79 | required_storage = tis * fps * toppf * cpp 80 | ``` 81 | 82 | This video would require approximately `250.28GB` of storage or `1.19 Gbps` of bandwidth! That's why we need to use a [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work). 83 | 84 | ## container - a comfy place for audio and video 85 | 86 | > A container or wrapper format is a metafile format whose specification describes how different elements of data and metadata coexist in a computer file. 87 | > https://en.wikipedia.org/wiki/Digital_container_format 88 | 89 | A **single file that contains all the streams** (mostly the audio and video) and it also provides **synchronization and general metadata**, such as title, resolution and etc. 90 | 91 | Usually we can infer the format of a file by looking at its extension: for instance a `video.webm` is probably a video using the container [`webm`](https://www.webmproject.org/). 92 | 93 | ![container](/img/container.png) 94 | 95 | # FFmpeg - command line 96 | 97 | > A complete, cross-platform solution to record, convert and stream audio and video. 98 | 99 | To work with multimedia we can use the AMAZING tool/library called [FFmpeg](https://www.ffmpeg.org/). Chances are you already know/use it directly or indirectly (do you use [Chrome?](https://www.chromium.org/developers/design-documents/video)). 100 | 101 | It has a command line program called `ffmpeg`, a very simple yet powerful binary. 102 | For instance, you can convert from `mp4` to the container `avi` just by typing the follow command: 103 | 104 | ```bash 105 | $ ffmpeg -i input.mp4 output.avi 106 | ``` 107 | 108 | We just made a **remuxing** here, which is converting from one container to another one. 109 | Technically FFmpeg could also be doing a transcoding but we'll talk about that later. 110 | 111 | ## FFmpeg command line tool 101 112 | 113 | FFmpeg does have a [documentation](https://www.ffmpeg.org/ffmpeg.html) that does a great job of explaining how it works. 114 | 115 | ```bash 116 | # you can also look for the documentation using the command line 117 | 118 | ffmpeg -h full | grep -A 10 -B 10 avoid_negative_ts 119 | ``` 120 | 121 | To make things short, the FFmpeg command line program expects the following argument format to perform its actions `ffmpeg {1} {2} -i {3} {4} {5}`, where: 122 | 123 | 1. global options 124 | 2. input file options 125 | 3. input url 126 | 4. output file options 127 | 5. output url 128 | 129 | The parts 2, 3, 4 and 5 can be as many as you need. 130 | It's easier to understand this argument format in action: 131 | 132 | ``` bash 133 | # WARNING: this file is around 300MB 134 | $ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4 135 | 136 | $ ffmpeg \ 137 | -y \ # global options 138 | -c:a libfdk_aac \ # input options 139 | -i bunny_1080p_60fps.mp4 \ # input url 140 | -c:v libvpx-vp9 -c:a libvorbis \ # output options 141 | bunny_1080p_60fps_vp9.webm # output url 142 | ``` 143 | This command takes an input file `mp4` containing two streams (an audio encoded with `aac` CODEC and a video encoded using `h264` CODEC) and convert it to `webm`, changing its audio and video CODECs too. 144 | 145 | We could simplify the command above but then be aware that FFmpeg will adopt or guess the default values for you. 146 | For instance when you just type `ffmpeg -i input.avi output.mp4` what audio/video CODEC does it use to produce the `output.mp4`? 147 | 148 | Werner Robitza wrote a must read/execute [tutorial about encoding and editing with FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/). 149 | 150 | # Common video operations 151 | 152 | While working with audio/video we usually do a set of tasks with the media. 153 | 154 | ## Transcoding 155 | 156 | ![transcoding](/img/transcoding.png) 157 | 158 | **What?** the act of converting one of the streams (audio or video) from one CODEC to another one. 159 | 160 | **Why?** sometimes some devices (TVs, smartphones, console and etc) doesn't support X but Y and newer CODECs provide better compression rate. 161 | 162 | **How?** converting an `H264` (AVC) video to an `H265` (HEVC). 163 | ```bash 164 | $ ffmpeg \ 165 | -i bunny_1080p_60fps.mp4 \ 166 | -c:v libx265 \ 167 | bunny_1080p_60fps_h265.mp4 168 | ``` 169 | 170 | ## Transmuxing 171 | 172 | ![transmuxing](/img/transmuxing.png) 173 | 174 | **What?** the act of converting from one format (container) to another one. 175 | 176 | **Why?** sometimes some devices (TVs, smartphones, console and etc) doesn't support X but Y and sometimes newer containers provide modern required features. 177 | 178 | **How?** converting a `mp4` to a `ts`. 179 | ```bash 180 | $ ffmpeg \ 181 | -i bunny_1080p_60fps.mp4 \ 182 | -c copy \ # just saying to ffmpeg to skip encoding 183 | bunny_1080p_60fps.ts 184 | ``` 185 | 186 | ## Transrating 187 | 188 | ![transrating](/img/transrating.png) 189 | 190 | **What?** the act of changing the bit rate, or producing other renditions. 191 | 192 | **Why?** people will try to watch your video in a `2G` (edge) connection using a less powerful smartphone or in a `fiber` Internet connection on their 4K TVs therefore you should offer more than one rendition of the same video with different bit rate. 193 | 194 | **How?** producing a rendition with bit rate between 964K and 3856K. 195 | ```bash 196 | $ ffmpeg \ 197 | -i bunny_1080p_60fps.mp4 \ 198 | -minrate 964K -maxrate 3856K -bufsize 2000K \ 199 | bunny_1080p_60fps_transrating_964_3856.mp4 200 | ``` 201 | 202 | Usually we'll be using transrating with transsizing. Werner Robitza wrote another must read/execute [series of posts about FFmpeg rate control](http://slhck.info/posts/). 203 | 204 | ## Transsizing 205 | 206 | ![transsizing](/img/transsizing.png) 207 | 208 | **What?** the act of converting from one resolution to another one. As said before transsizing is often used with transrating. 209 | 210 | **Why?** reasons are about the same as for the transrating. 211 | 212 | **How?** converting a `1080p` to a `480p` resolution. 213 | ```bash 214 | $ ffmpeg \ 215 | -i bunny_1080p_60fps.mp4 \ 216 | -vf scale=480:-1 \ 217 | bunny_1080p_60fps_transsizing_480.mp4 218 | ``` 219 | 220 | ## Bonus Round: Adaptive Streaming 221 | 222 | ![adaptive streaming](/img/adaptive-streaming.png) 223 | 224 | **What?** the act of producing many resolutions (bit rates) and split the media into chunks and serve them via http. 225 | 226 | **Why?** to provide a flexible media that can be watched on a low end smartphone or on a 4K TV, it's also easy to scale and deploy but it can add latency. 227 | 228 | **How?** creating an adaptive WebM using DASH. 229 | ```bash 230 | # video streams 231 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm 232 | 233 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm 234 | 235 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm 236 | 237 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm 238 | 239 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm 240 | 241 | # audio streams 242 | $ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm 243 | 244 | # the DASH manifest 245 | $ ffmpeg \ 246 | -f webm_dash_manifest -i video_160x90_250k.webm \ 247 | -f webm_dash_manifest -i video_320x180_500k.webm \ 248 | -f webm_dash_manifest -i video_640x360_750k.webm \ 249 | -f webm_dash_manifest -i video_640x360_1000k.webm \ 250 | -f webm_dash_manifest -i video_1280x720_500k.webm \ 251 | -f webm_dash_manifest -i audio_128k.webm \ 252 | -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \ 253 | -f webm_dash_manifest \ 254 | -adaptation_sets "id=0,streams=0,1,2,3,4 id=1,streams=5" \ 255 | manifest.mpd 256 | ``` 257 | 258 | PS: I stole this example from the [Instructions to playback Adaptive WebM using DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash) 259 | 260 | ## Going beyond 261 | 262 | There are [many and many other usages for FFmpeg](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly). 263 | I use it in conjunction with *iMovie* to produce/edit some videos for YouTube and you can certainly use it professionally. 264 | 265 | # Learn FFmpeg libav the Hard Way 266 | 267 | > Don't you wonder sometimes 'bout sound and vision? 268 | > **David Robert Jones** 269 | 270 | Since the [FFmpeg](#ffmpeg---command-line) is so useful as a command line tool to do essential tasks over the media files, how can we use it in our programs? 271 | 272 | FFmpeg is [composed by several libraries](https://www.ffmpeg.org/doxygen/trunk/index.html) that can be integrated into our own programs. 273 | Usually, when you install FFmpeg, it installs automatically all these libraries. I'll be referring to the set of these libraries as **FFmpeg libav**. 274 | 275 | > This title is a homage to Zed Shaw's series [Learn X the Hard Way](https://learncodethehardway.org/), particularly his book Learn C the Hard Way. 276 | 277 | ## Chapter 0 - The infamous hello world 278 | This hello world actually won't show the message `"hello world"` in the terminal :tongue: 279 | Instead we're going to **print out information about the video**, things like its format (container), duration, resolution, audio channels and, in the end, we'll **decode some frames and save them as image files**. 280 | 281 | ### FFmpeg libav architecture 282 | 283 | But before we start to code, let's learn how **FFmpeg libav architecture** works and how its components communicate with others. 284 | 285 | Here's a diagram of the process of decoding a video: 286 | 287 | ![ffmpeg libav architecture - decoding process](/img/decoding.png) 288 | 289 | You'll first need to load your media file into a component called [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (the video container is also known as format). 290 | It actually doesn't fully load the whole file: it often only reads the header. 291 | 292 | Once we loaded the minimal **header of our container**, we can access its streams (think of them as a rudimentary audio and video data). 293 | Each stream will be available in a component called [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html). 294 | 295 | > Stream is a fancy name for a continuous flow of data. 296 | 297 | Suppose our video has two streams: an audio encoded with [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) and a video encoded with [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). From each stream we can extract **pieces (slices) of data** called packets that will be loaded into components named [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html). 298 | 299 | The **data inside the packets are still coded** (compressed) and in order to decode the packets, we need to pass them to a specific [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html). 300 | 301 | The `AVCodec` will decode them into [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) and finally, this component gives us **the uncompressed frame**. Noticed that the same terminology/process is used either by audio and video stream. 302 | 303 | ### Requirements 304 | 305 | Since some people were [facing issues while compiling or running the examples](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling) **we're going to use [`Docker`](https://docs.docker.com/install/) as our development/runner environment,** we'll also use the big buck bunny video so if you don't have it locally just run the command `make fetch_small_bunny_video`. 306 | 307 | ### Chapter 0 - code walkthrough 308 | 309 | > #### TLDR; show me the [code](/0_hello_world.c) and execution. 310 | > ```bash 311 | > $ make run_hello 312 | > ``` 313 | 314 | We'll skip some details, but don't worry: the [source code is available at github](/0_hello_world.c). 315 | 316 | We're going to allocate memory to the component [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) that will hold information about the format (container). 317 | 318 | ```c 319 | AVFormatContext *pFormatContext = avformat_alloc_context(); 320 | ``` 321 | 322 | Now we're going to open the file and read its header and fill the `AVFormatContext` with minimal information about the format (notice that usually the codecs are not opened). 323 | The function used to do this is [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). It expects an `AVFormatContext`, a `filename` and two optional arguments: the [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (if you pass `NULL`, FFmpeg will guess the format) and the [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (which are the options to the demuxer). 324 | 325 | ```c 326 | avformat_open_input(&pFormatContext, filename, NULL, NULL); 327 | ``` 328 | 329 | We can print the format name and the media duration: 330 | 331 | ```c 332 | printf("Format %s, duration %lld us", pFormatContext->iformat->long_name, pFormatContext->duration); 333 | ``` 334 | 335 | To access the `streams`, we need to read data from the media. The function [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) does that. 336 | Now, the `pFormatContext->nb_streams` will hold the amount of streams and the `pFormatContext->streams[i]` will give us the `i` stream (an [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)). 337 | 338 | ```c 339 | avformat_find_stream_info(pFormatContext, NULL); 340 | ``` 341 | 342 | Now we'll loop through all the streams. 343 | 344 | ```c 345 | for (int i = 0; i < pFormatContext->nb_streams; i++) 346 | { 347 | // 348 | } 349 | ``` 350 | 351 | For each stream, we're going to keep the [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html), which describes the properties of a codec used by the stream `i`. 352 | 353 | ```c 354 | AVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar; 355 | ``` 356 | 357 | With the codec properties we can look up the proper CODEC querying the function [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) and find the registered decoder for the codec id and return an [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html), the component that knows how to en**CO**de and **DEC**ode the stream. 358 | ```c 359 | AVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id); 360 | ``` 361 | 362 | Now we can print information about the codecs. 363 | 364 | ```c 365 | // specific for video and audio 366 | if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) { 367 | printf("Video Codec: resolution %d x %d", pLocalCodecParameters->width, pLocalCodecParameters->height); 368 | } else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) { 369 | printf("Audio Codec: %d channels, sample rate %d", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate); 370 | } 371 | // general 372 | printf("\tCodec %s ID %d bit_rate %lld", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate); 373 | ``` 374 | 375 | With the codec, we can allocate memory for the [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html), which will hold the context for our decode/encode process, but then we need to fill this codec context with CODEC parameters; we do that with [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16). 376 | 377 | Once we filled the codec context, we need to open the codec. We call the function [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) and then we can use it. 378 | 379 | ```c 380 | AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec); 381 | avcodec_parameters_to_context(pCodecContext, pCodecParameters); 382 | avcodec_open2(pCodecContext, pCodec, NULL); 383 | ``` 384 | 385 | Now we're going to read the packets from the stream and decode them into frames but first, we need to allocate memory for both components, the [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) and [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html). 386 | 387 | ```c 388 | AVPacket *pPacket = av_packet_alloc(); 389 | AVFrame *pFrame = av_frame_alloc(); 390 | ``` 391 | 392 | Let's feed our packets from the streams with the function [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) while it has packets. 393 | 394 | ```c 395 | while (av_read_frame(pFormatContext, pPacket) >= 0) { 396 | //... 397 | } 398 | ``` 399 | 400 | Let's **send the raw data packet** (compressed frame) to the decoder, through the codec context, using the function [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3). 401 | 402 | ```c 403 | avcodec_send_packet(pCodecContext, pPacket); 404 | ``` 405 | 406 | And let's **receive the raw data frame** (uncompressed frame) from the decoder, through the same codec context, using the function [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c). 407 | 408 | ```c 409 | avcodec_receive_frame(pCodecContext, pFrame); 410 | ``` 411 | 412 | We can print the frame number, the [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [frame type](https://en.wikipedia.org/wiki/Video_compression_picture_types) and etc. 413 | 414 | ```c 415 | printf( 416 | "Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]", 417 | av_get_picture_type_char(pFrame->pict_type), 418 | pCodecContext->frame_number, 419 | pFrame->pts, 420 | pFrame->pkt_dts, 421 | pFrame->key_frame, 422 | pFrame->coded_picture_number, 423 | pFrame->display_picture_number 424 | ); 425 | ``` 426 | 427 | Finally we can save our decoded frame into a [simple gray image](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). The process is very simple, we'll use the `pFrame->data` where the index is related to the [planes Y, Cb and Cr](https://en.wikipedia.org/wiki/YCbCr), we just picked `0` (Y) to save our gray image. 428 | 429 | ```c 430 | save_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename); 431 | 432 | static void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename) 433 | { 434 | FILE *f; 435 | int i; 436 | f = fopen(filename,"w"); 437 | // writing the minimal required header for a pgm file format 438 | // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example 439 | fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255); 440 | 441 | // writing line by line 442 | for (i = 0; i < ysize; i++) 443 | fwrite(buf + i * wrap, 1, xsize, f); 444 | fclose(f); 445 | } 446 | ``` 447 | 448 | And voilà! Now we have a gray scale image with 2MB: 449 | 450 | ![saved frame](/img/generated_frame.png) 451 | 452 | ## Chapter 1 - syncing audio and video 453 | 454 | > **Be the player** - a young JS developer writing a new MSE video player. 455 | 456 | Before we move to [code a transcoding example](#chapter-2---transcoding) let's talk about **timing**, or how a video player knows the right time to play a frame. 457 | 458 | In the last example, we saved some frames that can be seen here: 459 | 460 | ![frame 0](/img/hello_world_frames/frame0.png) 461 | ![frame 1](/img/hello_world_frames/frame1.png) 462 | ![frame 2](/img/hello_world_frames/frame2.png) 463 | ![frame 3](/img/hello_world_frames/frame3.png) 464 | ![frame 4](/img/hello_world_frames/frame4.png) 465 | ![frame 5](/img/hello_world_frames/frame5.png) 466 | 467 | When we're designing a video player we need to **play each frame at a given pace**, otherwise it would be hard to pleasantly see the video either because it's playing so fast or so slow. 468 | 469 | Therefore we need to introduce some logic to play each frame smoothly. For that matter, each frame has a **presentation timestamp** (PTS) which is an increasing number factored in a **timebase** that is a rational number (where the denominator is known as **timescale**) divisible by the **frame rate (fps)**. 470 | 471 | It's easier to understand when we look at some examples, let's simulate some scenarios. 472 | 473 | For a `fps=60/1` and `timebase=1/60000` each PTS will increase `timescale / fps = 1000` therefore the **PTS real time** for each frame could be (supposing it started at 0): 474 | 475 | * `frame=0, PTS = 0, PTS_TIME = 0` 476 | * `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016` 477 | * `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033` 478 | 479 | For almost the same scenario but with a timebase equal to `1/60`. 480 | 481 | * `frame=0, PTS = 0, PTS_TIME = 0` 482 | * `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016` 483 | * `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033` 484 | * `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050` 485 | 486 | For a `fps=25/1` and `timebase=1/75` each PTS will increase `timescale / fps = 3` and the PTS time could be: 487 | 488 | * `frame=0, PTS = 0, PTS_TIME = 0` 489 | * `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04` 490 | * `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08` 491 | * `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12` 492 | * ... 493 | * `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96` 494 | * ... 495 | * `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56` 496 | 497 | Now with the `pts_time` we can find a way to render this synched with audio `pts_time` or with a system clock. The FFmpeg libav provides these info through its API: 498 | 499 | - fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad) 500 | - tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad) 501 | - tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6) 502 | 503 | Just out of curiosity, the frames we saved were sent in a DTS order (frames: 1,6,4,2,3,5) but played at a PTS order (frames: 1,2,3,4,5). Also, notice how cheap are B-Frames in comparison to P or I-Frames. 504 | 505 | ``` 506 | LOG: AVStream->r_frame_rate 60/1 507 | LOG: AVStream->time_base 1/60000 508 | ... 509 | LOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0] 510 | LOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3] 511 | LOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4] 512 | LOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2] 513 | LOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5] 514 | LOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1] 515 | ``` 516 | 517 | ## Chapter 2 - remuxing 518 | 519 | Remuxing is the act of changing from one format (container) to another, for instance, we can change a [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) video to a [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) one without much pain using FFmpeg: 520 | 521 | ```bash 522 | ffmpeg input.mp4 -c copy output.ts 523 | ``` 524 | 525 | It'll demux the mp4 but it won't decode or encode it (`-c copy`) and in the end, it'll mux it into a `mpegts` file. If you don't provide the format `-f` the ffmpeg will try to guess it based on the file's extension. 526 | 527 | The general usage of FFmpeg or the libav follows a pattern/architecture or workflow: 528 | * **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - it accepts an `input` (a `file` for instance but it could be a `rtmp` or `HTTP` input as well) 529 | * **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - it `demuxes` its content, revealing mostly metadata and its streams 530 | * **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - it `decodes` its compressed streams data *optional* 531 | * **[pixel layer](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - it can also apply some `filters` to the raw frames (like resizing)*optional* 532 | * and then it does the reverse path 533 | * **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - it `encodes` (or `re-encodes` or even `transcodes`) the raw frames*optional* 534 | * **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - it `muxes` (or `remuxes`) the raw streams (the compressed data) 535 | * **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - and finally the muxed data is sent to an `output` (another file or maybe a network remote server) 536 | 537 | ![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg) 538 | > This graph is strongly inspired by [Leixiaohua's](http://leixiaohua1020.github.io/#ffmpeg-development-examples) and [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9) works. 539 | 540 | Now let's code an example using libav to provide the same effect as in `ffmpeg input.mp4 -c copy output.ts`. 541 | 542 | We're going to read from an input (`input_format_context`) and change it to another output (`output_format_context`). 543 | 544 | ```c 545 | AVFormatContext *input_format_context = NULL; 546 | AVFormatContext *output_format_context = NULL; 547 | ``` 548 | 549 | We start doing the usually allocate memory and open the input format. For this specific case, we're going to open an input file and allocate memory for an output file. 550 | 551 | ```c 552 | if ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) { 553 | fprintf(stderr, "Could not open input file '%s'", in_filename); 554 | goto end; 555 | } 556 | if ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) { 557 | fprintf(stderr, "Failed to retrieve input stream information"); 558 | goto end; 559 | } 560 | 561 | avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename); 562 | if (!output_format_context) { 563 | fprintf(stderr, "Could not create output context\n"); 564 | ret = AVERROR_UNKNOWN; 565 | goto end; 566 | } 567 | ``` 568 | 569 | We're going to remux only the video, audio and subtitle types of streams so we're holding what streams we'll be using into an array of indexes. 570 | 571 | ```c 572 | number_of_streams = input_format_context->nb_streams; 573 | streams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list)); 574 | ``` 575 | 576 | Just after we allocated the required memory, we're going to loop throughout all the streams and for each one we need to create new out stream into our output format context, using the [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) function. Notice that we're marking all the streams that aren't video, audio or subtitle so we can skip them after. 577 | 578 | ```c 579 | for (i = 0; i < input_format_context->nb_streams; i++) { 580 | AVStream *out_stream; 581 | AVStream *in_stream = input_format_context->streams[i]; 582 | AVCodecParameters *in_codecpar = in_stream->codecpar; 583 | if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && 584 | in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO && 585 | in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { 586 | streams_list[i] = -1; 587 | continue; 588 | } 589 | streams_list[i] = stream_index++; 590 | out_stream = avformat_new_stream(output_format_context, NULL); 591 | if (!out_stream) { 592 | fprintf(stderr, "Failed allocating output stream\n"); 593 | ret = AVERROR_UNKNOWN; 594 | goto end; 595 | } 596 | ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); 597 | if (ret < 0) { 598 | fprintf(stderr, "Failed to copy codec parameters\n"); 599 | goto end; 600 | } 601 | } 602 | ``` 603 | 604 | Now we can create the output file. 605 | 606 | ```c 607 | if (!(output_format_context->oformat->flags & AVFMT_NOFILE)) { 608 | ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE); 609 | if (ret < 0) { 610 | fprintf(stderr, "Could not open output file '%s'", out_filename); 611 | goto end; 612 | } 613 | } 614 | 615 | ret = avformat_write_header(output_format_context, NULL); 616 | if (ret < 0) { 617 | fprintf(stderr, "Error occurred when opening output file\n"); 618 | goto end; 619 | } 620 | ``` 621 | 622 | After that, we can copy the streams, packet by packet, from our input to our output streams. We'll loop while it has packets (`av_read_frame`), for each packet we need to re-calculate the PTS and DTS to finally write it (`av_interleaved_write_frame`) to our output format context. 623 | 624 | ```c 625 | while (1) { 626 | AVStream *in_stream, *out_stream; 627 | ret = av_read_frame(input_format_context, &packet); 628 | if (ret < 0) 629 | break; 630 | in_stream = input_format_context->streams[packet.stream_index]; 631 | if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) { 632 | av_packet_unref(&packet); 633 | continue; 634 | } 635 | packet.stream_index = streams_list[packet.stream_index]; 636 | out_stream = output_format_context->streams[packet.stream_index]; 637 | /* copy packet */ 638 | packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 639 | packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); 640 | packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); 641 | // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903 642 | packet.pos = -1; 643 | 644 | //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1 645 | ret = av_interleaved_write_frame(output_format_context, &packet); 646 | if (ret < 0) { 647 | fprintf(stderr, "Error muxing packet\n"); 648 | break; 649 | } 650 | av_packet_unref(&packet); 651 | } 652 | ``` 653 | 654 | To finalize we need to write the stream trailer to an output media file with [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) function. 655 | 656 | ```c 657 | av_write_trailer(output_format_context); 658 | ``` 659 | 660 | Now we're ready to test it and the first test will be a format (video container) conversion from a MP4 to a MPEG-TS video file. We're basically making the command line `ffmpeg input.mp4 -c copy output.ts` with libav. 661 | 662 | ```bash 663 | make run_remuxing_ts 664 | ``` 665 | 666 | It's working!!! don't you trust me?! you shouldn't, we can check it with `ffprobe`: 667 | 668 | ```bash 669 | ffprobe -i remuxed_small_bunny_1080p_60fps.ts 670 | 671 | Input #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts': 672 | Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s 673 | Program 1 674 | Metadata: 675 | service_name : Service01 676 | service_provider: FFmpeg 677 | Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc 678 | Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s 679 | ``` 680 | 681 | To sum up what we did here in a graph, we can revisit our initial [idea about how libav works](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture) but showing that we skipped the codec part. 682 | 683 | ![remuxing libav components](/img/remuxing_libav_components.png) 684 | 685 | Before we end this chapter I'd like to show an important part of the remuxing process, **you can pass options to the muxer**. Let's say we want to delivery [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) format for that matter we need to use [fragmented mp4](https://stackoverflow.com/a/35180327) (sometimes referred as `fmp4`) instead of MPEG-TS or plain MPEG-4. 686 | 687 | With the [command line we can do that easily](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting). 688 | 689 | ``` 690 | ffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4 691 | ``` 692 | 693 | Almost equally easy as the command line is the libav version of it, we just need to pass the options when write the output header, just before the packets copy. 694 | 695 | ```c 696 | AVDictionary* opts = NULL; 697 | av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov+default_base_moof", 0); 698 | ret = avformat_write_header(output_format_context, &opts); 699 | ``` 700 | 701 | We now can generate this fragmented mp4 file: 702 | 703 | ```bash 704 | make run_remuxing_fragmented_mp4 705 | ``` 706 | 707 | But to make sure that I'm not lying to you. You can use the amazing site/tool [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) or the site [http://mp4parser.com/](http://mp4parser.com/) to see the differences, first load up the "common" mp4. 708 | 709 | ![mp4 boxes](/img/boxes_normal_mp4.png) 710 | 711 | As you can see it has a single `mdat` atom/box, **this is place where the video and audio frames are**. Now load the fragmented mp4 to see which how it spreads the `mdat` boxes. 712 | 713 | ![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png) 714 | 715 | ## Chapter 3 - transcoding 716 | 717 | > #### TLDR; show me the [code](/3_transcoding.c) and execution. 718 | > ```bash 719 | > $ make run_transcoding 720 | > ``` 721 | > We'll skip some details, but don't worry: the [source code is available at github](/3_transcoding.c). 722 | 723 | 724 | 725 | In this chapter, we're going to create a minimalist transcoder, written in C, that can convert videos coded in H264 to H265 using **FFmpeg/libav** library specifically [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, and libavutil. 726 | 727 | ![media transcoding flow](/img/transcoding_flow.png) 728 | 729 | > _Just a quick recap:_ The [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) is the abstraction for the format of the media file, aka container (ex: MKV, MP4, Webm, TS). The [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) represents each type of data for a given format (ex: audio, video, subtitle, metadata). The [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) is a slice of compressed data obtained from the `AVStream` that can be decoded by an [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ex: av1, h264, vp9, hevc) generating a raw data called [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html). 730 | 731 | ### Transmuxing 732 | 733 | Let's start with the simple transmuxing operation and then we can build upon this code, the first step is to **load the input file**. 734 | 735 | ```c 736 | // Allocate an AVFormatContext 737 | avfc = avformat_alloc_context(); 738 | // Open an input stream and read the header. 739 | avformat_open_input(avfc, in_filename, NULL, NULL); 740 | // Read packets of a media file to get stream information. 741 | avformat_find_stream_info(avfc, NULL); 742 | ``` 743 | 744 | Now we're going to set up the decoder, the `AVFormatContext` will give us access to all the `AVStream` components and for each one of them, we can get their `AVCodec` and create the particular `AVCodecContext` and finally we can open the given codec so we can proceed to the decoding process. 745 | 746 | > The [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) holds data about media configuration such as bit rate, frame rate, sample rate, channels, height, and many others. 747 | 748 | ```c 749 | for (int i = 0; i < avfc->nb_streams; i++) 750 | { 751 | AVStream *avs = avfc->streams[i]; 752 | AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id); 753 | AVCodecContext *avcc = avcodec_alloc_context3(*avc); 754 | avcodec_parameters_to_context(*avcc, avs->codecpar); 755 | avcodec_open2(*avcc, *avc, NULL); 756 | } 757 | ``` 758 | 759 | We need to prepare the output media file for transmuxing as well, we first **allocate memory** for the output `AVFormatContext`. We create **each stream** in the output format. In order to pack the stream properly, we **copy the codec parameters** from the decoder. 760 | 761 | We **set the flag** `AV_CODEC_FLAG_GLOBAL_HEADER` which tells the encoder that it can use the global headers and finally we open the output **file for write** and persist the headers. 762 | 763 | ```c 764 | avformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename); 765 | 766 | AVStream *avs = avformat_new_stream(encoder_avfc, NULL); 767 | avcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar); 768 | 769 | if (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER) 770 | encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 771 | 772 | avio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE); 773 | avformat_write_header(encoder->avfc, &muxer_opts); 774 | 775 | ``` 776 | 777 | We're getting the `AVPacket`'s from the decoder, adjusting the timestamps, and write the packet properly to the output file. Even though the function `av_interleaved_write_frame` says "write frame" we are storing the packet. We finish the transmuxing process by writing the stream trailer to the file. 778 | 779 | ```c 780 | AVFrame *input_frame = av_frame_alloc(); 781 | AVPacket *input_packet = av_packet_alloc(); 782 | 783 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 784 | { 785 | av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base); 786 | av_interleaved_write_frame(*avfc, input_packet) < 0)); 787 | } 788 | 789 | av_write_trailer(encoder_avfc); 790 | ``` 791 | 792 | ### Transcoding 793 | 794 | The previous section showed a simple transmuxer program, now we're going to add the capability to encode files, specifically we're going to enable it to transcode videos from `h264` to `h265`. 795 | 796 | After we prepared the decoder but before we arrange the output media file we're going to set up the encoder. 797 | 798 | * Create the video `AVStream` in the encoder, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 799 | * Use the `AVCodec` called `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37) 800 | * Create the `AVCodecContext` based in the created codec, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315) 801 | * Set up basic attributes for the transcoding session, and 802 | * Open the codec and copy parameters from the context to the stream. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) and [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe) 803 | 804 | ```c 805 | AVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL); 806 | AVStream *video_avs = avformat_new_stream(encoder_avfc, NULL); 807 | 808 | char *codec_name = "libx265"; 809 | char *codec_priv_key = "x265-params"; 810 | // we're going to use internal options for the x265 811 | // it disables the scene change detection and fix then 812 | // GOP on 60 frames. 813 | char *codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 814 | 815 | AVCodec *video_avc = avcodec_find_encoder_by_name(codec_name); 816 | AVCodecContext *video_avcc = avcodec_alloc_context3(video_avc); 817 | // encoder codec params 818 | av_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0); 819 | video_avcc->height = decoder_ctx->height; 820 | video_avcc->width = decoder_ctx->width; 821 | video_avcc->pix_fmt = video_avc->pix_fmts[0]; 822 | // control rate 823 | video_avcc->bit_rate = 2 * 1000 * 1000; 824 | video_avcc->rc_buffer_size = 4 * 1000 * 1000; 825 | video_avcc->rc_max_rate = 2 * 1000 * 1000; 826 | video_avcc->rc_min_rate = 2.5 * 1000 * 1000; 827 | // time base 828 | video_avcc->time_base = av_inv_q(input_framerate); 829 | video_avs->time_base = sc->video_avcc->time_base; 830 | 831 | avcodec_open2(sc->video_avcc, sc->video_avc, NULL); 832 | avcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc); 833 | ``` 834 | 835 | We need to expand our decoding loop for the video stream transcoding: 836 | 837 | * Send the empty `AVPacket` to the decoder, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 838 | * Receive the uncompressed `AVFrame`, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 839 | * Start to transcode this raw frame, 840 | * Send the raw frame, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169) 841 | * Receive the compressed, based on our codec, `AVPacket`, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6) 842 | * Set up the timestamp, and [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e) 843 | * Write it to the output file. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1) 844 | 845 | ```c 846 | AVFrame *input_frame = av_frame_alloc(); 847 | AVPacket *input_packet = av_packet_alloc(); 848 | 849 | while (av_read_frame(decoder_avfc, input_packet) >= 0) 850 | { 851 | int response = avcodec_send_packet(decoder_video_avcc, input_packet); 852 | while (response >= 0) { 853 | response = avcodec_receive_frame(decoder_video_avcc, input_frame); 854 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 855 | break; 856 | } else if (response < 0) { 857 | return response; 858 | } 859 | if (response >= 0) { 860 | encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index); 861 | } 862 | av_frame_unref(input_frame); 863 | } 864 | av_packet_unref(input_packet); 865 | } 866 | av_write_trailer(encoder_avfc); 867 | 868 | // used function 869 | int encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) { 870 | AVPacket *output_packet = av_packet_alloc(); 871 | int response = avcodec_send_frame(video_avcc, input_frame); 872 | 873 | while (response >= 0) { 874 | response = avcodec_receive_packet(video_avcc, output_packet); 875 | if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) { 876 | break; 877 | } else if (response < 0) { 878 | return -1; 879 | } 880 | 881 | output_packet->stream_index = index; 882 | output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den; 883 | 884 | av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base); 885 | response = av_interleaved_write_frame(avfc, output_packet); 886 | } 887 | av_packet_unref(output_packet); 888 | av_packet_free(&output_packet); 889 | return 0; 890 | } 891 | 892 | ``` 893 | 894 | We converted the media stream from `h264` to `h265`, as expected the `h265` version of the media file is smaller than the `h264` however the [created program](/3_transcoding.c) is capable of: 895 | 896 | ```c 897 | 898 | /* 899 | * H264 -> H265 900 | * Audio -> remuxed (untouched) 901 | * MP4 - MP4 902 | */ 903 | StreamingParams sp = {0}; 904 | sp.copy_audio = 1; 905 | sp.copy_video = 0; 906 | sp.video_codec = "libx265"; 907 | sp.codec_priv_key = "x265-params"; 908 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0"; 909 | 910 | /* 911 | * H264 -> H264 (fixed gop) 912 | * Audio -> remuxed (untouched) 913 | * MP4 - MP4 914 | */ 915 | StreamingParams sp = {0}; 916 | sp.copy_audio = 1; 917 | sp.copy_video = 0; 918 | sp.video_codec = "libx264"; 919 | sp.codec_priv_key = "x264-params"; 920 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 921 | 922 | /* 923 | * H264 -> H264 (fixed gop) 924 | * Audio -> remuxed (untouched) 925 | * MP4 - fragmented MP4 926 | */ 927 | StreamingParams sp = {0}; 928 | sp.copy_audio = 1; 929 | sp.copy_video = 0; 930 | sp.video_codec = "libx264"; 931 | sp.codec_priv_key = "x264-params"; 932 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 933 | sp.muxer_opt_key = "movflags"; 934 | sp.muxer_opt_value = "frag_keyframe+empty_moov+delay_moov+default_base_moof"; 935 | 936 | /* 937 | * H264 -> H264 (fixed gop) 938 | * Audio -> AAC 939 | * MP4 - MPEG-TS 940 | */ 941 | StreamingParams sp = {0}; 942 | sp.copy_audio = 0; 943 | sp.copy_video = 0; 944 | sp.video_codec = "libx264"; 945 | sp.codec_priv_key = "x264-params"; 946 | sp.codec_priv_value = "keyint=60:min-keyint=60:scenecut=0:force-cfr=1"; 947 | sp.audio_codec = "aac"; 948 | sp.output_extension = ".ts"; 949 | 950 | /* WIP :P -> it's not playing on VLC, the final bit rate is huge 951 | * H264 -> VP9 952 | * Audio -> Vorbis 953 | * MP4 - WebM 954 | */ 955 | //StreamingParams sp = {0}; 956 | //sp.copy_audio = 0; 957 | //sp.copy_video = 0; 958 | //sp.video_codec = "libvpx-vp9"; 959 | //sp.audio_codec = "libvorbis"; 960 | //sp.output_extension = ".webm"; 961 | 962 | ``` 963 | 964 | > Now, to be honest, this was [harder than I thought](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54) it'd be and I had to dig into the [FFmpeg command line source code](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) and test it a lot and I think I'm missing something because I had to enforce `force-cfr` for the `h264` to work and I'm still seeing some warning messages like `warning messages (forced frame type (5) at 80 was changed to frame type (3))`. 965 | -------------------------------------------------------------------------------- /build/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /fetch_bbb_video.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # the link doesn't work anymore 3 | # wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4 4 | # ffmpeg -y -i bunny_1080p_60fps.mp4 -ss 00:01:24 -t 00:00:10 small_bunny_1080p_60fps.mp4 5 | 6 | echo "the small_bunny_1080p_60fps.mp4 is already provided" 7 | -------------------------------------------------------------------------------- /img/adaptive-streaming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/adaptive-streaming.png -------------------------------------------------------------------------------- /img/boxes_fragmente_mp4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/boxes_fragmente_mp4.png -------------------------------------------------------------------------------- /img/boxes_normal_mp4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/boxes_normal_mp4.png -------------------------------------------------------------------------------- /img/container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/container.png -------------------------------------------------------------------------------- /img/decoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/decoding.png -------------------------------------------------------------------------------- /img/encoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/encoding.png -------------------------------------------------------------------------------- /img/ffmpeg_libav_workflow.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/ffmpeg_libav_workflow.jpeg -------------------------------------------------------------------------------- /img/generated_frame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/generated_frame.png -------------------------------------------------------------------------------- /img/h264_properties.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/h264_properties.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame0.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame1.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame2.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame3.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame4.png -------------------------------------------------------------------------------- /img/hello_world_frames/frame5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hello_world_frames/frame5.png -------------------------------------------------------------------------------- /img/hevc_properties.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/hevc_properties.png -------------------------------------------------------------------------------- /img/remuxing_libav_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/remuxing_libav_components.png -------------------------------------------------------------------------------- /img/transcoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/transcoding.png -------------------------------------------------------------------------------- /img/transcoding_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/transcoding_flow.png -------------------------------------------------------------------------------- /img/transmuxing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/transmuxing.png -------------------------------------------------------------------------------- /img/transrating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/transrating.png -------------------------------------------------------------------------------- /img/transsizing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/img/transsizing.png -------------------------------------------------------------------------------- /remuxed_small_bunny_1080p_60fps.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/remuxed_small_bunny_1080p_60fps.ts -------------------------------------------------------------------------------- /small_bunny_1080p_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandromoreira/ffmpeg-libav-tutorial/8263d4d83e69620fb9028354e26f22c891e6770c/small_bunny_1080p_60fps.mp4 -------------------------------------------------------------------------------- /video_debugging.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "video_debugging.h" 11 | 12 | void logging(const char *fmt, ...) 13 | { 14 | va_list args; 15 | fprintf( stderr, "LOG: " ); 16 | va_start( args, fmt ); 17 | vfprintf( stderr, fmt, args ); 18 | va_end( args ); 19 | fprintf( stderr, "\n" ); 20 | } 21 | 22 | void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt) 23 | { 24 | AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base; 25 | 26 | logging("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d", 27 | av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base), 28 | av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base), 29 | av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base), 30 | pkt->stream_index); 31 | } 32 | 33 | void print_timing(char *name, AVFormatContext *avf, AVCodecContext *avc, AVStream *avs) { 34 | logging("================================================="); 35 | logging("%s", name); 36 | 37 | logging("\tAVFormatContext"); 38 | if (avf != NULL) { 39 | logging("\t\tstart_time=%d duration=%d bit_rate=%d start_time_realtime=%d", avf->start_time, avf->duration, avf->bit_rate, avf->start_time_realtime); 40 | } else { 41 | logging("\t\t->NULL"); 42 | } 43 | 44 | logging("\tAVCodecContext"); 45 | if (avc != NULL) { 46 | logging("\t\tbit_rate=%d ticks_per_frame=%d width=%d height=%d gop_size=%d keyint_min=%d sample_rate=%d profile=%d level=%d ", 47 | avc->bit_rate, avc->ticks_per_frame, avc->width, avc->height, avc->gop_size, avc->keyint_min, avc->sample_rate, avc->profile, avc->level); 48 | logging("\t\tavc->time_base=num/den %d/%d", avc->time_base.num, avc->time_base.den); 49 | logging("\t\tavc->framerate=num/den %d/%d", avc->framerate.num, avc->framerate.den); 50 | logging("\t\tavc->pkt_timebase=num/den %d/%d", avc->pkt_timebase.num, avc->pkt_timebase.den); 51 | } else { 52 | logging("\t\t->NULL"); 53 | } 54 | 55 | logging("\tAVStream"); 56 | if (avs != NULL) { 57 | logging("\t\tindex=%d start_time=%d duration=%d ", avs->index, avs->start_time, avs->duration); 58 | logging("\t\tavs->time_base=num/den %d/%d", avs->time_base.num, avs->time_base.den); 59 | logging("\t\tavs->sample_aspect_ratio=num/den %d/%d", avs->sample_aspect_ratio.num, avs->sample_aspect_ratio.den); 60 | logging("\t\tavs->avg_frame_rate=num/den %d/%d", avs->avg_frame_rate.num, avs->avg_frame_rate.den); 61 | logging("\t\tavs->r_frame_rate=num/den %d/%d", avs->r_frame_rate.num, avs->r_frame_rate.den); 62 | } else { 63 | logging("\t\t->NULL"); 64 | } 65 | 66 | logging("================================================="); 67 | } 68 | -------------------------------------------------------------------------------- /video_debugging.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | void logging(const char *fmt, ...); 12 | void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt); 13 | void print_timing(char *name, AVFormatContext *avf, AVCodecContext *avc, AVStream *avs); 14 | --------------------------------------------------------------------------------