PTS:Presentation timestamp 最终渲染时用的时间戳
DTS:Decoding timestamp 解码用 的什么有
I(intra) B(bidirectional) P(predicted)压缩原理,帧内压缩 gop 第一帧是关键帧 B帧可以向前向后参考一帧,也可以多少帧,一般设置为3帧。只有有B帧时,pts和dts才不一样。
从哪获得pts呢,从解复用的流中的AVPacket中有pts,解码后的AVFrame中也有pts。两个地方的值相同,特殊时候AVFrame中没有pts,但是我们一般拉取流后都会解码,因此用AVFrame值方便,但是如果没有我们也可以推算,手工推算比较复杂,但是ffmpeg给我们提供了函数可以帮我们算出,av_frame_get_best_effort_timestamp()
tbr:帧率 time base of rate 帧率的时间基,比如25帧视频 tbr是1/25
tbn:time base of stream 流的时间基
tbc:time base of codec 解码的时间戳
用ffplay播放一个视频就可以看到这三个时间基
音视频同步的三种方法:
视频同步到音频:视频在音频之前那么就播放,在音频之后就等待,等待两者pts之差 推荐这种方法。
音频同步到视频:涉及到音频的补帧和丢帧,如果音频时间总和大于视频总和则需要丢音频帧,如果小于则需要补音频帧,非常不方便。
音频和视频都同步到系统时钟
音频的时间总长是固定的,且每个时间刻度都有声音。视频总长可以不固定,且都是固定时刻值。音频就像线段_____,视频就像一个个小点…
一种视频同步到音频的方法,先播放第一帧视频,然后再拿到第二个视屏,如果这个视频在当前的音频pts之前,那么就直接播放,如果在这个音频后面,那么就播放这个音频,并且设置个计时器,等待这帧音频和视频两者pts之差的时间后,再把这帧视频渲染。
#include <stdio.h> #include <assert.h> #include <math.h> #include <SDL.h> #include <libavcodec/avcodec.h> #include <libavformat/avformat.h> #include <libswscale/swscale.h> #include <libswresample/swresample.h> // compatibility with newer API #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1) #define av_frame_alloc avcodec_alloc_frame #define av_frame_free avcodec_free_frame #endif #define SDL_AUDIO_BUFFER_SIZE 1024 #define MAX_AUDIO_FRAME_SIZE 192000 #define MAX_AUDIOQ_SIZE (5 * 16 * 1024) #define MAX_VIDEOQ_SIZE (5 * 256 * 1024) #define AV_SYNC_THRESHOLD 0.01 #define AV_NOSYNC_THRESHOLD 10.0 #define FF_REFRESH_EVENT (SDL_USEREVENT) #define FF_QUIT_EVENT (SDL_USEREVENT + 1) #define VIDEO_PICTURE_QUEUE_SIZE 1 typedef struct PacketQueue { AVPacketList *first_pkt, *last_pkt; int nb_packets; int size; SDL_mutex *mutex; SDL_cond *cond; } PacketQueue; typedef struct VideoPicture { AVPicture *bmp; int width, height; /* source height & width */ int allocated; double pts; } VideoPicture; typedef struct VideoState { AVFormatContext *pFormatCtx; int videoStream, audioStream; AVStream *audio_st; AVCodecContext *audio_ctx; PacketQueue audioq; uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2]; unsigned int audio_buf_size; unsigned int audio_buf_index; AVFrame audio_frame; AVPacket audio_pkt; uint8_t *audio_pkt_data; int audio_pkt_size; int audio_hw_buf_size; struct SwrContext *audio_swr_ctx; double audio_clock; double video_clock; ///<pts of last decoded frame / predicted pts of next decoded frame double frame_timer; double frame_last_pts; double frame_last_delay; AVStream *video_st; AVCodecContext *video_ctx; PacketQueue videoq; struct SwsContext *video_sws_ctx; VideoPicture pictq[VIDEO_PICTURE_QUEUE_SIZE]; int pictq_size, pictq_rindex, pictq_windex; SDL_mutex *pictq_mutex; SDL_cond *pictq_cond; SDL_Thread *parse_tid; SDL_Thread *video_tid; char filename[1024]; int quit; } VideoState; SDL_mutex *text_mutex; SDL_Window *win; SDL_Renderer *renderer; SDL_Texture *texture; /* Since we only have one decoding thread, the Big Struct can be global in case we need it. */ VideoState *global_video_state; void packet_queue_init(PacketQueue *q) { memset(q, 0, sizeof(PacketQueue)); q->mutex = SDL_CreateMutex(); q->cond = SDL_CreateCond(); } int packet_queue_put(PacketQueue *q, AVPacket *pkt) { AVPacketList *pkt1; if(av_dup_packet(pkt) < 0) { return -1; } pkt1 = av_malloc(sizeof(AVPacketList)); if (!pkt1) return -1; pkt1->pkt = *pkt; pkt1->next = NULL; SDL_LockMutex(q->mutex); if (!q->last_pkt) q->first_pkt = pkt1; else q->last_pkt->next = pkt1; q->last_pkt = pkt1; q->nb_packets++; q->size += pkt1->pkt.size; SDL_CondSignal(q->cond); SDL_UnlockMutex(q->mutex); return 0; } int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block) { AVPacketList *pkt1; int ret; SDL_LockMutex(q->mutex); for(;;) { if(global_video_state->quit) { ret = -1; break; } pkt1 = q->first_pkt; if (pkt1) { q->first_pkt = pkt1->next; if (!q->first_pkt) q->last_pkt = NULL; q->nb_packets--; q->size -= pkt1->pkt.size; *pkt = pkt1->pkt; av_free(pkt1); ret = 1; break; } else if (!block) { ret = 0; break; } else { SDL_CondWait(q->cond, q->mutex); } } SDL_UnlockMutex(q->mutex); return ret; } double get_audio_clock(VideoState *is) { double pts; int hw_buf_size, bytes_per_sec, n; pts = is->audio_clock; /* maintained in the audio thread */ hw_buf_size = is->audio_buf_size - is->audio_buf_index; bytes_per_sec = 0; n = is->audio_ctx->channels * 2; if(is->audio_st) { bytes_per_sec = is->audio_ctx->sample_rate * n; } if(bytes_per_sec) { pts -= (double)hw_buf_size / bytes_per_sec; } return pts; } int audio_decode_frame(VideoState *is, uint8_t *audio_buf, int buf_size, double *pts_ptr) { int len1, data_size = 0; AVPacket *pkt = &is->audio_pkt; double pts; int n; for(;;) { while(is->audio_pkt_size > 0) { int got_frame = 0; len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt); if(len1 < 0) { /* if error, skip frame */ is->audio_pkt_size = 0; break; } data_size = 0; if(got_frame) { /* data_size = av_samples_get_buffer_size(NULL, is->audio_ctx->channels, is->audio_frame.nb_samples, is->audio_ctx->sample_fmt, 1); */ data_size = 2 * is->audio_frame.nb_samples * 2; assert(data_size <= buf_size); swr_convert(is->audio_swr_ctx, &audio_buf, MAX_AUDIO_FRAME_SIZE*3/2, (const uint8_t **)is->audio_frame.data, is->audio_frame.nb_samples); //memcpy(audio_buf, is->audio_frame.data[0], data_size); } is->audio_pkt_data += len1; is->audio_pkt_size -= len1; if(data_size <= 0) { /* No data yet, get more frames */ continue; } pts = is->audio_clock; *pts_ptr = pts; n = 2 * is->audio_ctx->channels; is->audio_clock += (double)data_size / (double)(n * is->audio_ctx->sample_rate); /* We have data, return it and come back for more later */ return data_size; } if(pkt->data) av_free_packet(pkt); if(is->quit) { return -1; } /* next packet */ if(packet_queue_get(&is->audioq, pkt, 1) < 0) { return -1; } is->audio_pkt_data = pkt->data; is->audio_pkt_size = pkt->size; /* if update, update the audio clock w/pts */ if(pkt->pts != AV_NOPTS_VALUE) { is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts; } } } void audio_callback(void *userdata, Uint8 *stream, int len) { VideoState *is = (VideoState *)userdata; int len1, audio_size; double pts; SDL_memset(stream, 0, len); while(len > 0) { if(is->audio_buf_index >= is->audio_buf_size) { /* We have already sent all our data; get more */ audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts); if(audio_size < 0) { /* If error, output silence */ is->audio_buf_size = 1024 * 2 * 2; memset(is->audio_buf, 0, is->audio_buf_size); } else { is->audio_buf_size = audio_size; } is->audio_buf_index = 0; } len1 = is->audio_buf_size - is->audio_buf_index; if(len1 > len) len1 = len; SDL_MixAudio(stream,(uint8_t *)is->audio_buf + is->audio_buf_index, len1, SDL_MIX_MAXVOLUME); //memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1); len -= len1; stream += len1; is->audio_buf_index += len1; } } static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) { SDL_Event event; event.type = FF_REFRESH_EVENT; event.user.data1 = opaque; SDL_PushEvent(&event); return 0; /* 0 means stop timer */ } /* schedule a video refresh in 'delay' ms */ static void schedule_refresh(VideoState *is, int delay) { SDL_AddTimer(delay, sdl_refresh_timer_cb, is); } void video_display(VideoState *is) { SDL_Rect rect; VideoPicture *vp; float aspect_ratio; int w, h, x, y; int i; vp = &is->pictq[is->pictq_rindex]; if(vp->bmp) { SDL_UpdateYUVTexture( texture, NULL, vp->bmp->data[0], vp->bmp->linesize[0], vp->bmp->data[1], vp->bmp->linesize[1], vp->bmp->data[2], vp->bmp->linesize[2]); rect.x = 0; rect.y = 0; rect.w = is->video_ctx->width; rect.h = is->video_ctx->height; SDL_LockMutex(text_mutex); SDL_RenderClear( renderer ); SDL_RenderCopy( renderer, texture, NULL, &rect); SDL_RenderPresent( renderer ); SDL_UnlockMutex(text_mutex); } } void video_refresh_timer(void *userdata) { VideoState *is = (VideoState *)userdata; VideoPicture *vp; double actual_delay, delay, sync_threshold, ref_clock, diff; if(is->video_st) { if(is->pictq_size == 0) { schedule_refresh(is, 1); } else { vp = &is->pictq[is->pictq_rindex]; delay = vp->pts - is->frame_last_pts; /* the pts from last time */ if(delay <= 0 || delay >= 1.0) { /* if incorrect delay, use previous one */ delay = is->frame_last_delay; } /* save for next time */ is->frame_last_delay = delay; is->frame_last_pts = vp->pts; /* update delay to sync to audio */ ref_clock = get_audio_clock(is); diff = vp->pts - ref_clock; /* Skip or repeat the frame. Take delay into account FFPlay still doesn't "know if this is the best guess." */ sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD; if(fabs(diff) < AV_NOSYNC_THRESHOLD) { if(diff <= -sync_threshold) { delay = 0; } else if(diff >= sync_threshold) { delay = 2 * delay; } } is->frame_timer += delay; /* computer the REAL delay */ actual_delay = is->frame_timer - (av_gettime() / 1000000.0); if(actual_delay < 0.010) { /* Really it should skip the picture instead */ actual_delay = 0.010; } schedule_refresh(is, (int)(actual_delay * 1000 + 0.5)); /* show the picture! */ video_display(is); /* update queue for next picture! */ if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) { is->pictq_rindex = 0; } SDL_LockMutex(is->pictq_mutex); is->pictq_size--; SDL_CondSignal(is->pictq_cond); SDL_UnlockMutex(is->pictq_mutex); } } else { schedule_refresh(is, 100); } } void alloc_picture(void *userdata) { int ret = -1; VideoState *is = (VideoState *)userdata; VideoPicture *vp; vp = &is->pictq[is->pictq_windex]; if(vp->bmp) { // we already have one make another, bigger/smaller avpicture_free(vp->bmp); free(vp->bmp); vp->bmp = NULL; } // Allocate a place to put our YUV image on that screen SDL_LockMutex(text_mutex); vp->bmp = (AVPicture*)malloc(sizeof(AVPicture)); ret = avpicture_alloc(vp->bmp, AV_PIX_FMT_YUV420P, is->video_ctx->width, is->video_ctx->height); if (ret < 0) { fprintf(stderr, "Could not allocate temporary picture: %s\n", av_err2str(ret)); } SDL_UnlockMutex(text_mutex); vp->width = is->video_ctx->width; vp->height = is->video_ctx->height; vp->allocated = 1; } int queue_picture(VideoState *is, AVFrame *pFrame, double pts) { VideoPicture *vp; /* wait until we have space for a new pic */ SDL_LockMutex(is->pictq_mutex); while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE && !is->quit) { SDL_CondWait(is->pictq_cond, is->pictq_mutex); } SDL_UnlockMutex(is->pictq_mutex); if(is->quit) return -1; // windex is set to 0 initially vp = &is->pictq[is->pictq_windex]; /* allocate or resize the buffer! */ if(!vp->bmp || vp->width != is->video_ctx->width || vp->height != is->video_ctx->height) { vp->allocated = 0; alloc_picture(is); if(is->quit) { return -1; } } /* We have a place to put our picture on the queue */ if(vp->bmp) { vp->pts = pts; // Convert the image into YUV format that SDL uses sws_scale(is->video_sws_ctx, (uint8_t const * const *)pFrame->data, pFrame->linesize, 0, is->video_ctx->height, vp->bmp->data, vp->bmp->linesize); /* now we inform our display thread that we have a pic ready */ if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) { is->pictq_windex = 0; } SDL_LockMutex(is->pictq_mutex); is->pictq_size++; SDL_UnlockMutex(is->pictq_mutex); } return 0; } double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) { double frame_delay; if(pts != 0) { /* if we have pts, set video clock to it */ is->video_clock = pts; } else { /* if we aren't given a pts, set it to the clock */ pts = is->video_clock; } /* update the video clock */ frame_delay = av_q2d(is->video_ctx->time_base); /* if we are repeating a frame, adjust clock accordingly */ frame_delay += src_frame->repeat_pict * (frame_delay * 0.5); is->video_clock += frame_delay; return pts; } int decode_video_thread(void *arg) { VideoState *is = (VideoState *)arg; AVPacket pkt1, *packet = &pkt1; int frameFinished; AVFrame *pFrame; double pts; pFrame = av_frame_alloc(); for(;;) { if(packet_queue_get(&is->videoq, packet, 1) < 0) { // means we quit getting packets break; } pts = 0; // Decode video frame avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet); if((pts = av_frame_get_best_effort_timestamp(pFrame)) == AV_NOPTS_VALUE) { pts = 0; } pts *= av_q2d(is->video_st->time_base); // Did we get a video frame? if(frameFinished) { pts = synchronize_video(is, pFrame, pts); if(queue_picture(is, pFrame, pts) < 0) { break; } } av_free_packet(packet); } av_frame_free(&pFrame); return 0; } int stream_component_open(VideoState *is, int stream_index) { AVFormatContext *pFormatCtx = is->pFormatCtx; AVCodecContext *codecCtx = NULL; AVCodec *codec = NULL; SDL_AudioSpec wanted_spec, spec; if(stream_index < 0 || stream_index >= pFormatCtx->nb_streams) { return -1; } codecCtx = avcodec_alloc_context3(NULL); int ret = avcodec_parameters_to_context(codecCtx, pFormatCtx->streams[stream_index]->codecpar); if (ret < 0) return -1; codec = avcodec_find_decoder(codecCtx->codec_id); if(!codec) { fprintf(stderr, "Unsupported codec!\n"); return -1; } if(codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) { // Set audio settings from codec info wanted_spec.freq = codecCtx->sample_rate; wanted_spec.format = AUDIO_S16SYS; wanted_spec.channels = 2;//codecCtx->channels; wanted_spec.silence = 0; wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE; wanted_spec.callback = audio_callback; wanted_spec.userdata = is; if(SDL_OpenAudio(&wanted_spec, &spec) < 0) { fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError()); return -1; } is->audio_hw_buf_size = spec.size; } if(avcodec_open2(codecCtx, codec, NULL) < 0) { fprintf(stderr, "Unsupported codec!\n"); return -1; } switch(codecCtx->codec_type) { case AVMEDIA_TYPE_AUDIO: is->audioStream = stream_index; is->audio_st = pFormatCtx->streams[stream_index]; is->audio_ctx = codecCtx; is->audio_buf_size = 0; is->audio_buf_index = 0; memset(&is->audio_pkt, 0, sizeof(is->audio_pkt)); packet_queue_init(&is->audioq); //Out Audio Param uint64_t out_channel_layout=AV_CH_LAYOUT_STEREO; //AAC:1024 MP3:1152 int out_nb_samples= is->audio_ctx->frame_size; //AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16; int out_sample_rate=is->audio_ctx->sample_rate; int out_channels=av_get_channel_layout_nb_channels(out_channel_layout); //Out Buffer Size /* int out_buffer_size=av_samples_get_buffer_size(NULL, out_channels, out_nb_samples, AV_SAMPLE_FMT_S16, 1); */ //uint8_t *out_buffer=(uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE*2); int64_t in_channel_layout=av_get_default_channel_layout(is->audio_ctx->channels); struct SwrContext *audio_convert_ctx; audio_convert_ctx = swr_alloc(); swr_alloc_set_opts(audio_convert_ctx, out_channel_layout, AV_SAMPLE_FMT_S16, out_sample_rate, in_channel_layout, is->audio_ctx->sample_fmt, is->audio_ctx->sample_rate, 0, NULL); fprintf(stderr, "swr opts: out_channel_layout:%lld, out_sample_fmt:%d, out_sample_rate:%d, in_channel_layout:%lld, in_sample_fmt:%d, in_sample_rate:%d", out_channel_layout, AV_SAMPLE_FMT_S16, out_sample_rate, in_channel_layout, is->audio_ctx->sample_fmt, is->audio_ctx->sample_rate); swr_init(audio_convert_ctx); is->audio_swr_ctx = audio_convert_ctx; SDL_PauseAudio(0); break; case AVMEDIA_TYPE_VIDEO: is->videoStream = stream_index; is->video_st = pFormatCtx->streams[stream_index]; is->video_ctx = codecCtx; is->frame_timer = (double)av_gettime() / 1000000.0; is->frame_last_delay = 40e-3; packet_queue_init(&is->videoq); is->video_sws_ctx = sws_getContext(is->video_ctx->width, is->video_ctx->height, is->video_ctx->pix_fmt, is->video_ctx->width, is->video_ctx->height, AV_PIX_FMT_YUV420P, SWS_BILINEAR, NULL, NULL, NULL ); is->video_tid = SDL_CreateThread(decode_video_thread, "decode_video_thread", is); break; default: break; } } int demux_thread(void *arg) { Uint32 pixformat; VideoState *is = (VideoState *)arg; AVFormatContext *pFormatCtx; AVPacket pkt1, *packet = &pkt1; int video_index = -1; int audio_index = -1; int i; is->videoStream=-1; is->audioStream=-1; global_video_state = is; // Open video file if(avformat_open_input(&pFormatCtx, is->filename, NULL, NULL)!=0) return -1; // Couldn't open file is->pFormatCtx = pFormatCtx; // Retrieve stream information if(avformat_find_stream_info(pFormatCtx, NULL)<0) return -1; // Couldn't find stream information // Dump information about file onto standard error av_dump_format(pFormatCtx, 0, is->filename, 0); // Find the first video stream for(i=0; i<pFormatCtx->nb_streams; i++) { if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO && video_index < 0) { video_index=i; } if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO && audio_index < 0) { audio_index=i; } } if(audio_index >= 0) { stream_component_open(is, audio_index); } if(video_index >= 0) { stream_component_open(is, video_index); } if(is->videoStream < 0 || is->audioStream < 0) { fprintf(stderr, "%s: could not open codecs\n", is->filename); goto fail; } win = SDL_CreateWindow("Media Player", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, is->video_ctx->width, is->video_ctx->height, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); renderer = SDL_CreateRenderer(win, -1, 0); pixformat = SDL_PIXELFORMAT_IYUV; texture = SDL_CreateTexture(renderer, pixformat, SDL_TEXTUREACCESS_STREAMING, is->video_ctx->width, is->video_ctx->height); // main decode loop for(;;) { if(is->quit) { SDL_CondSignal(is->videoq.cond); SDL_CondSignal(is->audioq.cond); break; } // seek stuff goes here if(is->audioq.size > MAX_AUDIOQ_SIZE || is->videoq.size > MAX_VIDEOQ_SIZE) { SDL_Delay(10); continue; } if(av_read_frame(is->pFormatCtx, packet) < 0) { if(is->pFormatCtx->pb->error == 0) { SDL_Delay(100); /* no error; wait for user input */ continue; } else { break; } } // Is this a packet from the video stream? if(packet->stream_index == is->videoStream) { packet_queue_put(&is->videoq, packet); } else if(packet->stream_index == is->audioStream) { packet_queue_put(&is->audioq, packet); } else { av_free_packet(packet); } } /* all done - wait for it */ while(!is->quit) { SDL_Delay(100); } fail: if(1){ SDL_Event event; event.type = FF_QUIT_EVENT; event.user.data1 = is; SDL_PushEvent(&event); } return 0; } int main(int argc, char *argv[]) { int ret = -1; SDL_Event event; VideoState *is; is = av_mallocz(sizeof(VideoState)); if(argc < 2) { fprintf(stderr, "Usage: test <file>\n"); exit(1); } // Register all formats and codecs av_register_all(); if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) { fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError()); exit(1); } text_mutex = SDL_CreateMutex(); av_strlcpy(is->filename, argv[1], sizeof(is->filename)); is->pictq_mutex = SDL_CreateMutex(); is->pictq_cond = SDL_CreateCond(); schedule_refresh(is, 40); is->parse_tid = SDL_CreateThread(demux_thread, "demux_thread", is); if(!is->parse_tid) { av_free(is); goto __FAIL; } for(;;) { SDL_WaitEvent(&event); switch(event.type) { case FF_QUIT_EVENT: case SDL_QUIT: is->quit = 1; //SDL_Quit(); //return 0; goto __QUIT; break; case FF_REFRESH_EVENT: video_refresh_timer(event.user.data1); break; default: break; } } __QUIT: ret = 0; __FAIL: SDL_Quit(); /* if(audiofd){ fclose(audiofd); } if(audiofd1){ fclose(audiofd1); } */ return ret; }