基于RTP协议的H264播放器
1. 概述
前面记录了一篇基于RTP协议的H264的推流器、接收器的实现过程,但是没有加上解码播放,这里记录一下如何实现解码和播放,也是在前面的基础之上实现的。前一篇的记录为【开源项目】基于RTP协议的H264码流发送器和接收器
在前文中,接收器将接收到的一系列数据包进行解析,并分成了一个个完整的帧,存储在内存之中。下面要将这些完整的帧进行解码成为yuv,并且播放。因此,需要添加解码和播放部分的代码。工程的代码结构为
工程的核心函数是udp_receive_packet(),这个函数的主要工作流程为:
(1)使用recvfrom()来接收数据流
(2)使用check_fragment()对数据流进行解析,并且拷贝到本地内存中
(3)使用h264_parse_packet()来解码所获取的数据,并且使用SDL进行播放
在处理接收的数据流时,我是按照一整个压缩帧进行存储的,例如存储一个完整的Intra帧或者一个完整的P帧,不包含后续帧的信息。然而,使用av_parser_parse2()进行分析时,会首先去寻找下一帧的起始地址来确定当前帧是否完整的输入了,如果没有找到,则很多分析流程不会执行。我在这里用了一个小技巧,在数据内存的最末尾加上一个伪起始码,让av_parser_parse2()确认已经接受了所有的数据,从而进行后续的分析
PS:不过其实这样写并不通用,只是为了配合我的整帧存储方式进行的微调。如果要实现通用的解码,在接收时去掉RTP的头,送入到av_parser_parse2()就可以了
2.工程
头文件的定义中,包括rtp header和rtp packet的定义,还定义了一个全局上下文信息结构体rtp_sdl_context_t
#pragma once #include <stdio.h> #include <WinSock2.h> #include <string.h> #include <stdint.h> #include <inttypes.h> extern "C" { #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libswscale/swscale.h" #include "libavutil/imgutils.h" #include "SDL2/SDL.h" }; #undef main #define RECV_DATA_SIZE 10000 #define MAX_BUFF_SIZE 32 * 1024 * 1024 #define RTP_MAX_PKT_SIZE 1400 // RTP数据包最大为 #define RTP_HEADER_SIZE 12 #define RTP_PADDING_SIZE 64 #define RTP_PACKET_START 1 #define RTP_PACKET_FRAGMENT 2 #define RTP_PACKET_END 3 #define RECV_STREAM_DOWNLOAD 0 #define RECV_YUV_DOWNLOAD 0 typedef struct rtp_header { // 存储时高位存储的是version /* byte 0 */ uint8_t csrc_len : 4; /* expect 0 */ uint8_t extension : 1; /* expect 1 */ uint8_t padding : 1; /* expect 0 */ uint8_t version : 2; /* expect 2 */ /* byte 1 */ uint8_t payload_type : 7; uint8_t marker : 1; /* expect 1 */ /* bytes 2, 3 */ uint16_t seq_num; /* bytes 4-7 */ uint32_t timestamp; /* bytes 8-11 */ uint32_t ssrc; /* stream number is used here. */ }rtp_header_t; typedef struct rtp_packet { rtp_header_t rtp_h; uint8_t rtp_data[RTP_MAX_PKT_SIZE + RTP_PADDING_SIZE]; }rtp_packet_t; typedef struct rtp_context { int rtp_packet_cnt; int rtp_buffer_size; int rtp_frame_cnt; int packet_loc; // uint8_t* rtp_buffer_data; }rtp_context_t; typedef struct rtp_sdl_context { // video param const AVCodec* codec; AVCodecContext* codec_ctx; AVCodecParserContext* parser_ctx ; AVFrame* frame; SwsContext* img_convert_ctx; // SDL param SDL_Window* window; SDL_Renderer* render; SDL_Texture* texture; SDL_Rect rect; }rtp_sdl_context_t;
cpp文件的定义和前文类似,只是增加了一些FFmpeg解码函数和SDL播放函数,重要部分有注释
#pragma warning(disable:4996) #pragma comment(lib,"ws2_32.lib") #include "include/udp_rtp_decode_sdl.h" FILE* fp_yuv; //int avc_init(const AVCodec* codec, AVCodecContext* codec_ctx, AVCodecParserContext* parser, AVFrame* frame) int avc_init(rtp_sdl_context_t* rsc) { AVCodecID codec_id = AV_CODEC_ID_H264; rsc->codec = avcodec_find_decoder(codec_id); if (!rsc->codec) { printf("find decoder failed\n"); return -1; } rsc->codec_ctx = avcodec_alloc_context3(rsc->codec); if (!rsc->codec_ctx) { printf("alloc context3 failed\n"); return -1; } rsc->parser_ctx = av_parser_init(codec_id); if (!rsc->parser_ctx) { printf("parser ctx init failed\n"); return -1; } rsc->frame = av_frame_alloc(); if (!rsc->frame) { printf("alloc frame failed\n"); return -1; } if (avcodec_open2(rsc->codec_ctx, rsc->codec, NULL) < 0) { printf("Could not open codec\n"); return -1; } return 0; } int sdl_init(rtp_sdl_context_t* rsc) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) { printf("could not init sdl\n"); return -1; } const int screen_w = 1280, screen_h = 720; const int pixel_w = 1280, pixel_h = 720; //SDL 2.0 Support for multiple windows rsc->window = SDL_CreateWindow("Play", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, screen_w, screen_h, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); if (!rsc->window) { printf("SDL: could not create window - exiting:%s\n", SDL_GetError()); return -1; } rsc->render = SDL_CreateRenderer(rsc->window, -1, 0); int pixformat = SDL_PIXELFORMAT_IYUV; rsc->texture = SDL_CreateTexture(rsc->render, pixformat, SDL_TEXTUREACCESS_STREAMING, pixel_w, pixel_h); int border = 0; rsc->rect.x = 0 + border; rsc->rect.y = 0 + border; rsc->rect.w = screen_w - border * 2; rsc->rect.h = screen_h - border * 2; } void av_free_all(rtp_sdl_context_t* rsc) { avcodec_free_context(&rsc->codec_ctx); av_parser_close(rsc->parser_ctx); av_frame_free(&rsc->frame); } int check_nalu_header(uint8_t data0) { int forbidden_zero_bit = data0 & 0x80; // 1bit int nal_ref_idc = data0 & 0x60; // 2 bit int nal_unit_type = data0 & 0x1F; // 5bit if ((data0 & 0x80) == 1) { printf("forbidden zero bit should be 0\n"); return -1; } // printf("forbidden_zero_bit:%d, nal_ref_idc:%d, nal_unit_type:%d\n", forbidden_zero_bit, nal_ref_idc, nal_unit_type); return nal_unit_type; } int check_fragment_nalu_header(rtp_context_t* rtp_ctx, uint8_t data0, uint8_t data1) { int nal_unit_type = check_nalu_header(data0); int s, e, type; int pos; if (nal_unit_type == 28) // H264 { s = data1 & 0x80; // S e = data1 & 0x40; // E type = data1 & 0x1F; // type pos = data1 & 0xC0; // 1100 0000 switch (pos) { case 0x80: rtp_ctx->packet_loc = RTP_PACKET_START; break; case 0x40: rtp_ctx->packet_loc = RTP_PACKET_END; break; case 0x00: rtp_ctx->packet_loc = RTP_PACKET_FRAGMENT; break; default: // error printf("invalid packet loc\n"); return -1; break; } } return 0; } int find_nal_unit(uint8_t* buf, int size, int* nal_start, int* nal_end) { int i; // find start *nal_start = 0; *nal_end = 0; i = 0; while ( //( next_bits( 24 ) != 0x000001 && next_bits( 32 ) != 0x00000001 ) (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) && (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0 || buf[i + 3] != 0x01) ) { i++; // skip leading zero if (i + 4 >= size) { return 0; } // did not find nal start } if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) // ( next_bits( 24 ) != 0x000001 ) { i++; } if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) { /* error, should never happen */ return 0; } i += 3; *nal_start = i; while ( //( next_bits( 24 ) != 0x000000 && next_bits( 24 ) != 0x000001 ) (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0) && (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) ) { i++; // FIXME the next line fails when reading a nal that ends exactly at the end of the data if (i + 3 >= size) { *nal_end = size; return -1; } // did not find nal end, stream ended first } *nal_end = i; return (*nal_end - *nal_start); } void set_default_rtp_context(rtp_context_t* rtp_ctx) { memset(rtp_ctx->rtp_buffer_data, 0, sizeof(rtp_ctx->rtp_buffer_size)); rtp_ctx->rtp_packet_cnt = 0; rtp_ctx->rtp_buffer_size = 0; rtp_ctx->packet_loc = 0; } // Check the data is fragment or not, if fragment, try to concate int check_fragment(rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt, uint8_t* data, int size) { int nal_start, nal_end; int ret = 0; int data_size = size - RTP_HEADER_SIZE; find_nal_unit(data, data_size, &nal_start, &nal_end); // check NALU split pos uint8_t data0 = data[nal_start]; uint8_t data1 = data[nal_start + 1]; uint8_t fu_indicator, fu_header; if (nal_start > 0 && nal_start < 5) // single-fragment, maybe SPS, PPS or small size frame { fu_indicator = 0; fu_header = 0; ret = check_nalu_header(data0); // update nalu_type rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + data_size) * sizeof(uint8_t)); memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data, data_size); #if STREAM_DOWNLOAD fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, data_size, fp_in); #endif fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, data_size); rtp_ctx->rtp_frame_cnt++; rtp_ctx->rtp_buffer_size += data_size; } else // multi-fragment { fu_indicator = data[0]; fu_header = data[1]; ret = check_fragment_nalu_header(rtp_ctx, fu_indicator, fu_header); if (ret < 0) { printf("invalid nalu header\n"); return -1; } int real_data_size = data_size - 2; rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + real_data_size) * sizeof(uint8_t)); if (!rtp_ctx->rtp_buffer_data) { printf("realloc rtp_buffer_data failed\n"); return -1; } memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data + 2, real_data_size); // plus 2 to skip fu_indicator and fu_header #if STREAM_DOWNLOAD fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, real_data_size, fp_in); fflush(fp_in); #endif rtp_ctx->rtp_packet_cnt++; rtp_ctx->rtp_buffer_size += real_data_size; if (rtp_ctx->packet_loc == RTP_PACKET_END) // end of packet { fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, rtp_ctx->rtp_buffer_size); rtp_ctx->rtp_frame_cnt++; } } return 0; } // 伪造起始码 int forge_end_code(uint8_t* data, int size) { data = (uint8_t*)realloc(data, (size + 6) * sizeof(uint8_t)); if (!data) { printf("realloc end code failed\n"); return -1; } data[size] = 0x00; data[size + 1] = 0x00; data[size + 2] = 0x00; data[size + 3] = 0x01; data[size + 4] = 0x41; data[size + 5] = 0x9A; size += 6; return size; } int h264_parse_packet(rtp_sdl_context_t* rsc, rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt) { AVPacket* packet; int ret = 0; packet = av_packet_alloc(); if (!packet) { printf("alloc packet failed\n"); return -1; } // 添加伪起始码 uint8_t* buf_data = rtp_ctx->rtp_buffer_data; int data_size = rtp_ctx->rtp_buffer_size; data_size = forge_end_code(buf_data, data_size); ret = av_parser_parse2(rsc->parser_ctx, rsc->codec_ctx, &packet->data, &packet->size, buf_data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0); if (ret < 0) { printf("parse packet failed, err:%d\n", ret); return -1; } ret = avcodec_send_packet(rsc->codec_ctx, packet); if (ret < 0) { printf("send packet failed\n"); return -1; } rsc->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P; rsc->img_convert_ctx = sws_getContext(rsc->codec_ctx->width, rsc->codec_ctx->height, rsc->codec_ctx->pix_fmt, rsc->codec_ctx->width, rsc->codec_ctx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); while (ret >= 0) { ret = avcodec_receive_frame(rsc->codec_ctx, rsc->frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) return -1; else if (ret < 0) { fprintf(stderr, "Error during decoding\n"); exit(1); } //printf("saving frame:%d\n", rsc->codec_ctx->frame_num); fflush(stdout); #if RECV_YUV_DOWNLOAD int size = rsc->frame->width * rsc->frame->height; fwrite(rsc->frame->data[0], 1, size, fp_yuv);//Y fwrite(rsc->frame->data[1], 1, size / 4, fp_yuv);//U fwrite(rsc->frame->data[2], 1, size / 4, fp_yuv);//V fflush(fp_yuv); #endif sws_scale(rsc->img_convert_ctx, (const unsigned char* const*)rsc->frame->data, rsc->frame->linesize, 0, rsc->codec_ctx->height, rsc->frame->data, rsc->frame->linesize); // SDL播放 SDL_UpdateYUVTexture(rsc->texture, &rsc->rect, rsc->frame->data[0], rsc->frame->linesize[0], rsc->frame->data[1], rsc->frame->linesize[1], rsc->frame->data[2], rsc->frame->linesize[2]); SDL_RenderClear(rsc->render); SDL_RenderCopy(rsc->render, rsc->texture, NULL, &rsc->rect); SDL_RenderPresent(rsc->render); SDL_Delay(40); // delay 40ms } return 0; } // 接收数据包 int udp_recevie_packet(rtp_sdl_context_t* rsc, const char* url, int port) { WSADATA wsaData; WORD sockVersion = MAKEWORD(2, 2); int cnt = 0; if (WSAStartup(sockVersion, &wsaData) != 0) { return 0; } SOCKET ser_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (ser_socket == INVALID_SOCKET) { ERROR("Invalid socket"); return -1; } int on = 1; setsockopt(ser_socket, SOL_SOCKET, SO_REUSEADDR, (const char*)& on, sizeof(on)); sockaddr_in ser_addr; ser_addr.sin_family = AF_INET; ser_addr.sin_port = htons(port); ser_addr.sin_addr.s_addr = inet_addr(url); if (bind(ser_socket, (sockaddr*)& ser_addr, sizeof(ser_addr)) == SOCKET_ERROR) { printf("Bind socket addr error\n"); closesocket(ser_socket); return -1; } sockaddr_in remote_addr; int addr_len = sizeof(remote_addr); char recv_data[RECV_DATA_SIZE]; rtp_context_t* rtp_ctx = (rtp_context_t*)calloc(1, sizeof(rtp_context_t)); if (!rtp_ctx) { printf("alloc rtp_ctx failed\n"); return -1; } rtp_packet_t* rtp_pkt = (rtp_packet_t*)calloc(1, sizeof(rtp_packet_t)); if (!rtp_pkt) { printf("alloc rtp_pkt failed\n"); return -1; } fprintf(stdout, "Listening on port:%d\n", port); while (1) { // recvfrom接收传输过来的数据 int pkt_size = recvfrom(ser_socket, recv_data, RECV_DATA_SIZE, 0, (sockaddr*)& remote_addr, &addr_len); if (pkt_size > 0) { memcpy(rtp_pkt, recv_data, pkt_size); check_fragment(rtp_ctx, rtp_pkt, rtp_pkt->rtp_data, pkt_size); // check pkt data is fragment or not rtp_header_t rtp_h = rtp_pkt->rtp_h; char payload = rtp_h.payload_type; if (rtp_ctx->packet_loc == RTP_PACKET_END) { switch (payload) { case 33: // mpegts // mpegts_packet_parse((uint8_t*)rtp_data, parse_mpegts, payload, rtp_data_size); // TODO: add mpegts parser printf("MPEGTS type\n"); break; case 96: // h264 //printf("payload type:%s\n", "H264"); // 进行h264码流的解析 h264_parse_packet(rsc, rtp_ctx, rtp_pkt); break; default: printf("Unknown type\n"); break; } // printf("[RTP PKT] %5d| %5s | %10u| %5d| %5d\n", cnt, payload_str, timestamp, seq_num, pkt_size); set_default_rtp_context(rtp_ctx); // set default rtp ctx value } } } } int main() { rtp_sdl_context_t* rsc = (rtp_sdl_context_t*)malloc(sizeof(rtp_sdl_context_t)); if (!rsc) { printf("malloc rsc failed\n"); return -1; } memset(rsc, 0, sizeof(rtp_sdl_context_t)); // 初始化参数 avc_init(rsc); sdl_init(rsc); // 如果要存储yuv信息可以设置为1 #if RECV_YUV_DOWNLOAD fp_yuv = fopen("rtp_receive_yuv.yuv", "wb"); #endif // 开始接收数据包 udp_recevie_packet(rsc, "127.0.0.1", 8880); av_free_all(rsc); #if RECV_YUV_DOWNLOAD fclose(fp_yuv); #endif return 0; }
3.测试
发送端
接收端
接收端播放正常,感觉可以后续改一改SDL的逻辑,让窗口变成可移动和可缩放的
4.小结
总体来说,这个功能的实现是比较简单的,不过使用了一个小的技巧,伪造了一个起始地址,如果代码格式要求不严格,可以凑合着用。如果要做成大的工程,应该将多个packet直接送入解码器,这样比较合理,也更符合FFmpeg的设计原则