AAC ADTS格式分析
目标:从 MP4 格式的视频格式中抽取 aac 音频单独保存。

采样率

image.png
根据采样率和对应编号整理为一个数组:

int samplingFrequency[] = {
    96000,
    88200,
    64000,
    48000,
    44100,
    32000,
    24000,
    22050,
    16000,
    12000,
    11025,
};

组织 ADTS 头

image.png
image.png

void encode_adts_header(char* buf, const int data_length, int profile, int samplerate, int channels) {
    int sampling_frequency_index = 3;  // 默认48000频率
    int adtsLen = data_length + 7; // 包长+头长=总长
    
    int frequencies_size = sizeof(samplingFrequency) / sizeof(samplingFrequency[0]);
    int i = 0;
    // 匹配采样评率
    for (i = 0; i < frequencies_size; i++) {
        if (samplingFrequency[i] == samplerate) {
            sampling_frequency_index = i;
            break;
        }
    }
    if (i >= frequencies_size) {
        printf("unsupport samplerate: %d\n", samplerate);
        return -1;
    }

    buf[0] = 0xFF; // syncword 高8位
    buf[1] = 0xF0; // syncword 第8位
    buf[1] |= (0 << 3); // ID 0表示MPEG-4 
    buf[1] |= (0 << 2); // layer 00
    buf[1] |= (0 << 1); 
    buf[1] |= 1; // protection_absent 1 表示没有校验

    buf[2] = (profile << 6); // profile 支持哪个级别的AAC
    buf[2] |= (sampling_frequency_index & 0x0F) << 2; // 采样频率
    buf[2] |= (0 << 1); // private 0
    buf[2] |= (channels & 0x04) >> 2; // channels 声道 高1位

    buf[3] = (channels & 0x03) << 6; // channels 声道 低2位
    buf[3] |= (0 << 5);
    buf[3] |= (0 << 4);
    buf[3] |= (0 << 3);
    buf[3] |= (0 << 2);
    buf[3] |= (adtsLen & 0x1800) >> 11; // 包长高2位

    buf[4] = (uint8_t)((adtsLen & 0x7F8) >> 3); // 包长3-10位
    buf[5] = (uint8_t)((adtsLen & 0x7) << 5); // 包长最后三位
	
	// 0x7FF 可变码流
    buf[5] |= 0x1F;
    buf[6] = 0xFC;

    return 0;
}

读取 packet

// 运行带两个参数 分别时候输入文件和输出文件
if (argc < 3) {
        return -1;
    }

    const char* in_filepath = argv[1];
    const char* out_filepath = argv[2];

    int errbuf[1024] = {0};

    FILE* acc_fp = NULL;

    int ret = -1;

    AVFormatContext* ifmt_ctx = NULL; // 解码器上下文
    AVPacket pkt;

    av_log_set_level(AV_LOG_DEBUG); // 设置日志级别
    acc_fp = fopen(out_filepath, "wb"); // 打开文件准备写入
    if (!acc_fp) {
        av_log(NULL, AV_LOG_ERROR, "Open out_file error\n");
        return ret;
    }

    ret = avformat_open_input(&ifmt_ctx, in_filepath, NULL, NULL); // 输入源
    if (ret < 0) {
        av_strerror(ret, errbuf, 1024);
        av_log(NULL, AV_LOG_ERROR, "Could not open source file: %s, %d(%s)", in_filepath, ret, errbuf);
        return ret;
    }

    ret = avformat_find_stream_info(ifmt_ctx, NULL);
    if (ret < 0) {
        av_strerror(ret, errbuf, 1024);
        av_log(NULL, AV_LOG_ERROR, "Find stream error: %d(%s)\n", ret, errbuf);
        return ret;
    }

    av_dump_format(ifmt_ctx, NULL, in_filepath, NULL); // dump文件到上下文

    // pkt = av_packet_alloc();
    av_init_packet(&pkt);
	
	 // 找到音频流
    int audio_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);

    if(audio_index < 0)
    {
        av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
               av_get_media_type_string(AVMEDIA_TYPE_AUDIO),
               in_filepath);
        return AVERROR(EINVAL);
    }
	
	// 判断音频流是不是aac
    if (ifmt_ctx->streams[audio_index]->codecpar->codec_id != AV_CODEC_ID_AAC) {
        printf("The media file no contain AAC stream, it's codec_id is %d\n", ifmt_ctx->streams[audio_index]->codecpar->codec_id);
        goto failed;
    }

写入文件

while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
        /* code */
        if (pkt.stream_index == audio_index) {
            char adts_header_buf[7] = {0};
            // adts_header
            encode_adts_header(adts_header_buf, pkt.size,
                                ifmt_ctx->streams[audio_index]->codecpar->profile,
                                ifmt_ctx->streams[audio_index]->codecpar->sample_rate,
                                ifmt_ctx->streams[audio_index]->codecpar->channels);
            fwrite(adts_header_buf, 1, 7, acc_fp);
            int len = fwrite(pkt.data, 1, pkt.size, acc_fp);

            if (len != pkt.size) {
                av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal\n");
            }
        }

        av_packet_unref(&pkt);
    }

完整代码

#include <stdio.h>
#include <libavformat/avformat.h>

int samplingFrequency[] = {
    96000,
    88200,
    64000,
    48000,
    44100,
    32000,
    24000,
    22050,
    16000,
    12000,
    11025,
};

void encode_adts_header(char* buf, const int data_length, int profile, int samplerate, int channels) {
    int sampling_frequency_index = 3;
    int adtsLen = data_length + 7;
    
    int frequencies_size = sizeof(samplingFrequency) / sizeof(samplingFrequency[0]);
    int i = 0;
    for (i = 0; i < frequencies_size; i++) {
        if (samplingFrequency[i] == samplerate) {
            sampling_frequency_index = i;
            break;
        }
    }
    if (i >= frequencies_size) {
        printf("unsupport samplerate: %d\n", samplerate);
        return -1;
    }

    buf[0] = 0xFF;
    buf[1] = 0xF0;
    buf[1] |= (0 << 3);
    buf[1] |= (0 << 2);
    buf[1] |= (0 << 1);
    buf[1] |= 1;

    buf[2] = (profile << 6);
    buf[2] |= (sampling_frequency_index & 0x0F) << 2;
    buf[2] |= (0 << 1);
    buf[2] |= (channels & 0x04) >> 2;

    buf[3] = (channels & 0x03) << 6;
    buf[3] |= (0 << 5);
    buf[3] |= (0 << 4);
    buf[3] |= (0 << 3);
    buf[3] |= (0 << 2);
    buf[3] |= (adtsLen & 0x1800) >> 11;

    buf[4] = (uint8_t)((adtsLen & 0x7F8) >> 3);
    buf[5] = (uint8_t)((adtsLen & 0x7) << 5);

    buf[5] |= 0x1F;
    buf[6] = 0xFC;

    return 0;
}

int main(int argc, char* argv[]) {
    
    if (argc < 3) {
        return -1;
    }

    const char* in_filepath = argv[1];
    const char* out_filepath = argv[2];

    int errbuf[1024] = {0};

    FILE* acc_fp = NULL;

    int ret = -1;

    AVFormatContext* ifmt_ctx = NULL;
    AVPacket pkt;

    av_log_set_level(AV_LOG_DEBUG);

    acc_fp = fopen(out_filepath, "wb");
    if (!acc_fp) {
        av_log(NULL, AV_LOG_ERROR, "Open out_file error\n");
        return ret;
    }

    ret = avformat_open_input(&ifmt_ctx, in_filepath, NULL, NULL);
    if (ret < 0) {
        av_strerror(ret, errbuf, 1024);
        av_log(NULL, AV_LOG_ERROR, "Could not open source file: %s, %d(%s)", in_filepath, ret, errbuf);
        return ret;
    }

    ret = avformat_find_stream_info(ifmt_ctx, NULL);
    if (ret < 0) {
        av_strerror(ret, errbuf, 1024);
        av_log(NULL, AV_LOG_ERROR, "Find stream error: %d(%s)\n", ret, errbuf);
        return ret;
    }

    av_dump_format(ifmt_ctx, NULL, in_filepath, NULL);

    // pkt = av_packet_alloc();
    av_init_packet(&pkt);

    int audio_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);

    if(audio_index < 0)
    {
        av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
               av_get_media_type_string(AVMEDIA_TYPE_AUDIO),
               in_filepath);
        return AVERROR(EINVAL);
    }

    if (ifmt_ctx->streams[audio_index]->codecpar->codec_id != AV_CODEC_ID_AAC) {
        printf("The media file no contain AAC stream, it's codec_id is %d\n", ifmt_ctx->streams[audio_index]->codecpar->codec_id);
        goto failed;
    }

    while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
        /* code */
        if (pkt.stream_index == audio_index) {
            char adts_header_buf[7] = {0};
            // adts_header
            encode_adts_header(adts_header_buf, pkt.size,
                                ifmt_ctx->streams[audio_index]->codecpar->profile,
                                ifmt_ctx->streams[audio_index]->codecpar->sample_rate,
                                ifmt_ctx->streams[audio_index]->codecpar->channels);
            fwrite(adts_header_buf, 1, 7, acc_fp);
            int len = fwrite(pkt.data, 1, pkt.size, acc_fp);

            if (len != pkt.size) {
                av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal\n");
            }
        }

        av_packet_unref(&pkt);
    }
    
failed:
    if (ifmt_ctx) {
        avformat_close_input(&ifmt_ctx);
    }
    if (acc_fp) {
        fclose(acc_fp);
    }
}