0
点赞
收藏
分享

微信扫一扫

【作业】AVI视频格式分析

王远洋 2022-03-21 阅读 82
音视频c++

AVI视频格式简介

(以Microsoft AVI格式为准)
‎Microsoft AVI 文件格式是一种 RIFF 文件规范,用于捕获、编辑和播放音频-视频序列的应用程序。通常,AVI文件包含不同类型数据的多个流。大多数AVI序列同时使用音频和视频流。AVI序列的简单变体使用视频数据,不需要音频流。‎
AVI视频中的数据以容器的形式存储,分为LIST和CHUNK两种,其中LIST可以认为是结构化的数据,LIST中可以嵌套LIST和CHUNK。
以下是一个格式示例:

RIFF ('AVI '
      LIST ('hdrl'
            'avih'(<Main AVI Header>)
            LIST ('strl'
                  'strh'(<Stream header>)
                  'strf'(<Stream format>)
                  [ 'strd'(<Additional header data>) ]
                  [ 'strn'(<Stream name>) ]
                  ...
                 )
             ...
           )
      LIST ('movi'
            {SubChunk | LIST ('rec '
                              SubChunk1
                              SubChunk2
                              ...
                             )
               ...
            }
            ...
           )
      ['idx1' (<AVI Index>) ]
     )

文件先以RIFF开头,紧跟一个dhrl的LIST,这个LIST存放了绝大部分元数据:avih存储了这个视频的主要头信息,紧跟着的一个strl的LIST则存储了视频中的媒体流信息,即一个或几个视频流、音频流。
avih表明是AVI的主要头文件,下面是AVI Main Header的结构体:

typedef struct {
    FOURCC fcc;
    DWORD cb;
    DWORD dwMicroSecPerFrame;

    DWORD dwMaxBytesPerSec;

    DWORD dwPaddingGranularity;
    DWORD dwFlags;
    
    DWORD dwTotalFrames;
    DWORD dwInitialFrames;

    DWORD dwStreams;

    DWORD dsSuggestedBufferSize;

    DWORD dwWidth;
    DWORD dwHeight;
    DWORD reserve[4];
} MainAVIHeader;

LIST strl(stream list)存储了视频/音频的信息。音频和视频存放在不同的LIST strl。
其中音频的数据
首先是一个Stream header,表明这个视频/音频所拥有的流的格式和基本信息。

typedef struct {
    FOURCC fccType;
    FOURCC fccHandler;
    DWORD dwFlags;
    WORD wProirity;
    WORD wLanguage;
    DWORD dwInitialFrames;
    DWORD dwScale;
    DWORD dwRate;
    DWORD dwStart;
    DWORD dwLength;
    DWORD dwSuggestedBufferSize;
    DWORD dwQuality;
    DWORD dwSampleSize;
    RECT rcFrame;
} AVIStreamHeader;

流格式信息,音频和视频的结构不同。
音频:

typedef struct {
  WORD  wFormatTag;
  WORD  nChannels;
  DWORD nSamplesPerSec;
  DWORD nAvgBytesPerSec;
  WORD  nBlockAlign;
  WORD  wBitsPerSample;
  WORD  cbSize;
} WAVEFORMATEX;

视频:

typedef struct tagBITMAPINFOHEADER {
  DWORD biSize;
  LONG  biWidth;
  LONG  biHeight;
  WORD  biPlanes;
  WORD  biBitCount;
  DWORD biCompression;
  DWORD biSizeImage;
  LONG  biXPelsPerMeter;
  LONG  biYPelsPerMeter;
  DWORD biClrUsed;
  DWORD biClrImportant;
} BITMAPINFOHEADER, *LPBITMAPINFOHEADER, *PBITMAPINFOHEADER;

在之后还有一些可选的附加数据。

实际媒体数据存放在之后LIST里,类型为’movi’,音频和视频流的组织形式在不同视频中不一定相同,有分开存储,有交织存储的。
在movi LIST中,每段数据分片前有一个两位数字(使用ASCII码)+两字节类型名,对流进行了编号。它们在LIST中顺序存储。

Two-character codeDescription
dbUncompressed video frame
dcCompressed video frame
pcPalette change
wbAudio data

如编号为0的压缩视频流为00dc,注意这是一个4字节的字符串。
虽然在这个编号之后的接下来4字节表明了这分片的大小,但是这个分片之间可能有空隙,因此可能无法直接一次读出所有分片。为了读出所有分片,在视频流的末尾有一个索引列表(AVI Index),给出了这些分片的准确位置。

这个索引的格式如下:

typedef struct {
    DWORD dwChunkId;
    DWORD dwFlags;
    DWORD dwOffset;
    DWORD dwSize;
  } _avioldindex_entry, AVIOLDINDEX_ENTRY;

typedef struct _avioldindex {
  FOURCC             fcc;
  DWORD              cb;
  _avioldindex_entry aIndex[];
} AVIOLDINDEX;

此外,文件中还有若干端标记为JUNK的意义不明的空白。

代码

下面是读出完整文件结构的C代码:
class3.h

typedef unsigned char BYTE;
typedef short WORD;
typedef int DWORD;
typedef DWORD LONG;
typedef struct {
    DWORD dwWidth;
    DWORD dwHeight;
} RECT;

typedef union {
    char ch[4];
    DWORD tag;
} FOURCC;

typedef struct {
    FOURCC RIFF;
    unsigned int fileSize;
    FOURCC fileType;
} RIFFHeader;

typedef struct {
    FOURCC LIST;
    unsigned int listSize;
    FOURCC listType;
} LISTHeader;

typedef struct {
    FOURCC fcc;
    DWORD cb;
    DWORD dwMicroSecPerFrame;

    DWORD dwMaxBytesPerSec;

    DWORD dwPaddingGranularity;
    DWORD dwFlags;
    
    DWORD dwTotalFrames;
    DWORD dwInitialFrames;

    DWORD dwStreams;

    DWORD dsSuggestedBufferSize;

    DWORD dwWidth;
    DWORD dwHeight;
    DWORD reserve[4];
} MainAVIHeader;

typedef struct {
    FOURCC fccType;
    FOURCC fccHandler;
    DWORD dwFlags;
    WORD wProirity;
    WORD wLanguage;
    DWORD dwInitialFrames;
    DWORD dwScale;
    DWORD dwRate;
    DWORD dwStart;
    DWORD dwLength;
    DWORD dwSuggestedBufferSize;
    DWORD dwQuality;
    DWORD dwSampleSize;
    RECT rcFrame;
} AVIStreamHeader;

typedef struct {
    FOURCC type;
    DWORD size;
} LISTItem;

typedef struct {
  WORD  wFormatTag;
  WORD  nChannels;
  DWORD nSamplesPerSec;
  DWORD nAvgBytesPerSec;
  WORD  nBlockAlign;
  WORD  wBitsPerSample;
  WORD  cbSize;
} WAVEFORMATEX;

typedef struct tagBITMAPINFOHEADER {
  DWORD biSize;
  LONG  biWidth;
  LONG  biHeight;
  WORD  biPlanes;
  WORD  biBitCount;
  DWORD biCompression;
  DWORD biSizeImage;
  LONG  biXPelsPerMeter;
  LONG  biYPelsPerMeter;
  DWORD biClrUsed;
  DWORD biClrImportant;
} BITMAPINFOHEADER, *LPBITMAPINFOHEADER, *PBITMAPINFOHEADER;

typedef struct _tag_size_combine {
    FOURCC name;
    DWORD size;
} TagHeader;

typedef struct {
    DWORD dwChunkId;
    DWORD dwFlags;
    DWORD dwOffset;
    DWORD dwSize;
  } _avioldindex_entry, AVIOLDINDEX_ENTRY;

typedef struct _avioldindex {
  FOURCC             fcc;
  DWORD              cb;
  _avioldindex_entry aIndex[];
} AVIOLDINDEX;


void resolve_list(LISTHeader *list_header, BYTE ** bufp);
void next_section(BYTE **bufp);

class3.c

#include<stdio.h>
#include<malloc.h>
#include<string.h>
#include "class3.h"

FOURCC hdrl, strl, strh, strf, LIST, vids, auds, movi, idx1;
void init() {
    memcpy(&hdrl, "hdrl", 4);
    memcpy(&strl, "strl", 4);
    memcpy(&strh, "strh", 4);
    memcpy(&strf, "strf", 4);
    memcpy(&LIST, "LIST", 4);
    memcpy(&vids, "vids", 4);
    memcpy(&auds, "auds", 4);
    memcpy(&movi, "movi", 4);
    memcpy(&idx1, "idx1", 4);
}

int read_data_header(BYTE ** bufp, LISTHeader *list_header){
    memcpy(list_header, *bufp, sizeof(LISTHeader));
    *bufp += sizeof(LISTHeader);
}

void read_main_avi_header(BYTE ** bufp, MainAVIHeader *mahp){
    memcpy(mahp, *bufp, sizeof(MainAVIHeader));
    *bufp = *bufp + sizeof(MainAVIHeader);
}

void read_stream_header(BYTE ** bufp, AVIStreamHeader *sh) {
    memcpy(sh, *bufp, sizeof(AVIStreamHeader));
    *bufp = *bufp + sizeof(AVIStreamHeader);
}

void read_bitmap_info_header(BYTE *bufp, BITMAPINFOHEADER *bih){
    memcpy(bih, bufp, sizeof(BITMAPINFOHEADER));
    //*bufp = *bufp + sizeof(BITMAPINFOHEADER);
}

void read_wave_format_ex(BYTE *bufp, WAVEFORMATEX *wfe){
    memcpy(wfe, bufp, sizeof(WAVEFORMATEX));
}

void print_main_avi_header(MainAVIHeader * mahp){
    printf("\tType: %.4s\n", mahp->fcc.ch);
    printf("\tMicroSecPerFrame: %dns -> %0.4f fps\n", mahp->dwMicroSecPerFrame, 1e6/mahp->dwMicroSecPerFrame);
    printf("\tMaxBytesPerSec: %d\n", mahp->dwMaxBytesPerSec);
    
    printf("\tTotalFrames: %d\n", mahp->dwTotalFrames);
    printf("\tInitialFrames: %d\n", mahp->dwInitialFrames);
    printf("\tStreams: %d\n", mahp->dwStreams);
    printf("\tSuggestedBufferSize: %d\n", mahp->dsSuggestedBufferSize);

    printf("\tWidth: %d, Height: %d\n", mahp->dwWidth, mahp->dwHeight);
}


void print_stream_header(AVIStreamHeader *sh) {
    printf("\tType: %.4s\n"
           "\tHandler: %.4s\n"
           "\tRate: %d, Scale: %d\n"
           "\tLength: %d\n"
           "\tSampleSize: %d\n"
           "\tRect: (w: %d, h: %d)\n"
            , sh->fccType.ch, sh->fccHandler.ch, sh->dwRate, sh->dwScale,
            sh->dwLength, sh->dwSampleSize, sh->rcFrame.dwWidth, sh->rcFrame.dwHeight
        );
}

void print_bitmap_info_header(BITMAPINFOHEADER *bih) {
    printf("\tSize: %d, Widht: %d, Height: %d\n", bih->biSize, bih->biWidth, bih->biHeight);
}

void print_wave_format_ex(WAVEFORMATEX *wfe) {
    printf("\tFormatTag: %d, Channls: %d, SamplePersec: %d, AvgBytesPerSec: %d, Size: %d\n", wfe->wFormatTag, wfe->nChannels, wfe->nSamplesPerSec, wfe->nAvgBytesPerSec, wfe->cbSize);
}

void resolve_hdrl(BYTE ** bufp, int size) {
    MainAVIHeader main_avi_header;
    printf("Found AVI Main Header: \t\t\n");
    read_main_avi_header(bufp, &main_avi_header);
    print_main_avi_header(&main_avi_header);
}

FOURCC resolve_strh(BYTE **bufp){
    AVIStreamHeader avi_stream_header;
    printf("Found Stream Header: \t\t\n");
    read_stream_header(bufp, &avi_stream_header);
    print_stream_header(&avi_stream_header);
    return avi_stream_header.fccType;
}

void resolve_vids_strf(BYTE *buf) {
    BITMAPINFOHEADER bih;
    read_bitmap_info_header(buf, &bih);
    print_bitmap_info_header(&bih);
}

void resolve_auds_strf(BYTE *buf) {
    WAVEFORMATEX wm;
    read_wave_format_ex(buf, &wm);
    print_wave_format_ex(&wm);
}
void resolve_strl(BYTE ** bufp, int size){
    BYTE * end = *bufp + size;
    FOURCC type, next_tag; 
    int next_size;
    while(*bufp < end) {
        int rest = end - *bufp;
        int readd = size - rest;
        next_tag = * (FOURCC *) (*bufp);
        next_size = * (int *) (*bufp + 4);
        (*bufp) += 8;
        printf("Readd buffer: %d/%d/%d B (+%d)", readd, rest, size, next_size);
        if(next_tag.tag == strh.tag) {
            printf(" + 8 + %ld\n", sizeof(AVIStreamHeader));
            type = resolve_strh(bufp);
        } else if(next_tag.tag == strf.tag){
            printf(" + 8 + %ld or %ld\n", sizeof(BITMAPINFOHEADER), sizeof(WAVEFORMATEX));
            printf("Found Stream Format: \t\t\n");
            if(type.tag==vids.tag){
                resolve_vids_strf(*bufp);   
                *bufp += next_size;
            }
            else if(type.tag==auds.tag){
                resolve_auds_strf(*bufp);
                *bufp += next_size;
            }
            else {
                *bufp += next_size;
                printf("\tUndefined stream format %.4s\n", type.ch);
            }
        }
        else{
            printf(" + 8 + %d\n", next_size);
            printf("\tUndefined Stream tag %.4s(%.8X), skiped.\n", next_tag.ch, next_tag.tag);
            if(next_tag.tag==LIST.tag) {
                *bufp -= 8;
                next_section(bufp);
            }
            *bufp += next_size;

            //break;
        }
    }
    printf("strl resolved.\n");
}

void resolve_movi(BYTE **bufp, int size) {
    printf("<MOVI LIST>\n");
    BYTE * buf = *bufp;
    BYTE * end = buf + size;
    TagHeader *tag;
    while(buf < end) {
        tag = (TagHeader *) (buf);
        printf("tag %.4s size %d  (%ld/%d)\n", tag->name.ch, tag->size, buf - *bufp, size);
        buf += tag->size + 8;
    }
    *bufp += size - 4;
}

void resolve_idx(BYTE **bufp) {
    TagHeader tag = *(TagHeader *) *bufp;
    BYTE * buf = *bufp;
    BYTE * end = *bufp + tag.size;
    printf("<AVI INDEX> %.4s size: %d\t\t\n", tag.name.ch, tag.size);
    buf += 8;
    int counts = (tag.size - sizeof(AVIOLDINDEX))/sizeof(AVIOLDINDEX_ENTRY);
    AVIOLDINDEX idx = *(AVIOLDINDEX *) buf;
    printf("\tFcc: %.4s, cb:%d,  idx counts: %d\n", idx.fcc.ch, idx.cb, counts);
    printf("\tno.\tChunkId\tFlags\tOffset   \tSize\n");
    for(int i=1; i<= counts; i += 1){
        AVIOLDINDEX_ENTRY * aidx = (AVIOLDINDEX_ENTRY *) buf + i;
        if(i<5) printf("\t%d\t%.4s\t%d\t%0.8x\t%d\n", i, (BYTE *) &(aidx->dwChunkId), aidx->dwFlags, aidx->dwOffset, aidx->dwSize);
        if(i> (counts - 5)) printf("\t%d\t%.4s\t%d\t%d\t%d\n", i, (BYTE *) &(aidx->dwChunkId), aidx->dwFlags, aidx->dwOffset, aidx->dwSize);
    }
}

void resolve_junk(BYTE ** bufp) {}

void resolve_list(LISTHeader *list_header, BYTE ** bufp) {
    printf("'LIST':%.4s, listSize: %d, listType: %.4s\n",
                list_header->LIST.ch, list_header->listSize, list_header->listType.ch);

    if(list_header->listType.tag == hdrl.tag){
        resolve_hdrl(bufp, list_header->listSize);
    } else if(list_header->listType.tag == strl.tag) {
        resolve_strl(bufp, list_header->listSize);
    } else if(list_header->listType.tag == movi.tag) {
        resolve_movi(bufp, list_header->listSize);
    } 
    else {
        printf("Undefined list type %.4s, with size %d\n", list_header->listType.ch, list_header->listSize);
        *bufp += list_header->listSize - 4;
        //return ;
    }
}


void next_section(BYTE **bufp) {
    BYTE * buf = *bufp;
    TagHeader th;
    memcpy(&th, buf, sizeof(th));
    FOURCC next_tag = th.name;
    int size = th.size;
    static int loop =0;
    if(next_tag.tag == LIST.tag) {
        LISTHeader next_list_header;
        read_data_header(bufp, &next_list_header);
        resolve_list(&next_list_header, bufp);
        next_section(bufp);
    } 
    else if(next_tag.tag == idx1.tag) {
        resolve_idx(bufp);
    }
    else {
        printf("Next is not LIST: %.4s size %d.\n", next_tag.ch, size);
        *bufp = *bufp + size + 8;
        loop ++ ;
        if(loop<5) next_section(bufp);
    }
}


int main() {
    FILE * fp;
    RIFFHeader riff_header;
    LISTHeader list_header;
    MainAVIHeader main_avi_header;
    AVIStreamHeader stream_header;
    init();
    fp = fopen("./Bear In The Woods.avi", "r");
    BYTE * buf;
    fread(&riff_header, 1, sizeof(RIFFHeader), fp);
    printf("'RIFF':%.4s, fileSize: %d, fileType: %.4s\n", riff_header.RIFF.ch, riff_header.fileSize, riff_header.fileType.ch);
    
    int size = riff_header.fileSize; //list_header.listSize - 4;
    BYTE *bufs = buf = (BYTE *) malloc(size);
    fread(buf, 1, size, fp);

    //fread(&list_header, 1, sizeof(LISTHeader), fp);
    next_section(&buf);
    return 0;
}

下面是读出的所有信息,可以看到,文件里分别有一个音频流和一个视频流,它们在movi LIST中交织存储,且之间有空隙,顺序读取会出现错误,但是可以在最后的AVI Index里全部找到。

'RIFF':RIFF, fileSize: 28985984, fileType: AVI 
'LIST':LIST, listSize: 8926, listType: hdrl
Found AVI Main Header: 
        Type: avih
        MicroSecPerFrame: 33366ns -> 29.9706 fps
        MaxBytesPerSec: 41000
        TotalFrames: 825
        InitialFrames: 0
        Streams: 2
        SuggestedBufferSize: 1048576
        Width: 1920, Height: 1080
'LIST':LIST, listSize: 4320, listType: strl
Readd buffer: 0/4320/4320 B (+56) + 8 + 56
Found Stream Header: 
        Type: vids
        Handler: MP42
        Rate: 30000, Scale: 1001
        Length: 825
        SampleSize: 0
        Rect: (w: 0, h: 70780800)
Readd buffer: 64/4256/4320 B (+40) + 8 + 40 or 20
Found Stream Format: 
        Size: 40, Widht: 1920, Height: 1080
Readd buffer: 112/4208/4320 B (+4120) + 8 + 4120
        Undefined Stream tag JUNK(4B4E554A), skiped.
Readd buffer: 4240/80/4320 B (+68) + 8 + 68
        Undefined Stream tag vprp(70727076), skiped.
Readd buffer: 4316/4/4320 B (+4254) + 8 + 4254
        Undefined Stream tag LIST(5453494C), skiped.
'LIST':LIST, listSize: 4254, listType: strl
Readd buffer: 0/4254/4254 B (+56) + 8 + 56
Found Stream Header: 
        Type: auds
        Handler: 
        Rate: 11025, Scale: 253
        Length: 1202
        SampleSize: 1024
        Rect: (w: 0, h: 0)
Readd buffer: 64/4190/4254 B (+50) + 8 + 40 or 20
Found Stream Format: 
        FormatTag: 2, Channls: 2, SamplePersec: 44100, AvgBytesPerSec: 16000, Size: 32
Readd buffer: 122/4132/4254 B (+4120) + 8 + 4120
        Undefined Stream tag JUNK(4B4E554A), skiped.
Readd buffer: 4250/4/4254 B (+260) + 8 + 260
        Undefined Stream tag JUNK(4B4E554A), skiped.
strl resolved.
'LIST':LIST, listSize: 26, listType: INFO
Undefined list type INFO, with size 26
Next is not LIST: JUNK size 1016.
'LIST':LIST, listSize: 28943540, listType: movi
<MOVI LIST>
tag 00dc size 66996  (0/28943540)
tag 01wb size 1024  (67004/28943540)
tag 01wb size 1024  (68036/28943540)
tag 00dc size 70591  (69068/28943540)
tag  size 262242  (139667/28943540)
tag �
�� size 865395139  (401917/28943540)
<AVI INDEX> idx1 size: 32432
        Fcc: 00dc, cb:16,  idx counts: 2026
        no.     ChunkId Flags   Offset          Size
        1       01wb    16      000105c0        1024
        2       01wb    16      000109c8        1024
        3       00dc    0       00010dd0        70591
        4       01wb    16      00022198        1024
        2022    01wb    16      28925376        1024
        2023    00dc    0       28926408        14028
        2024    01wb    16      28940444        1024
        2025    01wb    16      28941476        1024
        2026    01wb    16      28942508        1024
strl resolved.
Next is not LIST:  size 67108937.
举报

相关推荐

0 条评论