AVI视频格式简介
(以Microsoft AVI格式为准)
Microsoft AVI 文件格式是一种 RIFF 文件规范,用于捕获、编辑和播放音频-视频序列的应用程序。通常,AVI文件包含不同类型数据的多个流。大多数AVI序列同时使用音频和视频流。AVI序列的简单变体使用视频数据,不需要音频流。
AVI视频中的数据以容器的形式存储,分为LIST和CHUNK两种,其中LIST可以认为是结构化的数据,LIST中可以嵌套LIST和CHUNK。
以下是一个格式示例:
RIFF ('AVI '
LIST ('hdrl'
'avih'(<Main AVI Header>)
LIST ('strl'
'strh'(<Stream header>)
'strf'(<Stream format>)
[ 'strd'(<Additional header data>) ]
[ 'strn'(<Stream name>) ]
...
)
...
)
LIST ('movi'
{SubChunk | LIST ('rec '
SubChunk1
SubChunk2
...
)
...
}
...
)
['idx1' (<AVI Index>) ]
)
文件先以RIFF开头,紧跟一个dhrl的LIST,这个LIST存放了绝大部分元数据:avih存储了这个视频的主要头信息,紧跟着的一个strl的LIST则存储了视频中的媒体流信息,即一个或几个视频流、音频流。
avih表明是AVI的主要头文件,下面是AVI Main Header的结构体:
typedef struct {
FOURCC fcc;
DWORD cb;
DWORD dwMicroSecPerFrame;
DWORD dwMaxBytesPerSec;
DWORD dwPaddingGranularity;
DWORD dwFlags;
DWORD dwTotalFrames;
DWORD dwInitialFrames;
DWORD dwStreams;
DWORD dsSuggestedBufferSize;
DWORD dwWidth;
DWORD dwHeight;
DWORD reserve[4];
} MainAVIHeader;
LIST strl(stream list)存储了视频/音频的信息。音频和视频存放在不同的LIST strl。
其中音频的数据
首先是一个Stream header,表明这个视频/音频所拥有的流的格式和基本信息。
typedef struct {
FOURCC fccType;
FOURCC fccHandler;
DWORD dwFlags;
WORD wProirity;
WORD wLanguage;
DWORD dwInitialFrames;
DWORD dwScale;
DWORD dwRate;
DWORD dwStart;
DWORD dwLength;
DWORD dwSuggestedBufferSize;
DWORD dwQuality;
DWORD dwSampleSize;
RECT rcFrame;
} AVIStreamHeader;
流格式信息,音频和视频的结构不同。
音频:
typedef struct {
WORD wFormatTag;
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nBlockAlign;
WORD wBitsPerSample;
WORD cbSize;
} WAVEFORMATEX;
视频:
typedef struct tagBITMAPINFOHEADER {
DWORD biSize;
LONG biWidth;
LONG biHeight;
WORD biPlanes;
WORD biBitCount;
DWORD biCompression;
DWORD biSizeImage;
LONG biXPelsPerMeter;
LONG biYPelsPerMeter;
DWORD biClrUsed;
DWORD biClrImportant;
} BITMAPINFOHEADER, *LPBITMAPINFOHEADER, *PBITMAPINFOHEADER;
在之后还有一些可选的附加数据。
实际媒体数据存放在之后LIST里,类型为’movi’,音频和视频流的组织形式在不同视频中不一定相同,有分开存储,有交织存储的。
在movi LIST中,每段数据分片前有一个两位数字(使用ASCII码)+两字节类型名,对流进行了编号。它们在LIST中顺序存储。
Two-character code | Description |
---|---|
db | Uncompressed video frame |
dc | Compressed video frame |
pc | Palette change |
wb | Audio data |
如编号为0的压缩视频流为00dc,注意这是一个4字节的字符串。
虽然在这个编号之后的接下来4字节表明了这分片的大小,但是这个分片之间可能有空隙,因此可能无法直接一次读出所有分片。为了读出所有分片,在视频流的末尾有一个索引列表(AVI Index),给出了这些分片的准确位置。
这个索引的格式如下:
typedef struct {
DWORD dwChunkId;
DWORD dwFlags;
DWORD dwOffset;
DWORD dwSize;
} _avioldindex_entry, AVIOLDINDEX_ENTRY;
typedef struct _avioldindex {
FOURCC fcc;
DWORD cb;
_avioldindex_entry aIndex[];
} AVIOLDINDEX;
此外,文件中还有若干端标记为JUNK的意义不明的空白。
代码
下面是读出完整文件结构的C代码:
class3.h
typedef unsigned char BYTE;
typedef short WORD;
typedef int DWORD;
typedef DWORD LONG;
typedef struct {
DWORD dwWidth;
DWORD dwHeight;
} RECT;
typedef union {
char ch[4];
DWORD tag;
} FOURCC;
typedef struct {
FOURCC RIFF;
unsigned int fileSize;
FOURCC fileType;
} RIFFHeader;
typedef struct {
FOURCC LIST;
unsigned int listSize;
FOURCC listType;
} LISTHeader;
typedef struct {
FOURCC fcc;
DWORD cb;
DWORD dwMicroSecPerFrame;
DWORD dwMaxBytesPerSec;
DWORD dwPaddingGranularity;
DWORD dwFlags;
DWORD dwTotalFrames;
DWORD dwInitialFrames;
DWORD dwStreams;
DWORD dsSuggestedBufferSize;
DWORD dwWidth;
DWORD dwHeight;
DWORD reserve[4];
} MainAVIHeader;
typedef struct {
FOURCC fccType;
FOURCC fccHandler;
DWORD dwFlags;
WORD wProirity;
WORD wLanguage;
DWORD dwInitialFrames;
DWORD dwScale;
DWORD dwRate;
DWORD dwStart;
DWORD dwLength;
DWORD dwSuggestedBufferSize;
DWORD dwQuality;
DWORD dwSampleSize;
RECT rcFrame;
} AVIStreamHeader;
typedef struct {
FOURCC type;
DWORD size;
} LISTItem;
typedef struct {
WORD wFormatTag;
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nBlockAlign;
WORD wBitsPerSample;
WORD cbSize;
} WAVEFORMATEX;
typedef struct tagBITMAPINFOHEADER {
DWORD biSize;
LONG biWidth;
LONG biHeight;
WORD biPlanes;
WORD biBitCount;
DWORD biCompression;
DWORD biSizeImage;
LONG biXPelsPerMeter;
LONG biYPelsPerMeter;
DWORD biClrUsed;
DWORD biClrImportant;
} BITMAPINFOHEADER, *LPBITMAPINFOHEADER, *PBITMAPINFOHEADER;
typedef struct _tag_size_combine {
FOURCC name;
DWORD size;
} TagHeader;
typedef struct {
DWORD dwChunkId;
DWORD dwFlags;
DWORD dwOffset;
DWORD dwSize;
} _avioldindex_entry, AVIOLDINDEX_ENTRY;
typedef struct _avioldindex {
FOURCC fcc;
DWORD cb;
_avioldindex_entry aIndex[];
} AVIOLDINDEX;
void resolve_list(LISTHeader *list_header, BYTE ** bufp);
void next_section(BYTE **bufp);
class3.c
#include<stdio.h>
#include<malloc.h>
#include<string.h>
#include "class3.h"
FOURCC hdrl, strl, strh, strf, LIST, vids, auds, movi, idx1;
void init() {
memcpy(&hdrl, "hdrl", 4);
memcpy(&strl, "strl", 4);
memcpy(&strh, "strh", 4);
memcpy(&strf, "strf", 4);
memcpy(&LIST, "LIST", 4);
memcpy(&vids, "vids", 4);
memcpy(&auds, "auds", 4);
memcpy(&movi, "movi", 4);
memcpy(&idx1, "idx1", 4);
}
int read_data_header(BYTE ** bufp, LISTHeader *list_header){
memcpy(list_header, *bufp, sizeof(LISTHeader));
*bufp += sizeof(LISTHeader);
}
void read_main_avi_header(BYTE ** bufp, MainAVIHeader *mahp){
memcpy(mahp, *bufp, sizeof(MainAVIHeader));
*bufp = *bufp + sizeof(MainAVIHeader);
}
void read_stream_header(BYTE ** bufp, AVIStreamHeader *sh) {
memcpy(sh, *bufp, sizeof(AVIStreamHeader));
*bufp = *bufp + sizeof(AVIStreamHeader);
}
void read_bitmap_info_header(BYTE *bufp, BITMAPINFOHEADER *bih){
memcpy(bih, bufp, sizeof(BITMAPINFOHEADER));
//*bufp = *bufp + sizeof(BITMAPINFOHEADER);
}
void read_wave_format_ex(BYTE *bufp, WAVEFORMATEX *wfe){
memcpy(wfe, bufp, sizeof(WAVEFORMATEX));
}
void print_main_avi_header(MainAVIHeader * mahp){
printf("\tType: %.4s\n", mahp->fcc.ch);
printf("\tMicroSecPerFrame: %dns -> %0.4f fps\n", mahp->dwMicroSecPerFrame, 1e6/mahp->dwMicroSecPerFrame);
printf("\tMaxBytesPerSec: %d\n", mahp->dwMaxBytesPerSec);
printf("\tTotalFrames: %d\n", mahp->dwTotalFrames);
printf("\tInitialFrames: %d\n", mahp->dwInitialFrames);
printf("\tStreams: %d\n", mahp->dwStreams);
printf("\tSuggestedBufferSize: %d\n", mahp->dsSuggestedBufferSize);
printf("\tWidth: %d, Height: %d\n", mahp->dwWidth, mahp->dwHeight);
}
void print_stream_header(AVIStreamHeader *sh) {
printf("\tType: %.4s\n"
"\tHandler: %.4s\n"
"\tRate: %d, Scale: %d\n"
"\tLength: %d\n"
"\tSampleSize: %d\n"
"\tRect: (w: %d, h: %d)\n"
, sh->fccType.ch, sh->fccHandler.ch, sh->dwRate, sh->dwScale,
sh->dwLength, sh->dwSampleSize, sh->rcFrame.dwWidth, sh->rcFrame.dwHeight
);
}
void print_bitmap_info_header(BITMAPINFOHEADER *bih) {
printf("\tSize: %d, Widht: %d, Height: %d\n", bih->biSize, bih->biWidth, bih->biHeight);
}
void print_wave_format_ex(WAVEFORMATEX *wfe) {
printf("\tFormatTag: %d, Channls: %d, SamplePersec: %d, AvgBytesPerSec: %d, Size: %d\n", wfe->wFormatTag, wfe->nChannels, wfe->nSamplesPerSec, wfe->nAvgBytesPerSec, wfe->cbSize);
}
void resolve_hdrl(BYTE ** bufp, int size) {
MainAVIHeader main_avi_header;
printf("Found AVI Main Header: \t\t\n");
read_main_avi_header(bufp, &main_avi_header);
print_main_avi_header(&main_avi_header);
}
FOURCC resolve_strh(BYTE **bufp){
AVIStreamHeader avi_stream_header;
printf("Found Stream Header: \t\t\n");
read_stream_header(bufp, &avi_stream_header);
print_stream_header(&avi_stream_header);
return avi_stream_header.fccType;
}
void resolve_vids_strf(BYTE *buf) {
BITMAPINFOHEADER bih;
read_bitmap_info_header(buf, &bih);
print_bitmap_info_header(&bih);
}
void resolve_auds_strf(BYTE *buf) {
WAVEFORMATEX wm;
read_wave_format_ex(buf, &wm);
print_wave_format_ex(&wm);
}
void resolve_strl(BYTE ** bufp, int size){
BYTE * end = *bufp + size;
FOURCC type, next_tag;
int next_size;
while(*bufp < end) {
int rest = end - *bufp;
int readd = size - rest;
next_tag = * (FOURCC *) (*bufp);
next_size = * (int *) (*bufp + 4);
(*bufp) += 8;
printf("Readd buffer: %d/%d/%d B (+%d)", readd, rest, size, next_size);
if(next_tag.tag == strh.tag) {
printf(" + 8 + %ld\n", sizeof(AVIStreamHeader));
type = resolve_strh(bufp);
} else if(next_tag.tag == strf.tag){
printf(" + 8 + %ld or %ld\n", sizeof(BITMAPINFOHEADER), sizeof(WAVEFORMATEX));
printf("Found Stream Format: \t\t\n");
if(type.tag==vids.tag){
resolve_vids_strf(*bufp);
*bufp += next_size;
}
else if(type.tag==auds.tag){
resolve_auds_strf(*bufp);
*bufp += next_size;
}
else {
*bufp += next_size;
printf("\tUndefined stream format %.4s\n", type.ch);
}
}
else{
printf(" + 8 + %d\n", next_size);
printf("\tUndefined Stream tag %.4s(%.8X), skiped.\n", next_tag.ch, next_tag.tag);
if(next_tag.tag==LIST.tag) {
*bufp -= 8;
next_section(bufp);
}
*bufp += next_size;
//break;
}
}
printf("strl resolved.\n");
}
void resolve_movi(BYTE **bufp, int size) {
printf("<MOVI LIST>\n");
BYTE * buf = *bufp;
BYTE * end = buf + size;
TagHeader *tag;
while(buf < end) {
tag = (TagHeader *) (buf);
printf("tag %.4s size %d (%ld/%d)\n", tag->name.ch, tag->size, buf - *bufp, size);
buf += tag->size + 8;
}
*bufp += size - 4;
}
void resolve_idx(BYTE **bufp) {
TagHeader tag = *(TagHeader *) *bufp;
BYTE * buf = *bufp;
BYTE * end = *bufp + tag.size;
printf("<AVI INDEX> %.4s size: %d\t\t\n", tag.name.ch, tag.size);
buf += 8;
int counts = (tag.size - sizeof(AVIOLDINDEX))/sizeof(AVIOLDINDEX_ENTRY);
AVIOLDINDEX idx = *(AVIOLDINDEX *) buf;
printf("\tFcc: %.4s, cb:%d, idx counts: %d\n", idx.fcc.ch, idx.cb, counts);
printf("\tno.\tChunkId\tFlags\tOffset \tSize\n");
for(int i=1; i<= counts; i += 1){
AVIOLDINDEX_ENTRY * aidx = (AVIOLDINDEX_ENTRY *) buf + i;
if(i<5) printf("\t%d\t%.4s\t%d\t%0.8x\t%d\n", i, (BYTE *) &(aidx->dwChunkId), aidx->dwFlags, aidx->dwOffset, aidx->dwSize);
if(i> (counts - 5)) printf("\t%d\t%.4s\t%d\t%d\t%d\n", i, (BYTE *) &(aidx->dwChunkId), aidx->dwFlags, aidx->dwOffset, aidx->dwSize);
}
}
void resolve_junk(BYTE ** bufp) {}
void resolve_list(LISTHeader *list_header, BYTE ** bufp) {
printf("'LIST':%.4s, listSize: %d, listType: %.4s\n",
list_header->LIST.ch, list_header->listSize, list_header->listType.ch);
if(list_header->listType.tag == hdrl.tag){
resolve_hdrl(bufp, list_header->listSize);
} else if(list_header->listType.tag == strl.tag) {
resolve_strl(bufp, list_header->listSize);
} else if(list_header->listType.tag == movi.tag) {
resolve_movi(bufp, list_header->listSize);
}
else {
printf("Undefined list type %.4s, with size %d\n", list_header->listType.ch, list_header->listSize);
*bufp += list_header->listSize - 4;
//return ;
}
}
void next_section(BYTE **bufp) {
BYTE * buf = *bufp;
TagHeader th;
memcpy(&th, buf, sizeof(th));
FOURCC next_tag = th.name;
int size = th.size;
static int loop =0;
if(next_tag.tag == LIST.tag) {
LISTHeader next_list_header;
read_data_header(bufp, &next_list_header);
resolve_list(&next_list_header, bufp);
next_section(bufp);
}
else if(next_tag.tag == idx1.tag) {
resolve_idx(bufp);
}
else {
printf("Next is not LIST: %.4s size %d.\n", next_tag.ch, size);
*bufp = *bufp + size + 8;
loop ++ ;
if(loop<5) next_section(bufp);
}
}
int main() {
FILE * fp;
RIFFHeader riff_header;
LISTHeader list_header;
MainAVIHeader main_avi_header;
AVIStreamHeader stream_header;
init();
fp = fopen("./Bear In The Woods.avi", "r");
BYTE * buf;
fread(&riff_header, 1, sizeof(RIFFHeader), fp);
printf("'RIFF':%.4s, fileSize: %d, fileType: %.4s\n", riff_header.RIFF.ch, riff_header.fileSize, riff_header.fileType.ch);
int size = riff_header.fileSize; //list_header.listSize - 4;
BYTE *bufs = buf = (BYTE *) malloc(size);
fread(buf, 1, size, fp);
//fread(&list_header, 1, sizeof(LISTHeader), fp);
next_section(&buf);
return 0;
}
下面是读出的所有信息,可以看到,文件里分别有一个音频流和一个视频流,它们在movi LIST中交织存储,且之间有空隙,顺序读取会出现错误,但是可以在最后的AVI Index里全部找到。
'RIFF':RIFF, fileSize: 28985984, fileType: AVI
'LIST':LIST, listSize: 8926, listType: hdrl
Found AVI Main Header:
Type: avih
MicroSecPerFrame: 33366ns -> 29.9706 fps
MaxBytesPerSec: 41000
TotalFrames: 825
InitialFrames: 0
Streams: 2
SuggestedBufferSize: 1048576
Width: 1920, Height: 1080
'LIST':LIST, listSize: 4320, listType: strl
Readd buffer: 0/4320/4320 B (+56) + 8 + 56
Found Stream Header:
Type: vids
Handler: MP42
Rate: 30000, Scale: 1001
Length: 825
SampleSize: 0
Rect: (w: 0, h: 70780800)
Readd buffer: 64/4256/4320 B (+40) + 8 + 40 or 20
Found Stream Format:
Size: 40, Widht: 1920, Height: 1080
Readd buffer: 112/4208/4320 B (+4120) + 8 + 4120
Undefined Stream tag JUNK(4B4E554A), skiped.
Readd buffer: 4240/80/4320 B (+68) + 8 + 68
Undefined Stream tag vprp(70727076), skiped.
Readd buffer: 4316/4/4320 B (+4254) + 8 + 4254
Undefined Stream tag LIST(5453494C), skiped.
'LIST':LIST, listSize: 4254, listType: strl
Readd buffer: 0/4254/4254 B (+56) + 8 + 56
Found Stream Header:
Type: auds
Handler:
Rate: 11025, Scale: 253
Length: 1202
SampleSize: 1024
Rect: (w: 0, h: 0)
Readd buffer: 64/4190/4254 B (+50) + 8 + 40 or 20
Found Stream Format:
FormatTag: 2, Channls: 2, SamplePersec: 44100, AvgBytesPerSec: 16000, Size: 32
Readd buffer: 122/4132/4254 B (+4120) + 8 + 4120
Undefined Stream tag JUNK(4B4E554A), skiped.
Readd buffer: 4250/4/4254 B (+260) + 8 + 260
Undefined Stream tag JUNK(4B4E554A), skiped.
strl resolved.
'LIST':LIST, listSize: 26, listType: INFO
Undefined list type INFO, with size 26
Next is not LIST: JUNK size 1016.
'LIST':LIST, listSize: 28943540, listType: movi
<MOVI LIST>
tag 00dc size 66996 (0/28943540)
tag 01wb size 1024 (67004/28943540)
tag 01wb size 1024 (68036/28943540)
tag 00dc size 70591 (69068/28943540)
tag size 262242 (139667/28943540)
tag �
�� size 865395139 (401917/28943540)
<AVI INDEX> idx1 size: 32432
Fcc: 00dc, cb:16, idx counts: 2026
no. ChunkId Flags Offset Size
1 01wb 16 000105c0 1024
2 01wb 16 000109c8 1024
3 00dc 0 00010dd0 70591
4 01wb 16 00022198 1024
2022 01wb 16 28925376 1024
2023 00dc 0 28926408 14028
2024 01wb 16 28940444 1024
2025 01wb 16 28941476 1024
2026 01wb 16 28942508 1024
strl resolved.
Next is not LIST: size 67108937.