访问个人博客也许可以获得无广告体验与更好的文章排版：codinglover.top/2022/03/20/嵌入式linux入门3-文件io/

文件描述符

每个打开的Linux文件都有一个对应的文件描述符，文件描述符为一个非负整数。我们可以通过调用open()函数获取文件描述符。

当shell开启一个进程时，此进程默认会继承三个文件描述符，称之为标准文件描述符，如下所示：

文件描述符	用途	POSIX名称	stdio流
0	标准输入	STDIN_FILENO	stdin
1	标准输出	STDOUT_FILENO	stdout
2	标准错误	STDERR_FILENO	stderr

在程序中指代这些文件描述符时，可以直接使用数字，也可使用<unistd.h>中定义的POSIX名称**（推荐）**。但是stdio流对应的文件描述符并不是一成不变的，这只是它们的初始值，调用freopen()函数可能会改变stdio流对应的文件描述符数值，参考如下引用：

文件I/O函数

执行文件I/O的4个主要系统调用函数如下（语言和软件包通常会利用二次封装的I/O函数来间接调用它们，比如C库中的文件操作类函数：fopen(), fread(), fwrite(), fclose()）：

int open(const char *pathname, int flags, mode_t mode);
// 打开pathname所指定的文件，并返回文件描述符，后续的函数使用文件描述符指代打开的文件，可以通过设置掩
// 码参数flags的方式在文件不存在时创建之, mode参数用于指定创建文件时文件的访问权限。

ssize_t read(int fd void *buf, size_t count);
// 从fd文件描述符所指代的文件中读取最多count字节的数据到缓冲buf，函数返回实际读取的字节数，如果读到文
// 件末尾，则返回0。

ssize_t write(int fd, const void *buf, size_t count);
// 从buf缓冲中读取count字节的数据并写入fd文件描述符所指代的文件中，函数返回实际写入文件的字节数。

int close(int fd);
//关闭文件描述符fd，此函数会释放文件描述符fd以及与之相关的内核资源。

如下是一个简单的cp命令的源代码，演示了上面四个函数的实际应用。

/*
 * file : main.c
 * 
 */

#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>

#ifndef BUF_SIZE /* Allow "cc -D" to override definition */
#define BUF_SIZE 1024
#endif

int main(int argc, char *argv[])
{
    int inputFd, outputFd, openFlags;
    mode_t filePerms;
    ssize_t numRead;
    char buf[BUF_SIZE];
    
    if (argc != 3 || strcmp(argv[1], "--help") == 0)
    {
        printf("[usage]: %s old-file now-file\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    
    /* open input and output files */
    
    inputFd = open(argv[1], O_RDONLY);
    if (inputFd == -1)
    {
        printf("[error]: opening file %s\n", argv[1]);
        exit(EXIT_FAILURE);
    }
    
    openFlags = O_CREAT | O_WRONLY | O_TRUNC; /* O_TRUNC clear file content */
    filePerms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP |
        		S_IROTH | S_IWOTH; /* rw-rw-rw */
    outputFd = open(argv[2], openFlags, filePerms);
    if (outputFd == -1)
    {
        printf("[error]: opening file %s\n", argv[2]);
        exit(EXIT_FAILURE);
    }
    
    /* transfer data until we encounter end of input or an error */
    
    while ((numRead = read(inputFd, buf, BUF_SIZE)) > 0)
    {
        if (write(outputFd, buf, numRead) != numRead)
        {
            printf("[error]: couldn't write whole buffer\n");
        	exit(EXIT_FAILURE);
        }
    }
    
    if (numRead == -1)
    {
        printf("[error]: read\n");
        exit(EXIT_FAILURE);
    }
    
    if (close(inputFd) == -1)
    {
        printf("[error]: close input\n");
        exit(EXIT_FAILURE);
    }
    
    if (close(outputFd) == -1)
    {
        printf("[error]: close output\n");
        exit(EXIT_FAILURE);
    }
    
    exit(EXIT_SUCCESS);
}

通用I/O

UNIX I/O模型的显著特点之一是其输入/输出的通用性概念，也就是我们常说的万物皆文件，使用上面所示的系统I/O调用函数可以对任何文件进行I/O操作，比如我们可以这样用上面的示例代码：

// 假设代码编译成可执行文件cp1，并且cp1位于当前目录下
./cp1 /dev/tty 1.txt // 从终端读取输入并写入文件1.txt
./cp1 1.txt /dev/tty // 将文件1.txt的内容打印到终端

其他常用函数

lseek()

off_t lseek(int fd, off_t offset, int whence);
// 将fd文件描述符的读写指针从whence所描述的位置为起点，移动offset个字节。
// whence可以为SEEK_SET（文件开头），SEEK_CUR（当前位置），SEEK_END（文件结尾后的第一个字节）
// 函数返回调整后文件读写指针所指位置相对文件开头的偏移值

//常用使用技巧

lseek(fd, 0, SEEK_SET);   		/* Start of file */
lseek(fd, 0, SEEK_END); 		/* Next byte after the end of the file */
lseek(fd, -1, SEEK_END); 		/* Last byte of file */
lseek(fd, -10, SEEK_CUR); 		/* Ten bytes prior to current location */
lseek(fd, 10000, SEEK_END);		/* 10001 bytes past last byte of file */

此函数用于改变文件描述符中文件读写指针的偏移量。对于每个打开的文件，系统内核会记录其文件偏移量将其作为执行下一个write()或read()操作的文件起始位置。

lseek并不适用于所有类型的文件，lseek()不适用于管道、FIFO、socket或者终端。当用于上述文件时，调用会失败并将errno全局变量设置为ESPIPE。

如果程序的文件偏移量越过了文件结尾，然后执行I/O操作，read()函数会返回0表示文件结尾，而write()函数则可以在文件结尾后的任意位置写入数据，**从文件结尾后到新写入数据的这段空间称为文件空洞。**对于程序而言这段空间是一段填充了0的文件内容，而对于磁盘来说，这段空间是不占用任何磁盘空间的（所以会出现文件大小大于占用磁盘空间大小的情况）。

以下为一个示例程序演示了lseek协同read和write一起使用时的用法，《UNIX系统编程手册》里的例程用了很多自己写的库函数，我简单替换了它们，这样直接就可以运行了而不用去下载源码，当然也就不那么”健壮“。。。

/*
 * file : lseek_io.c
 */

// 该程序的第一个命令行参数为要打开的文件名称
// 余下的参数则指定了在文件上执行的输入/输出操作。每个表示操作的参数都以一个字母开头，
// 紧跟操作相关的值（中间不需要空格分割）。
// s<offset> : 从文件开始检索到offset字节位置
// r<length> : 在当前文件偏移量处读取length字节内容并以文本形式显示
// R<length> : 在当前文件偏移量处读取length字节内容并以十六进制形式显示
// w<str>    : 在当前文件偏移量处向文件写入str指定的字符串

#include <ctype.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

void usageErr(const char *format, ...)
{
    va_list argList;
    fflush(stdout);
    fprintf(stderr, "Usage: ");
    va_start(argList, format);
    vfprintf(stderr, format, argList);
    va_end(argList);

    fflush(stderr);
    exit(EXIT_FAILURE);
}

void errExit(const char *format, ...)
{
    va_list argList;
    fflush(stdout);
    fprintf(stderr, "Error: ");
    va_start(argList, format);
    vfprintf(stderr, format, argList);
    va_end(argList);

    fflush(stderr);
    exit(EXIT_FAILURE);
}

size_t getLong(char *str)
{
    long unsigned num;
    if (sscanf(str, "%lu", &num) > 0)
    {
        return (size_t)num;
    }
    else
    {
        return 0;
    }
}

int main(int argc, char *argv[])
{
    size_t len;
    off_t offset;
    int fd, ap, j;
    char *buf;
    ssize_t numRead, numWritten;

    if (argc < 3 || strcmp(argv[1], "--help") == 0)
    {
        usageErr("%s file {r<length>|R<length>|w<string>|s<offset>}...\n", argv[0]);
    }

    fd = open(argv[1], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); /* rw-rw-rw- */
    if (fd == -1)
    {
        errExit("open");
    }

    for (ap = 2; ap < argc; ap++)
    {
        switch (argv[ap][0])
        {
        case 'r': /* Display bytes at current offset, as text */
        case 'R': /* Display bytes at current offset, as hex */
            len = getLong(&argv[ap][1]);
            buf = malloc(len);
            if (buf == NULL)
            {
                errExit("malloc");
            }

            numRead = read(fd, buf, len);
            if (numRead == -1)
            {
                errExit("read");
            }

            if (numRead == 0)
            {
                printf("%s: end-of-file\n", argv[ap]);
            }
            else
            {
                printf("%s :", argv[ap]);
                for (j = 0; j < numRead; j++)
                {
                    if (argv[ap][0] == 'r')
                    {
                        printf("%c", isprint((unsigned char)buf[j]) ? buf[j] : '?');
                    }
                    else
                    {
                        printf("%02x ", (unsigned int)buf[j]);
                    }
                }
                printf("\n");
            }

            free(buf);
            break;

        case 'w': /* write string at current offset */
            numWritten = write(fd, &argv[ap][1], strlen(&argv[ap][1]));
            if (numWritten == -1)
            {
                errExit("write");
            }
            printf("%s : wrote %ld bytes\n", argv[ap], (long) numWritten);
            break;

        case 's': /* Change file offset */
            offset = getLong(&argv[ap][1]);
            if (lseek(fd, offset, SEEK_SET) == -1)
            {
                errExit("lseek");
            }
            printf("%s: seek succeeded\n", argv[ap]);
            break;

        default:
            fprintf(stderr, "Argument must start with [rRws]: %s\n", argv[ap]);
        }
    }

    exit(EXIT_SUCCESS);
}

以下shell指令过程演示了上面示例程序的运行效果以及文件空洞现象。

water@LAPTOP-Q3TGG09O:~/lseek_io$ touch file
water@LAPTOP-Q3TGG09O:~/lseek_io$ ./lseek_io file s100000 wabc
s100000: seek succeeded
wabc : wrote 3 bytes
water@LAPTOP-Q3TGG09O:~/lseek_io$ ls -l file
-rw-r--r-- 1 water water 100003 Mar 20 22:48 file
water@LAPTOP-Q3TGG09O:~/lseek_io$ ./lseek_io file s10000 R5
s10000: seek succeeded
R5 :00 00 00 00 00

ioctl()

int ioctl(int fd, unsigned long request, ...);
// 用于执行通用I/O模型之外的文件操作，比如底层设备需要进行一些通过标准I/O函数无法进行的特殊的配置，
// 则可以通过这个接口进行
// fd为文件描述符，request为特定的操作请求，后面会跟一个或多个操作相关的参数，也可能没有