printk -go on-CFANZ编程社区

printk简介

printk是在内核中运行的向控制台输出显示的函数，Linux内核先分配一个静态的临时缓冲区log_buf，然后调用vscnprintf格式化显示字符串，并返回格式化之后字符串的长度，最后调用tty驱动初始化注册绑定好的console.write()实现向串口输出。

目前新版内核解决一些lock-less问题：见https://lkml.org/lkml/2020/7/9/551

pdf 文本附件：https://www.linuxplumbersconf.org/event/4/contributions/290/attachments/276/463/lpc2019_jogness_printk.pdf

其核心函数如下：

asmlinkage int vprintk_emit(int facility, int level,
                const char *dict, size_t dictlen,
                const char *fmt, va_list args)
{
    static bool recursion_bug;
    static char textbuf[LOG_LINE_MAX];
    char *text = textbuf;
    size_t text_len = 0;
    enum log_flags lflags = 0;
    unsigned long flags;
    int this_cpu;
    int printed_len = 0;
    int nmi_message_lost;
    bool in_sched = false;
    /* cpu currently holding logbuf_lock in this function */
    static unsigned int logbuf_cpu = UINT_MAX;

    if (level == LOGLEVEL_SCHED) {
        level = LOGLEVEL_DEFAULT;
        in_sched = true;
    }

    boot_delay_msec(level);
    printk_delay();

    local_irq_save(flags);
    this_cpu = smp_processor_id();

    /*
     * Ouch, printk recursed into itself!
     处理printk中再次调用printk的情况
     */
    if (unlikely(logbuf_cpu == this_cpu)) {
        /*
         * If a crash is occurring during printk() on this CPU,
         * then try to get the crash message out but make sure
         * we can't deadlock. Otherwise just return to avoid the
         * recursion and return - but flag the recursion so that
         * it can be printed at the next appropriate moment:
         */
        if (!oops_in_progress && !lockdep_recursing(current)) {
            recursion_bug = true;
            local_irq_restore(flags);
            return 0;
        }
        zap_locks();

zap_locks()函数主要作用就是在prink运行期间，如果printk local CPU发生了crash，确保不能死锁，并且每隔30s初始化锁logbuf_lock和console_sem,留时间给控制台打印完全的oops信息
/*
 * Zap console related locks when oopsing.
 * To leave time for slow consoles to print a full oops,
 * only zap at most once every 30 seconds.
 */
static void zap_locks(void)
{
    static unsigned long oops_timestamp;

    if (time_after_eq(jiffies, oops_timestamp) &&
        !time_after(jiffies, oops_timestamp + 30 * HZ))
        return;

    oops_timestamp = jiffies;

    debug_locks_off();
    /* If a crash is occurring, make sure we can't deadlock */
    raw_spin_lock_init(&logbuf_lock);
    /* And make sure that we print immediately */
    sema_init(&console_sem, 1);
}

}

    lockdep_off();
    /* This stops the holder of console_sem just where we want him */
    raw_spin_lock(&logbuf_lock);
    logbuf_cpu = this_cpu;

    if (unlikely(recursion_bug)) {
        static const char recursion_msg[] =
            "BUG: recent printk recursion!";

        recursion_bug = false;
        /* emit KERN_CRIT message */
        printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
                     NULL, 0, recursion_msg,
                     strlen(recursion_msg));
    }

    nmi_message_lost = get_nmi_message_lost();
    if (unlikely(nmi_message_lost)) {
        text_len = scnprintf(textbuf, sizeof(textbuf),
                     "BAD LUCK: lost %d message(s) from NMI context!",
                     nmi_message_lost);
        printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
                     NULL, 0, textbuf, text_len);
    }

    /*
     * The printf needs to come first; we need the syslog
     * prefix which might be passed-in as a parameter.
     */
    text_len = vscnprintf(text, sizeof(textbuf), fmt, args);

    /* mark and strip a trailing newline */
    if (text_len && text[text_len-1] == '\n') {
        text_len--;
        lflags |= LOG_NEWLINE;
    }

    /* strip kernel syslog prefix and extract log level or control flags */
    if (facility == 0) {
        int kern_level = printk_get_level(text);

        if (kern_level) {
            const char *end_of_header = printk_skip_level(text);
            switch (kern_level) {
            case '0' ... '7':
                if (level == LOGLEVEL_DEFAULT)
                    level = kern_level - '0';
                /* fallthrough */
            case 'd':    /* KERN_DEFAULT */
                lflags |= LOG_PREFIX;
            }
            /*
             * No need to check length here because vscnprintf
             * put '\0' at the end of the string. Only valid and
             * newly printed level is detected.
             */
            text_len -= end_of_header - text;
            text = (char *)end_of_header;
        }
    }

    if (level == LOGLEVEL_DEFAULT)
        level = default_message_loglevel;

    if (dict)
        lflags |= LOG_PREFIX|LOG_NEWLINE;

    if (!(lflags & LOG_NEWLINE)) {
        /*
         * Flush the conflicting buffer. An earlier newline was missing,
         * or another task also prints continuation lines.
         */
        if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
            cont_flush(LOG_NEWLINE);

        /* buffer line if possible, otherwise store it right away */
        if (cont_add(facility, level, text, text_len))
            printed_len += text_len;
        else
            printed_len += log_store(facility, level,
                         lflags | LOG_CONT, 0,
                         dict, dictlen, text, text_len);
    } else {
        bool stored = false;

        /*
         * If an earlier newline was missing and it was the same task,
         * either merge it with the current buffer and flush, or if
         * there was a race with interrupts (prefix == true) then just
         * flush it out and store this line separately.
         * If the preceding printk was from a different task and missed
         * a newline, flush and append the newline.
         */
        if (cont.len) {
            if (cont.owner == current && !(lflags & LOG_PREFIX))
                stored = cont_add(facility, level, text,
                          text_len);
            cont_flush(LOG_NEWLINE);
        }

        if (stored)
            printed_len += text_len;
        else
            printed_len += log_store(facility, level, lflags, 0,
                         dict, dictlen, text, text_len);
    }

    logbuf_cpu = UINT_MAX;
    raw_spin_unlock(&logbuf_lock);
    lockdep_on();
    local_irq_restore(flags);

    /* If called from the scheduler, we can not call up(). */
    if (!in_sched) {
        lockdep_off();
        /*
         * Try to acquire and then immediately release the console
         * semaphore.  The release will print out buffers and wake up
         * /dev/kmsg and syslog() users.
         */
        if (console_trylock())
            console_unlock();
        lockdep_on();
    }

    return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);

　　asmlinkage表示将函数参数存放在局部栈中，va_start和va_end是获取可变参数的方法，该方法读取可变参数的过程其实就是在堆栈中,使用指针,遍历堆栈段中的参数列表,从低地址到高地址一个一个地把参数内容读出来的过程.

　　vprintk_emit函数调用了console_unlock将log通过串口进行输出的

printk -go on_数据

所以为了CPU重启后，内核dmesg不丢失，一般固定内核静态缓冲区log_buf地址！！！也就是log_buf 地址一直是某一个物理地址，只要是非断电重启，其数据会一直在

由于目前使用了 lock；对性能有些影响，安装多核编程法则，此时使用无锁队列-lock-free 比较好

　　prink第一个阶段是将log放到buff, 第二阶段是将buff数据送到uart。Dump log到uart console是一个同步的操作过程。在此期间，IRQ是被disabled，CPU也是在wait，一直到dump完成。由于此阶段的速度会很慢, 并且会把local的irq disable. 所以会导致local cpu sched, 如果在此cpu 的thread都会被阻塞，严重的可能导致系统阻塞

console_unlock函数调用流程如下

printk -go on_sed_02

/**
 * console_trylock - try to lock the console system for exclusive use.
 *
 * Try to acquire a lock which guarantees that the caller has exclusive
 * access to the console system and the console_drivers list.
 *
 * returns 1 on success, and 0 on failure to acquire the lock.
 */
int console_trylock(void)
{
    if (down_trylock_console_sem())
        return 0;
    if (console_suspended) {
        up_console_sem();
        return 0;
    }
    console_locked = 1;
    /*
     * When PREEMPT_COUNT disabled we can't reliably detect if it's
     * safe to schedule (e.g. calling printk while holding a spin_lock),
     * because preempt_disable()/preempt_enable() are just barriers there
     * and preempt_count() is always 0.
     *
     * RCU read sections have a separate preemption counter when
     * PREEMPT_RCU enabled thus we must take extra care and check
     * rcu_preempt_depth(), otherwise RCU read sections modify
     * preempt_count().
     */
    console_may_schedule = !oops_in_progress &&
            preemptible() &&
            !rcu_preempt_depth();
    return 1;
}

logbuf_lock保护日志缓冲区，而console_sem保护对控制台驱动程序列表和实际控制台本身的访问。

/**
 * console_unlock - unlock the console system
 *
 * Releases the console_lock which the caller holds on the console system
 * and the console driver list.
 *
 * While the console_lock was held, console output may have been buffered
 * by printk().  If this is the case, console_unlock(); emits
 * the output prior to releasing the lock.
 *
 * If there is output waiting, we wake /dev/kmsg and syslog() users.
 *
 * console_unlock(); may be called from any context.
 */
void console_unlock(void)
{
    static char ext_text[CONSOLE_EXT_LOG_MAX];
    static char text[LOG_LINE_MAX + PREFIX_MAX];
    static u64 seen_seq;
    unsigned long flags;
    bool wake_klogd = false;
    bool do_cond_resched, retry;

    if (console_suspended) {
        up_console_sem();
        return;
    }

    /*
     * Console drivers are called under logbuf_lock, so
     * @console_may_schedule should be cleared before; however, we may
     * end up dumping a lot of lines, for example, if called from
     * console registration path, and should invoke cond_resched()
     * between lines if allowable.  Not doing so can cause a very long
     * scheduling stall on a slow console leading to RCU stall and
     * softlockup warnings which exacerbate the issue with more
     * messages practically incapacitating the system.
     */
    do_cond_resched = console_may_schedule;
    console_may_schedule = 0;

again:
    /*
     * We released the console_sem lock, so we need to recheck if
     * cpu is online and (if not) is there at least one CON_ANYTIME
     * console.
     */
    if (!can_use_console()) {
        console_locked = 0;
        up_console_sem();
        return;
    }

    /* flush buffered message fragment immediately to console */
    console_cont_flush(text, sizeof(text));

    for (;;) {
        struct printk_log *msg;
        size_t ext_len = 0;
        size_t len;
        int level;

        raw_spin_lock_irqsave(&logbuf_lock, flags);
        if (seen_seq != log_next_seq) {
            wake_klogd = true; // 控制判断是否唤醒syslogd的标志位
            seen_seq = log_next_seq;
        }

        if (console_seq < log_first_seq) {
            len = sprintf(text, "** %u printk messages dropped ** ",
                      (unsigned)(log_first_seq - console_seq));

            /* messages are gone, move to first one */
            console_seq = log_first_seq;
            console_idx = log_first_idx;
            console_prev = 0;
        } else {
            len = 0;
        }
skip:
        if (console_seq == log_next_seq)
            break;

        msg = log_from_idx(console_idx);
        if (msg->flags & LOG_NOCONS) {
            /*
             * Skip record we have buffered and already printed
             * directly to the console when we received it.
             */
            console_idx = log_next(console_idx);
            console_seq++;
            /*
             * We will get here again when we register a new
             * CON_PRINTBUFFER console. Clear the flag so we
             * will properly dump everything later.
             */
            msg->flags &= ~LOG_NOCONS;
            console_prev = msg->flags;
            goto skip;
        }

        level = msg->level;
        //计算本次log_buf中打印输出索引的范围
        len += msg_print_text(msg, console_prev, false,
                      text + len, sizeof(text) - len);
        if (nr_ext_console_drivers) {
            ext_len = msg_print_ext_header(ext_text,
                        sizeof(ext_text),
                        msg, console_seq, console_prev);
            ext_len += msg_print_ext_body(ext_text + ext_len,
                        sizeof(ext_text) - ext_len,
                        log_dict(msg), msg->dict_len,
                        log_text(msg), msg->text_len);
        }
        console_idx = log_next(console_idx);
        console_seq++;
        console_prev = msg->flags;
        raw_spin_unlock(&logbuf_lock);

        stop_critical_timings();    /* don't trace print latency */
        //con->write()
        call_console_drivers(level, ext_text, ext_len, text, len);
        start_critical_timings();
        local_irq_restore(flags);

        if (do_cond_resched)
            cond_resched();
    }
    console_locked = 0;

    /* Release the exclusive_console once it is used */
    if (unlikely(exclusive_console))
        exclusive_console = NULL;

    raw_spin_unlock(&logbuf_lock);

    up_console_sem();

    /*
     * Someone could have filled up the buffer again, so re-check if there's
     * something to flush. In case we cannot trylock the console_sem again,
     * there's a new owner and the console_unlock() from them will do the
     * flush, no worries.
     */
    raw_spin_lock(&logbuf_lock);
    retry = console_seq != log_next_seq;
    raw_spin_unlock_irqrestore(&logbuf_lock, flags);

    if (retry && console_trylock())
        goto again;

    if (wake_klogd)
        wake_up_klogd();// 唤醒sys_logd进程
}
EXPORT_SYMBOL(console_unlock);

　　将消息放入日志缓冲区后，printk()尝试获取console_sem。它甚至可以在中断上下文中执行此操作，因为down_trylock()不会 hibernate 。如果获取了信号量，则可以继续将日志缓冲区内容发送到控制台。如果不获取信号量，则是控制台信号量持有者的责任，即通过console_unlock()将日志缓冲区内容发送到控制台。
当console_unlock()发送到控制台时，可能会有其他CPU调用printk()。因此console_unlock()循环，直到没有其他要发送到控制台的内容为止。在每个循环中，它都会在logbuf_lock下获取指向日志缓冲区一部分的指针以发送到控制台，然后不再在logbuf_lock下发送输出到控制台。在将内容发送到控制台时，由于未采用logbuf_lock，因此其他CPU可以继续将内容添加到日志缓冲区。

　　释放logbuf_lock之后但在up()加上console_sem之前，缓冲区可能已被填满。且logbuf_lock在up()和console_sem之前被释放，因为up()会引起唤醒，需要唤醒运行队列，这可能会对使用运行队列锁定调用的printk()产生优先级反转问题；可以使用补丁来更改此锁定方案:Jan Kara [PATCH 0/8 v4] printk: Cleanups and softlockup avoidance，除其他事项外，尝试避免CPU在console_unlock()上无限期循环(在大型系统上已通过慢速串行控制台记录了大量启动事件)，将工作移交给其他CPU；并尝试最小化在printk()上禁用中断的时间。local_irq_save()可防止在本地CPU上处理中断，这可能会在从ISR返回时最终调用schedule()。 schedule()也可以在其他地方调用，但是由于应该可以在中断上下文中使用printk()，因此它调用的任何内容最终都不会调用schedule()(例如，出于这个原因，使用down_trylock()代替了down())。

Jan Kara的[PATCH 3/8] printk: Enable interrupts before calling console_trylock_for_printk()尝试最小化禁用中断的时间。在该补丁程序之前，至少出于以下原因，禁用了中断:
因此，上述补丁在后两种情况下不再禁用中断，而是将它们包装在preempt_disable()/preempt_enable()中，这允许中断，但不能抢占。

保护不受logbuf_lock保护的数据。这种情况通常通过自旋锁的IRQ变体来解决(例如raw_spin_lock_irqsave())，但是在禁用中断的情况下，还有更多事情要运行。
防止当前CPU突然更改。从can_use_console()调用的console_trylock_for_printk()询问:我们现在可以在此cpu上实际使用控制台吗？请注意:控制台驱动程序可能假定已分配了每cpu资源。转移到另一个CPU可能会造成困惑。
虽然console_sem与down_trylock()一起使用，所以中断也可以尝试printk()也不是问题，在按住console_sem()的同时被抢占会阻止其他任何人将其打印到控制台。

您可以共享对该线程的引用吗？您可能会注意到在3.10中有一个cont缓冲区，该缓冲区保存最后一个换行符中的所有字符。当换行到达时，或在其他任务printk()时，它将刷新到“真实”日志缓冲区。
在该线程上，除了以下摘录之外，我没有发现任何有关日志条目顺序的有效担忧:

Well, I believe someone got DDetetccctted ed 113223 HHzz CPUCPU

AFAIK完全是假的日志缓冲区的顺序由logbuf_lock保证，
写入vprintk_emit()中的日志缓冲区时已禁用的中断会导致此事件，并使用lockt变体来执行此操作，当从raw_spin_lock_irqsave()上读取中断时会禁用该中断(console_unlock())。因此，访问日志缓冲区不受其他CPU干扰或中断的影响是安全的。
　　在较新的内核中，仍然存在将日志行拆分为几个printk()调用的情况，这种情况被cont缓冲区覆盖，该缓冲区保存部分行，并在另一个CPU/中断干扰时刷新它们，因此可能会拆分日志行在几行中并且它们之间有不相关的日志行，但是任何日志行都不应有混合输出。
导致损坏的另一个可能原因是，由于日志缓冲区是环形缓冲区，因此从理论上讲它可能会溢出，这意味着将覆盖先前的消息。
　　该线程中的一个有效问题是日志缓冲区的及时输出。这是通过尝试在每个console_unlock()调用中调用vprintk_emit()(调用控制台驱动程序)来实现的。如果无法获取控制台信号，则该消息已经在日志缓冲区中，并且当前信号所有者将其输出到控制台。
　　该线程中提到的一个有趣的事情是，在commit a0f1ccfd8d: "lockdep: do not recurse in printk"之前(printk.c before和after)，中断在调用release_console_sem()(这是console_unlock()的前身)之前被重新启用。显然，启用lockdep(锁定验证程序，能够检测到可能的死锁和其他锁定问题，并启用 print 诊断)时，尝试从printk()进行printk时可能导致锁定。因此，对spin_{,un}lock_irq{save,restore}()的调用分为禁用/启用中断和获取/释放锁，在它们之间添加了lockdep_on/off()调用，并且禁用lockdep和中断已扩展为涵盖整个功能。
回到:

I see printk acquires logbuf_lock and writes to the log buffer and then tries to grab the console semaphore and releases logbuf_lock. Then inside console_unlock inside a loop it acquires logbuf_lock and disables interrupts, then releases logbuf_lock and calls the console drivers, and then restores interrupts. What's the purpose of this locking/disabling interrupt sequence?

不仅从console_unlock()调用vprintk_emit()，还在注册新控制台时，在恢复控制台时，在将CPU挂起且将未决输出热插入控制台时也调用了console_unlock()……这些地方通常启用了中断。因此，logbuf_lock必须考虑到这一点。
　　您似乎已经注意到，当在console_unlock()上使用raw_spin_lock_irqsave()时(它调用raw_spin_unlock())禁用了中断，但在释放锁(local_irq_console())时并没有(潜在地)重新启用了它们，但只有在call_console_drivers()之后才可能重新启用了它们(call_console_drivers())。我看到在禁用中断的情况下调用console_unlock()的唯一原因是为了避免CPU在我们下面进行更改(控制台驱动程序可以访问每个CPU变量)。
一个有趣的数据点是-rt(实时)补丁集在call_console_drivers()中调用migrate_disable()/migrate_enable()之前，以及在console_unlock()(在-rt中受printk()保护，这不允许CPU迁移)之前重新启用中断。这样做是为了最小化PREEMPT_RT_FULL期间的中断等待时间。 ojit_code支持低中断延迟。您可以在printk.c in the linux-stable-rt tree at git.kernel.org上看到它，相关的修补程序是printk-rt-aware。

关于linux - printk中断禁用和锁定，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/25873754/

转载 copy from https://www.coder.work/article/6676391

后续对此的改造：参考 https://linuxreviews.org/Linux_5.10_Will_Have_A_Fully_Lock-Less_Ring_Buffer

console 驱动：

　　芯片平台一般有好几个串口，具体选择哪个串口作为printk输出串口是可以配置的。我是通过cmdline配置console=ttyS0,115200，然后传递给内核，内核在启动时会解析cmdline，根据cmdline的解析结果来选择使用哪个串口，并进行串口注册。

printk -go on_数据_03 printk -go on_数据_04

　　首先从设备树里加载UART的硬件资源，然后在uart模块init的时候，进行uart_register_driver()或者是platform_driver_register()。同时会调用xxx_console_setup()初始化串口模式，主要是初始化struct console,serial21285_console_write()函数在con->write()的时候调用，就完成了往串口数据的打印。

　　register_console()首先遍历全局的console_drivers(**决定哪个driver输出printk log)*，然后根据早期硬件串口驱动注册时的struct console*的成员flags是否为CON_BOOT(引导控制台)。接下来获取preferred_console，从preferred_console = selected_console可以获取到串口号。然后接下来使用for循环匹配cmdline里的console name与注册时的newcon->name字段，最后把串口号放到console_drivers列表里，截止就完成了tty端口的指定了。

/ * The console driver calls this routine during kernel initialization
 * to register the console printing procedure with printk() and to
 * print any messages that were printed by the kernel before the
 * console driver was initialized.
 *
 * This can happen pretty early during the boot process (because of
 * early_printk) - sometimes before setup_arch() completes - be careful
 * of what kernel features are used - they may not be initialised yet.
 *
 * There are two types of consoles - bootconsoles (early_printk) and
 * "real" consoles (everything which is not a bootconsole) which are
 * handled differently.
 *  - Any number of bootconsoles can be registered at any time.
 *  - As soon as a "real" console is registered, all bootconsoles
 *    will be unregistered automatically.
 *  - Once a "real" console is registered, any attempt to register a
 *    bootconsoles will be rejected
 */
void register_console(struct console *newcon)
{
    int i;
    -----------------------------------
    if (console_drivers && console_drivers->flags & CON_BOOT)
        bcon = console_drivers;

    if (preferred_console < 0 || bcon || !console_drivers)
        preferred_console = selected_console;

    -------------------
    /*
     *    See if this console matches one we selected on
     *    the command line.
     */
    for (i = 0, c = console_cmdline;
         i < MAX_CMDLINECONSOLES && c->name[0];
         i++, c++) {
        if (!newcon->match ||
            newcon->match(newcon, c->name, c->index, c->options) != 0) {
            ----------------------
        newcon->flags |= CON_ENABLED;
        if (i == selected_console) {
            newcon->flags |= CON_CONSDEV;
            preferred_console = selected_console;
        }
        break;
    }
    ---------------------------------
        /*
         * We're about to replay the log buffer.  Only do this to the
         * just-registered console to avoid excessive message spam to
         * the already-registered consoles.
         */
        exclusive_console = newcon;
    }
    console_unlock();
    console_sysfs_notify();
    
}
EXPORT_SYMBOL(register_console);

/*
 * Call the console drivers, asking them to write out
 * log_buf[start] to log_buf[end - 1].
 * The console_lock must be held.
 */
static void call_console_drivers(int level,
                 const char *ext_text, size_t ext_len,
                 const char *text, size_t len)
{
    struct console *con;

    trace_console(text, len);

    if (level >= console_loglevel && !ignore_loglevel)
        return;
    if (!console_drivers)
        return;

    for_each_console(con) {
        if (exclusive_console && con != exclusive_console)
            continue;
        if (!(con->flags & CON_ENABLED))
            continue;
        if (!con->write)
            continue;
        if (!cpu_online(smp_processor_id()) &&
            !(con->flags & CON_ANYTIME))
            continue;
        if (con->flags & CON_EXTENDED)
            con->write(con, ext_text, ext_len);
        else
            con->write(con, text, len);
    }
}