diff --git a/MAINTAINERS b/MAINTAINERS index 7757bef14951..ca6f5f7a4752 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9997,6 +9997,14 @@ S: Supported F: Documentation/preempt-locking.txt F: include/linux/preempt.h +PRINTK +M: Petr Mladek +M: Sergey Senozhatsky +R: Steven Rostedt +S: Maintained +F: kernel/printk/ +F: include/linux/printk.h + PRISM54 WIRELESS DRIVER M: "Luis R. Rodriguez" L: linux-wireless@vger.kernel.org diff --git a/include/linux/printk.h b/include/linux/printk.h index 3472cc6b7a60..571257e0f53d 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_nmi_init(void); extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); -extern void printk_nmi_flush(void); -extern void printk_nmi_flush_on_panic(void); #else -static inline void printk_nmi_init(void) { } static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } -static inline void printk_nmi_flush(void) { } -static inline void printk_nmi_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK @@ -209,6 +203,9 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern void printk_safe_init(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl) static inline void show_regs_print_info(const char *log_lvl) { } + +static inline void printk_safe_init(void) +{ +} + +static inline void printk_safe_flush(void) +{ +} + +static inline void printk_safe_flush_on_panic(void) +{ +} #endif extern asmlinkage void dump_stack(void) __cold; diff --git a/init/Kconfig b/init/Kconfig index 55bb6fbc294e..483ad679aa37 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -861,17 +861,19 @@ config LOG_CPU_MAX_BUF_SHIFT 13 => 8 KB for each CPU 12 => 4 KB for each CPU -config NMI_LOG_BUF_SHIFT - int "Temporary per-CPU NMI log buffer size (12 => 4KB, 13 => 8KB)" +config PRINTK_SAFE_LOG_BUF_SHIFT + int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)" range 10 21 default 13 - depends on PRINTK_NMI + depends on PRINTK help - Select the size of a per-CPU buffer where NMI messages are temporary - stored. They are copied to the main log buffer in a safe context - to avoid a deadlock. The value defines the size as a power of 2. + Select the size of an alternate printk per-CPU buffer where messages + printed from usafe contexts are temporary stored. One example would + be NMI messages, another one - printk recursion. The messages are + copied to the main log buffer in a safe context to avoid a deadlock. + The value defines the size as a power of 2. - NMI messages are rare and limited. The largest one is when + Those messages are rare and limited. The largest one is when a backtrace is printed. It usually fits into 4KB. Select 8KB if you want to be on the safe side. diff --git a/init/main.c b/init/main.c index c8a00f0f10ff..24ea48745061 100644 --- a/init/main.c +++ b/init/main.c @@ -581,7 +581,7 @@ asmlinkage __visible void __init start_kernel(void) timekeeping_init(); time_init(); sched_clock_postinit(); - printk_nmi_init(); + printk_safe_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index a01974e1bf6b..bfe62d5b3872 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -916,7 +916,7 @@ void crash_kexec(struct pt_regs *regs) old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); if (old_cpu == PANIC_CPU_INVALID) { /* This is the 1st CPU which comes here, so go ahead. */ - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); __crash_kexec(regs); /* diff --git a/kernel/panic.c b/kernel/panic.c index 08aa88dde7de..b95959733ce0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -188,7 +188,7 @@ void panic(const char *fmt, ...) * Bypass the panic_cpu check and call __crash_kexec directly. */ if (!_crash_kexec_post_notifiers) { - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); __crash_kexec(NULL); /* @@ -213,7 +213,7 @@ void panic(const char *fmt, ...) atomic_notifier_call_chain(&panic_notifier_list, 0, buf); /* Call flush even twice. It tries harder with a single online CPU */ - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); kmsg_dump(KMSG_DUMP_PANIC); /* diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index abb0042a427b..4a2ffc39eb95 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += nmi.o +obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..1db044f808b7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,42 +16,55 @@ */ #include -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); - -#ifdef CONFIG_PRINTK_NMI +#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +#define PRINTK_NMI_CONTEXT_MASK 0x80000000 extern raw_spinlock_t logbuf_lock; +__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + +#define printk_safe_enter_irqsave(flags) \ + do { \ + local_irq_save(flags); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit_irqrestore(flags) \ + do { \ + __printk_safe_exit(); \ + local_irq_restore(flags); \ + } while (0) + +#define printk_safe_enter_irq() \ + do { \ + local_irq_disable(); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit_irq() \ + do { \ + __printk_safe_exit(); \ + local_irq_enable(); \ + } while (0) + +#else + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } + /* - * printk() could not take logbuf_lock in NMI context. Instead, - * it temporary stores the strings into a per-CPU buffer. - * The alternative implementation is chosen transparently - * via per-CPU variable. + * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem + * semaphore and some of console functions (console_unlock()/etc.), so + * printk-safe must preserve the existing local IRQ guarantees. */ -DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return this_cpu_read(printk_func)(fmt, args); -} +#define printk_safe_enter_irqsave(flags) local_irq_save(flags) +#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) -extern atomic_t nmi_message_lost; -static inline int get_nmi_message_lost(void) -{ - return atomic_xchg(&nmi_message_lost, 0); -} +#define printk_safe_enter_irq() local_irq_disable() +#define printk_safe_exit_irq() local_irq_enable() -#else /* CONFIG_PRINTK_NMI */ - -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return vprintk_default(fmt, args); -} - -static inline int get_nmi_message_lost(void) -{ - return 0; -} - -#endif /* CONFIG_PRINTK_NMI */ +#endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4ba3d34938c0..34da86e73d00 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -213,17 +213,36 @@ static int nr_ext_console_drivers; static int __down_trylock_console_sem(unsigned long ip) { - if (down_trylock(&console_sem)) + int lock_failed; + unsigned long flags; + + /* + * Here and in __up_console_sem() we need to be in safe mode, + * because spindump/WARN/etc from under console ->lock will + * deadlock in printk()->down_trylock_console_sem() otherwise. + */ + printk_safe_enter_irqsave(flags); + lock_failed = down_trylock(&console_sem); + printk_safe_exit_irqrestore(flags); + + if (lock_failed) return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; } #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) -#define up_console_sem() do { \ - mutex_release(&console_lock_dep_map, 1, _RET_IP_);\ - up(&console_sem);\ -} while (0) +static void __up_console_sem(unsigned long ip) +{ + unsigned long flags; + + mutex_release(&console_lock_dep_map, 1, ip); + + printk_safe_enter_irqsave(flags); + up(&console_sem); + printk_safe_exit_irqrestore(flags); +} +#define up_console_sem() __up_console_sem(_RET_IP_) /* * This is used for debugging the mess that is the VT code by @@ -351,6 +370,34 @@ __packed __aligned(4) */ DEFINE_RAW_SPINLOCK(logbuf_lock); +/* + * Helper macros to lock/unlock logbuf_lock and switch between + * printk-safe/unsafe modes. + */ +#define logbuf_lock_irq() \ + do { \ + printk_safe_enter_irq(); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irq() \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irq(); \ + } while (0) + +#define logbuf_lock_irqsave(flags) \ + do { \ + printk_safe_enter_irqsave(flags); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irqrestore(flags) \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irqrestore(flags); \ + } while (0) + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -782,20 +829,21 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; - raw_spin_lock_irq(&logbuf_lock); + + logbuf_lock_irq(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); goto out; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); ret = wait_event_interruptible(log_wait, user->seq != log_next_seq); if (ret) goto out; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); } if (user->seq < log_first_seq) { @@ -803,7 +851,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, user->idx = log_first_idx; user->seq = log_first_seq; ret = -EPIPE; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); goto out; } @@ -816,7 +864,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, user->idx = log_next(user->idx); user->seq++; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (len > count) { ret = -EINVAL; @@ -843,7 +891,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) if (offset) return -ESPIPE; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); switch (whence) { case SEEK_SET: /* the first record */ @@ -867,7 +915,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) default: ret = -EINVAL; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); return ret; } @@ -881,7 +929,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -889,7 +937,7 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) else ret = POLLIN|POLLRDNORM; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); return ret; } @@ -919,10 +967,10 @@ static int devkmsg_open(struct inode *inode, struct file *file) mutex_init(&user->lock); - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); user->idx = log_first_idx; user->seq = log_first_seq; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); file->private_data = user; return 0; @@ -1064,13 +1112,13 @@ void __init setup_log_buf(int early) return; } - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; free = __LOG_BUF_LEN - log_next_idx; memcpy(log_buf, __log_buf, __LOG_BUF_LEN); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); pr_info("log_buf_len: %d bytes\n", log_buf_len); pr_info("early log buf free: %d(%d%%)\n", @@ -1248,7 +1296,7 @@ static int syslog_print(char __user *buf, int size) size_t n; size_t skip; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1256,7 +1304,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial = 0; } if (syslog_seq == log_next_seq) { - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); break; } @@ -1275,7 +1323,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (!n) break; @@ -1304,7 +1352,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) if (!text) return -ENOMEM; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (buf) { u64 next_seq; u64 seq; @@ -1352,12 +1400,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = log_next(idx); seq++; - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (seq < log_first_seq) { /* messages are gone, move to next one */ @@ -1371,7 +1419,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) clear_seq = log_next_seq; clear_idx = log_next_idx; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); kfree(text); return len; @@ -1458,7 +1506,7 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - raw_spin_lock_irq(&logbuf_lock); + logbuf_lock_irq(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1486,7 +1534,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } - raw_spin_unlock_irq(&logbuf_lock); + logbuf_unlock_irq(); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -1510,8 +1558,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(int level, - const char *ext_text, size_t ext_len, +static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) { struct console *con; @@ -1538,28 +1585,6 @@ static void call_console_drivers(int level, } } -/* - * Zap console related locks when oopsing. - * To leave time for slow consoles to print a full oops, - * only zap at most once every 30 seconds. - */ -static void zap_locks(void) -{ - static unsigned long oops_timestamp; - - if (time_after_eq(jiffies, oops_timestamp) && - !time_after(jiffies, oops_timestamp + 30 * HZ)) - return; - - oops_timestamp = jiffies; - - debug_locks_off(); - /* If a crash is occurring, make sure we can't deadlock */ - raw_spin_lock_init(&logbuf_lock); - /* And make sure that we print immediately */ - sema_init(&console_sem, 1); -} - int printk_delay_msec __read_mostly; static inline void printk_delay(void) @@ -1669,18 +1694,13 @@ asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) { - static bool recursion_bug; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len = 0; enum log_flags lflags = 0; unsigned long flags; - int this_cpu; int printed_len = 0; - int nmi_message_lost; bool in_sched = false; - /* cpu currently holding logbuf_lock in this function */ - static unsigned int logbuf_cpu = UINT_MAX; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1690,53 +1710,8 @@ asmlinkage int vprintk_emit(int facility, int level, boot_delay_msec(level); printk_delay(); - local_irq_save(flags); - this_cpu = smp_processor_id(); - - /* - * Ouch, printk recursed into itself! - */ - if (unlikely(logbuf_cpu == this_cpu)) { - /* - * If a crash is occurring during printk() on this CPU, - * then try to get the crash message out but make sure - * we can't deadlock. Otherwise just return to avoid the - * recursion and return - but flag the recursion so that - * it can be printed at the next appropriate moment: - */ - if (!oops_in_progress && !lockdep_recursing(current)) { - recursion_bug = true; - local_irq_restore(flags); - return 0; - } - zap_locks(); - } - - lockdep_off(); /* This stops the holder of console_sem just where we want him */ - raw_spin_lock(&logbuf_lock); - logbuf_cpu = this_cpu; - - if (unlikely(recursion_bug)) { - static const char recursion_msg[] = - "BUG: recent printk recursion!"; - - recursion_bug = false; - /* emit KERN_CRIT message */ - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, - strlen(recursion_msg)); - } - - nmi_message_lost = get_nmi_message_lost(); - if (unlikely(nmi_message_lost)) { - text_len = scnprintf(textbuf, sizeof(textbuf), - "BAD LUCK: lost %d message(s) from NMI context!", - nmi_message_lost); - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, textbuf, text_len); - } - + logbuf_lock_irqsave(flags); /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. @@ -1779,14 +1754,10 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); - logbuf_cpu = UINT_MAX; - raw_spin_unlock(&logbuf_lock); - lockdep_on(); - local_irq_restore(flags); + logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ if (!in_sched) { - lockdep_off(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1794,7 +1765,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); - lockdep_on(); } return printed_len; @@ -1803,7 +1773,7 @@ EXPORT_SYMBOL(vprintk_emit); asmlinkage int vprintk(const char *fmt, va_list args) { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + return vprintk_func(fmt, args); } EXPORT_SYMBOL(vprintk); @@ -1895,16 +1865,12 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } -static void call_console_drivers(int level, - const char *ext_text, size_t ext_len, +static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -/* Still needs to be defined for users */ -DEFINE_PER_CPU(printk_func_t, printk_func); - #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK @@ -2220,9 +2186,9 @@ again: struct printk_log *msg; size_t ext_len = 0; size_t len; - int level; - raw_spin_lock_irqsave(&logbuf_lock, flags); + printk_safe_enter_irqsave(flags); + raw_spin_lock(&logbuf_lock); if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; @@ -2243,8 +2209,7 @@ skip: break; msg = log_from_idx(console_idx); - level = msg->level; - if (suppress_message_printing(level)) { + if (suppress_message_printing(msg->level)) { /* * Skip record we have buffered and already printed * directly to the console when we received it, and @@ -2270,9 +2235,9 @@ skip: raw_spin_unlock(&logbuf_lock); stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(level, ext_text, ext_len, text, len); + call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); - local_irq_restore(flags); + printk_safe_exit_irqrestore(flags); if (do_cond_resched) cond_resched(); @@ -2295,7 +2260,8 @@ skip: */ raw_spin_lock(&logbuf_lock); retry = console_seq != log_next_seq; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + raw_spin_unlock(&logbuf_lock); + printk_safe_exit_irqrestore(flags); if (retry && console_trylock()) goto again; @@ -2558,10 +2524,10 @@ void register_console(struct console *newcon) * console_unlock(); will print out the buffered messages * for us. */ - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to @@ -2860,12 +2826,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) /* initialize iterator with data about the stored records */ dumper->active = true; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; dumper->next_idx = log_next_idx; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); /* invoke dumper which will iterate over records */ dumper->dump(dumper, reason); @@ -2950,9 +2916,9 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, unsigned long flags; bool ret; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); return ret; } @@ -2991,7 +2957,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, if (!dumper->active) goto out; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; @@ -3000,7 +2966,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, /* last entry */ if (dumper->cur_seq >= dumper->next_seq) { - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); goto out; } @@ -3042,7 +3008,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, dumper->next_seq = next_seq; dumper->next_idx = next_idx; ret = true; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); out: if (len) *len = l; @@ -3080,9 +3046,9 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) { unsigned long flags; - raw_spin_lock_irqsave(&logbuf_lock, flags); + logbuf_lock_irqsave(flags); kmsg_dump_rewind_nolock(dumper); - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/printk/nmi.c b/kernel/printk/printk_safe.c similarity index 53% rename from kernel/printk/nmi.c rename to kernel/printk/printk_safe.c index f011aaef583c..033e50a7d706 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * nmi.c - Safe printk in NMI context + * printk_safe.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -32,36 +32,58 @@ * is later flushed into the main ring buffer via IRQ work. * * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. + * by examinig current printk() context mask stored in @printk_context + * per-CPU variable. * * The implementation allows to flush the strings also from another CPU. * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; -static int printk_nmi_irq_ready; -atomic_t nmi_message_lost; +static int printk_safe_irq_ready; -#define NMI_LOG_BUF_LEN ((1 << CONFIG_NMI_LOG_BUF_SHIFT) - \ - sizeof(atomic_t) - sizeof(struct irq_work)) +#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ + sizeof(atomic_t) - \ + sizeof(atomic_t) - \ + sizeof(struct irq_work)) -struct nmi_seq_buf { +struct printk_safe_seq_buf { atomic_t len; /* length of written data */ + atomic_t message_lost; struct irq_work work; /* IRQ work that flushes the buffer */ - unsigned char buffer[NMI_LOG_BUF_LEN]; + unsigned char buffer[SAFE_LOG_BUF_LEN]; }; -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); + +static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); +static DEFINE_PER_CPU(int, printk_context); + +#ifdef CONFIG_PRINTK_NMI +static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +#endif + +/* Get flushed in a more safe context. */ +static void queue_flush_work(struct printk_safe_seq_buf *s) +{ + if (printk_safe_irq_ready) { + /* Make sure that IRQ work is really initialized. */ + smp_rmb(); + irq_work_queue(&s->work); + } +} /* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. + * Add a message to per-CPU context-dependent buffer. NMI and printk-safe + * have dedicated buffers, because otherwise printk-safe preempted by + * NMI-printk would have overwritten the NMI messages. + * + * The messages are fushed from irq work (or from panic()), possibly, + * from other CPU, concurrently with printk_safe_log_store(). Should this + * happen, printk_safe_log_store() will notice the buffer->len mismatch + * and repeat the write. */ -static int vprintk_nmi(const char *fmt, va_list args) +static int printk_safe_log_store(struct printk_safe_seq_buf *s, + const char *fmt, va_list args) { - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: @@ -69,18 +91,21 @@ again: /* The trailing '\0' is not counted into len. */ if (len >= sizeof(s->buffer) - 1) { - atomic_inc(&nmi_message_lost); + atomic_inc(&s->message_lost); + queue_flush_work(s); return 0; } /* - * Make sure that all old data have been read before the buffer was - * reseted. This is not needed when we just append data. + * Make sure that all old data have been read before the buffer + * was reset. This is not needed when we just append data. */ if (!len) smp_rmb(); add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + if (!add) + return 0; /* * Do it once again if the buffer has been flushed in the meantime. @@ -90,32 +115,23 @@ again: if (atomic_cmpxchg(&s->len, len, len + add) != len) goto again; - /* Get flushed in a more safe context. */ - if (add && printk_nmi_irq_ready) { - /* Make sure that IRQ work is really initialized. */ - smp_rmb(); - irq_work_queue(&s->work); - } - + queue_flush_work(s); return add; } -static void printk_nmi_flush_line(const char *text, int len) +static inline void printk_safe_flush_line(const char *text, int len) { /* - * The buffers are flushed in NMI only on panic. The messages must - * go only into the ring buffer at this stage. Consoles will get - * explicitly called later when a crashdump is not generated. + * Avoid any console drivers calls from here, because we may be + * in NMI or printk_safe context (when in panic). The messages + * must go only into the ring buffer at this stage. Consoles will + * get explicitly called later when a crashdump is not generated. */ - if (in_nmi()) - printk_deferred("%.*s", len, text); - else - printk("%.*s", len, text); - + printk_deferred("%.*s", len, text); } /* printk part of the temporary buffer line by line */ -static int printk_nmi_flush_buffer(const char *start, size_t len) +static int printk_safe_flush_buffer(const char *start, size_t len) { const char *c, *end; bool header; @@ -127,7 +143,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) /* Print line by line. */ while (c < end) { if (*c == '\n') { - printk_nmi_flush_line(start, c - start + 1); + printk_safe_flush_line(start, c - start + 1); start = ++c; header = true; continue; @@ -140,7 +156,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) continue; } - printk_nmi_flush_line(start, c - start); + printk_safe_flush_line(start, c - start); start = c++; header = true; continue; @@ -154,22 +170,31 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) if (start < end && !header) { static const char newline[] = KERN_CONT "\n"; - printk_nmi_flush_line(start, end - start); - printk_nmi_flush_line(newline, strlen(newline)); + printk_safe_flush_line(start, end - start); + printk_safe_flush_line(newline, strlen(newline)); } return len; } +static void report_message_lost(struct printk_safe_seq_buf *s) +{ + int lost = atomic_xchg(&s->message_lost, 0); + + if (lost) + printk_deferred("Lost %d message(s)!\n", lost); +} + /* - * Flush data from the associated per_CPU buffer. The function + * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. */ -static void __printk_nmi_flush(struct irq_work *work) +static void __printk_safe_flush(struct irq_work *work) { static raw_spinlock_t read_lock = __RAW_SPIN_LOCK_INITIALIZER(read_lock); - struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); + struct printk_safe_seq_buf *s = + container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; size_t len; int i; @@ -194,9 +219,9 @@ more: * buffer size. */ if ((i && i >= len) || len > sizeof(s->buffer)) { - const char *msg = "printk_nmi_flush: internal error\n"; + const char *msg = "printk_safe_flush: internal error\n"; - printk_nmi_flush_line(msg, strlen(msg)); + printk_safe_flush_line(msg, strlen(msg)); len = 0; } @@ -205,7 +230,7 @@ more: /* Make sure that data has been written up to the @len */ smp_rmb(); - i += printk_nmi_flush_buffer(s->buffer + i, len - i); + i += printk_safe_flush_buffer(s->buffer + i, len - i); /* * Check that nothing has got added in the meantime and truncate @@ -217,35 +242,40 @@ more: goto more; out: + report_message_lost(s); raw_spin_unlock_irqrestore(&read_lock, flags); } /** - * printk_nmi_flush - flush all per-cpu nmi buffers. + * printk_safe_flush - flush all per-cpu nmi buffers. * * The buffers are flushed automatically via IRQ work. This function * is useful only when someone wants to be sure that all buffers have * been flushed at some point. */ -void printk_nmi_flush(void) +void printk_safe_flush(void) { int cpu; - for_each_possible_cpu(cpu) - __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); + for_each_possible_cpu(cpu) { +#ifdef CONFIG_PRINTK_NMI + __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +#endif + __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); + } } /** - * printk_nmi_flush_on_panic - flush all per-cpu nmi buffers when the system + * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system * goes down. * - * Similar to printk_nmi_flush() but it can be called even in NMI context when + * Similar to printk_safe_flush() but it can be called even in NMI context when * the system goes down. It does the best effort to get NMI messages into * the main ring buffer. * * Note that it could try harder when there is only one CPU online. */ -void printk_nmi_flush_on_panic(void) +void printk_safe_flush_on_panic(void) { /* * Make sure that we could access the main ring buffer. @@ -259,33 +289,97 @@ void printk_nmi_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } - printk_nmi_flush(); + printk_safe_flush(); } -void __init printk_nmi_init(void) +#ifdef CONFIG_PRINTK_NMI +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) { - int cpu; + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - for_each_possible_cpu(cpu) { - struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); - - init_irq_work(&s->work, __printk_nmi_flush); - } - - /* Make sure that IRQ works are initialized before enabling. */ - smp_wmb(); - printk_nmi_irq_ready = 1; - - /* Flush pending messages that did not have scheduled IRQ works. */ - printk_nmi_flush(); + return printk_safe_log_store(s, fmt, args); } void printk_nmi_enter(void) { - this_cpu_write(printk_func, vprintk_nmi); + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); } void printk_nmi_exit(void) { - this_cpu_write(printk_func, vprintk_default); + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +#else + +static int vprintk_nmi(const char *fmt, va_list args) +{ + return 0; +} + +#endif /* CONFIG_PRINTK_NMI */ + +/* + * Lock-less printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_safe(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +/* Can be preempted by NMI. */ +void __printk_safe_enter(void) +{ + this_cpu_inc(printk_context); +} + +/* Can be preempted by NMI. */ +void __printk_safe_exit(void) +{ + this_cpu_dec(printk_context); +} + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + + return vprintk_default(fmt, args); +} + +void __init printk_safe_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct printk_safe_seq_buf *s; + + s = &per_cpu(safe_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); + +#ifdef CONFIG_PRINTK_NMI + s = &per_cpu(nmi_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); +#endif + } + + /* Make sure that IRQ works are initialized before enabling. */ + smp_wmb(); + printk_safe_irq_ready = 1; + + /* Flush pending messages that did not have scheduled IRQ works. */ + printk_safe_flush(); } diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 75554754eadf..5f7999eacad5 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -77,7 +77,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, * Force flush any remote buffers that might be stuck in IRQ context * and therefore could not run their irq_work. */ - printk_nmi_flush(); + printk_safe_flush(); clear_bit_unlock(0, &backtrace_flag); put_cpu();