diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Kconfig | 51 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 65 | ||||
-rw-r--r-- | kernel/time/ntp.c | 6 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 61 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 |
5 files changed, 133 insertions, 52 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 70f27e89012b..2b62fe86f9ec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -105,7 +105,6 @@ config NO_HZ_FULL select RCU_USER_QS select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN - select CONTEXT_TRACKING_FORCE select IRQ_WORK help Adaptively try to shutdown the tick whenever possible, even when @@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL Note the boot CPU will still be kept outside the range to handle the timekeeping duty. +config NO_HZ_FULL_SYSIDLE + bool "Detect full-system idle state for full dynticks system" + depends on NO_HZ_FULL + default n + help + At least one CPU must keep the scheduling-clock tick running for + timekeeping purposes whenever there is a non-idle CPU, where + "non-idle" also includes dynticks CPUs as long as they are + running non-idle tasks. Because the underlying adaptive-tick + support cannot distinguish between all CPUs being idle and + all CPUs each running a single task in dynticks mode, the + underlying support simply ensures that there is always a CPU + handling the scheduling-clock tick, whether or not all CPUs + are idle. This Kconfig option enables scalable detection of + the all-CPUs-idle state, thus allowing the scheduling-clock + tick to be disabled when all CPUs are idle. Note that scalable + detection of the all-CPUs-idle state means that larger systems + will be slower to declare the all-CPUs-idle state. + + Say Y if you would like to help debug all-CPUs-idle detection. + + Say N if you are unsure. + +config NO_HZ_FULL_SYSIDLE_SMALL + int "Number of CPUs above which large-system approach is used" + depends on NO_HZ_FULL_SYSIDLE + range 1 NR_CPUS + default 8 + help + The full-system idle detection mechanism takes a lazy approach + on large systems, as is required to attain decent scalability. + However, on smaller systems, scalability is not anywhere near as + large a concern as is energy efficiency. The sysidle subsystem + therefore uses a fast but non-scalable algorithm for small + systems and a lazier but scalable algorithm for large systems. + This Kconfig parameter defines the number of CPUs in the largest + system that will be considered to be "small". + + The default value will be fine in most cases. Battery-powered + systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger + numbers of CPUs, and (3) are suffering from battery-lifetime + problems due to long sysidle latencies might wish to experiment + with larger values for this Kconfig parameter. On the other + hand, they might be even better served by disabling NO_HZ_FULL + entirely, given that NO_HZ_FULL is intended for HPC and + real-time workloads that at present do not tend to be run on + battery-powered systems. + + Take the default if you are unsure. + config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 38959c866789..662c5798a685 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -33,29 +33,64 @@ struct ce_unbind { int res; }; -/** - * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds - * @latch: value to convert - * @evt: pointer to clock event device descriptor - * - * Math helper, returns latch value converted to nanoseconds (bound checked) - */ -u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, + bool ismax) { u64 clc = (u64) latch << evt->shift; + u64 rnd; if (unlikely(!evt->mult)) { evt->mult = 1; WARN_ON(1); } + rnd = (u64) evt->mult - 1; + + /* + * Upper bound sanity check. If the backwards conversion is + * not equal latch, we know that the above shift overflowed. + */ + if ((clc >> evt->shift) != (u64)latch) + clc = ~0ULL; + + /* + * Scaled math oddities: + * + * For mult <= (1 << shift) we can safely add mult - 1 to + * prevent integer rounding loss. So the backwards conversion + * from nsec to device ticks will be correct. + * + * For mult > (1 << shift), i.e. device frequency is > 1GHz we + * need to be careful. Adding mult - 1 will result in a value + * which when converted back to device ticks can be larger + * than latch by up to (mult - 1) >> shift. For the min_delta + * calculation we still want to apply this in order to stay + * above the minimum device ticks limit. For the upper limit + * we would end up with a latch value larger than the upper + * limit of the device, so we omit the add to stay below the + * device upper boundary. + * + * Also omit the add if it would overflow the u64 boundary. + */ + if ((~0ULL - clc > rnd) && + (!ismax || evt->mult <= (1U << evt->shift))) + clc += rnd; do_div(clc, evt->mult); - if (clc < 1000) - clc = 1000; - if (clc > KTIME_MAX) - clc = KTIME_MAX; - return clc; + /* Deltas less than 1usec are pointless noise */ + return clc > 1000 ? clc : 1000; +} + +/** + * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds + * @latch: value to convert + * @evt: pointer to clock event device descriptor + * + * Math helper, returns latch value converted to nanoseconds (bound checked) + */ +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +{ + return cev_delta2ns(latch, evt, false); } EXPORT_SYMBOL_GPL(clockevent_delta2ns); @@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq) sec = 600; clockevents_calc_mult_shift(dev, freq, sec); - dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); - dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); + dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); + dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true); } /** diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8f5b3b98577b..bb2215174f05 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work) schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); } -static void notify_cmos_timer(void) +void ntp_notify_cmos_timer(void) { schedule_delayed_work(&sync_cmos_work, 0); } #else -static inline void notify_cmos_timer(void) { } +void ntp_notify_cmos_timer(void) { } #endif @@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai) if (!(time_status & STA_NANO)) txc->time.tv_usec /= NSEC_PER_USEC; - notify_cmos_timer(); - return result; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a3..3612fc77f834 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -23,6 +23,7 @@ #include <linux/irq_work.h> #include <linux/posix-timers.h> #include <linux/perf_event.h> +#include <linux/context_tracking.h> #include <asm/irq_regs.h> @@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) } #ifdef CONFIG_NO_HZ_FULL -static cpumask_var_t nohz_full_mask; -bool have_nohz_full_mask; +cpumask_var_t tick_nohz_full_mask; +bool tick_nohz_full_running; static bool can_stop_full_tick(void) { @@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) * Don't allow the user to think they can get * full NO_HZ with this machine. */ - WARN_ONCE(have_nohz_full_mask, + WARN_ONCE(tick_nohz_full_running, "NO_HZ FULL will not work with unstable sched clock"); return false; } @@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); * Re-evaluate the need for the tick on the current CPU * and restart it if necessary. */ -void tick_nohz_full_check(void) +void __tick_nohz_full_check(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); @@ -211,7 +212,7 @@ void tick_nohz_full_check(void) static void nohz_full_kick_work_func(struct irq_work *work) { - tick_nohz_full_check(); + __tick_nohz_full_check(); } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { @@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) static void nohz_full_kick_ipi(void *info) { - tick_nohz_full_check(); + __tick_nohz_full_check(); } /* @@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) */ void tick_nohz_full_kick_all(void) { - if (!have_nohz_full_mask) + if (!tick_nohz_full_running) return; preempt_disable(); - smp_call_function_many(nohz_full_mask, + smp_call_function_many(tick_nohz_full_mask, nohz_full_kick_ipi, NULL, false); + tick_nohz_full_kick(); preempt_enable(); } @@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) * It might need the tick due to per task/process properties: * perf events, posix cpu timers, ... */ -void tick_nohz_task_switch(struct task_struct *tsk) +void __tick_nohz_task_switch(struct task_struct *tsk) { unsigned long flags; @@ -269,31 +271,23 @@ out: local_irq_restore(flags); } -int tick_nohz_full_cpu(int cpu) -{ - if (!have_nohz_full_mask) - return 0; - - return cpumask_test_cpu(cpu, nohz_full_mask); -} - /* Parse the boot-time nohz CPU list from the kernel parameters. */ static int __init tick_nohz_full_setup(char *str) { int cpu; - alloc_bootmem_cpumask_var(&nohz_full_mask); - if (cpulist_parse(str, nohz_full_mask) < 0) { + alloc_bootmem_cpumask_var(&tick_nohz_full_mask); + if (cpulist_parse(str, tick_nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); return 1; } cpu = smp_processor_id(); - if (cpumask_test_cpu(cpu, nohz_full_mask)) { + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); + cpumask_clear_cpu(cpu, tick_nohz_full_mask); } - have_nohz_full_mask = true; + tick_nohz_full_running = true; return 1; } @@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, * If we handle the timekeeping duty for full dynticks CPUs, * we can't safely shutdown that CPU. */ - if (have_nohz_full_mask && tick_do_timer_cpu == cpu) + if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; break; } @@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) int err = -1; #ifdef CONFIG_NO_HZ_FULL_ALL - if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); return err; } err = 0; - cpumask_setall(nohz_full_mask); - cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); - have_nohz_full_mask = true; + cpumask_setall(tick_nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); + tick_nohz_full_running = true; #endif return err; } void __init tick_nohz_init(void) { - if (!have_nohz_full_mask) { + int cpu; + + if (!tick_nohz_full_running) { if (tick_nohz_init_all() < 0) return; } + for_each_cpu(cpu, tick_nohz_full_mask) + context_tracking_cpu_set(cpu); + cpu_notifier(tick_nohz_cpu_down_callback, 0); - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); } -#else -#define have_nohz_full_mask (0) #endif /* @@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (have_nohz_full_mask) { + if (tick_nohz_full_enabled()) { /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 48b9fffabdc2..947ba25a95a0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc) write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + ntp_notify_cmos_timer(); + return ret; } |