summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig51
-rw-r--r--kernel/time/clockevents.c65
-rw-r--r--kernel/time/ntp.c6
-rw-r--r--kernel/time/tick-sched.c61
-rw-r--r--kernel/time/timekeeping.c2
5 files changed, 133 insertions, 52 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 70f27e89012b..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
select RCU_USER_QS
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
- select CONTEXT_TRACKING_FORCE
select IRQ_WORK
help
Adaptively try to shutdown the tick whenever possible, even when
@@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
+config NO_HZ_FULL_SYSIDLE
+ bool "Detect full-system idle state for full dynticks system"
+ depends on NO_HZ_FULL
+ default n
+ help
+ At least one CPU must keep the scheduling-clock tick running for
+ timekeeping purposes whenever there is a non-idle CPU, where
+ "non-idle" also includes dynticks CPUs as long as they are
+ running non-idle tasks. Because the underlying adaptive-tick
+ support cannot distinguish between all CPUs being idle and
+ all CPUs each running a single task in dynticks mode, the
+ underlying support simply ensures that there is always a CPU
+ handling the scheduling-clock tick, whether or not all CPUs
+ are idle. This Kconfig option enables scalable detection of
+ the all-CPUs-idle state, thus allowing the scheduling-clock
+ tick to be disabled when all CPUs are idle. Note that scalable
+ detection of the all-CPUs-idle state means that larger systems
+ will be slower to declare the all-CPUs-idle state.
+
+ Say Y if you would like to help debug all-CPUs-idle detection.
+
+ Say N if you are unsure.
+
+config NO_HZ_FULL_SYSIDLE_SMALL
+ int "Number of CPUs above which large-system approach is used"
+ depends on NO_HZ_FULL_SYSIDLE
+ range 1 NR_CPUS
+ default 8
+ help
+ The full-system idle detection mechanism takes a lazy approach
+ on large systems, as is required to attain decent scalability.
+ However, on smaller systems, scalability is not anywhere near as
+ large a concern as is energy efficiency. The sysidle subsystem
+ therefore uses a fast but non-scalable algorithm for small
+ systems and a lazier but scalable algorithm for large systems.
+ This Kconfig parameter defines the number of CPUs in the largest
+ system that will be considered to be "small".
+
+ The default value will be fine in most cases. Battery-powered
+ systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
+ numbers of CPUs, and (3) are suffering from battery-lifetime
+ problems due to long sysidle latencies might wish to experiment
+ with larger values for this Kconfig parameter. On the other
+ hand, they might be even better served by disabling NO_HZ_FULL
+ entirely, given that NO_HZ_FULL is intended for HPC and
+ real-time workloads that at present do not tend to be run on
+ battery-powered systems.
+
+ Take the default if you are unsure.
+
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 38959c866789..662c5798a685 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -33,29 +33,64 @@ struct ce_unbind {
int res;
};
-/**
- * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
- * @latch: value to convert
- * @evt: pointer to clock event device descriptor
- *
- * Math helper, returns latch value converted to nanoseconds (bound checked)
- */
-u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
+static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
+ bool ismax)
{
u64 clc = (u64) latch << evt->shift;
+ u64 rnd;
if (unlikely(!evt->mult)) {
evt->mult = 1;
WARN_ON(1);
}
+ rnd = (u64) evt->mult - 1;
+
+ /*
+ * Upper bound sanity check. If the backwards conversion is
+ * not equal latch, we know that the above shift overflowed.
+ */
+ if ((clc >> evt->shift) != (u64)latch)
+ clc = ~0ULL;
+
+ /*
+ * Scaled math oddities:
+ *
+ * For mult <= (1 << shift) we can safely add mult - 1 to
+ * prevent integer rounding loss. So the backwards conversion
+ * from nsec to device ticks will be correct.
+ *
+ * For mult > (1 << shift), i.e. device frequency is > 1GHz we
+ * need to be careful. Adding mult - 1 will result in a value
+ * which when converted back to device ticks can be larger
+ * than latch by up to (mult - 1) >> shift. For the min_delta
+ * calculation we still want to apply this in order to stay
+ * above the minimum device ticks limit. For the upper limit
+ * we would end up with a latch value larger than the upper
+ * limit of the device, so we omit the add to stay below the
+ * device upper boundary.
+ *
+ * Also omit the add if it would overflow the u64 boundary.
+ */
+ if ((~0ULL - clc > rnd) &&
+ (!ismax || evt->mult <= (1U << evt->shift)))
+ clc += rnd;
do_div(clc, evt->mult);
- if (clc < 1000)
- clc = 1000;
- if (clc > KTIME_MAX)
- clc = KTIME_MAX;
- return clc;
+ /* Deltas less than 1usec are pointless noise */
+ return clc > 1000 ? clc : 1000;
+}
+
+/**
+ * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
+ * @latch: value to convert
+ * @evt: pointer to clock event device descriptor
+ *
+ * Math helper, returns latch value converted to nanoseconds (bound checked)
+ */
+u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
+{
+ return cev_delta2ns(latch, evt, false);
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
@@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq)
sec = 600;
clockevents_calc_mult_shift(dev, freq, sec);
- dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
- dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+ dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
+ dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
}
/**
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8f5b3b98577b..bb2215174f05 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work)
schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
}
-static void notify_cmos_timer(void)
+void ntp_notify_cmos_timer(void)
{
schedule_delayed_work(&sync_cmos_work, 0);
}
#else
-static inline void notify_cmos_timer(void) { }
+void ntp_notify_cmos_timer(void) { }
#endif
@@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
if (!(time_status & STA_NANO))
txc->time.tv_usec /= NSEC_PER_USEC;
- notify_cmos_timer();
-
return result;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e8a1516cc0a3..3612fc77f834 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
+#include <linux/context_tracking.h>
#include <asm/irq_regs.h>
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
}
#ifdef CONFIG_NO_HZ_FULL
-static cpumask_var_t nohz_full_mask;
-bool have_nohz_full_mask;
+cpumask_var_t tick_nohz_full_mask;
+bool tick_nohz_full_running;
static bool can_stop_full_tick(void)
{
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
- WARN_ONCE(have_nohz_full_mask,
+ WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
return false;
}
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
* Re-evaluate the need for the tick on the current CPU
* and restart it if necessary.
*/
-void tick_nohz_full_check(void)
+void __tick_nohz_full_check(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
static void nohz_full_kick_work_func(struct irq_work *work)
{
- tick_nohz_full_check();
+ __tick_nohz_full_check();
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
static void nohz_full_kick_ipi(void *info)
{
- tick_nohz_full_check();
+ __tick_nohz_full_check();
}
/*
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
*/
void tick_nohz_full_kick_all(void)
{
- if (!have_nohz_full_mask)
+ if (!tick_nohz_full_running)
return;
preempt_disable();
- smp_call_function_many(nohz_full_mask,
+ smp_call_function_many(tick_nohz_full_mask,
nohz_full_kick_ipi, NULL, false);
+ tick_nohz_full_kick();
preempt_enable();
}
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
* It might need the tick due to per task/process properties:
* perf events, posix cpu timers, ...
*/
-void tick_nohz_task_switch(struct task_struct *tsk)
+void __tick_nohz_task_switch(struct task_struct *tsk)
{
unsigned long flags;
@@ -269,31 +271,23 @@ out:
local_irq_restore(flags);
}
-int tick_nohz_full_cpu(int cpu)
-{
- if (!have_nohz_full_mask)
- return 0;
-
- return cpumask_test_cpu(cpu, nohz_full_mask);
-}
-
/* Parse the boot-time nohz CPU list from the kernel parameters. */
static int __init tick_nohz_full_setup(char *str)
{
int cpu;
- alloc_bootmem_cpumask_var(&nohz_full_mask);
- if (cpulist_parse(str, nohz_full_mask) < 0) {
+ alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+ if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
return 1;
}
cpu = smp_processor_id();
- if (cpumask_test_cpu(cpu, nohz_full_mask)) {
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
- cpumask_clear_cpu(cpu, nohz_full_mask);
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
- have_nohz_full_mask = true;
+ tick_nohz_full_running = true;
return 1;
}
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
* If we handle the timekeeping duty for full dynticks CPUs,
* we can't safely shutdown that CPU.
*/
- if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return NOTIFY_BAD;
break;
}
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
int err = -1;
#ifdef CONFIG_NO_HZ_FULL_ALL
- if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
+ if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
return err;
}
err = 0;
- cpumask_setall(nohz_full_mask);
- cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
- have_nohz_full_mask = true;
+ cpumask_setall(tick_nohz_full_mask);
+ cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
+ tick_nohz_full_running = true;
#endif
return err;
}
void __init tick_nohz_init(void)
{
- if (!have_nohz_full_mask) {
+ int cpu;
+
+ if (!tick_nohz_full_running) {
if (tick_nohz_init_all() < 0)
return;
}
+ for_each_cpu(cpu, tick_nohz_full_mask)
+ context_tracking_cpu_set(cpu);
+
cpu_notifier(tick_nohz_cpu_down_callback, 0);
- cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
+ cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
}
-#else
-#define have_nohz_full_mask (0)
#endif
/*
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}
- if (have_nohz_full_mask) {
+ if (tick_nohz_full_enabled()) {
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 48b9fffabdc2..947ba25a95a0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc)
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ ntp_notify_cmos_timer();
+
return ret;
}