+++ /dev/null
-
-Monotonic accurate time
-
-The goal of this design is to provide a monotonic time :
-
-Readable from userspace without a system call
-Readable from NMI handler
-Readable without disabling interrupts
-Readable without disabling preemption
-Only one clock source (most precise available : tsc)
-Support architectures with variable TSC frequency.
-
-Main difference with wall time currently implemented in the Linux kernel : the
-time update is done atomically instead of using a write seqlock. It permits
-reading time from NMI handler and from userspace.
-
-struct time_info {
- u64 tsc;
- u64 freq;
- u64 walltime;
-}
-
-static struct time_struct {
- struct time_info time_sel[2];
- long update_count;
-}
-
-DECLARE_PERCPU(struct time_struct, cpu_time);
-
-/* Number of times the scheduler is called on each CPU */
-DECLARE_PERCPU(unsigned long, sched_nr);
-
-/* On frequency change event */
-/* In irq context */
-void freq_change_cb(unsigned int new_freq)
-{
- struct time_struct this_cpu_time =
- per_cpu(cpu_time, smp_processor_id());
- struct time_info *write_time, *current_time;
- write_time =
- this_cpu_time->time_sel[(this_cpu_time->update_count+1)&1];
- current_time =
- this_cpu_time->time_sel[(this_cpu_time->update_count)&1];
- write_time->tsc = get_cycles();
- write_time->freq = new_freq;
- /* We cumulate the division imprecision. This is the downside of using
- * the TSC with variable frequency as a time base. */
- write_time->walltime =
- current_time->walltime +
- (write_time->tsc - current_time->tsc) /
- current_time->freq;
- wmb();
- this_cpu_time->update_count++;
-}
-
-
-/* Init cpu freq */
-init_cpu_freq()
-{
- struct time_struct this_cpu_time =
- per_cpu(cpu_time, smp_processor_id());
- struct time_info *current_time;
- memset(this_cpu_time, 0, sizeof(this_cpu_time));
- current_time = this_cpu_time->time_sel[this_cpu_time->update_count&1];
- /* Init current time */
- /* Get frequency */
- /* Reset cpus to 0 ns, 0 tsc, start their tsc. */
-}
-
-
-/* After a CPU comes back from hlt */
-/* The trick is to sync all the other CPUs on the first CPU up when they come
- * up. If all CPUs are down, then there is no need to increment the walltime :
- * let's simply define the useful walltime on a machine as the time elapsed
- * while there is a CPU running. If we want, when no cpu is active, we can use
- * a lower resolution clock to somehow keep track of walltime. */
-
-wake_from_hlt()
-{
- /* TODO */
-}
-
-
-
-/* Read time from anywhere in the kernel. Return time in walltime. (ns) */
-/* If the update_count changes while we read the context, it may be invalid.
- * This would happen if we are scheduled out for a period of time long enough to
- * permit 2 frequency changes. We simply start the loop again if it happens.
- * We detect it by comparing the update_count running counter.
- * We detect preemption by incrementing a counter sched_nr within schedule().
- * This counter is readable by user space through the vsyscall page. */
- */
-u64 read_time(void)
-{
- u64 walltime;
- long update_count;
- struct time_struct this_cpu_time;
- struct time_info *current_time;
- unsigned int cpu;
- long prev_sched_nr;
- do {
- cpu = _smp_processor_id();
- prev_sched_nr = per_cpu(sched_nr, cpu);
- if(cpu != _smp_processor_id())
- continue; /* changed CPU between CPUID and getting
- sched_nr */
- this_cpu_time = per_cpu(cpu_time, cpu);
- update_count = this_cpu_time->update_count;
- current_time = this_cpu_time->time_sel[update_count&1];
- walltime = current_time->walltime +
- (get_cycles() - current_time->tsc) /
- current_time->freq;
- if(per_cpu(sched_nr, cpu) != prev_sched_nr)
- continue; /* been preempted */
- } while(this_cpu_time->update_count != update_count);
- return walltime;
-}
-
-/* Userspace */
-/* Export all this data to user space through the vsyscall page. Use a function
- * like read_time to read the walltime. This function can be implemented as-is
- * because it doesn't need to disable preemption. */
-
-
-