Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Add support for vDSO acceleration of the "Hyper-V TSC page", to speed up clock reading on Hyper-V guests" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method x86/hyperv: Move TSC reading method to asm/mshyperv.h x86/hyperv: Implement hv_get_tsc_page()
This commit is contained in:
@@ -17,6 +17,7 @@
|
|||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
#include <asm/pvclock.h>
|
#include <asm/pvclock.h>
|
||||||
|
#include <asm/mshyperv.h>
|
||||||
#include <linux/math64.h>
|
#include <linux/math64.h>
|
||||||
#include <linux/time.h>
|
#include <linux/time.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
@@ -32,6 +33,11 @@ extern u8 pvclock_page
|
|||||||
__attribute__((visibility("hidden")));
|
__attribute__((visibility("hidden")));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
|
extern u8 hvclock_page
|
||||||
|
__attribute__((visibility("hidden")));
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef BUILD_VDSO32
|
#ifndef BUILD_VDSO32
|
||||||
|
|
||||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||||
@@ -141,6 +147,20 @@ static notrace u64 vread_pvclock(int *mode)
|
|||||||
return last;
|
return last;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
|
static notrace u64 vread_hvclock(int *mode)
|
||||||
|
{
|
||||||
|
const struct ms_hyperv_tsc_page *tsc_pg =
|
||||||
|
(const struct ms_hyperv_tsc_page *)&hvclock_page;
|
||||||
|
u64 current_tick = hv_read_tsc_page(tsc_pg);
|
||||||
|
|
||||||
|
if (current_tick != U64_MAX)
|
||||||
|
return current_tick;
|
||||||
|
|
||||||
|
*mode = VCLOCK_NONE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
notrace static u64 vread_tsc(void)
|
notrace static u64 vread_tsc(void)
|
||||||
{
|
{
|
||||||
@@ -172,6 +192,10 @@ notrace static inline u64 vgetsns(int *mode)
|
|||||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||||
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
|
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
|
||||||
cycles = vread_pvclock(mode);
|
cycles = vread_pvclock(mode);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
|
else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
|
||||||
|
cycles = vread_hvclock(mode);
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ SECTIONS
|
|||||||
* segment.
|
* segment.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
vvar_start = . - 2 * PAGE_SIZE;
|
vvar_start = . - 3 * PAGE_SIZE;
|
||||||
vvar_page = vvar_start;
|
vvar_page = vvar_start;
|
||||||
|
|
||||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||||
@@ -36,6 +36,7 @@ SECTIONS
|
|||||||
#undef EMIT_VVAR
|
#undef EMIT_VVAR
|
||||||
|
|
||||||
pvclock_page = vvar_start + PAGE_SIZE;
|
pvclock_page = vvar_start + PAGE_SIZE;
|
||||||
|
hvclock_page = vvar_start + 2 * PAGE_SIZE;
|
||||||
|
|
||||||
. = SIZEOF_HEADERS;
|
. = SIZEOF_HEADERS;
|
||||||
|
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ enum {
|
|||||||
sym_vvar_page,
|
sym_vvar_page,
|
||||||
sym_hpet_page,
|
sym_hpet_page,
|
||||||
sym_pvclock_page,
|
sym_pvclock_page,
|
||||||
|
sym_hvclock_page,
|
||||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||||
};
|
};
|
||||||
@@ -82,6 +83,7 @@ const int special_pages[] = {
|
|||||||
sym_vvar_page,
|
sym_vvar_page,
|
||||||
sym_hpet_page,
|
sym_hpet_page,
|
||||||
sym_pvclock_page,
|
sym_pvclock_page,
|
||||||
|
sym_hvclock_page,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vdso_sym {
|
struct vdso_sym {
|
||||||
@@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = {
|
|||||||
[sym_vvar_page] = {"vvar_page", true},
|
[sym_vvar_page] = {"vvar_page", true},
|
||||||
[sym_hpet_page] = {"hpet_page", true},
|
[sym_hpet_page] = {"hpet_page", true},
|
||||||
[sym_pvclock_page] = {"pvclock_page", true},
|
[sym_pvclock_page] = {"pvclock_page", true},
|
||||||
|
[sym_hvclock_page] = {"hvclock_page", true},
|
||||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#include <asm/cpufeature.h>
|
#include <asm/cpufeature.h>
|
||||||
|
#include <asm/mshyperv.h>
|
||||||
|
|
||||||
#if defined(CONFIG_X86_64)
|
#if defined(CONFIG_X86_64)
|
||||||
unsigned int __read_mostly vdso64_enabled = 1;
|
unsigned int __read_mostly vdso64_enabled = 1;
|
||||||
@@ -121,6 +122,12 @@ static int vvar_fault(const struct vm_special_mapping *sm,
|
|||||||
vmf->address,
|
vmf->address,
|
||||||
__pa(pvti) >> PAGE_SHIFT);
|
__pa(pvti) >> PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
} else if (sym_offset == image->sym_hvclock_page) {
|
||||||
|
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
|
||||||
|
|
||||||
|
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
|
||||||
|
ret = vm_insert_pfn(vma, vmf->address,
|
||||||
|
vmalloc_to_pfn(tsc_pg));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == 0 || ret == -EBUSY)
|
if (ret == 0 || ret == -EBUSY)
|
||||||
|
|||||||
@@ -27,45 +27,22 @@
|
|||||||
#include <linux/clockchips.h>
|
#include <linux/clockchips.h>
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
|
|
||||||
static struct ms_hyperv_tsc_page *tsc_pg;
|
static struct ms_hyperv_tsc_page *tsc_pg;
|
||||||
|
|
||||||
|
struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
|
||||||
|
{
|
||||||
|
return tsc_pg;
|
||||||
|
}
|
||||||
|
|
||||||
static u64 read_hv_clock_tsc(struct clocksource *arg)
|
static u64 read_hv_clock_tsc(struct clocksource *arg)
|
||||||
{
|
{
|
||||||
u64 current_tick;
|
u64 current_tick = hv_read_tsc_page(tsc_pg);
|
||||||
|
|
||||||
if (tsc_pg->tsc_sequence != 0) {
|
if (current_tick == U64_MAX)
|
||||||
/*
|
|
||||||
* Use the tsc page to compute the value.
|
|
||||||
*/
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
u64 tmp;
|
|
||||||
u32 sequence = tsc_pg->tsc_sequence;
|
|
||||||
u64 cur_tsc;
|
|
||||||
u64 scale = tsc_pg->tsc_scale;
|
|
||||||
s64 offset = tsc_pg->tsc_offset;
|
|
||||||
|
|
||||||
rdtscll(cur_tsc);
|
|
||||||
/* current_tick = ((cur_tsc *scale) >> 64) + offset */
|
|
||||||
asm("mulq %3"
|
|
||||||
: "=d" (current_tick), "=a" (tmp)
|
|
||||||
: "a" (cur_tsc), "r" (scale));
|
|
||||||
|
|
||||||
current_tick += offset;
|
|
||||||
if (tsc_pg->tsc_sequence == sequence)
|
|
||||||
return current_tick;
|
|
||||||
|
|
||||||
if (tsc_pg->tsc_sequence != 0)
|
|
||||||
continue;
|
|
||||||
/*
|
|
||||||
* Fallback using MSR method.
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
|
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
|
||||||
|
|
||||||
return current_tick;
|
return current_tick;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,7 +116,7 @@ void hyperv_init(void)
|
|||||||
/*
|
/*
|
||||||
* Register Hyper-V specific clocksource.
|
* Register Hyper-V specific clocksource.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
|
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
|
||||||
union hv_x64_msr_hypercall_contents tsc_msr;
|
union hv_x64_msr_hypercall_contents tsc_msr;
|
||||||
|
|
||||||
@@ -155,6 +132,9 @@ void hyperv_init(void)
|
|||||||
tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
|
tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
|
||||||
|
|
||||||
wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
|
wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
|
||||||
|
|
||||||
|
hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
|
||||||
|
|
||||||
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
|
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,8 @@
|
|||||||
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||||
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
|
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
|
||||||
#define VCLOCK_MAX 2
|
#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
|
||||||
|
#define VCLOCK_MAX 3
|
||||||
|
|
||||||
struct arch_clocksource_data {
|
struct arch_clocksource_data {
|
||||||
int vclock_mode;
|
int vclock_mode;
|
||||||
|
|||||||
@@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs);
|
|||||||
bool hv_is_hypercall_page_setup(void);
|
bool hv_is_hypercall_page_setup(void);
|
||||||
void hyperv_cleanup(void);
|
void hyperv_cleanup(void);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||||
|
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
|
||||||
|
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
|
||||||
|
{
|
||||||
|
u64 scale, offset, cur_tsc;
|
||||||
|
u32 sequence;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
|
||||||
|
* Top-Level Functional Specification ver. 3.0 and above. To get the
|
||||||
|
* reference time we must do the following:
|
||||||
|
* - READ ReferenceTscSequence
|
||||||
|
* A special '0' value indicates the time source is unreliable and we
|
||||||
|
* need to use something else. The currently published specification
|
||||||
|
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
|
||||||
|
* instead of '0' as the special value, see commit c35b82ef0294.
|
||||||
|
* - ReferenceTime =
|
||||||
|
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
|
||||||
|
* - READ ReferenceTscSequence again. In case its value has changed
|
||||||
|
* since our first reading we need to discard ReferenceTime and repeat
|
||||||
|
* the whole sequence as the hypervisor was updating the page in
|
||||||
|
* between.
|
||||||
|
*/
|
||||||
|
do {
|
||||||
|
sequence = READ_ONCE(tsc_pg->tsc_sequence);
|
||||||
|
if (!sequence)
|
||||||
|
return U64_MAX;
|
||||||
|
/*
|
||||||
|
* Make sure we read sequence before we read other values from
|
||||||
|
* TSC page.
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
|
scale = READ_ONCE(tsc_pg->tsc_scale);
|
||||||
|
offset = READ_ONCE(tsc_pg->tsc_offset);
|
||||||
|
cur_tsc = rdtsc_ordered();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure we read sequence after we read all other values
|
||||||
|
* from TSC page.
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
|
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
|
||||||
|
|
||||||
|
return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ struct vdso_image {
|
|||||||
long sym_vvar_page;
|
long sym_vvar_page;
|
||||||
long sym_hpet_page;
|
long sym_hpet_page;
|
||||||
long sym_pvclock_page;
|
long sym_pvclock_page;
|
||||||
|
long sym_hvclock_page;
|
||||||
long sym_VDSO32_NOTE_MASK;
|
long sym_VDSO32_NOTE_MASK;
|
||||||
long sym___kernel_sigreturn;
|
long sym___kernel_sigreturn;
|
||||||
long sym___kernel_rt_sigreturn;
|
long sym___kernel_rt_sigreturn;
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ config HYPERV
|
|||||||
Select this option to run Linux as a Hyper-V client operating
|
Select this option to run Linux as a Hyper-V client operating
|
||||||
system.
|
system.
|
||||||
|
|
||||||
|
config HYPERV_TSCPAGE
|
||||||
|
def_bool HYPERV && X86_64
|
||||||
|
|
||||||
config HYPERV_UTILS
|
config HYPERV_UTILS
|
||||||
tristate "Microsoft Hyper-V Utilities driver"
|
tristate "Microsoft Hyper-V Utilities driver"
|
||||||
depends on HYPERV && CONNECTOR && NLS
|
depends on HYPERV && CONNECTOR && NLS
|
||||||
|
|||||||
Reference in New Issue
Block a user