mmu.c source code [linux/arch/x86/hyperv/mmu.c]

1	#define pr_fmt(fmt) "Hyper-V: " fmt
2
3	#include <linux/hyperv.h>
4	#include <linux/log2.h>
5	#include <linux/slab.h>
6	#include <linux/types.h>
7
8	#include <asm/fpu/api.h>
9	#include <asm/mshyperv.h>
10	#include <asm/msr.h>
11	#include <asm/tlbflush.h>
12	#include <asm/tlb.h>
13
14	#define CREATE_TRACE_POINTS
15	#include <asm/trace/hyperv.h>
16
17	/ Each gva in gva_list encodes up to 4096 pages to flush /
18	#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
19
20	static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
21	const struct flush_tlb_info *info);
22
23	/*
24	* Fills in gva_list starting from offset. Returns the number of items added.
25	*/
26	static inline int fill_gva_list(u64 gva_list[], int offset,
27	unsigned long start, unsigned long end)
28	{
29	int gva_n = offset;
30	unsigned long cur = start, diff;
31
32	do {
33	diff = end > cur ? end - cur : `0`;
34
35	gva_list[gva_n] = cur & PAGE_MASK;
36	/*
37	* Lower 12 bits encode the number of additional
38	* pages to flush (in addition to the 'cur' page).
39	*/
40	if (diff >= HV_TLB_FLUSH_UNIT) {
41	gva_list[gva_n] \|= ~PAGE_MASK;
42	cur += HV_TLB_FLUSH_UNIT;
43	} else if (diff) {
44	gva_list[gva_n] \|= (diff - `1`) >> PAGE_SHIFT;
45	cur = end;
46	}
47
48	gva_n++;
49
50	} while (cur < end);
51
52	return gva_n - offset;
53	}
54
55	static bool cpu_is_lazy(int cpu)
56	{
57	return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
58	}
59
60	static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
61	const struct flush_tlb_info *info)
62	{
63	int cpu, vcpu, gva_n, max_gvas;
64	struct hv_tlb_flush *flush;
65	u64 status;
66	unsigned long flags;
67	bool do_lazy = !info->freed_tables;
68
69	trace_hyperv_mmu_flush_tlb_multi(cpus, info);
70
71	if (!hv_hypercall_pg)
72	goto do_native;
73
74	local_irq_save(flags);
75
76	flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
77
78	if (unlikely(!flush)) {
79	local_irq_restore(flags);
80	goto do_native;
81	}
82
83	if (info->mm) {
84	/*
85	* AddressSpace argument must match the CR3 with PCID bits
86	* stripped out.
87	*/
88	flush->address_space = virt_to_phys(address: info->mm->pgd);
89	flush->address_space &= CR3_ADDR_MASK;
90	flush->flags = `0`;
91	} else {
92	flush->address_space = `0`;
93	flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
94	}
95
96	flush->processor_mask = `0`;
97	if (cpumask_equal(src1p: cpus, cpu_present_mask)) {
98	flush->flags \|= HV_FLUSH_ALL_PROCESSORS;
99	} else {
100	/*
101	* From the supplied CPU set we need to figure out if we can get
102	* away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
103	* hypercalls. This is possible when the highest VP number in
104	* the set is < 64. As VP numbers are usually in ascending order
105	* and match Linux CPU ids, here is an optimization: we check
106	* the VP number for the highest bit in the supplied set first
107	* so we can quickly find out if using *_EX hypercalls is a
108	* must. We will also check all VP numbers when walking the
109	* supplied CPU set to remain correct in all cases.
110	*/
111	cpu = cpumask_last(srcp: cpus);
112
113	if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu_number: cpu) >= `64`)
114	goto do_ex_hypercall;
115
116	for_each_cpu(cpu, cpus) {
117	if (do_lazy && cpu_is_lazy(cpu))
118	continue;
119	vcpu = hv_cpu_number_to_vp_number(cpu_number: cpu);
120	if (vcpu == VP_INVAL) {
121	local_irq_restore(flags);
122	goto do_native;
123	}
124
125	if (vcpu >= `64`)
126	goto do_ex_hypercall;
127
128	__set_bit(vcpu, (unsigned long *)
129	&flush->processor_mask);
130	}
131
132	/ nothing to flush if 'processor_mask' ends up being empty /
133	if (!flush->processor_mask) {
134	local_irq_restore(flags);
135	return;
136	}
137	}
138
139	/*
140	* We can flush not more than max_gvas with one hypercall. Flush the
141	* whole address space if we were asked to do more.
142	*/
143	max_gvas = (PAGE_SIZE - sizeof(flush)) / sizeof*(flush->gva_list[`0`]);
144
145	if (info->end == TLB_FLUSH_ALL) {
146	flush->flags \|= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
147	status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
148	inputaddr: flush, NULL);
149	} else if (info->end &&
150	((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
151	status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
152	inputaddr: flush, NULL);
153	} else {
154	gva_n = fill_gva_list(gva_list: flush->gva_list, offset: `0`,
155	start: info->start, end: info->end);
156	status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
157	rep_count: gva_n, varhead_size: `0`, input: flush, NULL);
158	}
159	goto check_status;
160
161	do_ex_hypercall:
162	status = hyperv_flush_tlb_others_ex(cpus, info);
163
164	check_status:
165	local_irq_restore(flags);
166
167	if (hv_result_success(status))
168	return;
169	do_native:
170	native_flush_tlb_multi(cpumask: cpus, info);
171	}
172
173	static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
174	const struct flush_tlb_info *info)
175	{
176	int nr_bank = `0`, max_gvas, gva_n;
177	struct hv_tlb_flush_ex *flush;
178	u64 status;
179
180	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
181	return HV_STATUS_INVALID_PARAMETER;
182
183	flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
184
185	if (info->mm) {
186	/*
187	* AddressSpace argument must match the CR3 with PCID bits
188	* stripped out.
189	*/
190	flush->address_space = virt_to_phys(address: info->mm->pgd);
191	flush->address_space &= CR3_ADDR_MASK;
192	flush->flags = `0`;
193	} else {
194	flush->address_space = `0`;
195	flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
196	}
197
198	flush->hv_vp_set.valid_bank_mask = `0`;
199
200	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
201	nr_bank = cpumask_to_vpset_skip(vpset: &flush->hv_vp_set, cpus,
202	func: info->freed_tables ? NULL : cpu_is_lazy);
203	if (nr_bank < `0`)
204	return HV_STATUS_INVALID_PARAMETER;
205
206	/*
207	* We can flush not more than max_gvas with one hypercall. Flush the
208	* whole address space if we were asked to do more.
209	*/
210	max_gvas =
211	(PAGE_SIZE - sizeof(flush) - nr_bank
212	sizeof(flush->hv_vp_set.bank_contents[`0`])) /
213	sizeof(flush->gva_list[`0`]);
214
215	if (info->end == TLB_FLUSH_ALL) {
216	flush->flags \|= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
217	status = hv_do_rep_hypercall(
218	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
219	rep_count: `0`, varhead_size: nr_bank, input: flush, NULL);
220	} else if (info->end &&
221	((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
222	status = hv_do_rep_hypercall(
223	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
224	rep_count: `0`, varhead_size: nr_bank, input: flush, NULL);
225	} else {
226	gva_n = fill_gva_list(gva_list: flush->gva_list, offset: nr_bank,
227	start: info->start, end: info->end);
228	status = hv_do_rep_hypercall(
229	HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
230	rep_count: gva_n, varhead_size: nr_bank, input: flush, NULL);
231	}
232
233	return status;
234	}
235
236	void hyperv_setup_mmu_ops(void)
237	{
238	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
239	return;
240
241	pr_info("Using hypercall for remote TLB flush\n");
242	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
243	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
244	}
245

source code of linux/arch/x86/hyperv/mmu.c