1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
|
/*
* Adapted from arm64 version.
*
* Copyright (C) 2012 ARM Limited
* Copyright (C) 2015 Mentor Graphics Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/cache.h>
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <asm/arch_timer.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
#include <clocksource/arm_arch_timer.h>
#define MAX_SYMNAME 64
static struct page **vdso_text_pagelist;
extern char vdso_start[], vdso_end[];
/* Total number of pages needed for the data and text portions of the VDSO. */
unsigned int vdso_total_pages __ro_after_init;
/*
* The VDSO data page.
*/
static union vdso_data_store vdso_data_store __page_aligned_data;
static struct vdso_data *vdso_data = &vdso_data_store.data;
static struct page *vdso_data_page __ro_after_init;
static const struct vm_special_mapping vdso_data_mapping = {
.name = "[vvar]",
.pages = &vdso_data_page,
};
static struct vm_special_mapping vdso_text_mapping __ro_after_init = {
.name = "[vdso]",
};
struct elfinfo {
Elf32_Ehdr *hdr; /* ptr to ELF */
Elf32_Sym *dynsym; /* ptr to .dynsym section */
unsigned long dynsymsize; /* size of .dynsym section */
char *dynstr; /* ptr to .dynstr section */
};
/* Cached result of boot-time check for whether the arch timer exists,
* and if so, whether the virtual counter is useable.
*/
static bool cntvct_ok __ro_after_init;
static bool __init cntvct_functional(void)
{
struct device_node *np;
bool ret = false;
if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
goto out;
/* The arm_arch_timer core should export
* arch_timer_use_virtual or similar so we don't have to do
* this.
*/
np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
if (!np)
goto out_put;
if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
goto out_put;
ret = true;
out_put:
of_node_put(np);
out:
return ret;
}
static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
unsigned long *size)
{
Elf32_Shdr *sechdrs;
unsigned int i;
char *secnames;
/* Grab section headers and strings so we can tell who is who */
sechdrs = (void *)ehdr + ehdr->e_shoff;
secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
/* Find the section they want */
for (i = 1; i < ehdr->e_shnum; i++) {
if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
if (size)
*size = sechdrs[i].sh_size;
return (void *)ehdr + sechdrs[i].sh_offset;
}
}
if (size)
*size = 0;
return NULL;
}
static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
{
unsigned int i;
for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
char name[MAX_SYMNAME], *c;
if (lib->dynsym[i].st_name == 0)
continue;
strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
*c = 0;
if (strcmp(symname, name) == 0)
return &lib->dynsym[i];
}
return NULL;
}
static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
{
Elf32_Sym *sym;
sym = find_symbol(lib, symname);
if (!sym)
return;
sym->st_name = 0;
}
static void __init patch_vdso(void *ehdr)
{
struct elfinfo einfo;
einfo = (struct elfinfo) {
.hdr = ehdr,
};
einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
/* If the virtual counter is absent or non-functional we don't
* want programs to incur the slight additional overhead of
* dispatching through the VDSO only to fall back to syscalls.
*/
if (!cntvct_ok) {
vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
}
}
static int __init vdso_init(void)
{
unsigned int text_pages;
int i;
if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("VDSO is not a valid ELF object!\n");
return -ENOEXEC;
}
text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_debug("vdso: %i text pages at base %pK\n", text_pages, vdso_start);
/* Allocate the VDSO text pagelist */
vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
GFP_KERNEL);
if (vdso_text_pagelist == NULL)
return -ENOMEM;
/* Grab the VDSO data page. */
vdso_data_page = virt_to_page(vdso_data);
/* Grab the VDSO text pages. */
for (i = 0; i < text_pages; i++) {
struct page *page;
page = virt_to_page(vdso_start + i * PAGE_SIZE);
vdso_text_pagelist[i] = page;
}
vdso_text_mapping.pages = vdso_text_pagelist;
vdso_total_pages = 1; /* for the data/vvar page */
vdso_total_pages += text_pages;
cntvct_ok = cntvct_functional();
patch_vdso(vdso_start);
return 0;
}
arch_initcall(vdso_init);
static int install_vvar(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
vma = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_MAYREAD,
&vdso_data_mapping);
return PTR_ERR_OR_ZERO(vma);
}
/* assumes mmap_sem is write-locked */
void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
unsigned long len;
mm->context.vdso = 0;
if (vdso_text_pagelist == NULL)
return;
if (install_vvar(mm, addr))
return;
/* Account for vvar page. */
addr += PAGE_SIZE;
len = (vdso_total_pages - 1) << PAGE_SHIFT;
vma = _install_special_mapping(mm, addr, len,
VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
&vdso_text_mapping);
if (!IS_ERR(vma))
mm->context.vdso = addr;
}
static void vdso_write_begin(struct vdso_data *vdata)
{
++vdso_data->seq_count;
smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
}
static void vdso_write_end(struct vdso_data *vdata)
{
smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
++vdso_data->seq_count;
}
static bool tk_is_cntvct(const struct timekeeper *tk)
{
if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
return false;
if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
return false;
return true;
}
/**
* update_vsyscall - update the vdso data page
*
* Increment the sequence counter, making it odd, indicating to
* userspace that an update is in progress. Update the fields used
* for coarse clocks and, if the architected system timer is in use,
* the fields used for high precision clocks. Increment the sequence
* counter again, making it even, indicating to userspace that the
* update is finished.
*
* Userspace is expected to sample seq_count before reading any other
* fields from the data page. If seq_count is odd, userspace is
* expected to wait until it becomes even. After copying data from
* the page, userspace must sample seq_count again; if it has changed
* from its previous value, userspace must retry the whole sequence.
*
* Calls to update_vsyscall are serialized by the timekeeping core.
*/
void update_vsyscall(struct timekeeper *tk)
{
struct timespec64 *wtm = &tk->wall_to_monotonic;
if (!cntvct_ok) {
/* The entry points have been zeroed, so there is no
* point in updating the data page.
*/
return;
}
vdso_write_begin(vdso_data);
vdso_data->tk_is_cntvct = tk_is_cntvct(tk);
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec = (u32)(tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift);
vdso_data->wtm_clock_sec = wtm->tv_sec;
vdso_data->wtm_clock_nsec = wtm->tv_nsec;
if (vdso_data->tk_is_cntvct) {
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mult = tk->tkr_mono.mult;
vdso_data->cs_shift = tk->tkr_mono.shift;
vdso_data->cs_mask = tk->tkr_mono.mask;
}
vdso_write_end(vdso_data);
flush_dcache_page(virt_to_page(vdso_data));
}
void update_vsyscall_tz(void)
{
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
flush_dcache_page(virt_to_page(vdso_data));
}
|