Commit | Line | Data |
---|---|---|
b0b55251 LJ |
1 | /* |
2 | * rculfhash-mm-mmap.c | |
3 | * | |
4 | * mmap/reservation based memory management for Lock-Free RCU Hash Table | |
5 | * | |
6 | * Copyright 2011 - Lai Jiangshan <laijs@cn.fujitsu.com> | |
7 | * | |
8 | * This library is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * This library is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with this library; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include <unistd.h> | |
6ef90903 MD |
24 | #include <stdio.h> |
25 | #include <errno.h> | |
26 | #include <stdlib.h> | |
b0b55251 LJ |
27 | #include <sys/mman.h> |
28 | #include "rculfhash-internal.h" | |
29 | ||
0d0cf93f MD |
30 | #ifndef MAP_ANONYMOUS |
31 | #define MAP_ANONYMOUS MAP_ANON | |
32 | #endif | |
33 | ||
142af0ff MJ |
34 | /* |
35 | * The allocation scheme used by the mmap based RCU hash table is to make a | |
36 | * large unaccessible mapping to reserve memory without allocating it. | |
37 | * Then smaller chunks are allocated by overlapping read/write mappings which | |
38 | * do allocate memory. Deallocation is done by an overlapping unaccessible | |
39 | * mapping. | |
40 | * | |
41 | * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the | |
42 | * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't | |
43 | * support overlapping mappings. | |
44 | * | |
45 | * An alternative to the overlapping mappings is to use mprotect to change the | |
46 | * protection on chunks of the large mapping, read/write to allocate and none | |
47 | * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a | |
48 | * call to madvise is also required to deallocate and it just doesn't work on | |
49 | * macOS. | |
50 | * | |
51 | * For this reason, we keep to original scheme on all platforms except Cygwin. | |
52 | */ | |
53 | ||
54 | ||
55 | /* Reserve inaccessible memory space without allocating it */ | |
56 | static | |
57 | void *memory_map(size_t length) | |
b0b55251 | 58 | { |
6ef90903 | 59 | void *ret; |
b0b55251 | 60 | |
6ef90903 MD |
61 | ret = mmap(NULL, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
62 | if (ret == MAP_FAILED) { | |
63 | perror("mmap"); | |
64 | abort(); | |
65 | } | |
b0b55251 LJ |
66 | return ret; |
67 | } | |
68 | ||
142af0ff MJ |
69 | static |
70 | void memory_unmap(void *ptr, size_t length) | |
b0b55251 | 71 | { |
6ef90903 MD |
72 | if (munmap(ptr, length)) { |
73 | perror("munmap"); | |
74 | abort(); | |
75 | } | |
b0b55251 LJ |
76 | } |
77 | ||
142af0ff MJ |
78 | #ifdef __CYGWIN__ |
79 | /* Set protection to read/write to allocate a memory chunk */ | |
80 | static | |
81 | void memory_populate(void *ptr, size_t length) | |
82 | { | |
6ef90903 MD |
83 | if (mprotect(ptr, length, PROT_READ | PROT_WRITE)) { |
84 | perror("mprotect"); | |
85 | abort(); | |
86 | } | |
142af0ff MJ |
87 | } |
88 | ||
89 | /* Set protection to none to deallocate a memory chunk */ | |
90 | static | |
91 | void memory_discard(void *ptr, size_t length) | |
92 | { | |
6ef90903 MD |
93 | if (mprotect(ptr, length, PROT_NONE)) { |
94 | perror("mprotect"); | |
95 | abort(); | |
96 | } | |
142af0ff MJ |
97 | } |
98 | ||
99 | #else /* __CYGWIN__ */ | |
100 | ||
101 | static | |
102 | void memory_populate(void *ptr, size_t length) | |
b0b55251 | 103 | { |
6ef90903 MD |
104 | if (mmap(ptr, length, PROT_READ | PROT_WRITE, |
105 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
106 | -1, 0) != ptr) { | |
107 | perror("mmap"); | |
108 | abort(); | |
109 | } | |
b0b55251 LJ |
110 | } |
111 | ||
112 | /* | |
113 | * Discard garbage memory and avoid system save it when try to swap it out. | |
114 | * Make it still reserved, inaccessible. | |
115 | */ | |
142af0ff MJ |
116 | static |
117 | void memory_discard(void *ptr, size_t length) | |
b0b55251 | 118 | { |
6ef90903 MD |
119 | if (mmap(ptr, length, PROT_NONE, |
120 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
121 | -1, 0) != ptr) { | |
122 | perror("mmap"); | |
123 | abort(); | |
124 | } | |
b0b55251 | 125 | } |
142af0ff | 126 | #endif /* __CYGWIN__ */ |
b0b55251 LJ |
127 | |
128 | static | |
129 | void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) | |
130 | { | |
131 | if (order == 0) { | |
132 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
133 | /* small table */ | |
134 | ht->tbl_mmap = calloc(ht->max_nr_buckets, | |
135 | sizeof(*ht->tbl_mmap)); | |
136 | assert(ht->tbl_mmap); | |
137 | return; | |
138 | } | |
139 | /* large table */ | |
140 | ht->tbl_mmap = memory_map(ht->max_nr_buckets | |
141 | * sizeof(*ht->tbl_mmap)); | |
142 | memory_populate(ht->tbl_mmap, | |
143 | ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); | |
144 | } else if (order > ht->min_alloc_buckets_order) { | |
145 | /* large table */ | |
146 | unsigned long len = 1UL << (order - 1); | |
147 | ||
148 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
149 | memory_populate(ht->tbl_mmap + len, | |
150 | len * sizeof(*ht->tbl_mmap)); | |
151 | } | |
152 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
153 | } | |
154 | ||
155 | /* | |
156 | * cds_lfht_free_bucket_table() should be called with decreasing order. | |
157 | * When cds_lfht_free_bucket_table(0) is called, it means the whole | |
158 | * lfht is destroyed. | |
159 | */ | |
160 | static | |
161 | void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) | |
162 | { | |
163 | if (order == 0) { | |
164 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
165 | /* small table */ | |
166 | poison_free(ht->tbl_mmap); | |
167 | return; | |
168 | } | |
169 | /* large table */ | |
170 | memory_unmap(ht->tbl_mmap, | |
171 | ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); | |
172 | } else if (order > ht->min_alloc_buckets_order) { | |
173 | /* large table */ | |
174 | unsigned long len = 1UL << (order - 1); | |
175 | ||
176 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
177 | memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); | |
178 | } | |
179 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
180 | } | |
181 | ||
182 | static | |
183 | struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) | |
184 | { | |
185 | return &ht->tbl_mmap[index]; | |
186 | } | |
187 | ||
188 | static | |
189 | struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, | |
190 | unsigned long max_nr_buckets) | |
191 | { | |
1228af1c | 192 | unsigned long page_bucket_size; |
b0b55251 | 193 | |
1228af1c | 194 | page_bucket_size = getpagesize() / sizeof(struct cds_lfht_node); |
b0b55251 LJ |
195 | if (max_nr_buckets <= page_bucket_size) { |
196 | /* small table */ | |
197 | min_nr_alloc_buckets = max_nr_buckets; | |
198 | } else { | |
199 | /* large table */ | |
200 | min_nr_alloc_buckets = max(min_nr_alloc_buckets, | |
201 | page_bucket_size); | |
202 | } | |
203 | ||
1228af1c LJ |
204 | return __default_alloc_cds_lfht( |
205 | &cds_lfht_mm_mmap, sizeof(struct cds_lfht), | |
206 | min_nr_alloc_buckets, max_nr_buckets); | |
b0b55251 LJ |
207 | } |
208 | ||
209 | const struct cds_lfht_mm_type cds_lfht_mm_mmap = { | |
210 | .alloc_cds_lfht = alloc_cds_lfht, | |
211 | .alloc_bucket_table = cds_lfht_alloc_bucket_table, | |
212 | .free_bucket_table = cds_lfht_free_bucket_table, | |
213 | .bucket_at = bucket_at, | |
214 | }; |