/home/andy/git/oilshell/oil/mycpp/cheney_heap.h
Line | Count | Source (jump to first uncovered line) |
1 | | // mycpp/gc_heap.h |
2 | | // |
3 | | // A garbage collected heap that looks like statically typed Python: Str, |
4 | | // List<T>, Dict<K, V>. |
5 | | |
6 | | #ifndef CHENEY_HEAP_H |
7 | | #define CHENEY_HEAP_H |
8 | | |
9 | | #include <cassert> // assert() |
10 | | #include <cstdlib> // malloc |
11 | | #include <cstring> // memcpy |
12 | | #include <initializer_list> |
13 | | #include <new> // placement new |
14 | | #include <utility> // std::forward |
15 | | |
16 | | #include "mycpp/common.h" |
17 | | |
18 | | // Design Notes: |
19 | | |
20 | | // It's a semi-space collector using the Cheney algorithm. (Later we may add a |
21 | | // "large object space", managed by mark-and-sweep after each copy step.) |
22 | | |
23 | | // Influences / Prior art: |
24 | | // |
25 | | // - OCaml / ZINC machine - statically typed, everything is a pointer (no value |
26 | | // types). |
27 | | // - femtolisp - Cheney collector; used to bootstrap Juliia. |
28 | | // - Other: ES shell, Lua, Python and especially Python dicts. |
29 | | |
30 | | // Design: |
31 | | // |
32 | | // - Graph of application types: |
33 | | // - Str* |
34 | | // - List<T>* |
35 | | // - Dict<K, V>* |
36 | | // - User-defined classes, which may have a vtable pointer. |
37 | | // - Graph of GC nodes: everything is an Obj* with an 8 byte header |
38 | | // - 1 byte heap tag, for Cheney |
39 | | // - 1 byte type tag for Zephyr ASDL unions |
40 | | // - 2 byte / 16-bit field bit mask for following pointers on user-defined |
41 | | // classes and List / Dict "headers" |
42 | | // - 4 bytes length for Cheney to determine how much to copy. |
43 | | // |
44 | | // Operations that resize: |
45 | | // |
46 | | // - List::append() and extend() can realloc |
47 | | // - Dict::set() can realloc and rehash (TODO) |
48 | | // |
49 | | // Important Types: |
50 | | // |
51 | | // - Slab<T> to trace the object graph. Slab<int> is opaque, but Slab<T*> |
52 | | // requires tracing. |
53 | | // - A List<T> is a fixed-size structure, with int fields and a pointer |
54 | | // to a single Slab<T> (the items). |
55 | | // - A Dict<K, V> is also fixed-size, with pointers to 3 slabs: the index |
56 | | // Slab<int>, the keys Slab<K>, and the index Slab<V>. |
57 | | // |
58 | | // "Ghost" layout types: |
59 | | // |
60 | | // - LayoutFixed - for walking up to 16 fields of a user-defined type. |
61 | | // - LayoutForwarded - for storing the forwarding pointer that the Cheney |
62 | | // algorithm uses. |
63 | | // |
64 | | // Stack rooting API: |
65 | | // |
66 | | // StackRoots _roots({&mystr, &mydict, &mylist}); |
67 | | // |
68 | | // This pushes local variables onto the global data structure managed by the |
69 | | // GC. |
70 | | |
71 | | // TODO: Dicts should actually use hashing! Test computational complexity. |
72 | | |
73 | | // Memory allocation APIs: |
74 | | // |
75 | | // - Alloc<Foo>(x) |
76 | | // The typed public API. An alternative to new Foo(x). mycpp/ASDL should |
77 | | // generate these calls. |
78 | | // - AllocStr(length), StrFromC(), NewList, NewDict: Alloc() doesn't |
79 | | // work |
80 | | // for these types for various reasons |
81 | | // - Heap::Allocate() |
82 | | // The untyped internal API. For AllocStr() and NewSlab(). |
83 | | // - malloc() -- for say yajl to use. Manually deallocated. |
84 | | // - new/delete -- shouldn't be in Oil? |
85 | | |
86 | | // Slab Sizing with 8-byte slab header |
87 | | // |
88 | | // 16 - 8 = 8 = 1 eight-byte or 2 four-byte elements |
89 | | // 32 - 8 = 24 = 3 eight-byte or 6 four-byte elements |
90 | | // 64 - 8 = 56 = 7 eight-byte or 14 four-byte elements |
91 | | |
92 | | // #defines for degbugging: |
93 | | // |
94 | | // GC_EVERY_ALLOC: Collect() on every Allocate(). Exposes many bugs! |
95 | | // GC_VERBOSE: Log when we collect |
96 | | // GC_STATS: Collect more stats. TODO: Rename this? |
97 | | |
98 | | // Silly definition for passing types like GlobalList<T, N> and initializer |
99 | | // lists like {1, 2, 3} to macros |
100 | | |
101 | | template <class T> |
102 | | class List; |
103 | | |
104 | | class Obj; |
105 | | |
106 | | class Space { |
107 | | public: |
108 | 0 | Space() { |
109 | 0 | } |
110 | | void Init(int); |
111 | | |
112 | | void Free(); |
113 | | |
114 | | #if GC_STATS |
115 | | void AssertValid(void* p) { |
116 | | if (begin_ <= p && p < begin_ + size_) { |
117 | | return; |
118 | | } |
119 | | log("p = %p isn't between %p and %p", begin_, begin_ + size_); |
120 | | InvalidCodePath(); |
121 | | } |
122 | | #endif |
123 | | |
124 | | char* begin_; |
125 | | int size_; // number of bytes |
126 | | }; |
127 | | |
128 | | class CheneyHeap { |
129 | | public: |
130 | 0 | CheneyHeap() { // default constructor does nothing -- relies on zero |
131 | 0 | // initialization |
132 | 0 | } |
133 | | |
134 | | // Real initialization with the initial heap size. The heap grows with |
135 | | // allocations. |
136 | 0 | void Init(int space_size) { |
137 | 0 | from_space_.Init(space_size); |
138 | 0 |
|
139 | 0 | free_ = from_space_.begin_; // where we allocate from |
140 | 0 | limit_ = free_ + space_size; |
141 | 0 |
|
142 | 0 | roots_top_ = 0; |
143 | 0 |
|
144 | 0 | is_initialized_ = true; |
145 | 0 |
|
146 | 0 | #if GC_STATS |
147 | 0 | num_collections_ = 0; |
148 | 0 | num_heap_growths_ = 0; |
149 | 0 | num_forced_growths_ = 0; |
150 | 0 | num_live_objs_ = 0; |
151 | 0 | #endif |
152 | 0 | } |
153 | | |
154 | 0 | void* Bump(int n) { |
155 | 0 | char* p = free_; |
156 | 0 | free_ += n; |
157 | | #if GC_STATS |
158 | | num_live_objs_++; |
159 | | #endif |
160 | 0 | return p; |
161 | 0 | } |
162 | | |
163 | | void* Allocate(int); |
164 | | |
165 | 0 | void Swap() { |
166 | | // Swap spaces for next collection. |
167 | 0 | char* tmp = from_space_.begin_; |
168 | 0 | from_space_.begin_ = to_space_.begin_; |
169 | 0 | to_space_.begin_ = tmp; |
170 | |
|
171 | 0 | int tmp2 = from_space_.size_; |
172 | 0 | from_space_.size_ = to_space_.size_; |
173 | 0 | to_space_.size_ = tmp2; |
174 | 0 | } |
175 | | |
176 | 0 | void PushRoot(Obj** p) { |
177 | 0 | // log("PushRoot %d", roots_top_); |
178 | 0 | roots_[roots_top_++] = p; |
179 | 0 | // TODO: This should be like a malloc() failure? |
180 | 0 | assert(roots_top_ < kMaxRoots); |
181 | 0 | } |
182 | | |
183 | 0 | void PopRoot() { |
184 | 0 | roots_top_--; |
185 | 0 | // log("PopRoot %d", roots_top_); |
186 | 0 | } |
187 | | |
188 | | Obj* Relocate(Obj* obj, Obj* header); |
189 | | |
190 | | // mutates free_ and other variables |
191 | | void Collect(int to_space_size = 0); |
192 | | |
193 | | #if GC_STATS |
194 | | void Report() { |
195 | | log("-----"); |
196 | | log("num collections = %d", num_collections_); |
197 | | log("num heap growths = %d", num_heap_growths_); |
198 | | log("num forced heap growths = %d", num_forced_growths_); |
199 | | log("num live objects = %d", num_live_objs_); |
200 | | |
201 | | log("from_space_ %p", from_space_.begin_); |
202 | | log("to_space %p", to_space_.begin_); |
203 | | log("-----"); |
204 | | } |
205 | | #endif |
206 | | |
207 | | Space from_space_; // space we allocate from |
208 | | Space to_space_; // space that the collector copies to |
209 | | |
210 | | char* free_; // next place to allocate, from_space_ <= free_ < limit_ |
211 | | char* limit_; // end of space we're allocating from |
212 | | |
213 | | // Stack roots. The obvious data structure is a linked list, but an array |
214 | | // has better locality. |
215 | | // |
216 | | // femtolisp uses a global pointer to dynamically-allocated growable array, |
217 | | // with initial N_STACK = 262144! Kind of arbitrary. |
218 | | |
219 | | int roots_top_; |
220 | | Obj** roots_[kMaxRoots]; // These are pointers to Obj* pointers |
221 | | |
222 | | bool is_initialized_ = false; |
223 | | |
224 | | #if GC_STATS |
225 | | int num_collections_; |
226 | | int num_heap_growths_; |
227 | | int num_forced_growths_; // when a single allocation is too big |
228 | | int num_live_objs_; |
229 | | #endif |
230 | | }; |
231 | | |
232 | | #if GC_STATS |
233 | | void ShowFixedChildren(Obj* obj); |
234 | | #endif |
235 | | |
236 | | // LayoutForwarded and LayoutFixed aren't real types. You can cast arbitrary |
237 | | // objs to them to access a HOMOGENEOUS REPRESENTATION useful for garbage |
238 | | // collection. |
239 | | |
240 | | class LayoutForwarded : public Obj { |
241 | | public: |
242 | | Obj* new_location; // valid if and only if heap_tag_ == Tag::Forwarded |
243 | | }; |
244 | | |
245 | | #endif // GC_HEAP_H |