cpp

Coverage Report

Created: 2022-09-21 22:22

/home/andy/git/oilshell/oil/mycpp/cheney_heap.h
Line
Count
Source (jump to first uncovered line)
1
// mycpp/gc_heap.h
2
//
3
// A garbage collected heap that looks like statically typed Python: Str,
4
// List<T>, Dict<K, V>.
5
6
#ifndef CHENEY_HEAP_H
7
#define CHENEY_HEAP_H
8
9
#include <cassert>  // assert()
10
#include <cstdlib>  // malloc
11
#include <cstring>  // memcpy
12
#include <initializer_list>
13
#include <new>      // placement new
14
#include <utility>  // std::forward
15
16
#include "mycpp/common.h"
17
18
// Design Notes:
19
20
// It's a semi-space collector using the Cheney algorithm.  (Later we may add a
21
// "large object space", managed by mark-and-sweep after each copy step.)
22
23
// Influences / Prior art:
24
//
25
// - OCaml / ZINC machine - statically typed, everything is a pointer (no value
26
//   types).
27
// - femtolisp - Cheney collector; used to bootstrap Juliia.
28
// - Other: ES shell, Lua, Python and especially Python dicts.
29
30
// Design:
31
//
32
// - Graph of application types:
33
//   - Str*
34
//   - List<T>*
35
//   - Dict<K, V>*
36
//   - User-defined classes, which may have a vtable pointer.
37
// - Graph of GC nodes: everything is an Obj* with an 8 byte header
38
//   - 1 byte heap tag, for Cheney
39
//   - 1 byte type tag for Zephyr ASDL unions
40
//   - 2 byte / 16-bit field bit mask for following pointers on user-defined
41
//     classes and List / Dict "headers"
42
//   - 4 bytes length for Cheney to determine how much to copy.
43
//
44
// Operations that resize:
45
//
46
// - List::append() and extend() can realloc
47
// - Dict::set() can realloc and rehash (TODO)
48
//
49
// Important Types:
50
//
51
// - Slab<T> to trace the object graph.  Slab<int> is opaque, but Slab<T*>
52
//   requires tracing.
53
//   - A List<T> is a fixed-size structure, with int fields and a pointer
54
//     to a single Slab<T> (the items).
55
//   - A Dict<K, V> is also fixed-size, with pointers to 3 slabs: the index
56
//     Slab<int>, the keys Slab<K>, and the index Slab<V>.
57
//
58
// "Ghost" layout types:
59
//
60
// - LayoutFixed - for walking up to 16 fields of a user-defined type.
61
// - LayoutForwarded - for storing the forwarding pointer that the Cheney
62
//   algorithm uses.
63
//
64
// Stack rooting API:
65
//
66
//   StackRoots _roots({&mystr, &mydict, &mylist});
67
//
68
// This pushes local variables onto the global data structure managed by the
69
// GC.
70
71
// TODO: Dicts should actually use hashing!  Test computational complexity.
72
73
// Memory allocation APIs:
74
//
75
// - Alloc<Foo>(x)
76
//   The typed public API.  An alternative to new Foo(x).  mycpp/ASDL should
77
//   generate these calls.
78
// - AllocStr(length), StrFromC(), NewList, NewDict: Alloc() doesn't
79
// work
80
//   for these types for various reasons
81
// - Heap::Allocate()
82
//   The untyped internal API.  For AllocStr() and NewSlab().
83
// - malloc() -- for say yajl to use.  Manually deallocated.
84
// - new/delete -- shouldn't be in Oil?
85
86
// Slab Sizing with 8-byte slab header
87
//
88
//   16 - 8 =  8 = 1 eight-byte or  2 four-byte elements
89
//   32 - 8 = 24 = 3 eight-byte or  6 four-byte elements
90
//   64 - 8 = 56 = 7 eight-byte or 14 four-byte elements
91
92
// #defines for degbugging:
93
//
94
// GC_EVERY_ALLOC: Collect() on every Allocate().  Exposes many bugs!
95
// GC_VERBOSE: Log when we collect
96
// GC_STATS: Collect more stats.  TODO: Rename this?
97
98
// Silly definition for passing types like GlobalList<T, N> and initializer
99
// lists like {1, 2, 3} to macros
100
101
template <class T>
102
class List;
103
104
class Obj;
105
106
class Space {
107
 public:
108
0
  Space() {
109
0
  }
110
  void Init(int);
111
112
  void Free();
113
114
#if GC_STATS
115
  void AssertValid(void* p) {
116
    if (begin_ <= p && p < begin_ + size_) {
117
      return;
118
    }
119
    log("p = %p isn't between %p and %p", begin_, begin_ + size_);
120
    InvalidCodePath();
121
  }
122
#endif
123
124
  char* begin_;
125
  int size_;  // number of bytes
126
};
127
128
class CheneyHeap {
129
 public:
130
0
  CheneyHeap() {  // default constructor does nothing -- relies on zero
131
0
                  // initialization
132
0
  }
133
134
  // Real initialization with the initial heap size.  The heap grows with
135
  // allocations.
136
0
  void Init(int space_size) {
137
0
    from_space_.Init(space_size);
138
0
139
0
    free_ = from_space_.begin_;  // where we allocate from
140
0
    limit_ = free_ + space_size;
141
0
142
0
    roots_top_ = 0;
143
0
144
0
    is_initialized_ = true;
145
0
146
0
#if GC_STATS
147
0
    num_collections_ = 0;
148
0
    num_heap_growths_ = 0;
149
0
    num_forced_growths_ = 0;
150
0
    num_live_objs_ = 0;
151
0
#endif
152
0
  }
153
154
0
  void* Bump(int n) {
155
0
    char* p = free_;
156
0
    free_ += n;
157
#if GC_STATS
158
    num_live_objs_++;
159
#endif
160
0
    return p;
161
0
  }
162
163
  void* Allocate(int);
164
165
0
  void Swap() {
166
    // Swap spaces for next collection.
167
0
    char* tmp = from_space_.begin_;
168
0
    from_space_.begin_ = to_space_.begin_;
169
0
    to_space_.begin_ = tmp;
170
171
0
    int tmp2 = from_space_.size_;
172
0
    from_space_.size_ = to_space_.size_;
173
0
    to_space_.size_ = tmp2;
174
0
  }
175
176
0
  void PushRoot(Obj** p) {
177
0
    // log("PushRoot %d", roots_top_);
178
0
    roots_[roots_top_++] = p;
179
0
    // TODO: This should be like a malloc() failure?
180
0
    assert(roots_top_ < kMaxRoots);
181
0
  }
182
183
0
  void PopRoot() {
184
0
    roots_top_--;
185
0
    // log("PopRoot %d", roots_top_);
186
0
  }
187
188
  Obj* Relocate(Obj* obj, Obj* header);
189
190
  // mutates free_ and other variables
191
  void Collect(int to_space_size = 0);
192
193
#if GC_STATS
194
  void Report() {
195
    log("-----");
196
    log("num collections = %d", num_collections_);
197
    log("num heap growths = %d", num_heap_growths_);
198
    log("num forced heap growths = %d", num_forced_growths_);
199
    log("num live objects = %d", num_live_objs_);
200
201
    log("from_space_ %p", from_space_.begin_);
202
    log("to_space %p", to_space_.begin_);
203
    log("-----");
204
  }
205
#endif
206
207
  Space from_space_;  // space we allocate from
208
  Space to_space_;    // space that the collector copies to
209
210
  char* free_;   // next place to allocate, from_space_ <= free_ < limit_
211
  char* limit_;  // end of space we're allocating from
212
213
  // Stack roots.  The obvious data structure is a linked list, but an array
214
  // has better locality.
215
  //
216
  // femtolisp uses a global pointer to dynamically-allocated growable array,
217
  // with initial N_STACK = 262144!  Kind of arbitrary.
218
219
  int roots_top_;
220
  Obj** roots_[kMaxRoots];  // These are pointers to Obj* pointers
221
222
  bool is_initialized_ = false;
223
224
#if GC_STATS
225
  int num_collections_;
226
  int num_heap_growths_;
227
  int num_forced_growths_;  // when a single allocation is too big
228
  int num_live_objs_;
229
#endif
230
};
231
232
#if GC_STATS
233
void ShowFixedChildren(Obj* obj);
234
#endif
235
236
// LayoutForwarded and LayoutFixed aren't real types.  You can cast arbitrary
237
// objs to them to access a HOMOGENEOUS REPRESENTATION useful for garbage
238
// collection.
239
240
class LayoutForwarded : public Obj {
241
 public:
242
  Obj* new_location;  // valid if and only if heap_tag_ == Tag::Forwarded
243
};
244
245
#endif  // GC_HEAP_H