cpp

Coverage Report

Created: 2023-03-07 20:24

/home/andy/git/oilshell/oil/mycpp/mark_sweep_heap.cc
Line
Count
Source (jump to first uncovered line)
1
#include "mycpp/mark_sweep_heap.h"
2
3
#include <inttypes.h>  // PRId64
4
#include <stdlib.h>    // getenv()
5
#include <string.h>    // strlen()
6
#include <sys/time.h>  // gettimeofday()
7
#include <time.h>      // clock_gettime(), CLOCK_PROCESS_CPUTIME_ID
8
#include <unistd.h>    // STDERR_FILENO
9
10
#include "_build/detected-cpp-config.h"  // for GC_TIMING
11
#include "mycpp/gc_builtins.h"           // StringToInteger()
12
#include "mycpp/gc_slab.h"
13
14
// TODO: Remove this guard when we have separate binaries
15
#if MARK_SWEEP
16
17
68
void MarkSweepHeap::Init() {
18
68
  Init(1000);  // collect at 1000 objects in tests
19
68
}
20
21
70
void MarkSweepHeap::Init(int gc_threshold) {
22
70
  gc_threshold_ = gc_threshold;
23
24
70
  char* e;
25
70
  e = getenv("OIL_GC_THRESHOLD");
26
70
  if (e) {
27
0
    int result;
28
0
    if (StringToInteger(e, strlen(e), 10, &result)) {
29
      // Override collection threshold
30
0
      gc_threshold_ = result;
31
0
    }
32
0
  }
33
34
  // only for developers
35
70
  e = getenv("_OIL_GC_VERBOSE");
36
70
  if (e && strcmp(e, "1") == 0) {
37
0
    gc_verbose_ = true;
38
0
  }
39
40
70
  live_objs_.reserve(KiB(10));
41
70
  roots_.reserve(KiB(1));  // prevent resizing in common case
42
70
}
43
44
16
int MarkSweepHeap::MaybeCollect() {
45
  // Maybe collect BEFORE allocation, because the new object won't be rooted
46
  #if GC_ALWAYS
47
  int result = Collect();
48
  #else
49
16
  int result = -1;
50
16
  if (num_live_ > gc_threshold_) {
51
0
    result = Collect();
52
0
  }
53
16
  #endif
54
55
16
  num_gc_points_++;  // this is a manual collection point
56
16
  return result;
57
16
}
58
59
// Allocate and update stats
60
8.15k
void* MarkSweepHeap::Allocate(size_t num_bytes) {
61
  // log("Allocate %d", num_bytes);
62
63
8.15k
  if (to_free_.empty()) {
64
    // Use higher object IDs
65
8.10k
    obj_id_after_allocate_ = greatest_obj_id_;
66
8.10k
    greatest_obj_id_++;
67
68
    // This check is ON in release mode
69
8.10k
    CHECK(greatest_obj_id_ <= kMaxObjId);
70
71
8.10k
  } else {
72
50
    RawObject* dead = to_free_.back();
73
50
    to_free_.pop_back();
74
75
50
    ObjHeader* header = FindObjHeader(dead);
76
50
    obj_id_after_allocate_ = header->obj_id;  // reuse the dead object's ID
77
78
50
    free(dead);
79
50
  }
80
81
0
  void* result = calloc(num_bytes, 1);
82
8.15k
  DCHECK(result != nullptr);
83
84
0
  live_objs_.push_back(reinterpret_cast<RawObject*>(result));
85
86
8.15k
  num_live_++;
87
8.15k
  num_allocated_++;
88
8.15k
  bytes_allocated_ += num_bytes;
89
90
8.15k
  return result;
91
8.15k
}
92
93
  #if 0
94
void* MarkSweepHeap::Reallocate(void* p, size_t num_bytes) {
95
  FAIL(kNotImplemented);
96
  // This causes a double-free in the GC!
97
  // return realloc(p, num_bytes);
98
}
99
  #endif
100
101
// "Leaf" for marking / TraceChildren
102
//
103
// - Abort if nullptr
104
// - Find the header (get rid of this when remove ObjHeader member)
105
// - Tag::{Opaque,FixedSized,Scanned} have their mark bits set
106
// - Tag::{FixedSize,Scanned} are also pushed on the gray stack
107
108
141
void MarkSweepHeap::MaybeMarkAndPush(RawObject* obj) {
109
141
  ObjHeader* header = FindObjHeader(obj);
110
141
  if (header->heap_tag == HeapTag::Global) {  // don't mark or push
111
8
    return;
112
8
  }
113
114
133
  int obj_id = header->obj_id;
115
133
  if (mark_set_.IsMarked(obj_id)) {
116
29
    return;
117
29
  }
118
119
104
  switch (header->heap_tag) {
120
33
  case HeapTag::Opaque:  // e.g. strings have no children
121
33
    mark_set_.Mark(obj_id);
122
33
    break;
123
124
8
  case HeapTag::Scanned:  // these 2 types have children
125
71
  case HeapTag::FixedSize:
126
71
    mark_set_.Mark(obj_id);
127
71
    gray_stack_.push_back(header);  // Push the header, not the object!
128
71
    break;
129
130
0
  default:
131
0
    FAIL(kShouldNotGetHere);
132
104
  }
133
104
}
134
135
112
void MarkSweepHeap::TraceChildren() {
136
183
  while (!gray_stack_.empty()) {
137
71
    ObjHeader* header = gray_stack_.back();
138
71
    gray_stack_.pop_back();
139
140
71
    switch (header->heap_tag) {
141
63
    case HeapTag::FixedSize: {
142
63
      auto fixed = reinterpret_cast<LayoutFixed*>(header);
143
63
      int mask = FIELD_MASK(fixed->header_);
144
145
1.07k
      for (int i = 0; i < kFieldMaskBits; ++i) {
146
1.00k
        if (mask & (1 << i)) {
147
45
          RawObject* child = fixed->children_[i];
148
45
          if (child) {
149
39
            MaybeMarkAndPush(child);
150
39
          }
151
45
        }
152
1.00k
      }
153
63
      break;
154
0
    }
155
156
8
    case HeapTag::Scanned: {
157
      // no vtable
158
      // assert(reinterpret_cast<void*>(header) ==
159
      // reinterpret_cast<void*>(obj));
160
161
8
      auto slab = reinterpret_cast<Slab<RawObject*>*>(header);
162
163
8
      int n = NUM_POINTERS(slab->header_);
164
32
      for (int i = 0; i < n; ++i) {
165
24
        RawObject* child = slab->items_[i];
166
24
        if (child) {
167
10
          MaybeMarkAndPush(child);
168
10
        }
169
24
      }
170
8
      break;
171
0
    }
172
0
    default:
173
      // Only FixedSize and Scanned are pushed
174
0
      FAIL(kShouldNotGetHere);
175
71
    }
176
71
  }
177
112
}
178
179
112
void MarkSweepHeap::Sweep() {
180
112
  int last_live_index = 0;
181
112
  int num_objs = live_objs_.size();
182
8.36k
  for (int i = 0; i < num_objs; ++i) {
183
8.25k
    RawObject* obj = live_objs_[i];
184
8.25k
    assert(obj);  // malloc() shouldn't have returned nullptr
185
186
0
    ObjHeader* header = FindObjHeader(obj);
187
8.25k
    bool is_live = mark_set_.IsMarked(header->obj_id);
188
189
    // Compact live_objs_ and populate to_free_.  Note: doing the reverse could
190
    // be more efficient when many objects are dead.
191
8.25k
    if (is_live) {
192
104
      live_objs_[last_live_index++] = obj;
193
8.14k
    } else {
194
8.14k
      to_free_.push_back(obj);
195
      // free(obj);
196
8.14k
      num_live_--;
197
8.14k
    }
198
8.25k
  }
199
112
  live_objs_.resize(last_live_index);  // remove dangling objects
200
201
112
  num_collections_++;
202
112
  max_survived_ = std::max(max_survived_, num_live_);
203
112
}
204
205
112
int MarkSweepHeap::Collect() {
206
112
  #ifdef GC_TIMING
207
112
  struct timespec start, end;
208
112
  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start) < 0) {
209
0
    assert(0);
210
0
  }
211
0
  #endif
212
213
0
  int num_roots = roots_.size();
214
112
  int num_globals = global_roots_.size();
215
216
112
  if (gc_verbose_) {
217
0
    log("");
218
0
    log("%2d. GC with %d roots (%d global) and %d live objects",
219
0
        num_collections_, num_roots + num_globals, num_globals, num_live_);
220
0
  }
221
222
  // Resize it
223
112
  mark_set_.ReInit(greatest_obj_id_);
224
225
  // Mark roots.
226
  // Note: It might be nice to get rid of double pointers
227
246
  for (int i = 0; i < num_roots; ++i) {
228
134
    RawObject* root = *(roots_[i]);
229
134
    if (root) {
230
84
      MaybeMarkAndPush(root);
231
84
    }
232
134
  }
233
234
120
  for (int i = 0; i < num_globals; ++i) {
235
8
    RawObject* root = global_roots_[i];
236
8
    if (root) {
237
8
      MaybeMarkAndPush(root);
238
8
    }
239
8
  }
240
241
  // Traverse object graph.
242
112
  TraceChildren();
243
244
112
  Sweep();
245
246
112
  if (gc_verbose_) {
247
0
    log("    %d live after sweep", num_live_);
248
0
  }
249
250
  // We know how many are live.  If the number of objects is close to the
251
  // threshold (above 75%), then set the threshold to 2 times the number of
252
  // live objects.  This is an ad hoc policy that removes observed "thrashing"
253
  // -- being at 99% of the threshold and doing FUTILE mark and sweep.
254
255
112
  int water_mark = (gc_threshold_ * 3) / 4;
256
112
  if (num_live_ > water_mark) {
257
0
    gc_threshold_ = num_live_ * 2;
258
0
    num_growths_++;
259
0
    if (gc_verbose_) {
260
0
      log("    exceeded %d live objects; gc_threshold set to %d", water_mark,
261
0
          gc_threshold_);
262
0
    }
263
0
  }
264
265
112
  #ifdef GC_TIMING
266
112
  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end) < 0) {
267
0
    assert(0);
268
0
  }
269
270
0
  double start_secs = start.tv_sec + start.tv_nsec / 1e9;
271
112
  double end_secs = end.tv_sec + end.tv_nsec / 1e9;
272
112
  double gc_millis = (end_secs - start_secs) * 1000.0;
273
274
112
  if (gc_verbose_) {
275
0
    log("    %.1f ms GC", gc_millis);
276
0
  }
277
278
112
  total_gc_millis_ += gc_millis;
279
112
  if (gc_millis > max_gc_millis_) {
280
56
    max_gc_millis_ = gc_millis;
281
56
  }
282
112
  #endif
283
284
112
  return num_live_;  // for unit tests only
285
112
}
286
287
6
void MarkSweepHeap::PrintStats(int fd) {
288
6
  dprintf(fd, "  num live        = %10d\n", num_live_);
289
  // max survived_ can be less than num_live_, because leave off the last GC
290
6
  dprintf(fd, "  max survived    = %10d\n", max_survived_);
291
6
  dprintf(fd, "\n");
292
6
  dprintf(fd, "  num allocated   = %10d\n", num_allocated_);
293
6
  dprintf(fd, "bytes allocated   = %10" PRId64 "\n", bytes_allocated_);
294
6
  dprintf(fd, "\n");
295
6
  dprintf(fd, "  num gc points   = %10d\n", num_gc_points_);
296
6
  dprintf(fd, "  num collections = %10d\n", num_collections_);
297
6
  dprintf(fd, "\n");
298
6
  dprintf(fd, "   gc threshold   = %10d\n", gc_threshold_);
299
6
  dprintf(fd, "  num growths     = %10d\n", num_growths_);
300
6
  dprintf(fd, "\n");
301
6
  dprintf(fd, "  max gc millis   = %10.1f\n", max_gc_millis_);
302
6
  dprintf(fd, "total gc millis   = %10.1f\n", total_gc_millis_);
303
6
  dprintf(fd, "\n");
304
6
  dprintf(fd, "roots capacity    = %10d\n",
305
6
          static_cast<int>(roots_.capacity()));
306
6
  dprintf(fd, " objs capacity    = %10d\n",
307
6
          static_cast<int>(live_objs_.capacity()));
308
6
}
309
310
54
void MarkSweepHeap::EagerFree() {
311
8.09k
  for (auto obj : to_free_) {
312
8.09k
    free(obj);
313
8.09k
  }
314
54
}
315
316
// Cleanup at the end of main() to remain ASAN-safe
317
56
void MarkSweepHeap::DoProcessExit(bool fast_exit) {
318
56
  char* e = getenv("OIL_GC_ON_EXIT");
319
320
56
  if (fast_exit) {
321
    // don't collect by default; OIL_GC_ON_EXIT=1 overrides
322
2
    if (e && strcmp(e, "1") == 0) {
323
0
      Collect();
324
0
      EagerFree();
325
0
    }
326
54
  } else {
327
    // collect by default; OIL_GC_ON_EXIT=0 overrides
328
54
    if (e && strcmp(e, "0") == 0) {
329
0
      ;
330
54
    } else {
331
54
      Collect();
332
54
      EagerFree();
333
54
    }
334
54
  }
335
336
56
  int stats_fd = -1;
337
56
  e = getenv("OIL_GC_STATS");
338
56
  if (e && strlen(e)) {  // env var set and non-empty
339
0
    stats_fd = STDERR_FILENO;
340
56
  } else {
341
    // A raw file descriptor lets benchmarks extract stats even if the script
342
    // writes to stdout and stderr.  Shells can't use open() without potential
343
    // conflicts.
344
345
56
    e = getenv("OIL_GC_STATS_FD");
346
56
    if (e && strlen(e)) {
347
      // Try setting 'stats_fd'.  If there's an error, it will be unchanged, and
348
      // we don't PrintStats();
349
0
      StringToInteger(e, strlen(e), 10, &stats_fd);
350
0
    }
351
56
  }
352
353
56
  if (stats_fd != -1) {
354
0
    PrintStats(stats_fd);
355
0
  }
356
56
}
357
358
54
void MarkSweepHeap::CleanProcessExit() {
359
54
  DoProcessExit(false);  // not fast_exit
360
54
}
361
362
// for the main binary
363
2
void MarkSweepHeap::FastProcessExit() {
364
2
  DoProcessExit(true);
365
2
}
366
367
MarkSweepHeap gHeap;
368
369
#endif  // MARK_SWEEP