cpp

Coverage Report

Created: 2024-03-13 14:13

/home/andy/git/oilshell/oil/mycpp/gc_mylib.h
Line
Count
Source (jump to first uncovered line)
1
// gc_mylib.h - corresponds to mycpp/mylib.py
2
3
#ifndef MYCPP_GC_MYLIB_H
4
#define MYCPP_GC_MYLIB_H
5
6
#include <limits.h>  // CHAR_BIT
7
8
#include "mycpp/gc_alloc.h"  // gHeap
9
#include "mycpp/gc_dict.h"   // for dict_erase()
10
#include "mycpp/gc_tuple.h"
11
12
template <class K, class V>
13
class Dict;
14
15
// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
16
// Notes:
17
// - Python 2.7's intobject.c has an erroneous +6
18
// - This is 13, but len('-2147483648') is 11, which means we only need 12?
19
// - This formula is valid for octal(), because 2^(3 bits) = 8
20
21
const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
22
23
namespace mylib {
24
25
void InitCppOnly();
26
27
// Wrappers around our C++ APIs
28
29
14
inline void MaybeCollect() {
30
14
  gHeap.MaybeCollect();
31
14
}
32
33
void print_stderr(BigStr* s);
34
35
286
inline int ByteAt(BigStr* s, int i) {
36
286
  DCHECK(0 <= i);
37
286
  DCHECK(i <= len(s));
38
39
0
  return static_cast<unsigned char>(s->data_[i]);
40
286
}
41
42
1.02k
inline int ByteEquals(int byte, BigStr* ch) {
43
1.02k
  DCHECK(0 <= byte);
44
1.02k
  DCHECK(byte < 256);
45
46
1.02k
  DCHECK(len(ch) == 1);
47
48
0
  return byte == static_cast<unsigned char>(ch->data_[0]);
49
1.02k
}
50
51
256
inline int ByteInSet(int byte, BigStr* byte_set) {
52
256
  DCHECK(0 <= byte);
53
256
  DCHECK(byte < 256);
54
55
0
  int n = len(byte_set);
56
1.77k
  for (int i = 0; i < n; ++i) {
57
1.52k
    int b = static_cast<unsigned char>(byte_set->data_[i]);
58
1.52k
    if (byte == b) {
59
6
      return true;
60
6
    }
61
1.52k
  }
62
250
  return false;
63
256
}
64
65
BigStr* JoinBytes(List<int>* byte_list);
66
67
// const int kStdout = 1;
68
// const int kStderr = 2;
69
70
// void writeln(BigStr* s, int fd = kStdout);
71
72
Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
73
74
template <typename K, typename V>
75
280
void dict_erase(Dict<K, V>* haystack, K needle) {
76
280
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
280
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
280
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
280
  if (kv_index < 0) {
85
2
    return;
86
2
  }
87
88
278
  int last_kv_index = haystack->len_ - 1;
89
278
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
278
  if (kv_index != last_kv_index) {
104
142
    K last_key = haystack->keys_->items_[last_kv_index];
105
142
    V last_val = haystack->values_->items_[last_kv_index];
106
142
    int last_pos = haystack->hash_and_probe(last_key);
107
142
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
142
    haystack->values_->items_[kv_index] = last_val;
110
142
    haystack->index_->items_[last_pos] = kv_index;
111
142
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
278
  haystack->values_->items_[last_kv_index] = 0;
116
278
  haystack->index_->items_[pos] = kDeletedEntry;
117
278
  haystack->len_--;
118
278
  DCHECK(haystack->len_ < haystack->capacity_);
119
278
}
_ZN5mylib10dict_eraseIP6BigStriEEvP4DictIT_T0_ES4_
Line
Count
Source
75
2
void dict_erase(Dict<K, V>* haystack, K needle) {
76
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
2
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
2
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
2
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
2
  int last_kv_index = haystack->len_ - 1;
89
2
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
2
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
2
  haystack->values_->items_[last_kv_index] = 0;
116
2
  haystack->index_->items_[pos] = kDeletedEntry;
117
2
  haystack->len_--;
118
2
  DCHECK(haystack->len_ < haystack->capacity_);
119
2
}
_ZN5mylib10dict_eraseIP6BigStrS2_EEvP4DictIT_T0_ES4_
Line
Count
Source
75
2
void dict_erase(Dict<K, V>* haystack, K needle) {
76
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
2
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
2
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
2
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
2
  int last_kv_index = haystack->len_ - 1;
89
2
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
2
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
2
  haystack->values_->items_[last_kv_index] = 0;
116
2
  haystack->index_->items_[pos] = kDeletedEntry;
117
2
  haystack->len_--;
118
2
  DCHECK(haystack->len_ < haystack->capacity_);
119
2
}
_ZN5mylib10dict_eraseIiiEEvP4DictIT_T0_ES2_
Line
Count
Source
75
274
void dict_erase(Dict<K, V>* haystack, K needle) {
76
274
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
274
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
274
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
274
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
274
  int last_kv_index = haystack->len_ - 1;
89
274
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
274
  if (kv_index != last_kv_index) {
104
142
    K last_key = haystack->keys_->items_[last_kv_index];
105
142
    V last_val = haystack->values_->items_[last_kv_index];
106
142
    int last_pos = haystack->hash_and_probe(last_key);
107
142
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
142
    haystack->values_->items_[kv_index] = last_val;
110
142
    haystack->index_->items_[last_pos] = kv_index;
111
142
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
274
  haystack->values_->items_[last_kv_index] = 0;
116
274
  haystack->index_->items_[pos] = kDeletedEntry;
117
274
  haystack->len_--;
118
274
  DCHECK(haystack->len_ < haystack->capacity_);
119
274
}
_ZN5mylib10dict_eraseIiP6BigStrEEvP4DictIT_T0_ES4_
Line
Count
Source
75
2
void dict_erase(Dict<K, V>* haystack, K needle) {
76
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
2
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
2
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
2
  if (kv_index < 0) {
85
2
    return;
86
2
  }
87
88
0
  int last_kv_index = haystack->len_ - 1;
89
0
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
0
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
0
  haystack->values_->items_[last_kv_index] = 0;
116
0
  haystack->index_->items_[pos] = kDeletedEntry;
117
0
  haystack->len_--;
118
0
  DCHECK(haystack->len_ < haystack->capacity_);
119
0
}
120
121
// NOTE: Can use OverAllocatedStr for all of these, rather than copying
122
123
4
inline BigStr* hex_lower(int i) {
124
4
  char buf[kIntBufSize];
125
4
  int len = snprintf(buf, kIntBufSize, "%x", i);
126
4
  return ::StrFromC(buf, len);
127
4
}
128
129
4
inline BigStr* hex_upper(int i) {
130
4
  char buf[kIntBufSize];
131
4
  int len = snprintf(buf, kIntBufSize, "%X", i);
132
4
  return ::StrFromC(buf, len);
133
4
}
134
135
4
inline BigStr* octal(int i) {
136
4
  char buf[kIntBufSize];
137
4
  int len = snprintf(buf, kIntBufSize, "%o", i);
138
4
  return ::StrFromC(buf, len);
139
4
}
140
141
// Abstract type: Union of LineReader and Writer
142
class File {
143
 public:
144
146
  File() {
145
146
  }
146
  // Writer
147
  virtual void write(BigStr* s) = 0;
148
  virtual void flush() = 0;
149
150
  // Reader
151
  virtual BigStr* readline() = 0;
152
153
  // Both
154
  virtual bool isatty() = 0;
155
  virtual void close() = 0;
156
157
0
  static constexpr ObjHeader obj_header() {
158
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(File));
159
0
  }
160
161
17
  static constexpr uint32_t field_mask() {
162
17
    return kZeroMask;
163
17
  }
164
};
165
166
// Wrap a FILE* for read and write
167
class CFile : public File {
168
 public:
169
17
  explicit CFile(FILE* f) : File(), f_(f) {
170
17
  }
171
  // Writer
172
  void write(BigStr* s) override;
173
  void flush() override;
174
175
  // Reader
176
  BigStr* readline() override;
177
178
  // Both
179
  bool isatty() override;
180
  void close() override;
181
182
17
  static constexpr ObjHeader obj_header() {
183
17
    return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
184
17
  }
185
186
17
  static constexpr uint32_t field_mask() {
187
    // not mutating field_mask because FILE* isn't a GC object
188
17
    return File::field_mask();
189
17
  }
190
191
 private:
192
  FILE* f_;
193
194
  DISALLOW_COPY_AND_ASSIGN(CFile)
195
};
196
197
// Abstract File we can only read from.
198
// TODO: can we get rid of DCHECK() and reinterpret_cast?
199
class LineReader : public File {
200
 public:
201
6
  LineReader() : File() {
202
6
  }
203
0
  void write(BigStr* s) override {
204
0
    CHECK(false);  // should not happen
205
0
  }
206
0
  void flush() override {
207
0
    CHECK(false);  // should not happen
208
0
  }
209
210
0
  static constexpr ObjHeader obj_header() {
211
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
212
0
  }
213
214
6
  static constexpr uint32_t field_mask() {
215
6
    return kZeroMask;
216
6
  }
217
};
218
219
class BufLineReader : public LineReader {
220
 public:
221
6
  explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
222
6
  }
223
  virtual BigStr* readline();
224
2
  virtual bool isatty() {
225
2
    return false;
226
2
  }
227
2
  virtual void close() {
228
2
  }
229
230
  BigStr* s_;
231
  int pos_;
232
233
6
  static constexpr ObjHeader obj_header() {
234
6
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
235
6
  }
236
237
6
  static constexpr uint32_t field_mask() {
238
6
    return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
239
6
  }
240
241
  DISALLOW_COPY_AND_ASSIGN(BufLineReader)
242
};
243
244
extern LineReader* gStdin;
245
246
2
inline LineReader* Stdin() {
247
2
  if (gStdin == nullptr) {
248
2
    gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
249
2
  }
250
2
  return gStdin;
251
2
}
252
253
LineReader* open(BigStr* path);
254
255
// Abstract File we can only write to.
256
// TODO: can we get rid of DCHECK() and reinterpret_cast?
257
class Writer : public File {
258
 public:
259
123
  Writer() : File() {
260
123
  }
261
0
  BigStr* readline() override {
262
0
    CHECK(false);  // should not happen
263
0
  }
264
265
0
  static constexpr ObjHeader obj_header() {
266
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
267
0
  }
268
269
123
  static constexpr uint32_t field_mask() {
270
123
    return kZeroMask;
271
123
  }
272
};
273
274
class MutableStr;
275
276
class BufWriter : public Writer {
277
 public:
278
123
  BufWriter() : Writer(), str_(nullptr), len_(0) {
279
123
  }
280
  void write(BigStr* s) override;
281
  void write_spaces(int n);
282
2
  void clear() {  // Reuse this instance
283
2
    str_ = nullptr;
284
2
    len_ = 0;
285
2
    is_valid_ = true;
286
2
  }
287
0
  void close() override {
288
0
  }
289
2
  void flush() override {
290
2
  }
291
2
  bool isatty() override {
292
2
    return false;
293
2
  }
294
  BigStr* getvalue();  // part of cStringIO API
295
296
  //
297
  // Low Level API for C++ usage only
298
  //
299
300
  // Convenient API that avoids BigStr*
301
  void WriteConst(const char* c_string);
302
303
  // Potentially resizes the buffer.
304
  void EnsureMoreSpace(int n);
305
  // After EnsureMoreSpace(42), you can write 42 more bytes safely.
306
  //
307
  // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
308
  // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
309
310
  // (Similar to vector::reserve(n), but it takes an integer to ADD to the
311
  // capacity.)
312
313
  uint8_t* LengthPointer();    // start + length
314
  uint8_t* CapacityPointer();  // start + capacity
315
  void SetLengthFrom(uint8_t* length_ptr);
316
317
79
  int Length() {
318
79
    return len_;
319
79
  }
320
321
  // Rewind to earlier position, future writes start there
322
  void Truncate(int length);
323
324
123
  static constexpr ObjHeader obj_header() {
325
123
    return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
326
123
  }
327
328
123
  static constexpr unsigned field_mask() {
329
    // maskvit_v() because BufWriter has virtual methods
330
123
    return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
331
123
  }
332
333
 private:
334
  void WriteRaw(char* s, int n);
335
336
  MutableStr* str_;  // getvalue() turns this directly into Str*, no copying
337
  int len_;          // how many bytes have been written so far
338
  bool is_valid_ = true;  // It becomes invalid after getvalue() is called
339
};
340
341
extern Writer* gStdout;
342
343
13
inline Writer* Stdout() {
344
13
  if (gStdout == nullptr) {
345
5
    gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
346
5
    gHeap.RootGlobalVar(gStdout);
347
5
  }
348
13
  return gStdout;
349
13
}
350
351
extern Writer* gStderr;
352
353
2
inline Writer* Stderr() {
354
2
  if (gStderr == nullptr) {
355
2
    gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
356
2
    gHeap.RootGlobalVar(gStderr);
357
2
  }
358
2
  return gStderr;
359
2
}
360
361
class UniqueObjects {
362
  // Can't be expressed in typed Python because we don't have uint64_t for
363
  // addresses
364
365
 public:
366
0
  UniqueObjects() {
367
0
  }
368
0
  void Add(void* obj) {
369
0
  }
370
0
  int Get(void* obj) {
371
0
    return -1;
372
0
  }
373
374
0
  static constexpr ObjHeader obj_header() {
375
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
376
0
  }
377
378
  // SPECIAL CASE? We should never have a unique reference to an object?  So
379
  // don't bother tracing
380
0
  static constexpr uint32_t field_mask() {
381
0
    return kZeroMask;
382
0
  }
383
384
 private:
385
  // address -> small integer ID
386
  Dict<void*, int> addresses_;
387
};
388
389
}  // namespace mylib
390
391
#endif  // MYCPP_GC_MYLIB_H