cpp

Coverage Report

Created: 2022-11-10 11:34

/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef MYCPP_GC_STR_H
2
#define MYCPP_GC_STR_H
3
4
template <typename T>
5
class List;
6
7
class Str : public Obj {
8
 public:
9
  // Don't call this directly.  Call NewStr() instead, which calls this.
10
2.93k
  Str() : Obj(Tag::Opaque, kZeroMask, 0) {
11
2.93k
  }
12
13
415
  char* data() {
14
415
    return data_;
15
415
  }
16
17
  void SetObjLenFromStrLen(int str_len);
18
19
  // Useful for getcwd() + PATH_MAX, gethostname() + HOSTNAME_MAX, etc.
20
3
  void SetObjLenFromC() {
21
3
    SetObjLenFromStrLen(strlen(data_));
22
3
  }
23
24
  Str* index_(int i);
25
26
  int find(Str* needle, int pos = 0);
27
  int rfind(Str* needle);
28
29
  Str* slice(int begin);
30
  Str* slice(int begin, int end);
31
32
  Str* strip();
33
  // Used for CommandSub in osh/cmd_exec.py
34
  Str* rstrip(Str* chars);
35
  Str* rstrip();
36
37
  Str* lstrip(Str* chars);
38
  Str* lstrip();
39
40
  Str* ljust(int width, Str* fillchar);
41
  Str* rjust(int width, Str* fillchar);
42
43
  bool startswith(Str* s);
44
  bool endswith(Str* s);
45
46
  Str* replace(Str* old, Str* new_str);
47
  Str* join(List<Str*>* items);
48
49
  List<Str*>* split(Str* sep);
50
  List<Str*>* splitlines(bool keep);
51
52
  bool isdigit();
53
  bool isalpha();
54
  bool isupper();
55
56
  Str* upper();
57
  Str* lower();
58
59
  // Other options for fast comparison / hashing / string interning:
60
  // - unique_id_: an index into intern table.  I don't think this works unless
61
  //   you want to deal with rehashing all strings when the set grows.
62
  //   - although note that the JVM has -XX:StringTableSize=FIXED, which means
63
  //   - it can degrade into linked list performance
64
  // - Hashed strings become GLOBAL_STR().  Never deallocated.
65
  // - Hashed strings become part of the "large object space", which might be
66
  //   managed by mark and sweep.  This requires linked list overhead.
67
  //   (doubly-linked?)
68
  // - Intern strings at GARBAGE COLLECTION TIME, with
69
  //   LayoutForwarded::new_location_?  Is this possible?  Does it introduce
70
  //   too much coupling between strings, hash tables, and GC?
71
  int hash_value_;
72
  char data_[1];  // flexible array
73
74
 private:
75
  int _strip_left_pos();
76
  int _strip_right_pos();
77
78
  DISALLOW_COPY_AND_ASSIGN(Str)
79
};
80
81
constexpr int kStrHeaderSize = offsetof(Str, data_);
82
83
36
inline void Str::SetObjLenFromStrLen(int str_len) {
84
36
  obj_len_ = kStrHeaderSize + str_len + 1;
85
36
}
86
87
8.29k
inline int len(const Str* s) {
88
8.29k
  assert(s->obj_len_ >= kStrHeaderSize - 1);
89
0
  return s->obj_len_ - kStrHeaderSize - 1;
90
8.29k
}
91
92
// Notes:
93
// - sizeof("foo") == 4, for the NUL terminator.
94
// - gc_heap_test.cc has a static_assert that GlobalStr matches Str.  We don't
95
// put it here because it triggers -Winvalid-offsetof
96
97
//
98
// String "Constructors".  We need these because of the "flexible array"
99
// pattern.  I don't think "new Str()" can do that, and placement new would
100
// require mycpp to generate 2 statements everywhere.
101
//
102
103
2.89k
inline Str* NewStr(int len) {
104
  // RootingScope omitted for PASS THROUGH
105
2.89k
  int obj_len = kStrHeaderSize + len + 1;
106
107
  // only allocation is unconditionally returned
108
2.89k
  void* place = gHeap.Allocate(obj_len);
109
110
2.89k
  auto s = new (place) Str();
111
2.89k
  s->SetObjLen(obj_len);
112
2.89k
  return s;
113
2.89k
}
114
115
// Like NewStr, but allocate more than you need, e.g. for snprintf() to write
116
// into.  CALLER IS RESPONSIBLE for calling s->SetObjLenFromStrLen() afterward!
117
36
inline Str* OverAllocatedStr(int len) {
118
36
  int obj_len = kStrHeaderSize + len + 1;  // NUL terminator
119
36
  void* place = gHeap.Allocate(obj_len);
120
36
  auto s = new (place) Str();
121
36
  return s;
122
36
}
123
124
1.33k
inline Str* StrFromC(const char* data, int len) {
125
  // RootingScope omitted for PASS THROUGH
126
1.33k
  Str* s = NewStr(len);
127
1.33k
  memcpy(s->data_, data, len);
128
1.33k
  assert(s->data_[len] == '\0');  // should be true because Heap was zeroed
129
130
0
  return s;
131
1.33k
}
132
133
668
inline Str* StrFromC(const char* data) {
134
  // RootingScope omitted for PASS THROUGH
135
668
  return StrFromC(data, strlen(data));
136
668
}
137
138
// NOTE: This iterates over bytes.
139
class StrIter {
140
 public:
141
121
  explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) {
142
    // We need this because StrIter is directly on the stack, and s_ could be
143
    // moved during iteration.
144
121
    gHeap.PushRoot(reinterpret_cast<Obj**>(&s_));
145
121
  }
146
121
  ~StrIter() {
147
121
    gHeap.PopRoot();
148
121
  }
149
150
  void Next() {
150
150
    i_++;
151
150
  }
152
271
  bool Done() {
153
271
    return i_ >= len_;
154
271
  }
155
151
  Str* Value() {  // similar to index_()
156
    // TODO: create 256 GLOBAL_STR() and return those instead!
157
151
    Str* result = NewStr(1);
158
151
    result->data_[0] = s_->data_[i_];
159
    // assert(result->data_[1] == '\0');
160
151
    return result;
161
151
  }
162
163
 private:
164
  Str* s_;
165
  int i_;
166
  int len_;
167
168
  DISALLOW_COPY_AND_ASSIGN(StrIter)
169
};
170
171
bool maybe_str_equals(Str* left, Str* right);
172
173
extern Str* kEmptyString;
174
175
template <int N>
176
class GlobalStr {
177
  // A template type with the same layout as Str with length N-1 (which needs a
178
  // buffer of size N).  For initializing global constant instances.
179
 public:
180
  OBJ_HEADER()
181
182
  int hash_value_;
183
  const char data_[N];
184
185
  DISALLOW_COPY_AND_ASSIGN(GlobalStr)
186
};
187
188
// This macro is a workaround for the fact that it's impossible to have a
189
// a constexpr initializer for char[N].  The "String Literals as Non-Type
190
// Template Parameters" feature of C++ 20 would have done it, but it's not
191
// there.
192
//
193
// https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/
194
// https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor
195
196
#define GLOBAL_STR(name, val)                                            \
197
  GlobalStr<sizeof(val)> _##name = {                                     \
198
      Tag::Global, 0, kZeroMask, kStrHeaderSize + sizeof(val), -1, val}; \
199
  Str* name = reinterpret_cast<Str*>(&_##name);
200
201
#endif  // MYCPP_GC_STR_H