cpp

Coverage Report

Created: 2022-09-21 22:22

/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef STR_TYPES_H
2
#define STR_TYPES_H
3
4
template <typename T>
5
class List;
6
7
class Str : public Obj {
8
 public:
9
  // Don't call this directly.  Call AllocStr() instead, which calls this.
10
2.93k
  Str() : Obj(Tag::Opaque, kZeroMask, 0) {
11
    // log("GC Str()");
12
2.93k
  }
13
14
462
  char* data() {
15
462
    return data_;
16
462
  }
17
18
  void SetObjLenFromStrLen(int str_len);
19
20
  Str* index_(int i);
21
22
  int find(Str* needle, int pos = 0);
23
  int rfind(Str* needle);
24
25
  Str* slice(int begin);
26
  Str* slice(int begin, int end);
27
28
  Str* strip();
29
  // Used for CommandSub in osh/cmd_exec.py
30
  Str* rstrip(Str* chars);
31
  Str* rstrip();
32
33
  Str* lstrip(Str* chars);
34
  Str* lstrip();
35
36
  Str* ljust(int width, Str* fillchar);
37
  Str* rjust(int width, Str* fillchar);
38
39
  bool startswith(Str* s);
40
  bool endswith(Str* s);
41
42
  Str* replace(Str* old, Str* new_str);
43
  Str* join(List<Str*>* items);
44
45
  List<Str*>* split(Str* sep);
46
  List<Str*>* splitlines(bool keep);
47
48
  bool isdigit();
49
  bool isalpha();
50
  bool isupper();
51
52
  Str* upper();
53
  Str* lower();
54
55
  // Other options for fast comparison / hashing / string interning:
56
  // - unique_id_: an index into intern table.  I don't think this works unless
57
  //   you want to deal with rehashing all strings when the set grows.
58
  //   - although note that the JVM has -XX:StringTableSize=FIXED, which means
59
  //   - it can degrade into linked list performance
60
  // - Hashed strings become GLOBAL_STR().  Never deallocated.
61
  // - Hashed strings become part of the "large object space", which might be
62
  //   managed by mark and sweep.  This requires linked list overhead.
63
  //   (doubly-linked?)
64
  // - Intern strings at GARBAGE COLLECTION TIME, with
65
  //   LayoutForwarded::new_location_?  Is this possible?  Does it introduce
66
  //   too much coupling between strings, hash tables, and GC?
67
  int hash_value_;
68
  char data_[1];  // flexible array
69
70
 private:
71
  int _strip_left_pos();
72
  int _strip_right_pos();
73
74
  DISALLOW_COPY_AND_ASSIGN(Str)
75
};
76
77
constexpr int kStrHeaderSize = offsetof(Str, data_);
78
79
7.77k
inline int len(const Str* s) {
80
  // NOTE(Jesse): Not sure if 0-length strings should be allowed, but we
81
  // currently don't hit this assertion, so I would think not?
82
7.77k
  assert(s->obj_len_ >= kStrHeaderSize - 1);
83
84
0
  return s->obj_len_ - kStrHeaderSize - 1;
85
7.77k
}
86
87
31
inline void Str::SetObjLenFromStrLen(int str_len) {
88
31
  obj_len_ = kStrHeaderSize + str_len + 1;
89
  /* assert(len(this) == str_len); */
90
31
}
91
92
// Notes:
93
// - sizeof("foo") == 4, for the NUL terminator.
94
// - gc_heap_test.cc has a static_assert that GlobalStr matches Str.  We don't
95
// put it here because it triggers -Winvalid-offsetof
96
97
//
98
// String "Constructors".  We need these because of the "flexible array"
99
// pattern.  I don't think "new Str()" can do that, and placement new would
100
// require mycpp to generate 2 statements everywhere.
101
//
102
103
2.90k
inline Str* AllocStr(int len) {
104
2.90k
  int obj_len = kStrHeaderSize + len + 1;
105
2.90k
  void* place = gHeap.Allocate(obj_len);
106
2.90k
  auto s = new (place) Str();
107
2.90k
  s->SetObjLen(obj_len);
108
2.90k
  return s;
109
2.90k
}
110
111
// Like AllocStr, but allocate more than you need, e.g. for snprintf() to write
112
// into.  CALLER IS RESPONSIBLE for calling s->SetObjLenFromStrLen() afterward!
113
31
inline Str* OverAllocatedStr(int len) {
114
31
  int obj_len = kStrHeaderSize + len + 1;  // NUL terminator
115
31
  void* place = gHeap.Allocate(obj_len);
116
31
  auto s = new (place) Str();
117
31
  return s;
118
31
}
119
120
1.36k
inline Str* StrFromC(const char* data, int len) {
121
1.36k
  Str* s = AllocStr(len);
122
1.36k
  memcpy(s->data_, data, len);
123
1.36k
  assert(s->data_[len] == '\0');  // should be true because Heap was zeroed
124
125
0
  return s;
126
1.36k
}
127
128
685
inline Str* StrFromC(const char* data) {
129
685
  return StrFromC(data, strlen(data));
130
685
}
131
132
2
inline Str* CopyBufferIntoNewStr(char* buf) {
133
2
  Str* s = StrFromC(buf);
134
2
  return s;
135
2
}
136
137
18
inline Str* CopyBufferIntoNewStr(char* buf, unsigned int buf_len) {
138
18
  Str* s = StrFromC(buf, buf_len);
139
18
  return s;
140
18
}
141
142
// NOTE: This iterates over bytes.
143
class StrIter {
144
 public:
145
121
  explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) {
146
    // We need this because StrIter is directly on the stack, and s_ could be
147
    // moved during iteration.
148
121
    gHeap.PushRoot(reinterpret_cast<Obj**>(&s_));
149
121
  }
150
121
  ~StrIter() {
151
121
    gHeap.PopRoot();
152
121
  }
153
150
  void Next() {
154
150
    i_++;
155
150
  }
156
271
  bool Done() {
157
271
    return i_ >= len_;
158
271
  }
159
151
  Str* Value() {  // similar to index_()
160
    // TODO: create 256 GLOBAL_STR() and return those instead!
161
151
    Str* result = AllocStr(1);
162
151
    result->data_[0] = s_->data_[i_];
163
    // assert(result->data_[1] == '\0');
164
151
    return result;
165
151
  }
166
167
 private:
168
  Str* s_;
169
  int i_;
170
  int len_;
171
172
  DISALLOW_COPY_AND_ASSIGN(StrIter)
173
};
174
175
bool maybe_str_equals(Str* left, Str* right);
176
177
// TODO(Jesse): Where should this go?  Certainly not here..
178
extern Str* kEmptyString;
179
180
template <int N>
181
class GlobalStr {
182
  // A template type with the same layout as Str with length N-1 (which needs a
183
  // buffer of size N).  For initializing global constant instances.
184
 public:
185
  OBJ_HEADER()
186
187
  int hash_value_;
188
  const char data_[N];
189
190
  DISALLOW_COPY_AND_ASSIGN(GlobalStr)
191
};
192
193
// This macro is a workaround for the fact that it's impossible to have a
194
// a constexpr initializer for char[N].  The "String Literals as Non-Type
195
// Template Parameters" feature of C++ 20 would have done it, but it's not
196
// there.
197
//
198
// https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/
199
// https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor
200
201
#define GLOBAL_STR(name, val)                                            \
202
  GlobalStr<sizeof(val)> _##name = {                                     \
203
      Tag::Global, 0, kZeroMask, kStrHeaderSize + sizeof(val), -1, val}; \
204
  Str* name = reinterpret_cast<Str*>(&_##name);
205
206
#endif  // STR_TYPES_H