/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef STR_TYPES_H |
2 | | #define STR_TYPES_H |
3 | | |
4 | | template <typename T> |
5 | | class List; |
6 | | |
7 | | class Str : public Obj { |
8 | | public: |
9 | | // Don't call this directly. Call AllocStr() instead, which calls this. |
10 | 2.93k | Str() : Obj(Tag::Opaque, kZeroMask, 0) { |
11 | | // log("GC Str()"); |
12 | 2.93k | } |
13 | | |
14 | 462 | char* data() { |
15 | 462 | return data_; |
16 | 462 | } |
17 | | |
18 | | void SetObjLenFromStrLen(int str_len); |
19 | | |
20 | | Str* index_(int i); |
21 | | |
22 | | int find(Str* needle, int pos = 0); |
23 | | int rfind(Str* needle); |
24 | | |
25 | | Str* slice(int begin); |
26 | | Str* slice(int begin, int end); |
27 | | |
28 | | Str* strip(); |
29 | | // Used for CommandSub in osh/cmd_exec.py |
30 | | Str* rstrip(Str* chars); |
31 | | Str* rstrip(); |
32 | | |
33 | | Str* lstrip(Str* chars); |
34 | | Str* lstrip(); |
35 | | |
36 | | Str* ljust(int width, Str* fillchar); |
37 | | Str* rjust(int width, Str* fillchar); |
38 | | |
39 | | bool startswith(Str* s); |
40 | | bool endswith(Str* s); |
41 | | |
42 | | Str* replace(Str* old, Str* new_str); |
43 | | Str* join(List<Str*>* items); |
44 | | |
45 | | List<Str*>* split(Str* sep); |
46 | | List<Str*>* splitlines(bool keep); |
47 | | |
48 | | bool isdigit(); |
49 | | bool isalpha(); |
50 | | bool isupper(); |
51 | | |
52 | | Str* upper(); |
53 | | Str* lower(); |
54 | | |
55 | | // Other options for fast comparison / hashing / string interning: |
56 | | // - unique_id_: an index into intern table. I don't think this works unless |
57 | | // you want to deal with rehashing all strings when the set grows. |
58 | | // - although note that the JVM has -XX:StringTableSize=FIXED, which means |
59 | | // - it can degrade into linked list performance |
60 | | // - Hashed strings become GLOBAL_STR(). Never deallocated. |
61 | | // - Hashed strings become part of the "large object space", which might be |
62 | | // managed by mark and sweep. This requires linked list overhead. |
63 | | // (doubly-linked?) |
64 | | // - Intern strings at GARBAGE COLLECTION TIME, with |
65 | | // LayoutForwarded::new_location_? Is this possible? Does it introduce |
66 | | // too much coupling between strings, hash tables, and GC? |
67 | | int hash_value_; |
68 | | char data_[1]; // flexible array |
69 | | |
70 | | private: |
71 | | int _strip_left_pos(); |
72 | | int _strip_right_pos(); |
73 | | |
74 | | DISALLOW_COPY_AND_ASSIGN(Str) |
75 | | }; |
76 | | |
77 | | constexpr int kStrHeaderSize = offsetof(Str, data_); |
78 | | |
79 | 7.77k | inline int len(const Str* s) { |
80 | | // NOTE(Jesse): Not sure if 0-length strings should be allowed, but we |
81 | | // currently don't hit this assertion, so I would think not? |
82 | 7.77k | assert(s->obj_len_ >= kStrHeaderSize - 1); |
83 | | |
84 | 0 | return s->obj_len_ - kStrHeaderSize - 1; |
85 | 7.77k | } |
86 | | |
87 | 31 | inline void Str::SetObjLenFromStrLen(int str_len) { |
88 | 31 | obj_len_ = kStrHeaderSize + str_len + 1; |
89 | | /* assert(len(this) == str_len); */ |
90 | 31 | } |
91 | | |
92 | | // Notes: |
93 | | // - sizeof("foo") == 4, for the NUL terminator. |
94 | | // - gc_heap_test.cc has a static_assert that GlobalStr matches Str. We don't |
95 | | // put it here because it triggers -Winvalid-offsetof |
96 | | |
97 | | // |
98 | | // String "Constructors". We need these because of the "flexible array" |
99 | | // pattern. I don't think "new Str()" can do that, and placement new would |
100 | | // require mycpp to generate 2 statements everywhere. |
101 | | // |
102 | | |
103 | 2.90k | inline Str* AllocStr(int len) { |
104 | 2.90k | int obj_len = kStrHeaderSize + len + 1; |
105 | 2.90k | void* place = gHeap.Allocate(obj_len); |
106 | 2.90k | auto s = new (place) Str(); |
107 | 2.90k | s->SetObjLen(obj_len); |
108 | 2.90k | return s; |
109 | 2.90k | } |
110 | | |
111 | | // Like AllocStr, but allocate more than you need, e.g. for snprintf() to write |
112 | | // into. CALLER IS RESPONSIBLE for calling s->SetObjLenFromStrLen() afterward! |
113 | 31 | inline Str* OverAllocatedStr(int len) { |
114 | 31 | int obj_len = kStrHeaderSize + len + 1; // NUL terminator |
115 | 31 | void* place = gHeap.Allocate(obj_len); |
116 | 31 | auto s = new (place) Str(); |
117 | 31 | return s; |
118 | 31 | } |
119 | | |
120 | 1.36k | inline Str* StrFromC(const char* data, int len) { |
121 | 1.36k | Str* s = AllocStr(len); |
122 | 1.36k | memcpy(s->data_, data, len); |
123 | 1.36k | assert(s->data_[len] == '\0'); // should be true because Heap was zeroed |
124 | | |
125 | 0 | return s; |
126 | 1.36k | } |
127 | | |
128 | 685 | inline Str* StrFromC(const char* data) { |
129 | 685 | return StrFromC(data, strlen(data)); |
130 | 685 | } |
131 | | |
132 | 2 | inline Str* CopyBufferIntoNewStr(char* buf) { |
133 | 2 | Str* s = StrFromC(buf); |
134 | 2 | return s; |
135 | 2 | } |
136 | | |
137 | 18 | inline Str* CopyBufferIntoNewStr(char* buf, unsigned int buf_len) { |
138 | 18 | Str* s = StrFromC(buf, buf_len); |
139 | 18 | return s; |
140 | 18 | } |
141 | | |
142 | | // NOTE: This iterates over bytes. |
143 | | class StrIter { |
144 | | public: |
145 | 121 | explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) { |
146 | | // We need this because StrIter is directly on the stack, and s_ could be |
147 | | // moved during iteration. |
148 | 121 | gHeap.PushRoot(reinterpret_cast<Obj**>(&s_)); |
149 | 121 | } |
150 | 121 | ~StrIter() { |
151 | 121 | gHeap.PopRoot(); |
152 | 121 | } |
153 | 150 | void Next() { |
154 | 150 | i_++; |
155 | 150 | } |
156 | 271 | bool Done() { |
157 | 271 | return i_ >= len_; |
158 | 271 | } |
159 | 151 | Str* Value() { // similar to index_() |
160 | | // TODO: create 256 GLOBAL_STR() and return those instead! |
161 | 151 | Str* result = AllocStr(1); |
162 | 151 | result->data_[0] = s_->data_[i_]; |
163 | | // assert(result->data_[1] == '\0'); |
164 | 151 | return result; |
165 | 151 | } |
166 | | |
167 | | private: |
168 | | Str* s_; |
169 | | int i_; |
170 | | int len_; |
171 | | |
172 | | DISALLOW_COPY_AND_ASSIGN(StrIter) |
173 | | }; |
174 | | |
175 | | bool maybe_str_equals(Str* left, Str* right); |
176 | | |
177 | | // TODO(Jesse): Where should this go? Certainly not here.. |
178 | | extern Str* kEmptyString; |
179 | | |
180 | | template <int N> |
181 | | class GlobalStr { |
182 | | // A template type with the same layout as Str with length N-1 (which needs a |
183 | | // buffer of size N). For initializing global constant instances. |
184 | | public: |
185 | | OBJ_HEADER() |
186 | | |
187 | | int hash_value_; |
188 | | const char data_[N]; |
189 | | |
190 | | DISALLOW_COPY_AND_ASSIGN(GlobalStr) |
191 | | }; |
192 | | |
193 | | // This macro is a workaround for the fact that it's impossible to have a |
194 | | // a constexpr initializer for char[N]. The "String Literals as Non-Type |
195 | | // Template Parameters" feature of C++ 20 would have done it, but it's not |
196 | | // there. |
197 | | // |
198 | | // https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/ |
199 | | // https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor |
200 | | |
201 | | #define GLOBAL_STR(name, val) \ |
202 | | GlobalStr<sizeof(val)> _##name = { \ |
203 | | Tag::Global, 0, kZeroMask, kStrHeaderSize + sizeof(val), -1, val}; \ |
204 | | Str* name = reinterpret_cast<Str*>(&_##name); |
205 | | |
206 | | #endif // STR_TYPES_H |