/home/andy/git/oilshell/oil/mycpp/leaky_containers.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <ctype.h> // isalpha(), isdigit() |
2 | | |
3 | | #include "mycpp/runtime.h" |
4 | | |
5 | | GLOBAL_STR(kEmptyString, ""); |
6 | | |
7 | 3 | int Str::find(Str* needle, int pos) { |
8 | 3 | int len_ = len(this); |
9 | 3 | assert(len(needle) == 1); // Oil's usage |
10 | 0 | char c = needle->data_[0]; |
11 | 12 | for (int i = pos; i < len_; ++i) { |
12 | 11 | if (data_[i] == c) { |
13 | 2 | return i; |
14 | 2 | } |
15 | 11 | } |
16 | 1 | return -1; |
17 | 3 | } |
18 | | |
19 | 3 | int Str::rfind(Str* needle) { |
20 | 3 | int len_ = len(this); |
21 | 3 | assert(len(needle) == 1); // Oil's usage |
22 | 0 | char c = needle->data_[0]; |
23 | 12 | for (int i = len_ - 1; i >= 0; --i) { |
24 | 11 | if (data_[i] == c) { |
25 | 2 | return i; |
26 | 2 | } |
27 | 11 | } |
28 | 1 | return -1; |
29 | 3 | } |
30 | | |
31 | 49 | bool Str::isdigit() { |
32 | 49 | int n = len(this); |
33 | 49 | if (n == 0) { |
34 | 1 | return false; // special case |
35 | 1 | } |
36 | 63 | for (int i = 0; i < n; ++i) { |
37 | 48 | if (!::isdigit(data_[i])) { |
38 | 33 | return false; |
39 | 33 | } |
40 | 48 | } |
41 | 15 | return true; |
42 | 48 | } |
43 | | |
44 | 34 | bool Str::isalpha() { |
45 | 34 | int n = len(this); |
46 | 34 | if (n == 0) { |
47 | 0 | return false; // special case |
48 | 0 | } |
49 | 49 | for (int i = 0; i < n; ++i) { |
50 | 36 | if (!::isalpha(data_[i])) { |
51 | 21 | return false; |
52 | 21 | } |
53 | 36 | } |
54 | 13 | return true; |
55 | 34 | } |
56 | | |
57 | | // e.g. for osh/braces.py |
58 | 4 | bool Str::isupper() { |
59 | 4 | int n = len(this); |
60 | 4 | if (n == 0) { |
61 | 1 | return false; // special case |
62 | 1 | } |
63 | 6 | for (int i = 0; i < n; ++i) { |
64 | 4 | if (!::isupper(data_[i])) { |
65 | 1 | return false; |
66 | 1 | } |
67 | 4 | } |
68 | 2 | return true; |
69 | 3 | } |
70 | | |
71 | 10 | bool Str::startswith(Str* s) { |
72 | 10 | int n = len(s); |
73 | 10 | if (n > len(this)) { |
74 | 0 | return false; |
75 | 0 | } |
76 | 10 | return memcmp(data_, s->data_, n) == 0; |
77 | 10 | } |
78 | | |
79 | 4 | bool Str::endswith(Str* s) { |
80 | 4 | int len_s = len(s); |
81 | 4 | int len_this = len(this); |
82 | 4 | if (len_s > len_this) { |
83 | 0 | return false; |
84 | 0 | } |
85 | 4 | const char* start = data_ + len_this - len_s; |
86 | 4 | return memcmp(start, s->data_, len_s) == 0; |
87 | 4 | } |
88 | | |
89 | | // Get a string with one character |
90 | 52 | Str* Str::index_(int i) { |
91 | 52 | int len_ = len(this); |
92 | 52 | if (i < 0) { |
93 | 1 | i = len_ + i; |
94 | 1 | } |
95 | 52 | assert(i >= 0); |
96 | 0 | assert(i < len_); // had a problem here! |
97 | | |
98 | 0 | Str* result = AllocStr(1); |
99 | 52 | result->data_[0] = data_[i]; |
100 | 52 | return result; |
101 | 52 | } |
102 | | |
103 | | // s[begin:end] |
104 | 317 | Str* Str::slice(int begin, int end) { |
105 | 317 | int len_ = len(this); |
106 | 317 | begin = std::min(begin, len_); |
107 | 317 | end = std::min(end, len_); |
108 | | |
109 | 317 | assert(begin <= len_); |
110 | 0 | assert(end <= len_); |
111 | | |
112 | 317 | if (begin < 0) { |
113 | 142 | begin = len_ + begin; |
114 | 142 | } |
115 | | |
116 | 317 | if (end < 0) { |
117 | 144 | end = len_ + end; |
118 | 144 | } |
119 | | |
120 | 317 | begin = std::min(begin, len_); |
121 | 317 | end = std::min(end, len_); |
122 | | |
123 | 317 | begin = std::max(begin, 0); |
124 | 317 | end = std::max(end, 0); |
125 | | |
126 | 317 | assert(begin >= 0); |
127 | 0 | assert(begin <= len_); |
128 | | |
129 | 0 | assert(end >= 0); |
130 | 0 | assert(end <= len_); |
131 | | |
132 | 0 | int new_len = end - begin; |
133 | | |
134 | | // Tried to use std::clamp() here but we're not compiling against cxx-17 |
135 | 317 | new_len = std::max(new_len, 0); |
136 | 317 | new_len = std::min(new_len, len_); |
137 | | |
138 | | /* printf("len(%d) [%d, %d] newlen(%d)\n", len_, begin, end, new_len); */ |
139 | | |
140 | 317 | assert(new_len >= 0); |
141 | 0 | assert(new_len <= len_); |
142 | | |
143 | 0 | Str* result = AllocStr(new_len); |
144 | 317 | memcpy(result->data_, data_+begin, new_len); |
145 | | |
146 | 317 | return result; |
147 | 317 | } |
148 | | |
149 | | // s[begin:] |
150 | 3 | Str* Str::slice(int begin) { |
151 | 3 | int len_ = len(this); |
152 | 3 | if (begin == 0) { |
153 | 0 | return this; // s[i:] where i == 0 is common in here docs |
154 | 0 | } |
155 | 3 | if (begin < 0) { |
156 | 1 | begin = len_ + begin; |
157 | 1 | } |
158 | 3 | return slice(begin, len_); |
159 | 3 | } |
160 | | |
161 | | // Used by 'help' builtin and --help, neither of which translate yet. |
162 | | |
163 | 0 | List<Str*>* Str::splitlines(bool keep) { |
164 | 0 | assert(keep == true); |
165 | 0 | NotImplemented(); |
166 | 0 | } |
167 | | |
168 | 3 | Str* Str::upper() { |
169 | 3 | int len_ = len(this); |
170 | 3 | Str* result = AllocStr(len_); |
171 | 3 | char* buffer = result->data(); |
172 | 19 | for (int char_index = 0; char_index < len_; ++char_index) { |
173 | 16 | buffer[char_index] = toupper(data_[char_index]); |
174 | 16 | } |
175 | 3 | return result; |
176 | 3 | } |
177 | | |
178 | 3 | Str* Str::lower() { |
179 | 3 | int len_ = len(this); |
180 | 3 | Str* result = AllocStr(len_); |
181 | 3 | char* buffer = result->data(); |
182 | 19 | for (int char_index = 0; char_index < len_; ++char_index) { |
183 | 16 | buffer[char_index] = tolower(data_[char_index]); |
184 | 16 | } |
185 | 3 | return result; |
186 | 3 | } |
187 | | |
188 | 15 | Str* Str::ljust(int width, Str* fillchar) { |
189 | 15 | assert(len(fillchar) == 1); |
190 | | |
191 | 0 | int len_ = len(this); |
192 | 15 | int num_fill = width - len_; |
193 | 15 | if (num_fill < 0) { |
194 | 5 | return this; |
195 | 10 | } else { |
196 | 10 | Str *result = AllocStr(width); |
197 | 10 | char c = fillchar->data_[0]; |
198 | 10 | memcpy(result->data_, data_, len_); |
199 | 21 | for (int i = len_; i < width; ++i) { |
200 | 11 | result->data_[i] = c; |
201 | 11 | } |
202 | 10 | return result; |
203 | 10 | } |
204 | 15 | } |
205 | | |
206 | 15 | Str* Str::rjust(int width, Str* fillchar) { |
207 | 15 | assert(len(fillchar) == 1); |
208 | | |
209 | 0 | int len_ = len(this); |
210 | 15 | int num_fill = width - len_; |
211 | 15 | if (num_fill < 0) { |
212 | 5 | return this; |
213 | 10 | } else { |
214 | 10 | Str *result = AllocStr(width); |
215 | 10 | char c = fillchar->data_[0]; |
216 | 21 | for (int i = 0; i < num_fill; ++i) { |
217 | 11 | result->data_[i] = c; |
218 | 11 | } |
219 | 10 | memcpy(result->data_ + num_fill, data_, len_); |
220 | 10 | return result; |
221 | 10 | } |
222 | 15 | } |
223 | | |
224 | 367 | Str* Str::replace(Str* old, Str* new_str) { |
225 | 367 | StackRoots _roots0({&old, &new_str}); |
226 | | |
227 | | // log("replacing %s with %s", old_data, new_str->data_); |
228 | 367 | const char* old_data = old->data_; |
229 | | |
230 | 367 | int this_len = len(this); |
231 | 367 | int old_len = len(old); |
232 | | |
233 | 367 | const char* last_possible = data_ + this_len - old_len; |
234 | | |
235 | 367 | const char* p_this = data_; // advances through 'this' |
236 | | |
237 | | // First pass: Calculate number of replacements, and hence new length |
238 | 367 | int replace_count = 0; |
239 | 46.7k | while (p_this <= last_possible) { |
240 | 46.3k | if (memcmp(p_this, old_data, old_len) == 0) { // equal |
241 | 381 | replace_count++; |
242 | 381 | p_this += old_len; |
243 | 45.9k | } else { |
244 | 45.9k | p_this++; |
245 | 45.9k | } |
246 | 46.3k | } |
247 | | |
248 | | // log("replacements %d", replace_count); |
249 | | |
250 | 367 | if (replace_count == 0) { |
251 | 3 | return this; // Reuse the string if there were no replacements |
252 | 3 | } |
253 | | |
254 | 364 | int new_str_len = len(new_str); |
255 | 364 | int result_len = |
256 | 364 | this_len - (replace_count * old_len) + (replace_count * new_str_len); |
257 | | |
258 | 364 | Str* result = AllocStr(result_len); |
259 | 364 | StackRoots _roots1({&result}); |
260 | | |
261 | 364 | const char* new_data = new_str->data_; |
262 | 364 | const size_t new_len = new_str_len; |
263 | | |
264 | | // Second pass: Copy pieces into 'result' |
265 | 364 | p_this = data_; // back to beginning |
266 | 364 | char* p_result = result->data_; // advances through 'result' |
267 | | |
268 | 46.6k | while (p_this <= last_possible) { |
269 | | // Note: would be more efficient if we remembered the match positions |
270 | 46.3k | if (memcmp(p_this, old_data, old_len) == 0) { // equal |
271 | 381 | memcpy(p_result, new_data, new_len); // Copy from new_str |
272 | 381 | p_result += new_len; |
273 | 381 | p_this += old_len; |
274 | 45.9k | } else { // copy 1 byte |
275 | 45.9k | *p_result = *p_this; |
276 | 45.9k | p_result++; |
277 | 45.9k | p_this++; |
278 | 45.9k | } |
279 | 46.3k | } |
280 | 364 | memcpy(p_result, p_this, data_ + this_len - p_this); // last part of string |
281 | 364 | return result; |
282 | 367 | } |
283 | | |
284 | | enum class StripWhere { |
285 | | Left, |
286 | | Right, |
287 | | Both, |
288 | | }; |
289 | | |
290 | | const int kWhitespace = -1; |
291 | | |
292 | 84 | bool OmitChar(uint8_t ch, int what) { |
293 | 84 | if (what == kWhitespace) { |
294 | 64 | return isspace(ch); |
295 | 64 | } else { |
296 | 20 | return what == ch; |
297 | 20 | } |
298 | 84 | } |
299 | | |
300 | | // StripAny is modeled after CPython's do_strip() in stringobject.c, and can |
301 | | // implement 6 functions: |
302 | | // |
303 | | // strip / lstrip / rstrip |
304 | | // strip(char) / lstrip(char) / rstrip(char) |
305 | | // |
306 | | // Args: |
307 | | // where: which ends to strip from |
308 | | // what: kWhitespace, or an ASCII code 0-255 |
309 | | |
310 | 32 | Str* StripAny(Str* s, StripWhere where, int what) { |
311 | 32 | StackRoots _roots({&s}); |
312 | | |
313 | 32 | int length = len(s); |
314 | 32 | const char* char_data = s->data(); |
315 | | |
316 | 32 | int i = 0; |
317 | 32 | if (where != StripWhere::Right) { |
318 | 46 | while (i < length && OmitChar(char_data[i], what)) { |
319 | 26 | i++; |
320 | 26 | } |
321 | 20 | } |
322 | | |
323 | 32 | int j = length; |
324 | 32 | if (where != StripWhere::Left) { |
325 | 51 | do { |
326 | 51 | j--; |
327 | 51 | } while (j >= i && OmitChar(char_data[j], what)); |
328 | 24 | j++; |
329 | 24 | } |
330 | | |
331 | 32 | if (i == j) { // Optimization to reuse existing object |
332 | 9 | return kEmptyString; |
333 | 9 | } |
334 | | |
335 | 23 | if (i == 0 && j == length) { // nothing stripped |
336 | 4 | return s; |
337 | 4 | } |
338 | | |
339 | | // Note: makes a copy in leaky version, and will in GC version too |
340 | 19 | int new_len = j - i; |
341 | 19 | Str* result = AllocStr(new_len); |
342 | 19 | memcpy(result->data(), s->data() + i, new_len); |
343 | 19 | return result; |
344 | 23 | } |
345 | | |
346 | 12 | Str* Str::strip() { |
347 | 12 | return StripAny(this, StripWhere::Both, kWhitespace); |
348 | 12 | } |
349 | | |
350 | | // Used for CommandSub in osh/cmd_exec.py |
351 | 4 | Str* Str::rstrip(Str* chars) { |
352 | 4 | assert(len(chars) == 1); |
353 | 0 | int c = chars->data_[0]; |
354 | 4 | return StripAny(this, StripWhere::Right, c); |
355 | 4 | } |
356 | | |
357 | 8 | Str* Str::rstrip() { |
358 | 8 | return StripAny(this, StripWhere::Right, kWhitespace); |
359 | 8 | } |
360 | | |
361 | 4 | Str* Str::lstrip(Str* chars) { |
362 | 4 | assert(len(chars) == 1); |
363 | 0 | int c = chars->data_[0]; |
364 | 4 | return StripAny(this, StripWhere::Left, c); |
365 | 4 | } |
366 | | |
367 | 4 | Str* Str::lstrip() { |
368 | 4 | return StripAny(this, StripWhere::Left, kWhitespace); |
369 | 4 | } |
370 | | |
371 | 43 | Str* Str::join(List<Str*>* items) { |
372 | 43 | auto self = this; |
373 | 43 | StackRoots _roots({&self, &items}); |
374 | | |
375 | 43 | int length = 0; |
376 | | |
377 | 43 | int num_parts = len(items); |
378 | 43 | if (num_parts == 0) { // " ".join([]) == "" |
379 | 3 | return kEmptyString; |
380 | 3 | } |
381 | 176 | for (int i = 0; i < num_parts; ++i) { |
382 | 136 | length += len(items->index_(i)); |
383 | 136 | } |
384 | | // add length of all the separators |
385 | 40 | int len_ = len(self); |
386 | 40 | length += len_ * (num_parts - 1); |
387 | | |
388 | 40 | Str* result = AllocStr(length); |
389 | 40 | char* p_result = result->data_; // advances through |
390 | | |
391 | 176 | for (int i = 0; i < num_parts; ++i) { |
392 | | // log("i %d", i); |
393 | 136 | if (i != 0 && len_) { // optimize common case of ''.join() |
394 | 8 | memcpy(p_result, data_, len_); // copy the separator |
395 | 8 | p_result += len_; |
396 | | // log("len_ %d", len_); |
397 | 8 | } |
398 | | |
399 | 136 | int n = len(items->index_(i)); |
400 | | // log("n: %d", n); |
401 | 136 | memcpy(p_result, items->index_(i)->data_, n); // copy the list item |
402 | 136 | p_result += n; |
403 | 136 | } |
404 | | |
405 | 40 | return result; |
406 | 43 | } |
407 | | |
408 | | int find_next(const char* haystack, int starting_index, int end_index, |
409 | 27 | const char needle) { |
410 | 27 | int result = end_index; |
411 | 63 | for (int i = starting_index; i < end_index; ++i) { |
412 | 59 | if (haystack[i] == needle) { |
413 | 23 | result = i; |
414 | 23 | break; |
415 | 23 | } |
416 | 59 | } |
417 | 27 | return result; |
418 | 27 | } |
419 | | |
420 | 27 | Str* NewStrFromHeapStr(Str* src, int new_len, int start_index = 0) { |
421 | 27 | StackRoots _roots({&src}); |
422 | | |
423 | 27 | Str* result = AllocStr(new_len); |
424 | 27 | assert((start_index + new_len) <= len(src)); |
425 | 0 | memcpy(result->data_, src->data_ + start_index, new_len); |
426 | | |
427 | 27 | return result; |
428 | 27 | } |
429 | | |
430 | 14 | List<Str*>* Str::split(Str* sep) { |
431 | 14 | assert(len(sep) == 1); // we can only split one char |
432 | 0 | char sep_char = sep->data_[0]; |
433 | | |
434 | 14 | auto self = this; |
435 | 14 | List<Str*>* result = nullptr; |
436 | | |
437 | 14 | StackRoots _roots({&self, &result}); |
438 | | |
439 | 14 | if (len(self) == 0) { |
440 | | // weird case consistent with Python: ''.split(':') == [''] |
441 | 2 | return NewList<Str*>({kEmptyString}); |
442 | 2 | } |
443 | | |
444 | 12 | result = NewList<Str*>({}); |
445 | | |
446 | 12 | int n = len(self); |
447 | 12 | int pos = 0; |
448 | 12 | int end = n; |
449 | | |
450 | 27 | while (true) { |
451 | | // NOTE(Jesse): Perfect use case for cursor |
452 | 27 | int new_pos = find_next(self->data_, pos, end, sep_char); |
453 | 27 | assert(new_pos >= pos); |
454 | 0 | assert(new_pos <= end); |
455 | | |
456 | 27 | if (new_pos == end) { |
457 | 4 | Str* to_push = NewStrFromHeapStr(self, end - pos, pos); |
458 | 4 | result->append(to_push); // StrFromC(self->data_+pos, end - pos)); // |
459 | | // rest of the string |
460 | 4 | break; |
461 | 4 | } |
462 | | |
463 | 23 | int new_len = new_pos - pos; |
464 | 23 | Str* to_push = NewStrFromHeapStr(self, new_len, pos); |
465 | 23 | result->append(to_push); |
466 | | |
467 | 23 | pos = new_pos + 1; |
468 | 23 | if (pos >= end) { // separator was at end of string |
469 | 8 | result->append(kEmptyString); |
470 | 8 | break; |
471 | 8 | } |
472 | 23 | } |
473 | | |
474 | 12 | return result; |
475 | 14 | } |