cpp

Coverage Report

Created: 2022-11-10 11:34

/home/andy/git/oilshell/oil/mycpp/leaky_containers.cc
Line
Count
Source (jump to first uncovered line)
1
#include <ctype.h>  // isalpha(), isdigit()
2
3
#include "mycpp/runtime.h"
4
5
GLOBAL_STR(kEmptyString, "");
6
7
3
int Str::find(Str* needle, int pos) {
8
3
  int len_ = len(this);
9
3
  assert(len(needle) == 1);  // Oil's usage
10
0
  char c = needle->data_[0];
11
12
  for (int i = pos; i < len_; ++i) {
12
11
    if (data_[i] == c) {
13
2
      return i;
14
2
    }
15
11
  }
16
1
  return -1;
17
3
}
18
19
3
int Str::rfind(Str* needle) {
20
3
  int len_ = len(this);
21
3
  assert(len(needle) == 1);  // Oil's usage
22
0
  char c = needle->data_[0];
23
12
  for (int i = len_ - 1; i >= 0; --i) {
24
11
    if (data_[i] == c) {
25
2
      return i;
26
2
    }
27
11
  }
28
1
  return -1;
29
3
}
30
31
49
bool Str::isdigit() {
32
49
  int n = len(this);
33
49
  if (n == 0) {
34
1
    return false;  // special case
35
1
  }
36
63
  for (int i = 0; i < n; ++i) {
37
48
    if (!::isdigit(data_[i])) {
38
33
      return false;
39
33
    }
40
48
  }
41
15
  return true;
42
48
}
43
44
34
bool Str::isalpha() {
45
34
  int n = len(this);
46
34
  if (n == 0) {
47
0
    return false;  // special case
48
0
  }
49
49
  for (int i = 0; i < n; ++i) {
50
36
    if (!::isalpha(data_[i])) {
51
21
      return false;
52
21
    }
53
36
  }
54
13
  return true;
55
34
}
56
57
// e.g. for osh/braces.py
58
4
bool Str::isupper() {
59
4
  int n = len(this);
60
4
  if (n == 0) {
61
1
    return false;  // special case
62
1
  }
63
6
  for (int i = 0; i < n; ++i) {
64
4
    if (!::isupper(data_[i])) {
65
1
      return false;
66
1
    }
67
4
  }
68
2
  return true;
69
3
}
70
71
10
bool Str::startswith(Str* s) {
72
10
  int n = len(s);
73
10
  if (n > len(this)) {
74
0
    return false;
75
0
  }
76
10
  return memcmp(data_, s->data_, n) == 0;
77
10
}
78
79
4
bool Str::endswith(Str* s) {
80
4
  int len_s = len(s);
81
4
  int len_this = len(this);
82
4
  if (len_s > len_this) {
83
0
    return false;
84
0
  }
85
4
  const char* start = data_ + len_this - len_s;
86
4
  return memcmp(start, s->data_, len_s) == 0;
87
4
}
88
89
// Get a string with one character
90
52
Str* Str::index_(int i) {
91
52
  int len_ = len(this);
92
52
  if (i < 0) {
93
1
    i = len_ + i;
94
1
  }
95
52
  assert(i >= 0);
96
0
  assert(i < len_);  // had a problem here!
97
98
0
  Str* result = NewStr(1);
99
52
  result->data_[0] = data_[i];
100
52
  return result;
101
52
}
102
103
// s[begin:end]
104
317
Str* Str::slice(int begin, int end) {
105
317
  RootingScope _r;
106
107
317
  int len_ = len(this);
108
317
  begin = std::min(begin, len_);
109
317
  end = std::min(end, len_);
110
111
317
  assert(begin <= len_);
112
0
  assert(end <= len_);
113
114
317
  if (begin < 0) {
115
142
    begin = len_ + begin;
116
142
  }
117
118
317
  if (end < 0) {
119
144
    end = len_ + end;
120
144
  }
121
122
317
  begin = std::min(begin, len_);
123
317
  end = std::min(end, len_);
124
125
317
  begin = std::max(begin, 0);
126
317
  end = std::max(end, 0);
127
128
317
  assert(begin >= 0);
129
0
  assert(begin <= len_);
130
131
0
  assert(end >= 0);
132
0
  assert(end <= len_);
133
134
0
  int new_len = end - begin;
135
136
  // Tried to use std::clamp() here but we're not compiling against cxx-17
137
317
  new_len = std::max(new_len, 0);
138
317
  new_len = std::min(new_len, len_);
139
140
  /* printf("len(%d) [%d, %d] newlen(%d)\n",  len_, begin, end, new_len); */
141
142
317
  assert(new_len >= 0);
143
0
  assert(new_len <= len_);
144
145
0
  Str* result = NewStr(new_len);
146
317
  memcpy(result->data_, data_ + begin, new_len);
147
148
317
  gHeap.RootOnReturn(result);  // return value rooting
149
317
  return result;
150
317
}
151
152
// s[begin:]
153
3
Str* Str::slice(int begin) {
154
  // RootingScope omitted because PASS THROUGH
155
  // log("slice(begin) -> %d frames", gHeap.root_set_.NumFrames());
156
157
3
  int len_ = len(this);
158
3
  if (begin == 0) {
159
0
    return this;  // s[i:] where i == 0 is common in here docs
160
0
  }
161
3
  if (begin < 0) {
162
1
    begin = len_ + begin;
163
1
  }
164
3
  return slice(begin, len_);
165
3
}
166
167
// Used by 'help' builtin and --help, neither of which translate yet.
168
169
0
List<Str*>* Str::splitlines(bool keep) {
170
0
  assert(keep == true);
171
0
  NotImplemented();
172
0
}
173
174
3
Str* Str::upper() {
175
3
  int len_ = len(this);
176
3
  Str* result = NewStr(len_);
177
3
  char* buffer = result->data();
178
19
  for (int char_index = 0; char_index < len_; ++char_index) {
179
16
    buffer[char_index] = toupper(data_[char_index]);
180
16
  }
181
3
  return result;
182
3
}
183
184
3
Str* Str::lower() {
185
3
  int len_ = len(this);
186
3
  Str* result = NewStr(len_);
187
3
  char* buffer = result->data();
188
19
  for (int char_index = 0; char_index < len_; ++char_index) {
189
16
    buffer[char_index] = tolower(data_[char_index]);
190
16
  }
191
3
  return result;
192
3
}
193
194
15
Str* Str::ljust(int width, Str* fillchar) {
195
15
  assert(len(fillchar) == 1);
196
197
0
  int len_ = len(this);
198
15
  int num_fill = width - len_;
199
15
  if (num_fill < 0) {
200
5
    return this;
201
10
  } else {
202
10
    Str* result = NewStr(width);
203
10
    char c = fillchar->data_[0];
204
10
    memcpy(result->data_, data_, len_);
205
21
    for (int i = len_; i < width; ++i) {
206
11
      result->data_[i] = c;
207
11
    }
208
10
    return result;
209
10
  }
210
15
}
211
212
15
Str* Str::rjust(int width, Str* fillchar) {
213
15
  assert(len(fillchar) == 1);
214
215
0
  int len_ = len(this);
216
15
  int num_fill = width - len_;
217
15
  if (num_fill < 0) {
218
5
    return this;
219
10
  } else {
220
10
    Str* result = NewStr(width);
221
10
    char c = fillchar->data_[0];
222
21
    for (int i = 0; i < num_fill; ++i) {
223
11
      result->data_[i] = c;
224
11
    }
225
10
    memcpy(result->data_ + num_fill, data_, len_);
226
10
    return result;
227
10
  }
228
15
}
229
230
367
Str* Str::replace(Str* old, Str* new_str) {
231
367
  StackRoots _roots0({&old, &new_str});
232
233
  // log("replacing %s with %s", old_data, new_str->data_);
234
367
  const char* old_data = old->data_;
235
236
367
  int this_len = len(this);
237
367
  int old_len = len(old);
238
239
367
  const char* last_possible = data_ + this_len - old_len;
240
241
367
  const char* p_this = data_;  // advances through 'this'
242
243
  // First pass: Calculate number of replacements, and hence new length
244
367
  int replace_count = 0;
245
46.7k
  while (p_this <= last_possible) {
246
46.3k
    if (memcmp(p_this, old_data, old_len) == 0) {  // equal
247
381
      replace_count++;
248
381
      p_this += old_len;
249
45.9k
    } else {
250
45.9k
      p_this++;
251
45.9k
    }
252
46.3k
  }
253
254
  // log("replacements %d", replace_count);
255
256
367
  if (replace_count == 0) {
257
3
    return this;  // Reuse the string if there were no replacements
258
3
  }
259
260
364
  int new_str_len = len(new_str);
261
364
  int result_len =
262
364
      this_len - (replace_count * old_len) + (replace_count * new_str_len);
263
264
364
  Str* result = NewStr(result_len);
265
364
  StackRoots _roots1({&result});
266
267
364
  const char* new_data = new_str->data_;
268
364
  const size_t new_len = new_str_len;
269
270
  // Second pass: Copy pieces into 'result'
271
364
  p_this = data_;                  // back to beginning
272
364
  char* p_result = result->data_;  // advances through 'result'
273
274
46.6k
  while (p_this <= last_possible) {
275
    // Note: would be more efficient if we remembered the match positions
276
46.3k
    if (memcmp(p_this, old_data, old_len) == 0) {  // equal
277
381
      memcpy(p_result, new_data, new_len);         // Copy from new_str
278
381
      p_result += new_len;
279
381
      p_this += old_len;
280
45.9k
    } else {  // copy 1 byte
281
45.9k
      *p_result = *p_this;
282
45.9k
      p_result++;
283
45.9k
      p_this++;
284
45.9k
    }
285
46.3k
  }
286
364
  memcpy(p_result, p_this, data_ + this_len - p_this);  // last part of string
287
364
  return result;
288
367
}
289
290
enum class StripWhere {
291
  Left,
292
  Right,
293
  Both,
294
};
295
296
const int kWhitespace = -1;
297
298
84
bool OmitChar(uint8_t ch, int what) {
299
84
  if (what == kWhitespace) {
300
64
    return isspace(ch);
301
64
  } else {
302
20
    return what == ch;
303
20
  }
304
84
}
305
306
// StripAny is modeled after CPython's do_strip() in stringobject.c, and can
307
// implement 6 functions:
308
//
309
//   strip / lstrip / rstrip
310
//   strip(char) / lstrip(char) / rstrip(char)
311
//
312
// Args:
313
//   where: which ends to strip from
314
//   what: kWhitespace, or an ASCII code 0-255
315
316
32
Str* StripAny(Str* s, StripWhere where, int what) {
317
32
  StackRoots _roots({&s});
318
319
32
  int length = len(s);
320
32
  const char* char_data = s->data();
321
322
32
  int i = 0;
323
32
  if (where != StripWhere::Right) {
324
46
    while (i < length && OmitChar(char_data[i], what)) {
325
26
      i++;
326
26
    }
327
20
  }
328
329
32
  int j = length;
330
32
  if (where != StripWhere::Left) {
331
51
    do {
332
51
      j--;
333
51
    } while (j >= i && OmitChar(char_data[j], what));
334
24
    j++;
335
24
  }
336
337
32
  if (i == j) {  // Optimization to reuse existing object
338
9
    return kEmptyString;
339
9
  }
340
341
23
  if (i == 0 && j == length) {  // nothing stripped
342
4
    return s;
343
4
  }
344
345
  // Note: makes a copy in leaky version, and will in GC version too
346
19
  int new_len = j - i;
347
19
  Str* result = NewStr(new_len);
348
19
  memcpy(result->data(), s->data() + i, new_len);
349
19
  return result;
350
23
}
351
352
12
Str* Str::strip() {
353
12
  return StripAny(this, StripWhere::Both, kWhitespace);
354
12
}
355
356
// Used for CommandSub in osh/cmd_exec.py
357
4
Str* Str::rstrip(Str* chars) {
358
4
  assert(len(chars) == 1);
359
0
  int c = chars->data_[0];
360
4
  return StripAny(this, StripWhere::Right, c);
361
4
}
362
363
8
Str* Str::rstrip() {
364
8
  return StripAny(this, StripWhere::Right, kWhitespace);
365
8
}
366
367
4
Str* Str::lstrip(Str* chars) {
368
4
  assert(len(chars) == 1);
369
0
  int c = chars->data_[0];
370
4
  return StripAny(this, StripWhere::Left, c);
371
4
}
372
373
4
Str* Str::lstrip() {
374
4
  return StripAny(this, StripWhere::Left, kWhitespace);
375
4
}
376
377
43
Str* Str::join(List<Str*>* items) {
378
43
  auto self = this;
379
43
  StackRoots _roots({&self, &items});
380
381
43
  int length = 0;
382
383
43
  int num_parts = len(items);
384
43
  if (num_parts == 0) {  // " ".join([]) == ""
385
3
    return kEmptyString;
386
3
  }
387
176
  for (int i = 0; i < num_parts; ++i) {
388
136
    length += len(items->index_(i));
389
136
  }
390
  // add length of all the separators
391
40
  int len_ = len(self);
392
40
  length += len_ * (num_parts - 1);
393
394
40
  Str* result = NewStr(length);
395
40
  char* p_result = result->data_;  // advances through
396
397
176
  for (int i = 0; i < num_parts; ++i) {
398
    // log("i %d", i);
399
136
    if (i != 0 && len_) {             // optimize common case of ''.join()
400
8
      memcpy(p_result, data_, len_);  // copy the separator
401
8
      p_result += len_;
402
      // log("len_ %d", len_);
403
8
    }
404
405
136
    int n = len(items->index_(i));
406
    // log("n: %d", n);
407
136
    memcpy(p_result, items->index_(i)->data_, n);  // copy the list item
408
136
    p_result += n;
409
136
  }
410
411
40
  return result;
412
43
}
413
414
int find_next(const char* haystack, int starting_index, int end_index,
415
27
              const char needle) {
416
27
  int result = end_index;
417
63
  for (int i = starting_index; i < end_index; ++i) {
418
59
    if (haystack[i] == needle) {
419
23
      result = i;
420
23
      break;
421
23
    }
422
59
  }
423
27
  return result;
424
27
}
425
426
27
Str* NewStrFromHeapStr(Str* src, int new_len, int start_index = 0) {
427
27
  StackRoots _roots({&src});
428
429
27
  Str* result = NewStr(new_len);
430
27
  assert((start_index + new_len) <= len(src));
431
0
  memcpy(result->data_, src->data_ + start_index, new_len);
432
433
27
  return result;
434
27
}
435
436
14
List<Str*>* Str::split(Str* sep) {
437
14
  assert(len(sep) == 1);  // we can only split one char
438
0
  char sep_char = sep->data_[0];
439
440
14
  auto self = this;
441
14
  List<Str*>* result = nullptr;
442
443
14
  StackRoots _roots({&self, &result});
444
445
14
  if (len(self) == 0) {
446
    // weird case consistent with Python: ''.split(':') == ['']
447
2
    return NewList<Str*>({kEmptyString});
448
2
  }
449
450
12
  result = NewList<Str*>({});
451
452
12
  int n = len(self);
453
12
  int pos = 0;
454
12
  int end = n;
455
456
27
  while (true) {
457
    // NOTE(Jesse): Perfect use case for BoundedBuffer
458
27
    int new_pos = find_next(self->data_, pos, end, sep_char);
459
27
    assert(new_pos >= pos);
460
0
    assert(new_pos <= end);
461
462
27
    if (new_pos == end) {
463
4
      Str* to_push = NewStrFromHeapStr(self, end - pos, pos);
464
4
      result->append(to_push);  // StrFromC(self->data_+pos, end - pos));  //
465
                                // rest of the string
466
4
      break;
467
4
    }
468
469
23
    int new_len = new_pos - pos;
470
23
    Str* to_push = NewStrFromHeapStr(self, new_len, pos);
471
23
    result->append(to_push);
472
473
23
    pos = new_pos + 1;
474
23
    if (pos >= end) {  // separator was at end of string
475
8
      result->append(kEmptyString);
476
8
      break;
477
8
    }
478
23
  }
479
480
12
  return result;
481
14
}