cpp

Coverage Report

Created: 2023-09-13 01:07

/home/andy/git/oilshell/oil/mycpp/gc_builtins.cc
Line
Count
Source (jump to first uncovered line)
1
#include <ctype.h>  // isspace()
2
#include <errno.h>  // errno
3
#include <stdio.h>  // required for readline/readline.h (man readline)
4
5
#include "_build/detected-cpp-config.h"
6
7
#ifdef HAVE_READLINE
8
  #include <readline/readline.h>
9
#endif
10
11
#include "mycpp/runtime.h"
12
13
// forward decl
14
namespace py_readline {
15
Str* readline(Str*);
16
}
17
18
// Translation of Python's print().
19
129
void print(Str* s) {
20
129
  fputs(s->data_, stdout);  // print until first NUL
21
129
  fputc('\n', stdout);
22
129
}
23
24
23
Str* str(int i) {
25
23
  Str* s = OverAllocatedStr(kIntBufSize);
26
23
  int length = snprintf(s->data(), kIntBufSize, "%d", i);
27
23
  s->MaybeShrink(length);
28
23
  return s;
29
23
}
30
31
// TODO:
32
// - This could use a fancy exact algorithm, not libc
33
// - Does libc depend on locale?
34
4
Str* str(double d) {
35
4
  char buf[64];  // overestimate, but we use snprintf() to be safe
36
37
  // Problem:
38
  // %f prints 3.0000000 and 3.500000
39
  // %g prints 3 and 3.5
40
  //
41
  // We want literal syntax to indicate float, so add '.'
42
43
4
  int n = sizeof(buf) - 2;  // in case we add '.0'
44
4
  int length = snprintf(buf, n, "%g", d);
45
46
4
  if (!strchr(buf, '.')) {  // 12345 -> 12345.0
47
2
    buf[length] = '.';
48
2
    buf[length + 1] = '0';
49
2
    buf[length + 2] = '\0';
50
2
  }
51
52
4
  return StrFromC(buf);
53
4
}
54
55
// Do we need this API?  Or is mylib.InternedStr(Str* s, int start, int end)
56
// better for getting values out of Token.line without allocating?
57
//
58
// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
59
//
60
// Also for SmallStr, we don't care about interning.  Only for HeapStr.
61
62
2
Str* intern(Str* s) {
63
  // TODO: put in table gHeap.interned_
64
2
  return s;
65
2
}
66
67
// Print quoted string.  TODO: use C-style strings (YSTR)
68
56
Str* repr(Str* s) {
69
  // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
70
56
  int n = len(s);
71
56
  int upper_bound = n * 4 + 2;
72
73
56
  Str* result = OverAllocatedStr(upper_bound);
74
75
  // Single quote by default.
76
56
  char quote = '\'';
77
56
  if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
78
10
    quote = '"';
79
10
  }
80
56
  char* p = result->data_;
81
82
  // From PyString_Repr()
83
56
  *p++ = quote;
84
474
  for (int i = 0; i < n; ++i) {
85
418
    char c = s->data_[i];
86
418
    if (c == quote || c == '\\') {
87
0
      *p++ = '\\';
88
0
      *p++ = c;
89
418
    } else if (c == '\t') {
90
7
      *p++ = '\\';
91
7
      *p++ = 't';
92
411
    } else if (c == '\n') {
93
14
      *p++ = '\\';
94
14
      *p++ = 'n';
95
397
    } else if (c == '\r') {
96
7
      *p++ = '\\';
97
7
      *p++ = 'r';
98
390
    } else if (isprint(c)) {
99
372
      *p++ = c;
100
372
    } else {  // Unprintable is \xff
101
18
      sprintf(p, "\\x%02x", c & 0xff);
102
18
      p += 4;
103
18
    }
104
418
  }
105
56
  *p++ = quote;
106
56
  *p = '\0';
107
108
56
  int length = p - result->data_;
109
56
  result->MaybeShrink(length);
110
56
  return result;
111
56
}
112
113
// Helper for str_to_int() that doesn't use exceptions.
114
84
bool StringToInteger(const char* s, int length, int base, int* result) {
115
84
  if (length == 0) {
116
2
    return false;  // empty string isn't a valid integer
117
2
  }
118
119
  // Empirically this is 4 4 8 on 32-bit and 4 8 8 on 64-bit
120
  // We want the bigger numbers
121
#if 0
122
  log("sizeof(int) = %d", sizeof(int));
123
  log("sizeof(long) = %ld", sizeof(long));
124
  log("sizeof(long long) = %ld", sizeof(long long));
125
  log("");
126
  log("LONG_MAX = %ld", LONG_MAX);
127
  log("LLONG_MAX = %lld", LLONG_MAX);
128
#endif
129
130
82
  char* pos;  // mutated by strtol
131
132
82
  long v = strtol(s, &pos, base);
133
134
  // The problem with long long is that mycpp deals with C++ int
135
  // long long v = strtoll(s, &pos, base);
136
137
  // log("v = %ld", v);
138
139
82
  switch (v) {
140
2
  case LONG_MIN:
141
2
    return false;  // underflow
142
2
  case LONG_MAX:
143
2
    return false;  // overflow
144
82
  }
145
146
78
  const char* end = s + length;
147
78
  if (pos == end) {
148
69
    *result = v;
149
69
    return true;  // strtol() consumed ALL characters.
150
69
  }
151
152
13
  while (pos < end) {
153
11
    if (!isspace(*pos)) {
154
7
      return false;  // Trailing non-space
155
7
    }
156
4
    pos++;
157
4
  }
158
159
2
  *result = v;
160
2
  return true;  // Trailing space is OK
161
9
}
162
163
8
int to_int(Str* s, int base) {
164
8
  int i;
165
8
  if (StringToInteger(s->data_, len(s), base, &i)) {
166
8
    return i;
167
8
  } else {
168
0
    throw Alloc<ValueError>();
169
0
  }
170
8
}
171
172
31
int to_int(Str* s) {
173
31
  int i;
174
31
  if (StringToInteger(s->data_, len(s), 10, &i)) {
175
28
    return i;
176
28
  } else {
177
3
    throw Alloc<ValueError>();
178
3
  }
179
31
}
180
181
832
Str* chr(int i) {
182
  // NOTE: i should be less than 256, in which we could return an object from
183
  // GLOBAL_STR() pool, like StrIter
184
832
  auto result = NewStr(1);
185
832
  result->data_[0] = i;
186
832
  return result;
187
832
}
188
189
836
int ord(Str* s) {
190
836
  assert(len(s) == 1);
191
  // signed to unsigned conversion, so we don't get values like -127
192
0
  uint8_t c = static_cast<uint8_t>(s->data_[0]);
193
836
  return c;
194
836
}
195
196
4
bool to_bool(Str* s) {
197
4
  return len(s) != 0;
198
4
}
199
200
8
double to_float(int i) {
201
8
  return static_cast<double>(i);
202
8
}
203
204
22
double to_float(Str* s) {
205
22
  char* begin = s->data_;
206
22
  char* end = begin + len(s);
207
208
22
  errno = 0;
209
22
  double result = strtod(begin, &end);
210
211
22
  if (errno == ERANGE) {  // error: overflow or underflow
212
    // log("OVERFLOW or UNDERFLOW %s", s->data_);
213
    // log("result %f", result);
214
4
    throw Alloc<ValueError>();
215
4
  }
216
18
  if (end == begin) {  // error: not a floating point number
217
4
    throw Alloc<ValueError>();
218
4
  }
219
220
14
  return result;
221
18
}
222
223
// e.g. ('a' in 'abc')
224
84
bool str_contains(Str* haystack, Str* needle) {
225
  // Common case
226
84
  if (len(needle) == 1) {
227
72
    return memchr(haystack->data_, needle->data_[0], len(haystack));
228
72
  }
229
230
12
  if (len(needle) > len(haystack)) {
231
2
    return false;
232
2
  }
233
234
  // General case. TODO: We could use a smarter substring algorithm.
235
236
10
  const char* end = haystack->data_ + len(haystack);
237
10
  const char* last_possible = end - len(needle);
238
10
  const char* p = haystack->data_;
239
240
22
  while (p <= last_possible) {
241
20
    if (memcmp(p, needle->data_, len(needle)) == 0) {
242
8
      return true;
243
8
    }
244
12
    p++;
245
12
  }
246
2
  return false;
247
10
}
248
249
52
Str* str_repeat(Str* s, int times) {
250
  // Python allows -1 too, and Oil used that
251
52
  if (times <= 0) {
252
18
    return kEmptyString;
253
18
  }
254
34
  int len_ = len(s);
255
34
  int new_len = len_ * times;
256
34
  Str* result = NewStr(new_len);
257
258
34
  char* dest = result->data_;
259
527
  for (int i = 0; i < times; i++) {
260
493
    memcpy(dest, s->data_, len_);
261
493
    dest += len_;
262
493
  }
263
34
  return result;
264
52
}
265
266
// for os_path.join()
267
// NOTE(Jesse): Perfect candidate for BoundedBuffer
268
20
Str* str_concat3(Str* a, Str* b, Str* c) {
269
20
  int a_len = len(a);
270
20
  int b_len = len(b);
271
20
  int c_len = len(c);
272
273
20
  int new_len = a_len + b_len + c_len;
274
20
  Str* result = NewStr(new_len);
275
20
  char* pos = result->data_;
276
277
20
  memcpy(pos, a->data_, a_len);
278
20
  pos += a_len;
279
280
20
  memcpy(pos, b->data_, b_len);
281
20
  pos += b_len;
282
283
20
  memcpy(pos, c->data_, c_len);
284
285
20
  assert(pos + c_len == result->data_ + new_len);
286
287
0
  return result;
288
20
}
289
290
71
Str* str_concat(Str* a, Str* b) {
291
71
  int a_len = len(a);
292
71
  int b_len = len(b);
293
71
  int new_len = a_len + b_len;
294
71
  Str* result = NewStr(new_len);
295
71
  char* buf = result->data_;
296
297
71
  memcpy(buf, a->data_, a_len);
298
71
  memcpy(buf + a_len, b->data_, b_len);
299
300
71
  return result;
301
71
}
302
303
//
304
// Comparators
305
//
306
307
3.32k
bool str_equals(Str* left, Str* right) {
308
  // Fast path for identical strings.  String deduplication during GC could
309
  // make this more likely.  String interning could guarantee it, allowing us
310
  // to remove memcmp().
311
3.32k
  if (left == right) {
312
167
    return true;
313
167
  }
314
315
3.16k
  if (left == nullptr || right == nullptr) {
316
0
    return false;
317
0
  }
318
319
  // obj_len equal implies string lengths are equal
320
321
3.16k
  if (left->len_ == right->len_) {
322
    // assert(len(left) == len(right));
323
755
    return memcmp(left->data_, right->data_, left->len_) == 0;
324
755
  }
325
326
2.40k
  return false;
327
3.16k
}
328
329
10
bool maybe_str_equals(Str* left, Str* right) {
330
10
  if (left && right) {
331
4
    return str_equals(left, right);
332
4
  }
333
334
6
  if (!left && !right) {
335
2
    return true;  // None == None
336
2
  }
337
338
4
  return false;  // one is None and one is a Str*
339
6
}
340
341
// TODO(Jesse): Make an inline version of this
342
2.75k
bool are_equal(Str* left, Str* right) {
343
2.75k
  return str_equals(left, right);
344
2.75k
}
345
346
// TODO(Jesse): Make an inline version of this
347
40
bool are_equal(int left, int right) {
348
40
  return left == right;
349
40
}
350
351
// TODO(Jesse): Make an inline version of this
352
363
bool keys_equal(int left, int right) {
353
363
  return left == right;
354
363
}
355
356
// TODO(Jesse): Make an inline version of this
357
2.65k
bool keys_equal(Str* left, Str* right) {
358
2.65k
  return are_equal(left, right);
359
2.65k
}
360
361
12
bool are_equal(Tuple2<Str*, int>* t1, Tuple2<Str*, int>* t2) {
362
12
  bool result = are_equal(t1->at0(), t2->at0());
363
12
  result = result && (t1->at1() == t2->at1());
364
12
  return result;
365
12
}
366
367
8
bool are_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
368
8
  return t1->at0() == t2->at0() && t1->at1() == t2->at1();
369
8
}
370
371
8
bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
372
8
  return are_equal(t1, t2);
373
8
}
374
375
8
bool keys_equal(Tuple2<Str*, int>* t1, Tuple2<Str*, int>* t2) {
376
8
  return are_equal(t1, t2);
377
8
}
378
379
249
bool str_equals0(const char* c_string, Str* s) {
380
249
  int n = strlen(c_string);
381
249
  if (len(s) == n) {
382
152
    return memcmp(s->data_, c_string, n) == 0;
383
152
  } else {
384
97
    return false;
385
97
  }
386
249
}
387
388
4
int hash(Str* s) {
389
  // FNV-1 from http://www.isthe.com/chongo/tech/comp/fnv/#FNV-1
390
4
  int h = 2166136261;          // 32-bit FNV-1 offset basis
391
4
  constexpr int p = 16777619;  // 32-bit FNV-1 prime
392
40
  for (int i = 0; i < len(s); i++) {
393
36
    h *= s->data()[i];
394
36
    h ^= p;
395
36
  }
396
4
  return h;
397
4
}
398
399
8
int max(int a, int b) {
400
8
  return std::max(a, b);
401
8
}
402
403
2
int max(List<int>* elems) {
404
2
  int n = len(elems);
405
2
  if (n < 1) {
406
0
    throw Alloc<ValueError>();
407
0
  }
408
409
2
  int ret = elems->index_(0);
410
10
  for (int i = 0; i < n; ++i) {
411
8
    int cand = elems->index_(i);
412
8
    if (cand > ret) {
413
2
      ret = cand;
414
2
    }
415
8
  }
416
417
2
  return ret;
418
2
}
419
420
0
Str* raw_input(Str* prompt) {
421
0
#ifdef HAVE_READLINE
422
0
  Str* ret = py_readline::readline(prompt);
423
0
  if (ret == nullptr) {
424
0
    throw Alloc<EOFError>();
425
0
  }
426
0
  return ret;
427
#else
428
  assert(0);  // not implemented
429
#endif
430
0
}