cpp

Coverage Report

Created: 2024-03-13 14:13

/home/andy/git/oilshell/oil/cpp/data_lang.cc
Line
Count
Source (jump to first uncovered line)
1
// data_lang.cc
2
3
#include "cpp/data_lang.h"
4
5
#include "data_lang/j8.h"
6
#include "data_lang/utf8_impls/bjoern_dfa.h"
7
8
// TODO: remove duplication
9
79
#define LOSSY_JSON (1 << 3)
10
11
namespace {
12
13
12
void WriteBString(BigStr* s, mylib::BufWriter* buf, int capacity) {
14
12
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
15
12
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
16
17
12
  buf->WriteConst("b'");
18
19
  // Set up pointers after writing opening quote
20
12
  uint8_t* out = buf->LengthPointer();  // mutated
21
12
  uint8_t* out_end = buf->CapacityPointer();
22
23
20
  while (true) {
24
20
    J8EncodeChunk(&in, in_end, &out, out_end, true);  // Fill as much as we can
25
20
    buf->SetLengthFrom(out);
26
27
20
    if (in >= in_end) {
28
12
      break;
29
12
    }
30
31
    // Same growth policy as below
32
8
    capacity = capacity * 3 / 2;
33
    // printf("[2] new capacity %d\n", capacity);
34
8
    buf->EnsureMoreSpace(capacity);
35
36
    // Recompute pointers
37
8
    out = buf->LengthPointer();
38
8
    out_end = buf->CapacityPointer();
39
8
  }
40
41
12
  buf->WriteConst("'");
42
12
}
43
44
0
void WriteBashDollarString(BigStr* s, mylib::BufWriter* buf, int capacity) {
45
0
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
46
0
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
47
48
0
  buf->WriteConst("$'");
49
50
  // Set up pointers after writing opening quote
51
0
  uint8_t* out = buf->LengthPointer();  // mutated
52
0
  uint8_t* out_end = buf->CapacityPointer();
53
54
0
  while (true) {
55
0
    BashDollarEncodeChunk(&in, in_end, &out,
56
0
                          out_end);  // Fill as much as we can
57
0
    buf->SetLengthFrom(out);
58
59
0
    if (in >= in_end) {
60
0
      break;
61
0
    }
62
63
    // Same growth policy as below
64
0
    capacity = capacity * 3 / 2;
65
    // printf("[2] new capacity %d\n", capacity);
66
0
    buf->EnsureMoreSpace(capacity);
67
68
    // Recompute pointers
69
0
    out = buf->LengthPointer();
70
0
    out_end = buf->CapacityPointer();
71
0
  }
72
73
0
  buf->WriteConst("'");
74
0
}
75
76
// Style is COPIED from pyj8::WriteString()
77
// Functionality is like j8_libc.c ShellEncodeString, that is:
78
//
79
// call BourneShellEncodeChunk()
80
// then either
81
//   WriteBString()
82
//   WriteBashDollarString()
83
84
0
void ShellEncodeString(BigStr* s, int ysh_fallback, mylib::BufWriter* buf) {
85
0
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
86
0
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
87
88
  // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
89
0
  int capacity = len(s) + 3 + 2;     // 3 for quotes, 2 potential \" \n
90
0
  if (capacity < J8_MIN_CAPACITY) {  // account for J8_MAX_BYTES_PER_INPUT_BYTE
91
0
    capacity = J8_MIN_CAPACITY;
92
0
  }
93
  // printf("[1] capacity %d\n", capacity);
94
95
0
  buf->EnsureMoreSpace(capacity);
96
97
0
  int begin = buf->Length();  // maybe Truncate to this position
98
0
  buf->WriteConst("'");
99
100
  // Set up pointers after writing opening quote
101
0
  uint8_t* out = buf->LengthPointer();  // mutated
102
0
  uint8_t* out_end = buf->CapacityPointer();
103
104
0
  while (true) {
105
    // Fill in as much as we can
106
0
    int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end);
107
0
    if (cannot_encode) {
108
0
      buf->Truncate(begin);
109
0
      if (ysh_fallback) {
110
0
        WriteBString(s, buf, capacity);  // fall back to b''
111
0
      } else {
112
0
        WriteBashDollarString(s, buf, capacity);  // fall back to $''
113
0
      }
114
0
      return;
115
0
    }
116
0
    buf->SetLengthFrom(out);
117
118
    // printf("[1] len %d\n", out_buf->len);
119
120
0
    if (in >= in_end) {
121
0
      break;
122
0
    }
123
124
    // Growth policy: every time through the loop, increase 1.5x
125
    //
126
    // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
127
    // This seems like a reasonable tradeoff between over-allocating and too
128
    // many realloc().
129
0
    capacity = capacity * 3 / 2;
130
    // printf("[1] new capacity %d\n", capacity);
131
0
    buf->EnsureMoreSpace(capacity);
132
133
    // Recompute pointers
134
0
    out = buf->LengthPointer();  // mutated
135
0
    out_end = buf->CapacityPointer();
136
    // printf("[1] out %p out_end %p\n", out, out_end);
137
0
  }
138
139
0
  buf->WriteConst("'");
140
0
}
141
142
}  // namespace
143
144
namespace fastfunc {
145
146
82
bool CanOmitQuotes(BigStr* s) {
147
82
  return ::CanOmitQuotes(reinterpret_cast<unsigned char*>(s->data_), len(s));
148
82
}
149
150
35
BigStr* J8EncodeString(BigStr* s, int j8_fallback) {
151
35
  auto buf = Alloc<mylib::BufWriter>();
152
35
  int options = j8_fallback ? 0 : LOSSY_JSON;
153
35
  pyj8::WriteString(s, options, buf);
154
35
  return buf->getvalue();
155
35
}
156
157
0
BigStr* ShellEncodeString(BigStr* s, int ysh_fallback) {
158
0
  auto buf = Alloc<mylib::BufWriter>();
159
0
  ::ShellEncodeString(s, ysh_fallback, buf);
160
0
  return buf->getvalue();
161
0
}
162
163
}  // namespace fastfunc
164
165
namespace pyj8 {
166
167
5
bool PartIsUtf8(BigStr* s, int start, int end) {
168
5
  uint32_t codepoint;
169
5
  uint32_t state = UTF8_ACCEPT;
170
171
9
  for (int i = start; i < end; ++i) {
172
    // This var or a static_cast<> is necessary.  Should really change BigStr*
173
    // to use unsigned type
174
6
    uint8_t c = s->data_[i];
175
6
    decode(&state, &codepoint, c);
176
6
    if (state == UTF8_REJECT) {
177
2
      return false;
178
2
    }
179
6
  }
180
181
3
  return state == UTF8_ACCEPT;
182
5
}
183
184
79
void WriteString(BigStr* s, int options, mylib::BufWriter* buf) {
185
79
  bool j8_fallback = !(options & LOSSY_JSON);
186
187
79
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
188
79
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
189
190
  // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
191
79
  int capacity = len(s) + 3 + 2;     // 3 for quotes, 2 potential \" \n
192
79
  if (capacity < J8_MIN_CAPACITY) {  // account for J8_MAX_BYTES_PER_INPUT_BYTE
193
55
    capacity = J8_MIN_CAPACITY;
194
55
  }
195
  // printf("[1] capacity %d\n", capacity);
196
197
79
  buf->EnsureMoreSpace(capacity);
198
199
79
  int begin = buf->Length();  // maybe Truncate to this position
200
79
  buf->WriteConst("\"");
201
202
  // Set up pointers after writing opening quote
203
79
  uint8_t* out = buf->LengthPointer();  // mutated
204
79
  uint8_t* out_end = buf->CapacityPointer();
205
206
111
  while (true) {
207
    // Fill in as much as we can
208
111
    int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false);
209
111
    if (invalid_utf8 && j8_fallback) {
210
12
      buf->Truncate(begin);
211
12
      WriteBString(s, buf, capacity);  // fall back to b''
212
12
      return;
213
12
    }
214
99
    buf->SetLengthFrom(out);
215
216
    // printf("[1] len %d\n", out_buf->len);
217
218
99
    if (in >= in_end) {
219
67
      break;
220
67
    }
221
222
    // Growth policy: every time through the loop, increase 1.5x
223
    //
224
    // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
225
    // This seems like a reasonable tradeoff between over-allocating and too
226
    // many realloc().
227
32
    capacity = capacity * 3 / 2;
228
    // printf("[1] new capacity %d\n", capacity);
229
32
    buf->EnsureMoreSpace(capacity);
230
231
    // Recompute pointers
232
32
    out = buf->LengthPointer();  // mutated
233
32
    out_end = buf->CapacityPointer();
234
    // printf("[1] out %p out_end %p\n", out, out_end);
235
32
  }
236
237
67
  buf->WriteConst("\"");
238
67
}
239
240
}  // namespace pyj8
241
242
namespace j8 {
243
244
2
int HeapValueId(value_asdl::value_t* val) {
245
2
#ifndef OPTIMIZED
246
  // ASDL generates headers with HeapTag::Scanned, but HeapTag::FixedSize would
247
  // also be valid.
248
2
  ObjHeader* h = ObjHeader::FromObject(val);
249
2
  DCHECK(h->heap_tag == HeapTag::Scanned || h->heap_tag == HeapTag::FixedSize);
250
0
#endif
251
252
0
  return ObjectId(val);
253
2
}
254
255
}  // namespace j8