/home/andy/git/oilshell/oil/data_lang/j8_libc.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "data_lang/j8_libc.h" |
2 | | |
3 | | #include <stdbool.h> // false |
4 | | #include <stdlib.h> // realloc |
5 | | |
6 | | #include "data_lang/j8.h" // EncodeRuneOrByte |
7 | | |
8 | 6 | void EncodeBString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) { |
9 | | // Compute pointers for the inner loop |
10 | 6 | unsigned char* in = (unsigned char*)in_buf.data; |
11 | 6 | unsigned char* in_end = in + in_buf.len; |
12 | | |
13 | 6 | unsigned char* out = out_buf->data; // mutated |
14 | 6 | unsigned char* out_end = out_buf->data + capacity; |
15 | 6 | unsigned char** p_out = &out; |
16 | | |
17 | 6 | J8_OUT('b'); // Left quote b'' |
18 | 6 | J8_OUT('\''); |
19 | | |
20 | 12 | while (true) { |
21 | | // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data, |
22 | | // out, out_end); |
23 | 12 | J8EncodeChunk(&in, in_end, &out, out_end, true); // Fill as much as we can |
24 | 12 | out_buf->len = out - out_buf->data; // recompute length |
25 | | |
26 | 12 | if (in >= in_end) { |
27 | 6 | break; |
28 | 6 | } |
29 | | |
30 | | // Same growth policy as below |
31 | 6 | capacity = capacity * 3 / 2; |
32 | | // printf("[2] new capacity %d\n", capacity); |
33 | 6 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity); |
34 | | |
35 | | // Recompute pointers |
36 | 6 | out = out_buf->data + out_buf->len; |
37 | 6 | out_end = out_buf->data + capacity; |
38 | 6 | p_out = &out; |
39 | 6 | } |
40 | | |
41 | 6 | J8_OUT('\''); |
42 | 6 | out_buf->len = out - out_buf->data; |
43 | | |
44 | 6 | J8_OUT('\0'); // NUL terminate for printf |
45 | 6 | } |
46 | | |
47 | | // $'' escaping |
48 | | // This function is a COPY of EncodeBString() above |
49 | 0 | void EncodeBashDollarString(j8_buf_t in_buf, j8_buf_t* out_buf, int capacity) { |
50 | | // Compute pointers for the inner loop |
51 | 0 | unsigned char* in = (unsigned char*)in_buf.data; |
52 | 0 | unsigned char* in_end = in + in_buf.len; |
53 | |
|
54 | 0 | unsigned char* out = out_buf->data; // mutated |
55 | 0 | unsigned char* out_end = out_buf->data + capacity; |
56 | 0 | unsigned char** p_out = &out; |
57 | |
|
58 | 0 | J8_OUT('$'); // Left quote b'' |
59 | 0 | J8_OUT('\''); |
60 | |
|
61 | 0 | while (true) { |
62 | | // printf("B iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data, |
63 | | // out, out_end); |
64 | | // Fill as much as we can |
65 | 0 | BashDollarEncodeChunk(&in, in_end, &out, out_end); |
66 | 0 | out_buf->len = out - out_buf->data; // recompute length |
67 | |
|
68 | 0 | if (in >= in_end) { |
69 | 0 | break; |
70 | 0 | } |
71 | | |
72 | | // Same growth policy as below |
73 | 0 | capacity = capacity * 3 / 2; |
74 | | // printf("[2] new capacity %d\n", capacity); |
75 | 0 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity); |
76 | | |
77 | | // Recompute pointers |
78 | 0 | out = out_buf->data + out_buf->len; |
79 | 0 | out_end = out_buf->data + capacity; |
80 | 0 | p_out = &out; |
81 | 0 | } |
82 | |
|
83 | 0 | J8_OUT('\''); |
84 | 0 | out_buf->len = out - out_buf->data; |
85 | |
|
86 | 0 | J8_OUT('\0'); // NUL terminate for printf |
87 | 0 | } |
88 | | |
89 | 22 | void J8EncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int j8_fallback) { |
90 | 22 | unsigned char* in = (unsigned char*)in_buf.data; |
91 | 22 | unsigned char* in_end = in + in_buf.len; |
92 | | |
93 | | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY) |
94 | 22 | int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n |
95 | 22 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE |
96 | 10 | capacity = J8_MIN_CAPACITY; |
97 | 10 | } |
98 | | // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback); |
99 | | |
100 | 22 | out_buf->data = (unsigned char*)malloc(capacity); |
101 | 22 | out_buf->len = 0; // starts out empty |
102 | | |
103 | 22 | unsigned char* out = out_buf->data; // mutated |
104 | 22 | unsigned char* out_end = out_buf->data + capacity; |
105 | 22 | unsigned char** p_out = &out; |
106 | | |
107 | 22 | J8_OUT('"'); |
108 | | |
109 | 44 | while (true) { |
110 | | // Fill in as much as we can |
111 | | // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data, |
112 | | // out, out_end); |
113 | 44 | int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false); |
114 | 44 | if (invalid_utf8 && j8_fallback) { |
115 | 6 | out_buf->len = 0; // rewind to begining |
116 | | // printf("out %p out_end %p capacity %d\n", out, out_end, capacity); |
117 | 6 | EncodeBString(in_buf, out_buf, capacity); // fall back to b'' |
118 | | // printf("len %d\n", out_buf->len); |
119 | 6 | return; |
120 | 6 | } |
121 | 38 | out_buf->len = out - out_buf->data; // recompute length |
122 | | // printf("[1] len %d\n", out_buf->len); |
123 | | |
124 | 38 | if (in >= in_end) { |
125 | 16 | break; |
126 | 16 | } |
127 | | |
128 | | // Growth policy: every time through the loop, increase 1.5x |
129 | | // |
130 | | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs. |
131 | | // This seems like a reasonable tradeoff between over-allocating and too |
132 | | // many realloc(). |
133 | 22 | capacity = capacity * 3 / 2; |
134 | | // printf("[1] new capacity %d\n", capacity); |
135 | 22 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity); |
136 | | |
137 | | // Recompute pointers |
138 | 22 | out = out_buf->data + out_buf->len; |
139 | 22 | out_end = out_buf->data + capacity; |
140 | 22 | p_out = &out; |
141 | | // printf("[1] out %p out_end %p\n", out, out_end); |
142 | 22 | } |
143 | | |
144 | 16 | J8_OUT('"'); |
145 | 16 | out_buf->len = out - out_buf->data; |
146 | | |
147 | 16 | J8_OUT('\0'); // NUL terminate for printf |
148 | 16 | } |
149 | | |
150 | | // Start with '', but fall back on $'' for ASCII control and \' |
151 | | // |
152 | | // Depending on options, fall back to |
153 | | // |
154 | | // EncodeBashDollarString() -- $'\xff' |
155 | | // EncodeBString() -- b'\yff' |
156 | | |
157 | | // Mostly a COPY of the above |
158 | 0 | void ShellEncodeString(j8_buf_t in_buf, j8_buf_t* out_buf, int ysh_fallback) { |
159 | 0 | unsigned char* in = (unsigned char*)in_buf.data; |
160 | 0 | unsigned char* in_end = in + in_buf.len; |
161 | | |
162 | | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY) |
163 | 0 | int capacity = in_buf.len + 3 + 2; // 3 for quotes, 2 potential \" \n |
164 | 0 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE |
165 | 0 | capacity = J8_MIN_CAPACITY; |
166 | 0 | } |
167 | | // printf("[1] capacity %d j8_fallback %d\n", capacity, j8_fallback); |
168 | |
|
169 | 0 | out_buf->data = (unsigned char*)malloc(capacity); |
170 | 0 | out_buf->len = 0; // starts out empty |
171 | |
|
172 | 0 | unsigned char* out = out_buf->data; // mutated |
173 | 0 | unsigned char* out_end = out_buf->data + capacity; |
174 | 0 | unsigned char** p_out = &out; |
175 | |
|
176 | 0 | J8_OUT('\''); |
177 | |
|
178 | 0 | while (true) { |
179 | | // Fill in as much as we can |
180 | | // printf("J8 iter %p < %p and %p < %p < %p\n", in, in_end, out_buf->data, |
181 | | // out, out_end); |
182 | 0 | int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end); |
183 | 0 | if (cannot_encode) { |
184 | 0 | out_buf->len = 0; // rewind to begining |
185 | | // printf("out %p out_end %p capacity %d\n", out, out_end, capacity); |
186 | 0 | if (ysh_fallback) { |
187 | 0 | EncodeBString(in_buf, out_buf, capacity); // fall back to b'' |
188 | 0 | } else { |
189 | 0 | EncodeBashDollarString(in_buf, out_buf, capacity); // fall back to $'' |
190 | 0 | } |
191 | | // printf("len %d\n", out_buf->len); |
192 | 0 | return; |
193 | 0 | } |
194 | 0 | out_buf->len = out - out_buf->data; // recompute length |
195 | | // printf("[1] len %d\n", out_buf->len); |
196 | |
|
197 | 0 | if (in >= in_end) { |
198 | 0 | break; |
199 | 0 | } |
200 | | |
201 | | // Growth policy: every time through the loop, increase 1.5x |
202 | | // |
203 | | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs. |
204 | | // This seems like a reasonable tradeoff between over-allocating and too |
205 | | // many realloc(). |
206 | 0 | capacity = capacity * 3 / 2; |
207 | | // printf("[1] new capacity %d\n", capacity); |
208 | 0 | out_buf->data = (unsigned char*)realloc(out_buf->data, capacity); |
209 | | |
210 | | // Recompute pointers |
211 | 0 | out = out_buf->data + out_buf->len; |
212 | 0 | out_end = out_buf->data + capacity; |
213 | 0 | p_out = &out; |
214 | | // printf("[1] out %p out_end %p\n", out, out_end); |
215 | 0 | } |
216 | | |
217 | 0 | J8_OUT('\''); |
218 | 0 | out_buf->len = out - out_buf->data; |
219 | |
|
220 | 0 | J8_OUT('\0'); // NUL terminate for printf |
221 | 0 | } |