1// CODYlib -*- mode:c++ -*-
2// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
3// License: Apache v2.0
4
5// Cody
6#include "internal.hh"
7// C++
8#include <algorithm>
9// C
10#include <cstring>
11// OS
12#include <unistd.h>
13#include <cerrno>
14
15// MessageBuffer code
16
17// Lines consist of words and end with a NEWLINE (0xa) char
18// Whitespace characters are TAB (0x9) and SPACE (0x20)
19// Words consist of non-whitespace chars separated by whitespace.
20// Multiple lines in one transaction are indicated by ending non-final
21// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
22// Continuations with ; preceding it
23// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
24// Quoting with '...'
25// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
26// Anything outside of <= <space> or DEL or \' or \\ needs escaping.
27// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
28// Spaces separate words, UTF8 encoding for non-ascii chars
29
30namespace Cody {
31namespace Detail {
32
33static const char CONTINUE = S2C(s: u8";");
34
35void MessageBuffer::BeginLine ()
36{
37 if (!buffer.empty ())
38 {
39 // Terminate the previous line with a continuation
40 buffer.reserve (n: buffer.size () + 3);
41 buffer.push_back (x: S2C(s: u8" "));
42 buffer.push_back (x: CONTINUE);
43 buffer.push_back (x: S2C(s: u8"\n"));
44 }
45 lastBol = buffer.size ();
46}
47
48// QUOTE means 'maybe quote', we search it for quote-needing chars
49
50void MessageBuffer::Append (char const *str, bool quote, size_t len)
51{
52 if (len == ~size_t (0))
53 len = strlen (s: str);
54
55 if (!len && !quote)
56 return;
57
58 // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
59 // that could remotely be shell-active. UTF8 encoding for non-ascii.
60 if (quote && len)
61 {
62 quote = false;
63 // Scan looking for quote-needing characters. We could just
64 // append until we find one, but that's probably confusing
65 for (size_t ix = len; ix--;)
66 {
67 unsigned char c = (unsigned char)str[ix];
68 if (!((c >= S2C(s: u8"a") && c <= S2C(s: u8"z"))
69 || (c >= S2C(s: u8"A") && c <= S2C(s: u8"Z"))
70 || (c >= S2C(s: u8"0") && c <= S2C(s: u8"9"))
71 || c == S2C(s: u8"-") || c == S2C(s: u8"+") || c == S2C(s: u8"_")
72 || c == S2C(s: u8"/") || c == S2C(s: u8"%") || c == S2C(s: u8".")))
73 {
74 quote = true;
75 break;
76 }
77 }
78 }
79
80 // Maximal length of appended string
81 buffer.reserve (n: buffer.size () + len * (quote ? 3 : 1) + 2);
82
83 if (quote)
84 buffer.push_back (x: S2C(s: u8"'"));
85
86 for (auto *end = str + len; str != end;)
87 {
88 auto *e = end;
89
90 if (quote)
91 // Look for next escape-needing char. More relaxed than
92 // the earlier needs-quoting check.
93 for (e = str; e != end; ++e)
94 {
95 unsigned char c = (unsigned char)*e;
96 if (c < S2C(s: u8" ") || c == 0x7f
97 || c == S2C(s: u8"\\") || c == S2C(s: u8"'"))
98 break;
99 }
100 buffer.insert (position: buffer.end (), first: str, last: e);
101 str = e;
102
103 if (str == end)
104 break;
105
106 buffer.push_back (x: S2C(s: u8"\\"));
107 switch (unsigned char c = (unsigned char)*str++)
108 {
109 case S2C(s: u8"\t"):
110 c = S2C(s: u8"t");
111 goto append;
112
113 case S2C(s: u8"\n"):
114 c = S2C(s: u8"n");
115 goto append;
116
117 case S2C(s: u8"'"):
118 case S2C(s: u8"\\"):
119 append:
120 buffer.push_back (x: c);
121 break;
122
123 default:
124 // Full-on escape. Use 2 lower-case hex chars
125 for (unsigned shift = 8; shift;)
126 {
127 shift -= 4;
128
129 char nibble = (c >> shift) & 0xf;
130 nibble += S2C(s: u8"0");
131 if (nibble > S2C(s: u8"9"))
132 nibble += S2C(s: u8"a") - (S2C(s: u8"9") + 1);
133 buffer.push_back (x: nibble);
134 }
135 }
136 }
137
138 if (quote)
139 buffer.push_back (x: S2C(s: u8"'"));
140}
141
142void MessageBuffer::Append (char c)
143{
144 buffer.push_back (x: c);
145}
146
147void MessageBuffer::AppendInteger (unsigned u)
148{
149 // Sigh, even though std::to_string is C++11, we support building on
150 // gcc 4.8, which is a C++11 compiler lacking std::to_string. so
151 // have something horrible.
152 std::string v (20, 0);
153 size_t len = snprintf (s: const_cast<char *> (v.data ()), maxlen: v.size (), format: "%u", u);
154 v.erase (pos: len);
155
156 AppendWord (str: v);
157}
158
159int MessageBuffer::Write (int fd) noexcept
160{
161 size_t limit = buffer.size () - lastBol;
162 ssize_t count = write (fd: fd, buf: &buffer.data ()[lastBol], n: limit);
163
164 int err = 0;
165 if (count < 0)
166 err = errno;
167 else
168 {
169 lastBol += count;
170 if (size_t (count) != limit)
171 err = EAGAIN;
172 }
173
174 if (err != EAGAIN && err != EINTR)
175 {
176 // Reset for next message
177 buffer.clear ();
178 lastBol = 0;
179 }
180
181 return err;
182}
183
184int MessageBuffer::Read (int fd) noexcept
185{
186 constexpr size_t blockSize = 200;
187
188 size_t lwm = buffer.size ();
189 size_t hwm = buffer.capacity ();
190 if (hwm - lwm < blockSize / 2)
191 hwm += blockSize;
192 buffer.resize (new_size: hwm);
193
194 auto iter = buffer.begin () + lwm;
195 ssize_t count = read (fd: fd, buf: &*iter, nbytes: hwm - lwm);
196 buffer.resize (new_size: lwm + (count >= 0 ? count : 0));
197
198 if (count < 0)
199 return errno;
200
201 if (!count)
202 // End of file
203 return -1;
204
205 bool more = true;
206 for (;;)
207 {
208 auto newline = std::find (first: iter, last: buffer.end (), val: S2C(s: u8"\n"));
209 if (newline == buffer.end ())
210 break;
211 more = newline != buffer.begin () && newline[-1] == CONTINUE;
212 iter = newline + 1;
213
214 if (iter == buffer.end ())
215 break;
216
217 if (!more)
218 {
219 // There is no continuation, but there are chars after the
220 // newline. Truncate the buffer and return an error
221 buffer.resize (new_size: iter - buffer.begin ());
222 return EINVAL;
223 }
224 }
225
226 return more ? EAGAIN : 0;
227}
228
229int MessageBuffer::Lex (std::vector<std::string> &result)
230{
231 result.clear ();
232
233 if (IsAtEnd ())
234 return ENOENT;
235
236 Assert (buffer.back () == S2C(u8"\n"));
237
238 auto iter = buffer.begin () + lastBol;
239
240 for (std::string *word = nullptr;;)
241 {
242 char c = *iter;
243
244 ++iter;
245 if (c == S2C(s: u8" ") || c == S2C(s: u8"\t"))
246 {
247 word = nullptr;
248 continue;
249 }
250
251 if (c == S2C(s: u8"\n"))
252 break;
253
254 if (c == CONTINUE)
255 {
256 // Line continuation
257 if (word || *iter != S2C(s: u8"\n"))
258 goto malformed;
259 ++iter;
260 break;
261 }
262
263 if (c <= S2C(s: u8" ") || c >= 0x7f)
264 goto malformed;
265
266 if (!word)
267 {
268 result.emplace_back ();
269 word = &result.back ();
270 }
271
272 if (c == S2C(s: u8"'"))
273 {
274 // Quoted word
275 for (;;)
276 {
277 c = *iter;
278
279 if (c == S2C(s: u8"\n"))
280 {
281 malformed:;
282 result.clear ();
283 iter = std::find (first: iter, last: buffer.end (), val: S2C(s: u8"\n"));
284 auto back = iter;
285 if (back[-1] == CONTINUE && back[-2] == S2C(s: u8" "))
286 // Smells like a line continuation
287 back -= 2;
288 result.emplace_back (args: &buffer[lastBol],
289 args: back - buffer.begin () - lastBol);
290 ++iter;
291 lastBol = iter - buffer.begin ();
292 return EINVAL;
293 }
294
295 if (c < S2C(s: u8" ") || c >= 0x7f)
296 goto malformed;
297
298 ++iter;
299 if (c == S2C(s: u8"'"))
300 break;
301
302 if (c == S2C(s: u8"\\"))
303 // escape
304 switch (c = *iter)
305 {
306 case S2C(s: u8"\\"):
307 case S2C(s: u8"'"):
308 ++iter;
309 break;
310
311 case S2C(s: u8"n"):
312 c = S2C(s: u8"\n");
313 ++iter;
314 break;
315
316 case S2C(s: u8"_"):
317 // We used to escape SPACE as \_, so accept that
318 c = S2C(s: u8" ");
319 ++iter;
320 break;
321
322 case S2C(s: u8"t"):
323 c = S2C(s: u8"\t");
324 ++iter;
325 break;
326
327 default:
328 {
329 unsigned v = 0;
330 for (unsigned nibble = 0; nibble != 2; nibble++)
331 {
332 c = *iter;
333 if (c < S2C(s: u8"0"))
334 {
335 if (!nibble)
336 goto malformed;
337 break;
338 }
339 else if (c <= S2C(s: u8"9"))
340 c -= S2C(s: u8"0");
341 else if (c < S2C(s: u8"a"))
342 {
343 if (!nibble)
344 goto malformed;
345 break;
346 }
347 else if (c <= S2C(s: u8"f"))
348 c -= S2C(s: u8"a") - 10;
349 else
350 {
351 if (!nibble)
352 goto malformed;
353 break;
354 }
355 ++iter;
356 v = (v << 4) | c;
357 }
358 c = v;
359 }
360 }
361 word->push_back (c: c);
362 }
363 }
364 else
365 // Unquoted character
366 word->push_back (c: c);
367 }
368 lastBol = iter - buffer.begin ();
369 if (result.empty ())
370 return ENOENT;
371
372 return 0;
373}
374
375void MessageBuffer::LexedLine (std::string &str)
376{
377 if (lastBol)
378 {
379 size_t pos = lastBol - 1;
380 for (; pos; pos--)
381 if (buffer[pos-1] == S2C(s: u8"\n"))
382 break;
383
384 size_t end = lastBol - 1;
385 if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(s: u8" "))
386 // Strip line continuation
387 end -= 2;
388 str.append (s: &buffer[pos], n: end - pos);
389 }
390}
391} // Detail
392} // Cody
393

source code of libcody/buffer.cc