cpp

Coverage Report

Created: 2023-09-13 01:07

/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line
Count
Source
1
#ifndef MYCPP_GC_STR_H
2
#define MYCPP_GC_STR_H
3
4
#include "mycpp/common.h"  // DISALLOW_COPY_AND_ASSIGN
5
#include "mycpp/gc_obj.h"  // GC_OBJ
6
7
template <typename T>
8
class List;
9
10
class Str {
11
 public:
12
  // Don't call this directly.  Call NewStr() instead, which calls this.
13
5.28k
  Str() {
14
5.28k
  }
15
16
553
  char* data() {
17
553
    return data_;
18
553
  }
19
20
  // Call this after writing into buffer created by OverAllocatedStr()
21
  void MaybeShrink(int str_len);
22
23
  Str* index_(int i);
24
25
  int find(Str* needle, int pos = 0);
26
  int rfind(Str* needle);
27
28
  Str* slice(int begin);
29
  Str* slice(int begin, int end);
30
  Str* slice(int begin, int end, int step);
31
32
  Str* strip();
33
  // Used for CommandSub in osh/cmd_exec.py
34
  Str* rstrip(Str* chars);
35
  Str* rstrip();
36
37
  Str* lstrip(Str* chars);
38
  Str* lstrip();
39
40
  Str* ljust(int width, Str* fillchar);
41
  Str* rjust(int width, Str* fillchar);
42
43
  bool startswith(Str* s);
44
  bool endswith(Str* s);
45
46
  Str* replace(Str* old, Str* new_str);
47
  Str* join(List<Str*>* items);
48
49
  List<Str*>* split(Str* sep);
50
  List<Str*>* split(Str* sep, int max_split);
51
  List<Str*>* splitlines(bool keep);
52
53
  bool isdigit();
54
  bool isalpha();
55
  bool isupper();
56
57
  Str* upper();
58
  Str* lower();
59
60
  // Other options for fast comparison / hashing / string interning:
61
  // - unique_id_: an index into intern table.  I don't think this works unless
62
  //   you want to deal with rehashing all strings when the set grows.
63
  //   - although note that the JVM has -XX:StringTableSize=FIXED, which means
64
  //   - it can degrade into linked list performance
65
  // - Hashed strings become GLOBAL_STR().  Never deallocated.
66
  // - Hashed strings become part of the "large object space", which might be
67
  //   managed by mark and sweep.  This requires linked list overhead.
68
  //   (doubly-linked?)
69
  // - Intern strings at GARBAGE COLLECTION TIME, with
70
  //   LayoutForwarded::new_location_?  Is this possible?  Does it introduce
71
  //   too much coupling between strings, hash tables, and GC?
72
73
5.28k
  static constexpr ObjHeader obj_header() {
74
5.28k
    return ObjHeader::Str();
75
5.28k
  }
76
77
  int len_;
78
  int hash_value_;
79
  char data_[1];  // flexible array
80
81
 private:
82
  int _strip_left_pos();
83
  int _strip_right_pos();
84
85
  DISALLOW_COPY_AND_ASSIGN(Str)
86
};
87
88
constexpr int kStrHeaderSize = offsetof(Str, data_);
89
90
// Note: for SmallStr, we might copy into the VALUE
91
156
inline void Str::MaybeShrink(int str_len) {
92
156
  len_ = str_len;
93
156
  data_[len_] = '\0';  // NUL terminate
94
156
}
95
96
13.4k
inline int len(const Str* s) {
97
13.4k
  return s->len_;
98
13.4k
}
99
100
Str* StrFormat(const char* fmt, ...);
101
Str* StrFormat(Str* fmt, ...);
102
103
// NOTE: This iterates over bytes.
104
class StrIter {
105
 public:
106
126
  explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) {
107
    // Cheney only: s_ could be moved during iteration.
108
    // gHeap.PushRoot(reinterpret_cast<RawObject**>(&s_));
109
126
  }
110
126
  ~StrIter() {
111
    // gHeap.PopRoot();
112
126
  }
113
158
  void Next() {
114
158
    i_++;
115
158
  }
116
284
  bool Done() {
117
284
    return i_ >= len_;
118
284
  }
119
  Str* Value();  // similar to index_()
120
121
 private:
122
  Str* s_;
123
  int i_;
124
  int len_;
125
126
  DISALLOW_COPY_AND_ASSIGN(StrIter)
127
};
128
129
bool maybe_str_equals(Str* left, Str* right);
130
131
extern Str* kEmptyString;
132
133
// GlobalStr notes:
134
// - sizeof("foo") == 4, for the NUL terminator.
135
// - gc_heap_test.cc has a static_assert that GlobalStr matches Str.  We don't
136
// put it here because it triggers -Winvalid-offsetof
137
138
template <int N>
139
class GlobalStr {
140
  // A template type with the same layout as Str with length N-1 (which needs a
141
  // buffer of size N).  For initializing global constant instances.
142
 public:
143
  int len_;
144
  int hash_value_;
145
  const char data_[N];
146
147
  DISALLOW_COPY_AND_ASSIGN(GlobalStr)
148
};
149
150
// This macro is a workaround for the fact that it's impossible to have a
151
// a constexpr initializer for char[N].  The "String Literals as Non-Type
152
// Template Parameters" feature of C++ 20 would have done it, but it's not
153
// there.
154
//
155
// https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/
156
// https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor
157
//
158
// TODO: Can we hash values at compile time so they can be in the intern table?
159
160
#define GLOBAL_STR(name, val)                                     \
161
  GcGlobal<GlobalStr<sizeof(val)>> _##name = {                    \
162
      ObjHeader::Global(TypeTag::Str),                            \
163
      {.len_ = sizeof(val) - 1, .hash_value_ = 0, .data_ = val}}; \
164
  Str* name = reinterpret_cast<Str*>(&_##name.obj);
165
166
#endif  // MYCPP_GC_STR_H