cpp

Coverage Report

Created: 2023-03-07 20:24

/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line
Count
Source
1
#ifndef MYCPP_GC_STR_H
2
#define MYCPP_GC_STR_H
3
4
#include "mycpp/common.h"  // DISALLOW_COPY_AND_ASSIGN
5
#include "mycpp/gc_obj.h"  // GC_OBJ
6
7
template <typename T>
8
class List;
9
10
class Str {
11
 public:
12
  // Don't call this directly.  Call NewStr() instead, which calls this.
13
5.37k
  Str() : header_(obj_header()) {
14
5.37k
  }
15
16
574
  char* data() {
17
574
    return data_;
18
574
  }
19
20
  // Call this after writing into buffer created by OverAllocatedStr()
21
  void MaybeShrink(int str_len);
22
23
  Str* index_(int i);
24
25
  int find(Str* needle, int pos = 0);
26
  int rfind(Str* needle);
27
28
  Str* slice(int begin);
29
  Str* slice(int begin, int end);
30
31
  Str* strip();
32
  // Used for CommandSub in osh/cmd_exec.py
33
  Str* rstrip(Str* chars);
34
  Str* rstrip();
35
36
  Str* lstrip(Str* chars);
37
  Str* lstrip();
38
39
  Str* ljust(int width, Str* fillchar);
40
  Str* rjust(int width, Str* fillchar);
41
42
  bool startswith(Str* s);
43
  bool endswith(Str* s);
44
45
  Str* replace(Str* old, Str* new_str);
46
  Str* join(List<Str*>* items);
47
48
  List<Str*>* split(Str* sep);
49
  List<Str*>* split(Str* sep, int max_split);
50
  List<Str*>* splitlines(bool keep);
51
52
  bool isdigit();
53
  bool isalpha();
54
  bool isupper();
55
56
  Str* upper();
57
  Str* lower();
58
59
  // Other options for fast comparison / hashing / string interning:
60
  // - unique_id_: an index into intern table.  I don't think this works unless
61
  //   you want to deal with rehashing all strings when the set grows.
62
  //   - although note that the JVM has -XX:StringTableSize=FIXED, which means
63
  //   - it can degrade into linked list performance
64
  // - Hashed strings become GLOBAL_STR().  Never deallocated.
65
  // - Hashed strings become part of the "large object space", which might be
66
  //   managed by mark and sweep.  This requires linked list overhead.
67
  //   (doubly-linked?)
68
  // - Intern strings at GARBAGE COLLECTION TIME, with
69
  //   LayoutForwarded::new_location_?  Is this possible?  Does it introduce
70
  //   too much coupling between strings, hash tables, and GC?
71
72
5.37k
  static constexpr ObjHeader obj_header() {
73
5.37k
    return ObjHeader::Str();
74
5.37k
  }
75
76
  GC_OBJ(header_);
77
  int len_;
78
  char data_[1];  // flexible array
79
80
 private:
81
  int _strip_left_pos();
82
  int _strip_right_pos();
83
84
  DISALLOW_COPY_AND_ASSIGN(Str)
85
};
86
87
constexpr int kStrHeaderSize = offsetof(Str, data_);
88
89
// Note: for SmallStr, we might copy into the VALUE
90
131
inline void Str::MaybeShrink(int str_len) {
91
131
  len_ = str_len;
92
131
}
93
94
13.3k
inline int len(const Str* s) {
95
13.3k
  return s->len_;
96
97
  // For Cheney, it's possible we could use this startegy of computing it from
98
  // the object length.
99
#if 0
100
  DCHECK(s->header_.obj_len >= kStrHeaderSize - 1);
101
  return s->header_.obj_len - kStrHeaderSize - 1;
102
#endif
103
13.3k
}
104
105
Str* StrFormat(const char* fmt, ...);
106
Str* StrFormat(Str* fmt, ...);
107
108
// NOTE: This iterates over bytes.
109
class StrIter {
110
 public:
111
126
  explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) {
112
    // Cheney only: s_ could be moved during iteration.
113
    // gHeap.PushRoot(reinterpret_cast<RawObject**>(&s_));
114
126
  }
115
126
  ~StrIter() {
116
    // gHeap.PopRoot();
117
126
  }
118
158
  void Next() {
119
158
    i_++;
120
158
  }
121
284
  bool Done() {
122
284
    return i_ >= len_;
123
284
  }
124
  Str* Value();  // similar to index_()
125
126
 private:
127
  Str* s_;
128
  int i_;
129
  int len_;
130
131
  DISALLOW_COPY_AND_ASSIGN(StrIter)
132
};
133
134
bool maybe_str_equals(Str* left, Str* right);
135
136
extern Str* kEmptyString;
137
138
// GlobalStr notes:
139
// - sizeof("foo") == 4, for the NUL terminator.
140
// - gc_heap_test.cc has a static_assert that GlobalStr matches Str.  We don't
141
// put it here because it triggers -Winvalid-offsetof
142
143
template <int N>
144
class GlobalStr {
145
  // A template type with the same layout as Str with length N-1 (which needs a
146
  // buffer of size N).  For initializing global constant instances.
147
 public:
148
  ObjHeader header_;
149
  int hash_value_;
150
  const char data_[N];
151
152
  DISALLOW_COPY_AND_ASSIGN(GlobalStr)
153
};
154
155
// This macro is a workaround for the fact that it's impossible to have a
156
// a constexpr initializer for char[N].  The "String Literals as Non-Type
157
// Template Parameters" feature of C++ 20 would have done it, but it's not
158
// there.
159
//
160
// https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/
161
// https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor
162
163
#define GLOBAL_STR(name, val)                                           \
164
  GlobalStr<sizeof(val)> _##name = {                                    \
165
      {kIsHeader, TypeTag::Str, kZeroMask, HeapTag::Global, kIsGlobal}, \
166
      sizeof(val) - 1,                                                  \
167
      val};                                                             \
168
  Str* name = reinterpret_cast<Str*>(&_##name);
169
170
#endif  // MYCPP_GC_STR_H