/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line | Count | Source |
1 | | #ifndef MYCPP_GC_STR_H |
2 | | #define MYCPP_GC_STR_H |
3 | | |
4 | | #include "mycpp/common.h" // DISALLOW_COPY_AND_ASSIGN |
5 | | #include "mycpp/gc_obj.h" // GC_OBJ |
6 | | |
7 | | template <typename T> |
8 | | class List; |
9 | | |
10 | | class Str { |
11 | | public: |
12 | | // Don't call this directly. Call NewStr() instead, which calls this. |
13 | 5.37k | Str() : header_(obj_header()) { |
14 | 5.37k | } |
15 | | |
16 | 574 | char* data() { |
17 | 574 | return data_; |
18 | 574 | } |
19 | | |
20 | | // Call this after writing into buffer created by OverAllocatedStr() |
21 | | void MaybeShrink(int str_len); |
22 | | |
23 | | Str* index_(int i); |
24 | | |
25 | | int find(Str* needle, int pos = 0); |
26 | | int rfind(Str* needle); |
27 | | |
28 | | Str* slice(int begin); |
29 | | Str* slice(int begin, int end); |
30 | | |
31 | | Str* strip(); |
32 | | // Used for CommandSub in osh/cmd_exec.py |
33 | | Str* rstrip(Str* chars); |
34 | | Str* rstrip(); |
35 | | |
36 | | Str* lstrip(Str* chars); |
37 | | Str* lstrip(); |
38 | | |
39 | | Str* ljust(int width, Str* fillchar); |
40 | | Str* rjust(int width, Str* fillchar); |
41 | | |
42 | | bool startswith(Str* s); |
43 | | bool endswith(Str* s); |
44 | | |
45 | | Str* replace(Str* old, Str* new_str); |
46 | | Str* join(List<Str*>* items); |
47 | | |
48 | | List<Str*>* split(Str* sep); |
49 | | List<Str*>* split(Str* sep, int max_split); |
50 | | List<Str*>* splitlines(bool keep); |
51 | | |
52 | | bool isdigit(); |
53 | | bool isalpha(); |
54 | | bool isupper(); |
55 | | |
56 | | Str* upper(); |
57 | | Str* lower(); |
58 | | |
59 | | // Other options for fast comparison / hashing / string interning: |
60 | | // - unique_id_: an index into intern table. I don't think this works unless |
61 | | // you want to deal with rehashing all strings when the set grows. |
62 | | // - although note that the JVM has -XX:StringTableSize=FIXED, which means |
63 | | // - it can degrade into linked list performance |
64 | | // - Hashed strings become GLOBAL_STR(). Never deallocated. |
65 | | // - Hashed strings become part of the "large object space", which might be |
66 | | // managed by mark and sweep. This requires linked list overhead. |
67 | | // (doubly-linked?) |
68 | | // - Intern strings at GARBAGE COLLECTION TIME, with |
69 | | // LayoutForwarded::new_location_? Is this possible? Does it introduce |
70 | | // too much coupling between strings, hash tables, and GC? |
71 | | |
72 | 5.37k | static constexpr ObjHeader obj_header() { |
73 | 5.37k | return ObjHeader::Str(); |
74 | 5.37k | } |
75 | | |
76 | | GC_OBJ(header_); |
77 | | int len_; |
78 | | char data_[1]; // flexible array |
79 | | |
80 | | private: |
81 | | int _strip_left_pos(); |
82 | | int _strip_right_pos(); |
83 | | |
84 | | DISALLOW_COPY_AND_ASSIGN(Str) |
85 | | }; |
86 | | |
87 | | constexpr int kStrHeaderSize = offsetof(Str, data_); |
88 | | |
89 | | // Note: for SmallStr, we might copy into the VALUE |
90 | 131 | inline void Str::MaybeShrink(int str_len) { |
91 | 131 | len_ = str_len; |
92 | 131 | } |
93 | | |
94 | 13.3k | inline int len(const Str* s) { |
95 | 13.3k | return s->len_; |
96 | | |
97 | | // For Cheney, it's possible we could use this startegy of computing it from |
98 | | // the object length. |
99 | | #if 0 |
100 | | DCHECK(s->header_.obj_len >= kStrHeaderSize - 1); |
101 | | return s->header_.obj_len - kStrHeaderSize - 1; |
102 | | #endif |
103 | 13.3k | } |
104 | | |
105 | | Str* StrFormat(const char* fmt, ...); |
106 | | Str* StrFormat(Str* fmt, ...); |
107 | | |
108 | | // NOTE: This iterates over bytes. |
109 | | class StrIter { |
110 | | public: |
111 | 126 | explicit StrIter(Str* s) : s_(s), i_(0), len_(len(s)) { |
112 | | // Cheney only: s_ could be moved during iteration. |
113 | | // gHeap.PushRoot(reinterpret_cast<RawObject**>(&s_)); |
114 | 126 | } |
115 | 126 | ~StrIter() { |
116 | | // gHeap.PopRoot(); |
117 | 126 | } |
118 | 158 | void Next() { |
119 | 158 | i_++; |
120 | 158 | } |
121 | 284 | bool Done() { |
122 | 284 | return i_ >= len_; |
123 | 284 | } |
124 | | Str* Value(); // similar to index_() |
125 | | |
126 | | private: |
127 | | Str* s_; |
128 | | int i_; |
129 | | int len_; |
130 | | |
131 | | DISALLOW_COPY_AND_ASSIGN(StrIter) |
132 | | }; |
133 | | |
134 | | bool maybe_str_equals(Str* left, Str* right); |
135 | | |
136 | | extern Str* kEmptyString; |
137 | | |
138 | | // GlobalStr notes: |
139 | | // - sizeof("foo") == 4, for the NUL terminator. |
140 | | // - gc_heap_test.cc has a static_assert that GlobalStr matches Str. We don't |
141 | | // put it here because it triggers -Winvalid-offsetof |
142 | | |
143 | | template <int N> |
144 | | class GlobalStr { |
145 | | // A template type with the same layout as Str with length N-1 (which needs a |
146 | | // buffer of size N). For initializing global constant instances. |
147 | | public: |
148 | | ObjHeader header_; |
149 | | int hash_value_; |
150 | | const char data_[N]; |
151 | | |
152 | | DISALLOW_COPY_AND_ASSIGN(GlobalStr) |
153 | | }; |
154 | | |
155 | | // This macro is a workaround for the fact that it's impossible to have a |
156 | | // a constexpr initializer for char[N]. The "String Literals as Non-Type |
157 | | // Template Parameters" feature of C++ 20 would have done it, but it's not |
158 | | // there. |
159 | | // |
160 | | // https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/ |
161 | | // https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor |
162 | | |
163 | | #define GLOBAL_STR(name, val) \ |
164 | | GlobalStr<sizeof(val)> _##name = { \ |
165 | | {kIsHeader, TypeTag::Str, kZeroMask, HeapTag::Global, kIsGlobal}, \ |
166 | | sizeof(val) - 1, \ |
167 | | val}; \ |
168 | | Str* name = reinterpret_cast<Str*>(&_##name); |
169 | | |
170 | | #endif // MYCPP_GC_STR_H |