regex -> Reverse Polish Expression -> NFA -> DFA

 1 class Solution {
  2 public:
  3
  4 /*
  5 * Use DFA to solve leetcode 10. Regular expression problems
  6*
  7* Steps:
  8 * 1. Treat the regular string as a post-order traversal expression, and use the stack and operator priority to convert it to
  9 * Inorder traversal expression, that is, reverse Polish expression.
 10 * 2. Use the inorder string to construct NFA through Thompson algorithm.
 11 * 3. Convert NFA to DFA using subset construction algorithm.
 12 * 4. Input the given string s into DFA to simulate all states, and return true if s has been read and is in the end node.
 13*
 14 *Note:
 15 * For simplicity, all new are not deleted.
 16*
 17 * Reference:
 18 * For the first step, please refer to Lessons 7.2 - 7.3 of "Data Structure" course by Han Yongkai, Tsinghua University, Taiwan
 19 * For steps 2-4, please refer to Lessons 2.3.1 - 3.2.4 of the course "Compilation Principles" of the University of Science and Technology of China Hua Healthcare
 20 */
 twenty one 
 22 // Node used by NFA
 23 struct Node;
 twenty four 
 25 // Path used by NFA
 26 struct Path
 27 {
 28 char c;
 29 Node * dest;
 30 Path(char in_c, Node* in_dest) :c(in_c), dest(in_dest) {}
 31 };
 32
 33 struct Node
 34 {
 35 int index;
 36 vector<Path> paths;
 37 Node()
 38 {
 39 static int i = 0;
 40 index = i++;
 41}
 42 Node(vector<Path> in_pathes) :pathes(in_pathes) {}
 43 void AddPath(char c, Node* dest)
 44 {
 45 paths. emplace_back(c, dest);
 46}
 47 };
 48
 49 // Convert post-order traversal regular string to in-order traversal
 50 // Use the method of creating ExpressionTree
 51 // For character concatenation, add an & amp; as an operator
 52 queue<char> ToInOrder(string s)
 53 {
 54 int len = s. length();
 55 queue<char> ans;
 56 stack<char> symbols; // symbol stack
 57 bool prevIsChar = false;
 58
 59 // Pop all low priority operators to ans
 60 auto PopLowPriority = [ &]()
 61 {
 62 while (!symbols. empty())
 63 {
 64 ans. push(symbols. top());
 65 symbols. pop();
 66}
 67
 68 };
 69
 70 for (int i = 0; i < len; + + i)
 71 {
 72 char c = s[i];
 73 if (c == '*')
 74 {
 75 if (!symbols.empty() & amp; & amp; symbols.top() == ' & amp;')
 76 {
 77 symbols. push(c);
 78}
 79 else
 80{
 81 PopLowPriority();
 82 symbols. push(c);
 83}
 84 prevIsChar = true;
 85 continue;
 86}
 87
 88 if (prevIsChar)
 89{
 90 PopLowPriority();
 91 symbols. push(' & amp;');
 92}
 93
 94 ans. push(c);
 95
 96 //
 97 if (i + 1 < len & amp; & amp; s[i + 1] != '*')
 98 prevIsChar = true;
 99 else
100 prevIsChar = false;
101}
102
103 PopLowPriority();
104
105 return ans;
106}
107
108 struct Tree
109 {
110 Node* start,* end;
111 };
112
113 // Use the Thompson algorithm to construct the inorder string into NFA
114 Tree CalcInOrder(queue<char> inorder)
115 {
116 stack<Tree> temp;
117
118 while (!inorder. empty())
119 {
120 char c = inorder. front();
121 inorder. pop();
122
123 if (c == '*')
124 {
125 auto & tree = temp. top();
126
127 Node* oldStart = tree.start;
128 Node* oldEnd = tree.end;
129
130 Node* newStart = new Node;
131 Node* newEnd = new Node;
132 newStart->AddPath(0, oldStart);
133 newStart->AddPath(0, newEnd);
134 oldEnd->AddPath(0, oldStart);
135 oldEnd->AddPath(0, newEnd);
136
137 tree.start = newStart;
138 tree.end = newEnd;
139 continue;
140}
141
142 if (c == ' &')
143 {
144 auto e2 = temp.top();temp.pop();
145 auto e1 = temp. top(); temp. pop();
146
147 Tree tree;
148 tree.start = e1.start;
149 tree.end = e2.end;
150 e1.end->AddPath(0, e2.start);
151
152 temp. push(tree);
153 continue;
154}
155
156 Node* node1 = new Node;
157 Node* node2 = new Node;
158 node1->AddPath(c, node2);
159 temp. push({ node1, node2 });
160}
161 return temp. top();
162}
163
164 // closure function
165 // Return: the closure of a node, namely
166 // A collection of all nodes that a node can reach through the ε path (including itself)
167 set<Node*> e_closure(Node* start)
168 {
169 set<Node*> ans;
170 queue<Node*> q;
171 q. push(start);
172 while (!q. empty())
173 {
174 auto node = q. front();
175 q. pop();
176
177 ans. insert(node);
178
179 for (auto & path : node->pathes)
180 if (path.c == 0 & & ans.find(path.dest) == ans.end())
181 q.push(path.dest);
182}
183 return ans;
184}
185
186 // Node structure used by DFA
187 struct FinalNode;
188 struct FinalPath
189 {
190 char c;
191 FinalNode* dest;
192 FinalPath(char c, FinalNode* dest) :c(c), dest(dest) {}
193 };
194
195 struct FinalNode
196 {
197 int index;
198 bool isEnd;
199 vector<FinalPath> paths;
200 FinalNode(bool isEnd) :isEnd(isEnd)
201 {
202 static int i = 0;
203 index = i++;
204}
205 void AddPath(char c, FinalNode *dest)
206 {
207 paths. emplace_back(c, dest);
208}
209 };
210
211 using FinalTree = FinalNode*;
212
213 // Subset Construction Algorithm Convert NFA to DFA
214 FinalTree BuildSubSet(const Tree tree)
215 {
216 // Return whether a node is an end node
217 auto IsEnd = [ & amp;](Node* node)->bool
218 {
219 return node == tree. end;
220 };
221
222 // Return whether there is an end node in a collection
223 auto SetIsEnd = [ & amp;](const set<Node*> & amp; nodeSet)->bool
224 {
225 return nodeSet.find(tree.end) != nodeSet.end();
226 };
227
228 set<Node*> q0;
229 q0 = e_closure(tree.start);
230
231 FinalNode* n0 = new FinalNode(SetIsEnd(q0));
232 map<set<Node*>, FinalNode*> newTreeDict;
233 newTreeDict[q0] = n0;
234
235 FinalTree newTree;
236 newTree = n0;
237
238 set<set<Node*>> Q;
239 Q.insert(q0);
240
241 queue<set<Node*>> worklist;
242 worklist. push(q0);
243 while (!worklist. empty())
244 {
245 auto q = worklist. front();
246 worklist. pop();
247
248 //
249 map<char, set<Node*>> dict;
250 for (auto node : q)
251 {
252 for (auto & path : node->pathes)
253 {
254 if (path.c != 0)
255 {
256 dict[path.c].insert(path.dest);
257}
258}
259}
260
261 for (auto & pr : dict)
262 {
263 for (auto node : pr.second)
264 {
265 auto t = e_closure(node);
266 FinalNode* node_t = nullptr;
267 if (newTreeDict. find(t) != newTreeDict. end())
268 node_t = newTreeDict[t];
269 else
270 {
271 node_t=new FinalNode(SetIsEnd(t));
272 newTreeDict[t] = node_t;
273}
274
275 //
276 FinalNode* node_q = newTreeDict[q];
277 node_q->AddPath(pr.first, node_t);
278
279 if (Q.find(t) == Q.end())
280 {
281 Q. insert(t);
282 worklist. push(t);
283}
284}
285}
286}
287 return newTree;
288}
289
290 // Traverse the DFA to determine whether the string matches
291 bool IsMatch(string s, FinalTree dfa)
292 {
293 struct State
294 {
295 int spos; // current string number
296 FinalNode* node; // The current node
297 };
298
299 int len = s. length();
300 State state0 = { -1,dfa };
301 queue<State> q;
302 q.push(state0);
303
304 while (!q. empty())
305 {
306 auto state = q. front();
307 q. pop();
308
309 // If the string has been read to the head and the current node is the end, return true
310 if (state.spos == len-1)
311 {
312 if (state. node->isEnd)
313 return true;
314
315 // Just read the string to the head, but the node is not the tail node
316 continue;
317}
318
319 for (auto & path : state.node->pathes)
320 {
321 // If the next character matches the path, or the path matches a '.'
322 if (path.c == s[state.spos + 1] || path.c=='.')
323 {
324 // Join the queue
325 q.push({ state.spos + 1,path.dest });
326}
327}
328}
329 return false;
330}
331
332 bool isMatch(string s, string p)
333 {
334 if (p.empty()) return s.empty();
335 auto q = ToInOrder(p);
336 auto tree = CalcInOrder(q);
337 auto DFA = BuildSubSet(tree);
338
339 return IsMatch(s, DFA);
340}
341 };