Directory
One, regexp_count
Two, regexp_extract
Three, regexp_extract_index
4. Test
1. Test data
2. Test results
When learning MySQL regular expressions, you may find that some common functions are not provided. The two most typical are: return the number of matches; return all matches at once. But we can create a custom function where we use a recursive query to do it.
1. regexp_count
drop function if exists regexp_count; delimiter // create function regexp_count(x text,r text, match_type varchar(5)) returns bigint reads sql data deterministic begin set @ret=0; with recursive cte as (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t union all select n + 1, regexp_substr(x,r,1, n + 1,match_type) from cte where b is not null) select count(*) into @ret from cte where b is not null; return @ret; end; // delimiter;
2. regexp_extract
drop function if exists regexp_extract; delimiter // create function regexp_extract(x text,r text, match_type varchar(5)) returns text charset utf8mb4 reads sql data deterministic begin set @ret=''; with recursive cte as (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t union all select n + 1, regexp_substr(x,r,1, n + 1,match_type) from cte where b is not null) select * into @ret from (select convert(group_concat(b) using utf8mb4) a from cte) t where a is not null; return @ret; end; // delimiter;
3. regexp_extract_index
drop function if exists regexp_extract_index; delimiter // create function regexp_extract_index(x text,r text, return_option int, match_type varchar(5)) returns text charset utf8mb4 reads sql data deterministic begin set @ret=''; with recursive cte as (select n, regexp_instr(x,r,1,t.n,return_option,match_type) b from (select 1 n) t union all select n + 1, regexp_instr(x,r,1, n + 1, return_option, match_type) from cte where b > 0) select * into @ret from (select convert(group_concat(b) using utf8mb4) a from cte where b >0) t; return @ret; end; // delimiter;
4. Test
1. Test data
drop table if exists t_regexp; create table t_regexp(a text); insert into t_regexp values ( 'THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS. ARGUMENT. How a Ship having passed the Line was driven by Storms to the cold Country towards the South Pole; and how from thence she made her course to the tropical Latitude of the Great Pacific Ocean; and of the strange things that befell; And in what manner the Ancyent Marine came back to his own Country. I. 1 It is an ancyent Marinere, 2 And he stoppeth one of three: 3 "By thy long gray beard and thy glittering eye 4 "Now wherefore stoppest me?'); insert into t_regexp values ('THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.'), ('ARGUMENT.'), ('How a Ship having passed the Line was driven by Storms to the cold Country'), ('towards the South Pole; and how from thence she made her course to the tropical'), ('Latitude of the Great Pacific Ocean; and of the strange things that befell;'), ('and in what manner the Ancyent Marinere came back to his own Country.'), ('I.'), ('1 It is an ancyent Marinere,'), ('2 And he stoppeth one of three:'), ('3 "By thy long gray beard and thy glittering eye'), ('4 "Now wherefore stoppest me?');
2. Test result
mysql> -- Match the word the, count the number of occurrences mysql> select regexp_extract(a,'\bthe\b','') a,regexp_count(a,'\bthe\b',' ') c from t_regexp; + ------------------------------------- + ------ + | a | c | + ------------------------------------- + ------ + | THE, THE, the, the, the, the, the, the, the | 9 | | THE,THE | 2 | | | 0 | | the, the | 2 | | the, the | 2 | | the, the | 2 | | the | 1 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | + ------------------------------------- + ------ + 12 rows in set (0.00 sec) mysql> -- Match the lowercase word the, and count the number of occurrences mysql> select regexp_extract(a,'\bthe\b','c') a,regexp_count(a,'\bthe\b',\ 'c') c from t_regexp; + ----------------------------- + ------ + | a | c | + ----------------------------- + ------ + | the, the, the, the, the, the, the | 7 | | | 0 | | | 0 | | the, the | 2 | | the, the | 2 | | the, the | 2 | | the | 1 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | + ----------------------------- + ------ + 12 rows in set (0.00 sec) mysql> -- All words matched by multi-line pattern, count the number of words mysql> select regexp_extract(a,'\w + ','') a, regexp_count(a,'\w + ','m') c from t_regexp; + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------+------ + | a | c | + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------+------ + | THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS,ARGUMENT,How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country ,towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical,Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange ,things, that, befell, and, in, what, manner, the, Ancyent, Marinere, came, back, to, his, own, Country, I, 1, It, is, an, ancyent, Marinere, 2, And ,he,stoppeth,one,of,three,3,By,thy,long,grey,beard,and,thy,glittering,eye,4,Now,wherefore,stoppest,me | 95 | | THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS | 9 | | ARGUMENT | 1 | | How, a, Ship, having, passed, the, Line, was, driven, by, Storms, to, the, cold, Country | 15 | | towards, the, South, Pole, and, how, from, thence, she, made, her, course, to, the, tropical | 15 | | Latitude, of, the, Great, Pacific, Ocean, and, of, the, strange, things, that, befell | 13 | | and, in, what, manner, the, Ancyent, Marinere, came, back, to, his, own, Country | 13 | | I | 1 | | 1,It,is,an,ancyent,Marinere | 6 | | 2, And, he, stoppeth, one, of, three | 7 | | 3,By,thy,long,grey,beard,and,thy,glittering,eye | 10 | | 4, Now, wherefore, stoppest, me | 5 | + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------+------ + 12 rows in set (0.01 sec) mysql> -- Case-sensitive, multi-line, dotall pattern matching 999, count the number of occurrences mysql> select regexp_extract(a,'999','') a,regexp_count(a,'999','cmn') c from t_regexp; + ------ + ------+ | a | c | + ------ + ------+ | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | | | 0 | + ------ + ------+ 12 rows in set (0.00 sec) mysql> -- returns the occurrence of the word the mysql> select regexp_extract_index(a,'\bthe\b',0,'') a from t_regexp; + ------------------------------------+ | a | + ------------------------------------+ | 1,13,86,119,144,203,228,260,311 | | 1,13 | | NULL | | 26,59 | | 9,68 | | 13,45 | | 20 | | NULL | | NULL | | NULL | | NULL | | NULL | + ------------------------------------+ 12 rows in set (0.00 sec) mysql> -- returns the position where the lowercase word the appears (after) mysql> select regexp_extract_index(a,'\bthe\b',1,'c') a from t_regexp; + ----------------------------+ | a | + ----------------------------+ | 89,122,147,206,231,263,314 | | NULL | | NULL | | 29,62 | | 12,71 | | 16,48 | | 23 | | NULL | | NULL | | NULL | | NULL | | NULL | + ----------------------------+ 12 rows in set (0.00 sec) mysql> -- Multi-line pattern matching, returns the position of all words mysql> select regexp_extract_index(a,'\w + ',0,'m') a from t_regexp; + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- ----------- + | a | + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- ----------- + 1,5,10,13,17,25,35,38,44,51,61,65,67,72,79,86,90,95,99,106,109,116,119,123,128,136,144,148,154,160,164,168,173,180,184,1 89,193,200,203,207,216,225,228,232,238,246,253,257,260,264,272,279,284,292,296,299,304,311,315,323,332,337,342,345,349,353, 362,365,373,376,379,382,390,400,411,415,418,427,431,434,441,450,453,457,462,467,473,477,481,492,496,508,512,522,531 | |1,5,10,13,17,25,35,38,44| | 1 | |1,5,7,12,19,26,30,35,39,46,49,56,59,63,68| |1,9,13,19,25,29,33,38,45,49,54,58,65,68,72| |1,10,13,17,23,31,38,42,45,49,57,64,69| |1,5,8,13,20,24,32,41,46,51,54,58,62| | 1 | |1,9,12,15,18,26| |1,12,16,19,28,32,35| |1,10,13,17,22,27,33,37,41,52| |1,13,17,27,36| + ------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- ----------- + 12 rows in set (0.00 sec) mysql> -- Case-sensitive, multi-line, dotall pattern matching, returns the position where 999 appears mysql> select regexp_extract_index(a,'999',1,'cmn') a from t_regexp; + ------+ | a | + ------+ | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | | NULL | + ------+ 12 rows in set (0.00 sec)