Preface
This chapter describes the use of 256tree to filter dirty words
c++ golang 2 versions
Written long ago at https://github.com/progtesttes
Here is a little optimization
1: c++ code
dirtywords.h
#if !defined DIRTY_WORDS_H_ #define DIRTY_WORDS_H_ //#include <stdio.h> #include <string.h> class CFilterDirtyWords {<!-- --> private: CFilterDirtyWords(); ~CFilterDirtyWords(); public: static CFilterDirtyWords* GetInstance(); void ReleaseByOwner() {<!-- --> delete this; } private: typedef struct_dirtytree {<!-- --> bool bend; struct _dirtytree * subtree[256]; _dirtytree() {<!-- --> bend = false; memset(subtree, 0, sizeof(_dirtytree*) * 256); } }DIRTYTREE, *PDIRTYTREE; PDIRTYTREE m_phead; static CFilterDirtyWords* pFilterDirtyWords; private: bool loaddirtywords(const char* filepath); bool hasdirtywords(const PDIRTYTREE pHead, const char * pstring); void filterdirtywords(const PDIRTYTREE pHead, char * pstring); void insertdirtywords(PDIRTYTREE & pHead, const char * pstring); void releaseddirtytree(PDIRTYTREE pHead); public: bool LoadDirtyFile(const char* filepath=nullptr); bool HasDirtyWords(const char* lpstr); void FilterDirtyWords(char * pstring); }; #endif
dirtywords.cpp
#include <stdio.h> #include <ctype.h> #include "dirtywords.h" #define CONFIG_DIRTY_WORDS "dirtywords.txt" CFilterDirtyWords* CFilterDirtyWords::pFilterDirtyWords = NULL; CFilterDirtyWords::CFilterDirtyWords() {<!-- --> m_phead = NULL; } CFilterDirtyWords::~CFilterDirtyWords() {<!-- --> releasedirtytree(m_phead); } CFilterDirtyWords* CFilterDirtyWords::GetInstance() {<!-- --> if(pFilterDirtyWords == NULL) {<!-- --> pFilterDirtyWords = new CFilterDirtyWords(); } return pFilterDirtyWords; } bool CFilterDirtyWords::LoadDirtyFile(const char* filepath) {<!-- --> return loaddirtywords(filepath); } bool CFilterDirtyWords::loaddirtywords(const char* filepath) {<!-- --> FILE * f = fopen(filepath== nullptr? CONFIG_DIRTY_WORDS : filepath, "r"); if (NULL == f) {<!-- --> return false; } char szbuf[256]; PDIRTYTREE phead = NULL; while (NULL != fgets(szbuf, 256, f)) {<!-- --> insertdirtywords(phead, szbuf); } fclose(f); m_phead = phead; if (NULL == m_phead) {<!-- --> printf("CFilterDirtyWords::loaddirtywords is NULL"); return false; } return true; // return m_phead?true:false; } void CFilterDirtyWords::filterdirtywords(const PDIRTYTREE pHead, char * pstring) {<!-- --> if (!pHead) return; PDIRTYTREE pTree = pHead; unsigned char ch = '\0'; int pos = 0; char * pTemp = pstring; bool bBegin = false; while (*pTemp != '\0') {<!-- --> ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp; if (pTree->subtree[ch]) {<!-- --> if (!bBegin) {<!-- --> bBegin = true; pos = pTemp - pstring; } pTree = pTree->subtree[ch]; if (pTree->bend) {<!-- --> while (pos <= pTemp - pstring) *(pstring + pos + + ) = '*'; } } else if (bBegin & amp; & amp; pHead->subtree[ch]) {<!-- --> pos = pTemp - pstring; pTree = pHead->subtree[ch]; if (pTree->bend) {<!-- --> while (pos <= pTemp - pstring) *(pstring + pos + + ) = '*'; } } else {<!-- --> pTree = pHead; bBegin = false; } + + pTemp; } } void CFilterDirtyWords::insertdirtywords(PDIRTYTREE & pHead, const char * pstring) {<!-- --> if (!pstring) return; if (!pHead) pHead = new DIRTYTREE; const char * pTemp = (char*)pstring; PDIRTYTREE pTree = pHead; unsigned char ch = '\0'; while (*pTemp != '\0' & amp; & amp; *pTemp != '\r' & amp; & amp; *pTemp != '\\ ') {< !-- --> ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp; if (!pTree->subtree[ch]) pTree->subtree[ch] = new DIRTYTREE; pTree = pTree->subtree[ch]; + + pTemp; } pTree->bend = true; } void CFilterDirtyWords::releasedirtytree(PDIRTYTREE pHead) {<!-- --> if (!pHead) return; for (unsigned int i = 0; i< 256; i ++ ) {<!-- --> releasedirtytree(pHead->subtree[i]); } delete pHead; } bool CFilterDirtyWords::hasdirtywords(const PDIRTYTREE pHead, const char * pstring) {<!-- --> if (!pHead) return false; PDIRTYTREE pTree = pHead; unsigned char ch = '\0'; char * pTemp = (char*)pstring; while (*pTemp != '\0') {<!-- --> ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp; if (pTree->subtree[ch]) {<!-- --> pTree = pTree->subtree[ch]; if (pTree->bend) {<!-- --> return true; } } else {<!-- --> pTree = pHead; } + + pTemp; } return false; } bool CFilterDirtyWords::HasDirtyWords(const char * pstring) {<!-- --> return hasdirtywords(m_phead, pstring); } void CFilterDirtyWords::FilterDirtyWords(char * pstring) {<!-- --> filterdirtywords(m_phead, pstring); }
main.cpp
#include "dirtywords.h" #include <stdio.h> int main() {<!-- --> if (CFilterDirtyWords::GetInstance()->LoadDirtyFile()) {<!-- --> printf("%d \\ ", CFilterDirtyWords::GetInstance()->HasDirtyWords("123")); //1 printf("%d \\ ", CFilterDirtyWords::GetInstance()->HasDirtyWords("12")); //0 } CFilterDirtyWords::GetInstance()->ReleaseByOwner() ; return 0; } /* The content of dirtywords.txt is as follows 132 123 121 1221 1121 */
operation result
2: golang code
dirtyword.go
package dityword import ( "bufio" "io" "log" "os" "strings" ) //256 trees type dirtytree struct {<!-- --> bend bool subtree [256]*dirtytree } var ( dirtyhead *dirtytree = nil ) func loaddirtywords(filename string) bool {<!-- --> fi, err := os. Open(filename) if err != nil {<!-- --> log.Printf("filename=%v Error: %s\\ ", filename, err) return false } defer fi. Close() phead := new(dirtytree) br := bufio. NewReader(fi) for {<!-- --> a, _, c := br. ReadLine() if c == io.EOF {<!-- --> break } // log.Printf("a=%v \\ ",string(a)) l := len(a) if l < 1 {<!-- --> continue } if l > 256 {<!-- --> a = a[:256] } //fmt.Println(string(a)) insert dirty words (phead, a) } dirtyhead = phead return true } func hasdirtywords(phead *dirtytree, str string) bool {<!-- --> if phead == nil {<!-- --> return false } var pTree *dirtytree = phead //log.Printf("cmp string=%#v \\ ",str) strlower := []byte(strings. ToLower(string(str))) l := len([]byte(strlower)) if l < 1 {<!-- --> return false } //log.Printf("cmp ToLower string=%#v \\ ",string(strlower)) for i := 0; i < l; i + + {<!-- --> ch := byte(strlower[i]) if pTree.subtree[ch] != nil {<!-- --> pTree = pTree. subtree[ch] if pTree.bend {<!-- --> return true } } else {<!-- --> pTree = phead } } return false } //func filterdirtywords(phead *dirtytree, str string) {<!-- --> // //} func insertdirtywords(phead *dirtytree, str []byte) {<!-- --> // all lowercase // log.Printf("org string=%#v \\ ",str) strlower := []byte(strings. ToLower(string(str))) l := len([]byte(strlower)) if l < 1 {<!-- --> return } // log.Printf("org ToLower string=%#v \\ ",string(strlower)) // log.Printf("org ToLower string=%#v \\ ",strlower) if phead == nil {<!-- --> phead = new(dirtytree) } pTree := phead for i := 0; i < l; i + + {<!-- --> ch := byte(strlower[i]) if pTree.subtree[ch] == nil {<!-- --> pTree.subtree[ch] = new(dirtytree) pTree = pTree. subtree[ch] } } pTree.bend = true } //func releaseddirtytree(phead *dirtytree) {<!-- --> //} //api func LoadDirtyWordsFile(filename string) bool {<!-- --> return loaddirtywords(filename) } func HasDirtyWords(chstr string) bool {<!-- --> return hasdirtywords(dirtyhead, chstr) } //func FilterDirtyWords(filterstr string) {<!-- --> // //}
main.go
package main import ( "bytes" "dirtywords/dityword" "fmt" "github.com/henrylee2cn/mahonia" "log" "os" "path" "regexp" "unicode/utf8" ) func check(src string) bool {<!-- --> str := "(?:')|(?:--)|(/\*(?:.|[\\ \r])*?\ */)|(\b(select|update|and|or|delete|insert|trancate|char|chr|into|substr|ascii|declare|exec|count|master|into|drop|execute)\b )" //Here changed to " re, err := regexp. Compile(str) if err != nil {<!-- --> fmt.Println(err.Error()) return true } b := re.MatchString(src) fmt.Println("llllll", b) //Print out false. return b } func main() {<!-- --> //1 read configuration file even cfgpath,_ := os. Getwd() filename := path.Join(cfgpath, "ditylist.txt") if !dityword.LoadDirtyWordsFile(filename) {<!-- --> os. Exit(1) } for {<!-- --> var input string fmt.Scanln( &input) log.Printf("input=%v len=%v \\ ", input, len(input)) if utf8.ValidString(input) {<!-- --> enc := mahonia. NewEncoder("gbk") gbkstr := enc. ConvertString(input) log.Printf("gbkstr=%v \\ ", []byte(gbkstr)) b := dityword.HasDirtyWords(gbkstr) usrc := bytes.Runes([]byte(input)) log.Printf("check b=%v uscr=%#v %v\\ ", b, usrc, len(usrc)) // 2018/05/26 00:02:12 input=day len=3 // 2018/05/26 00:02:12 gbkstr=[200 213] // 2018/05/26 00:02:12 check b=true uscr=[]int32{26085} 1 //r, size := utf8. DecodeRuneInString(input) //fmt.Printf("%c %v\\ ", r, size) // newdata := string(([]byte(input))[size:]) // fmt.Printf("%c %v data=%v \\ ", r, size, newdata) //str = str[size:] // if data,num := utf8.DecodeRuneInString(input); ok {<!-- --> // b := dityword. HasDirtyWords(input) // fmt.Printf("check b=%v \\ ",b) // } } } } /* The content of ditylist.txt is as follows fyou fky fyou1 */
Directory structure and running results
3: If the project needs to be uploaded later
If you find it useful, please like it and add a favorite