DirtyWordsFilter (dirty word filter)

Preface
This chapter describes the use of 256tree to filter dirty words
c++ golang 2 versions
Written long ago at https://github.com/progtesttes
Here is a little optimization

1: c++ code
dirtywords.h

#if !defined DIRTY_WORDS_H_
#define DIRTY_WORDS_H_
//#include <stdio.h>
#include <string.h>
class CFilterDirtyWords
{<!-- -->
private:
CFilterDirtyWords();
~CFilterDirtyWords();
public:
static CFilterDirtyWords* GetInstance();
void ReleaseByOwner() {<!-- --> delete this; }
private:
typedef struct_dirtytree
{<!-- -->
bool bend;
struct _dirtytree * subtree[256];
_dirtytree() {<!-- -->
bend = false;
memset(subtree, 0, sizeof(_dirtytree*) * 256);
}
}DIRTYTREE, *PDIRTYTREE;

PDIRTYTREE m_phead;
static CFilterDirtyWords* pFilterDirtyWords;
private:
bool loaddirtywords(const char* filepath);
bool hasdirtywords(const PDIRTYTREE pHead, const char * pstring);
void filterdirtywords(const PDIRTYTREE pHead, char * pstring);
void insertdirtywords(PDIRTYTREE & pHead, const char * pstring);
void releaseddirtytree(PDIRTYTREE pHead);
public:
bool LoadDirtyFile(const char* filepath=nullptr);
bool HasDirtyWords(const char* lpstr);
void FilterDirtyWords(char * pstring);
};
#endif

dirtywords.cpp

#include <stdio.h>
#include <ctype.h>
#include "dirtywords.h"
#define CONFIG_DIRTY_WORDS "dirtywords.txt"

CFilterDirtyWords* CFilterDirtyWords::pFilterDirtyWords = NULL;

CFilterDirtyWords::CFilterDirtyWords()
{<!-- -->
m_phead = NULL;
}

CFilterDirtyWords::~CFilterDirtyWords()
{<!-- -->
releasedirtytree(m_phead);
}

CFilterDirtyWords* CFilterDirtyWords::GetInstance()
{<!-- -->
if(pFilterDirtyWords == NULL) {<!-- -->
pFilterDirtyWords = new CFilterDirtyWords();
}
return pFilterDirtyWords;
}


bool CFilterDirtyWords::LoadDirtyFile(const char* filepath)
{<!-- -->
return loaddirtywords(filepath);
}

bool CFilterDirtyWords::loaddirtywords(const char* filepath)
{<!-- -->
FILE * f = fopen(filepath== nullptr? CONFIG_DIRTY_WORDS : filepath, "r");
if (NULL == f) {<!-- -->
return false;
}
char szbuf[256];
PDIRTYTREE phead = NULL;
while (NULL != fgets(szbuf, 256, f)) {<!-- -->
insertdirtywords(phead, szbuf);
}
fclose(f);
m_phead = phead;
if (NULL == m_phead) {<!-- -->
printf("CFilterDirtyWords::loaddirtywords is NULL"); return false;
}
return true;
// return m_phead?true:false;
}
void CFilterDirtyWords::filterdirtywords(const PDIRTYTREE pHead, char * pstring)
{<!-- -->
if (!pHead) return;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
int pos = 0;
char * pTemp = pstring;
bool bBegin = false;
while (*pTemp != '\0')
{<!-- -->
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (pTree->subtree[ch]) {<!-- -->
if (!bBegin) {<!-- -->
bBegin = true; pos = pTemp - pstring;
}
pTree = pTree->subtree[ch];

if (pTree->bend) {<!-- -->
while (pos <= pTemp - pstring) *(pstring + pos + + ) = '*';
}

}
else if (bBegin & amp; & amp; pHead->subtree[ch]) {<!-- -->
pos = pTemp - pstring; pTree = pHead->subtree[ch];
if (pTree->bend) {<!-- -->
while (pos <= pTemp - pstring) *(pstring + pos + + ) = '*';
}
}
else {<!-- -->
pTree = pHead; bBegin = false;
}
+ + pTemp;
}
}
void CFilterDirtyWords::insertdirtywords(PDIRTYTREE & pHead, const char * pstring)
{<!-- -->
if (!pstring) return;
if (!pHead) pHead = new DIRTYTREE;
const char * pTemp = (char*)pstring;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
while (*pTemp != '\0' & amp; & amp; *pTemp != '\r' & amp; & amp; *pTemp != '\\
') {< !-- -->
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (!pTree->subtree[ch]) pTree->subtree[ch] = new DIRTYTREE;
pTree = pTree->subtree[ch];
+ + pTemp;
}
pTree->bend = true;
}
void CFilterDirtyWords::releasedirtytree(PDIRTYTREE pHead)
{<!-- -->
if (!pHead) return;
for (unsigned int i = 0; i< 256; i ++ ) {<!-- -->
releasedirtytree(pHead->subtree[i]);
}
delete pHead;
}

bool CFilterDirtyWords::hasdirtywords(const PDIRTYTREE pHead, const char * pstring)
{<!-- -->
if (!pHead) return false;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
char * pTemp = (char*)pstring;
while (*pTemp != '\0')
{<!-- -->
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (pTree->subtree[ch]) {<!-- -->
pTree = pTree->subtree[ch];
if (pTree->bend) {<!-- -->
return true;
}
}
else {<!-- -->
pTree = pHead;
}
+ + pTemp;
}
return false;
}

bool CFilterDirtyWords::HasDirtyWords(const char * pstring)
{<!-- -->
return hasdirtywords(m_phead, pstring);
}

void CFilterDirtyWords::FilterDirtyWords(char * pstring)
{<!-- -->
filterdirtywords(m_phead, pstring);
}

main.cpp

#include "dirtywords.h"
#include <stdio.h>
int main() {<!-- -->

if (CFilterDirtyWords::GetInstance()->LoadDirtyFile()) {<!-- -->
printf("%d \\
", CFilterDirtyWords::GetInstance()->HasDirtyWords("123")); //1
printf("%d \\
", CFilterDirtyWords::GetInstance()->HasDirtyWords("12")); //0
}
CFilterDirtyWords::GetInstance()->ReleaseByOwner() ;
return 0;
}


/*
The content of dirtywords.txt is as follows
132
123
121
1221
1121
*/

operation result

2: golang code
dirtyword.go

package dityword

import (
"bufio"
"io"
"log"
"os"
"strings"
)

//256 trees
type dirtytree struct {<!-- -->
bend bool
subtree [256]*dirtytree
}

var (
dirtyhead *dirtytree = nil
)

func loaddirtywords(filename string) bool {<!-- -->

fi, err := os. Open(filename)
if err != nil {<!-- -->
log.Printf("filename=%v Error: %s\\
", filename, err)
return false
}
defer fi. Close()

phead := new(dirtytree)

br := bufio. NewReader(fi)
for {<!-- -->
a, _, c := br. ReadLine()
if c == io.EOF {<!-- -->
break
}
// log.Printf("a=%v \\
",string(a))
l := len(a)
if l < 1 {<!-- -->
continue
}
if l > 256 {<!-- -->
a = a[:256]
}
//fmt.Println(string(a))
insert dirty words (phead, a)
}
dirtyhead = phead
return true
}

func hasdirtywords(phead *dirtytree, str string) bool {<!-- -->
if phead == nil {<!-- -->
return false
}
var pTree *dirtytree = phead
//log.Printf("cmp string=%#v \\
",str)
strlower := []byte(strings. ToLower(string(str)))
l := len([]byte(strlower))
if l < 1 {<!-- -->
return false
}
//log.Printf("cmp ToLower string=%#v \\
",string(strlower))

for i := 0; i < l; i + + {<!-- -->
ch := byte(strlower[i])
if pTree.subtree[ch] != nil {<!-- -->
pTree = pTree. subtree[ch]
if pTree.bend {<!-- -->
return true
}
} else {<!-- -->
pTree = phead
}
}
return false
}

//func filterdirtywords(phead *dirtytree, str string) {<!-- -->
//
//}

func insertdirtywords(phead *dirtytree, str []byte) {<!-- -->

// all lowercase
// log.Printf("org string=%#v \\
",str)
strlower := []byte(strings. ToLower(string(str)))
l := len([]byte(strlower))
if l < 1 {<!-- -->
return
}
// log.Printf("org ToLower string=%#v \\
",string(strlower))
// log.Printf("org ToLower string=%#v \\
",strlower)
if phead == nil {<!-- -->
phead = new(dirtytree)
}
pTree := phead

for i := 0; i < l; i + + {<!-- -->
ch := byte(strlower[i])
if pTree.subtree[ch] == nil {<!-- -->
pTree.subtree[ch] = new(dirtytree)
pTree = pTree. subtree[ch]
}
}
pTree.bend = true
}

//func releaseddirtytree(phead *dirtytree) {<!-- -->

//}

//api
func LoadDirtyWordsFile(filename string) bool {<!-- -->
return loaddirtywords(filename)
}

func HasDirtyWords(chstr string) bool {<!-- -->

return hasdirtywords(dirtyhead, chstr)
}

//func FilterDirtyWords(filterstr string) {<!-- -->
//
//}



main.go

package main

import (
"bytes"
"dirtywords/dityword"
"fmt"
"github.com/henrylee2cn/mahonia"
"log"
"os"
"path"
"regexp"
"unicode/utf8"
)

func check(src string) bool {<!-- -->
str := "(?:')|(?:--)|(/\*(?:.|[\\
\r])*?\ */)|(\b(select|update|and|or|delete|insert|trancate|char|chr|into|substr|ascii|declare|exec|count|master|into|drop|execute)\b )" //Here changed to "
re, err := regexp. Compile(str)
if err != nil {<!-- -->
fmt.Println(err.Error())
return true
}
b := re.MatchString(src)
fmt.Println("llllll", b) //Print out false.
return b
}

func main() {<!-- -->

//1 read configuration file even
cfgpath,_ := os. Getwd()
filename := path.Join(cfgpath, "ditylist.txt")
if !dityword.LoadDirtyWordsFile(filename) {<!-- -->
os. Exit(1)
}

for {<!-- -->
var input string
fmt.Scanln( &input)
log.Printf("input=%v len=%v \\
", input, len(input))
if utf8.ValidString(input) {<!-- -->

enc := mahonia. NewEncoder("gbk")
gbkstr := enc. ConvertString(input)
log.Printf("gbkstr=%v \\
", []byte(gbkstr))
b := dityword.HasDirtyWords(gbkstr)
usrc := bytes.Runes([]byte(input))
log.Printf("check b=%v uscr=%#v %v\\
", b, usrc, len(usrc))

// 2018/05/26 00:02:12 input=day len=3
// 2018/05/26 00:02:12 gbkstr=[200 213]
// 2018/05/26 00:02:12 check b=true uscr=[]int32{26085} 1

//r, size := utf8. DecodeRuneInString(input)
//fmt.Printf("%c %v\\
", r, size)

// newdata := string(([]byte(input))[size:])
// fmt.Printf("%c %v data=%v \\
", r, size, newdata)
//str = str[size:]
// if data,num := utf8.DecodeRuneInString(input); ok {<!-- -->
// b := dityword. HasDirtyWords(input)
// fmt.Printf("check b=%v \\
",b)
// }

}

}

}

/*
The content of ditylist.txt is as follows
fyou
fky
fyou1
*/

Directory structure and running results

3: If the project needs to be uploaded later
If you find it useful, please like it and add a favorite