java 敏感词的过滤_JAVA_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > JAVA > java 敏感词的过滤

java 敏感词的过滤

 2013/12/9 18:26:09  VitoCorleoneDemo  程序员俱乐部  我要评论(0)
  • 摘要:最近在研究敏感词的过滤,网上看到有不少算法,我觉得这种算法还是不错的。希望跟大家共勉。不说了,先上代码:importjava.util.ArrayList;importjava.util.HashMap;importjava.util.HashSet;importjava.util.Iterator;importjava.util.List;importjava.util.Map;importjava.util.Set;publicclassT
  • 标签:Java

最近在研究敏感词的过滤,网上看到有不少算法,我觉得这种算法还是不错的。希望跟大家共勉。不说了,先上代码:

class="敏感词过滤" name="code">import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class T {
// private HashMap keysMap = new HashMap();
private static int matchType = 1; // 1:最小长度匹配 2:最大长度匹配

public static Map addKeywords(List<String> keywords) {
Map<String, String> keysMap = new HashMap<String, String>();
for (int i = 0; i < keywords.size(); i++) {
String key = keywords.get(i).trim();
Map filterHash = null;
filterHash = keysMap;
for (int j = 0; j < key.length(); j++) {
char word = key.charAt(j);
Object wordMap = filterHash.get(word);
if (wordMap != null) {
filterHash = (HashMap) wordMap;
} else {
HashMap<String, String> newWordHash = new HashMap<String, String>();
newWordHash.put("isEnd", "0");
filterHash.put(word, newWordHash);
filterHash = newWordHash;
}
if (j == key.length() - 1) {
filterHash.put("isEnd", "1");
}
}
}
return keysMap;
}

/**
* 重置关键词
*/
public static void clearKeywords(HashMap keysMap) {
keysMap.clear();
}

/**
* 检查一个字符串从begin位置起开始是否有keyword符合, 如果有符合的keyword值,返回值为匹配keyword的长度,否则返回零
* flag 1:最小长度匹配 2:最大长度匹配
*/
private static int checkKeyWords(HashMap keysMap, String txt, int begin, int flag) {
HashMap nowhash = null;
nowhash = keysMap;
int maxMatchRes = 0;
int res = 0;
int l = txt.length();
char word = 0;
for (int i = begin; i < l; i++) {
word = txt.charAt(i);
Object wordMap = nowhash.get(word);
if (wordMap != null) {
res++;
nowhash = (HashMap) wordMap;
if (((String) nowhash.get("isEnd")).equals("1")) {
if (flag == 1) {
wordMap = null;
nowhash = null;
txt = null;
return res;
} else {
maxMatchRes = res;
}
}
} else {
txt = null;
nowhash = null;
return maxMatchRes;
}
}
txt = null;
nowhash = null;
return maxMatchRes;
}

/**
* 返回txt中关键字的列表
*/
public static Set<String> getTxtKeyWords(HashMap keysMap, String txt) {
Set set = new HashSet();
int l = txt.length();
for (int i = 0; i < l;) {
int len = checkKeyWords(keysMap, txt, i, matchType);
if (len > 0) {
set.add(txt.substring(i, i + len));
i += len;
} else {
i++;
}
}
txt = null;
return set;
}

/**
* 仅判断txt中是否有关键字
*/
public static boolean isContentKeyWords(HashMap keysMap, String txt) {
for (int i = 0; i < txt.length(); i++) {
int len = checkKeyWords(keysMap, txt, i, 1);
if (len > 0) {
return true;
}
}
txt = null;
return false;
}

// public int getMatchType() {
// return matchType;
// }
//
// public void setMatchType(int matchType) {
// this.matchType = matchType;
// }

public static void main(String[] args) {
KeywordFilter filter = new KeywordFilter();
List<String> keywords = new ArrayList<String>();
keywords.add("你妹");
keywords.add("页面加载");
HashMap keysMap = filter.addKeywords(keywords);

String txt = "不允许说脏话,尤其是你妹这个词。庞大的页面加载的过程中";
System.out.println(keysMap);

boolean boo = filter.isContentKeyWords(keysMap, txt);

System.out.println(boo);

Set set = filter.getTxtKeyWords(keysMap, txt);

Iterator<String> it = set.iterator();  
while (it.hasNext()) {  
  String str = it.next();  
  System.out.println(str);  
}

System.out.println(set);
}
}

直接可以运行,大家可以看看结果,依照自己的思路进行,如果什么疑问,随时可以发表意见

?

发表评论
用户名: 匿名