简历打分排序ITeye - AG环亚娱乐集团

简历打分排序ITeye

2019年04月04日15时11分04秒 | 作者: 晗昱 | 标签: 排序,简历,体系 | 浏览: 2667

     部分给我找了点事做,帮挑选简历.估量是觉得我加班少了.为了不浪费时间,写了个简略的简历内容打分排序,今后直接排序转发

     代码如下:

     

package com.lu;
import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Base64;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class LuceneUtils {
 * 获取分词成果
 * @param 输入的字符串
 * @param 分词器
 * @return 分词成果
 // getWords("体系供给HTTP效劳给其他体系用于实时数据交互,选用WebService与总行进行实时数据交互",
 // analyzer).forEach(System.out::println);
 public static List String getWords(String str, Analyzer analyzer) {
 List String result = new ArrayList String 
 TokenStream stream = null;
 try {
 stream = analyzer.tokenStream("content", new StringReader(str));
 CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);
 stream.reset();
 while (stream.incrementToken()) {
 result.add(attr.toString());
 } catch (IOException e) {
 e.printStackTrace();
 } finally {
 if (stream != null) {
 try {
 stream.close();
 } catch (IOException e) {
 e.printStackTrace();
 return result;
 * 运用 Map按value进行排序
 * @param map
 * @return
 public static Map String, Integer sortMapByValue(Map String, Integer scoreMap) {
 if (scoreMap  null || scoreMap.isEmpty()) {
 return null;
 Map String, Integer sortedMap = new LinkedHashMap String, Integer 
 List Map.Entry String, Integer entryList = new ArrayList Map.Entry String, Integer (scoreMap.entrySet());
 entryList.stream().sorted(new Comparator Map.Entry String, Integer () {
 @Override
 public int compare(Entry String, Integer o1, Entry String, Integer o2) {
 return Integer.compare(o1.getValue(), o2.getValue());
 }).forEach(new Consumer Entry String, Integer () {
 @Override
 public void accept(Entry String, Integer t) {
 sortedMap.put(t.getKey(), t.getValue());
 return sortedMap;
 public static Optional String checkGetContent(String content) {
 String regx = "(^[\\s|\\S]*?)Content-Type:text/html;charset=\"([\\s|\\S]*?)\"[\\s|\\S]*?Content-Transfer-Encoding:base64([\\S|\\s]*?)boundary_([\\S|\\s]*?$)";
 Pattern compile = Pattern.compile(regx);
 Matcher matcher = compile.matcher(content);
 if (matcher.matches()) {
 if (matcher.groupCount() 0) {
 String matchCharset = matcher.group(2);
 String matchContent = matcher.group(3);
 return Optional.of(decodeStr(matchContent, matchCharset));
 return Optional.of(content);
 public static String decodeStr(String encodeStr, String charset) {
 byte[] b = encodeStr.getBytes();
 Base64 base64 = new Base64();
 b = base64.decode(b);
 String s;
 try {
 s = new String(b, charset);
 return s;
 } catch (UnsupportedEncodingException e) {
 s = new String(b);
 return s;
 public static Optional Directory openFSDirectory(String indexPath) {
 Path path = Paths.get(indexPath);
 try {
 FSDirectory fsDirectory = FSDirectory.open(path);
 return Optional.of(fsDirectory);
 } catch (IOException e) {
 e.printStackTrace();
 return Optional.empty();

 

package com.lu;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
public class ContentScoror {
 String indexPath = "lucene\\Index\\";
 Map String, Integer scoreMap = new HashMap ();
 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
 public void eval(IndexSearcher searcher, Query query, Integer weight) throws IOException {
 TopDocs topDocs = searcher.search(query, 1000);
 ScoreDoc[] hits = topDocs.scoreDocs;
 for (int i = 0; i hits.length; i++) {
 ScoreDoc hit = hits[i];
 Document hitDoc = searcher.doc(hit.doc);
 System.out.println("(" + hit.doc + "-" + hit.score + ")" + " name:" + hitDoc.get("name"));
 String filename = hitDoc.get("name");
 Integer score = scoreMap.get(filename);
 // 成果依照得分来排序。主要由 关键字的个数和权值来决议
 if (null  score) {
 score = 0;
 scoreMap.put(filename, 0);
 scoreMap.put(filename, score + weight);
 public void checkIndexAndScore(Directory directory, Analyzer analyzer) {
 try {
 IndexReader ir = DirectoryReader.open(directory);
 IndexSearcher searcher = new IndexSearcher(ir);
 QueryParser parse = new QueryParser("content", analyzer);
 Query query = parse.parse("统招本科");
 eval(searcher, query, 1);
 query = parse.parse("计算机数学信息办理");
 eval(searcher, query, 1);
 query = parse.parse("Java Web App");
 eval(searcher, query, 1);
 query = parse.parse("struts");
 eval(searcher, query, 1);
 query = parse.parse("mybatis");
 eval(searcher, query, 1);
 query = parse.parse("ibatis");
 eval(searcher, query, 1);
 query = parse.parse("hibernate");
 eval(searcher, query, 1);
 query = parse.parse("spring");
 eval(searcher, query, 1);
 query = parse.parse("调优");
 eval(searcher, query, 2);
 query = parse.parse("webservice");
 eval(searcher, query, 1);
 query = parse.parse("axis");
 eval(searcher, query, 2);
 query = parse.parse("xfire");
 eval(searcher, query, 1);
 query = parse.parse("cxf");
 eval(searcher, query, 1);
 query = parse.parse("jax-ws jws");
 eval(searcher, query, 1);
 query = parse.parse("xml json");
 eval(searcher, query, 1);
 query = parse.parse("oracle mysql sqlserver db2");
 eval(searcher, query, 1);
 query = parse.parse("redis memcached");
 eval(searcher, query, 1);
 query = parse.parse("组长办理规划架构剖析");
 eval(searcher, query, 1);
 Query pq = new PhraseQuery("content", "训练", "组织");
 eval(searcher, pq, *);
 ir.close();
 } catch (IOException e) {
 e.printStackTrace();
 } catch (ParseException e) {
 e.printStackTrace();
 public void doScore() {
 Optional Directory dir = LuceneUtils.openFSDirectory(indexPath);
 if (dir.isPresent()) {
 checkIndexAndScore(dir.get(), analyzer);
 public void showResult() {
 LuceneUtils.sortMapByValue(scoreMap).forEach((k, v) - System.out.println(k + " " + v));
 public static void main(String[] args) {
 ContentScoror fie = new ContentScoror();
 fie.doScore();
 fie.showResult();

 

 

package com.lu;
import java.io.File;
import java.io.IOException;
import java.util.Optional;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
public class FileIndexCreator {
 String indexPath = "lucene\\Index\\";
 String contentFilePath = "content";
 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
 public void addDoc(IndexWriter iw, File f) throws IOException {
 String str = FileUtils.readFileToString(f);
 Document doc = new Document();
 doc.add(new StringField("name", f.getName(), Field.Store.YES));
 doc.add(new TextField("content", LuceneUtils.checkGetContent(str).get(), Field.Store.YES));
 iw.addDocument(doc);
 public void content(IndexWriter iw) {
 File file = new File(contentFilePath);
 File[] listFiles = file.listFiles();
 Stream.of(listFiles).forEach(f - {
 try {
 addDoc(iw, f);
 } catch (IOException e) {
 e.printStackTrace();
 public void createIndex() {
 // create index
 Optional Directory dir = LuceneUtils.openFSDirectory(indexPath);
 if (dir.isPresent()) {
 // 也能够存放到内存
 // Directory directory = new RAMDirectory();
 IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
 IndexWriter iw = null;
 try {
 iw = new IndexWriter(dir.get(), iwc);
 content(iw);
 iw.commit();
 iw.close();
 } catch (IOException e) {
 e.printStackTrace();
 public static void main(String[] args) {
 // Analyzer analyzer = new SmartChineseAnalyzer();
 // getWords("体系供给HTTP效劳给其他体系用于实时数据交互,选用WebService与总行进行实时数据交互",
 // analyzer).forEach(System.out::println);
 FileIndexCreator fie = new FileIndexCreator();
 fie.createIndex();

 

 project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" 
 modelVersion 4.0.0 /modelVersion 
 groupId l.l.h /groupId 
 artifactId domjj /artifactId 
 version 0.0.1-SNAPSHOT /version 
 dependencies 
 ! dependency groupId pull-parser /groupId artifactId pull-parser /artifactId 
 version 2 /version /dependency  
 dependency 
 groupId xml-resolver /groupId 
 artifactId xml-resolver /artifactId 
 version 1.2 /version 
 /dependency 
 dependency 
 groupId pull-parser /groupId 
 artifactId pull-parser /artifactId 
 version 2.1.10 /version 
 /dependency 
 dependency 
 groupId org.dom4j /groupId 
 artifactId dom4j /artifactId 
 version 2.0.0-RC1 /version 
 /dependency 
 dependency 
 groupId org.apache.lucene /groupId 
 artifactId lucene-core /artifactId 
 version 5.3.1 /version 
 /dependency 
 dependency 
 groupId org.apache.lucene /groupId 
 artifactId lucene-analyzers-common /artifactId 
 version 5.3.1 /version 
 /dependency 
 dependency 
 groupId org.apache.lucene /groupId 
 artifactId lucene-queryparser /artifactId 
 version 5.3.1 /version 
 /dependency 
 ! 高亮  
 dependency 
 groupId org.apache.lucene /groupId 
 artifactId lucene-highlighter /artifactId 
 version 5.3.1 /version 
 /dependency 
 ! 中文分词器 SmartChineseAnalyzer  
 dependency 
 groupId org.apache.lucene /groupId 
 artifactId lucene-analyzers-smartcn /artifactId 
 version 5.3.1 /version 
 /dependency 
 ! 文件操作jar包  
 dependency 
 groupId commons-io /groupId 
 artifactId commons-io /artifactId 
 version 2.4 /version 
 /dependency 
 dependency 
 groupId commons-codec /groupId 
 artifactId commons-codec /artifactId 
 version 1.9 /version 
 /dependency 
 /dependencies 
 /project 

 

版权声明
本文来源于网络,版权归原作者所有,其内容与观点不代表AG环亚娱乐集团立场。转载文章仅为传播更有价值的信息,如采编人员采编有误或者版权原因,请与我们联系,我们核实后立即修改或删除。

猜您喜欢的文章

阅读排行

  • 1

    简历打分排序ITeye

    排序,简历,体系
  • 2

    如何用Redlock完成分布式锁ITeye

    分布式,完成,获取
  • 3
  • 4

    java 批量推送 iosITeye

    推送,测验,内容
  • 5
  • 6
  • 7
  • 8

    递归算法和文件行列算法ITeye

    文件,行列,文件夹
  • 9

    链表结构ITeye

    结点,保存,删去
  • 10