0
点赞
收藏
分享

微信扫一扫

使用Lucene-Spatial实现集成地理位置的全文检索


Lucene通过Spatial包提供了对基于地理位置的全文检索的支持,最典型的应用场景就是:“搜索中关村附近1公里内的火锅店,并按远近排序”。使用Lucene-Spatial添加对地理位置的支持,和之前普通文本搜索主要有两点区别:

        1. 将坐标信息转化为笛卡尔层,建立索引

 

private void indexLocation(Document document, JSONObject jo)  
throws Exception {  
  
double longitude = jo.getDouble("longitude");  
double latitude = jo.getDouble("latitude");  
  
new Field("lat", NumericUtils  
            .doubleToPrefixCoded(latitude), Field.Store.YES,  
            Field.Index.NOT_ANALYZED));  
new Field("lng", NumericUtils  
            .doubleToPrefixCoded(longitude), Field.Store.YES,  
            Field.Index.NOT_ANALYZED));  
  
for (int tier = startTier; tier <= endTier; tier++) {  
new CartesianTierPlotter(tier, projector,  
                CartesianTierPlotter.DEFALT_FIELD_PREFIX);  
final double boxId = ctp.getTierBoxId(latitude, longitude);  
new Field(ctp.getTierFieldName(), NumericUtils  
                .doubleToPrefixCoded(boxId), Field.Store.YES,  
                Field.Index.NOT_ANALYZED_NO_NORMS));  
    }  
}



        2. 搜索时,指定使用DistanceQueryFilter

 

DistanceQueryBuilder dq = new DistanceQueryBuilder(latitude,  
"lat", "lng",  
true, startTier,  
                endTier);  
DistanceFieldComparatorSource dsort = new DistanceFieldComparatorSource(  
                dq.getDistanceFilter());  
Sort sort = new Sort(new SortField("geo_distance", dsort));



      下面是基于Lucene3.2.0和JUnit4.8.2的完整代码。

 

<dependencies>  
<dependency>  
<groupId>junit</groupId>  
<artifactId>junit</artifactId>  
<version>4.8.2</version>  
<type>jar</type>  
<scope>test</scope>  
</dependency>  
<dependency>  
<groupId>org.apache.lucene</groupId>  
<artifactId>lucene-core</artifactId>  
<version>3.2.0</version>  
<type>jar</type>  
<scope>compile</scope>  
</dependency>  
<dependency>  
<groupId>org.apache.lucene</groupId>  
<artifactId>lucene-spatial</artifactId>  
<version>3.2.0</version>  
<type>jar</type>  
<scope>compile</scope>  
</dependency>  
<dependency>  
<groupId>org.json</groupId>  
<artifactId>json</artifactId>  
<version>20100903</version>  
<type>jar</type>  
<scope>compile</scope>  
</dependency>  
</dependencies>



 

 

        首先准备测试用的数据:

 


{"id":12,"title":"时尚码头美容美发热烫特价","longitude":116.3838183,"latitude":39.9629015}  
{"id":17,"title":"审美个人美容美发套餐","longitude":116.386564,"latitude":39.966102}  
{"id":23,"title":"海底捞吃300送300","longitude":116.38629,"latitude":39.9629573}  
{"id":26,"title":"仅98元!享原价335元李老爹","longitude":116.3846175,"latitude":39.9629125}  
{"id":29,"title":"都美造型烫染美发护理套餐","longitude":116.38629,"latitude":39.9629573}  
{"id":30,"title":"仅售55元!原价80元的老舍茶馆相声下午场","longitude":116.0799914,"latitude":39.9655391}  
{"id":33,"title":"仅售55元!原价80元的新笑声客栈早场","longitude":116.0799914,"latitude":39.9655391}  
{"id":34,"title":"仅售39元(红色礼盒)!原价80元的平谷桃","longitude":116.0799914,"latitude":39.9655391}  
{"id":46,"title":"仅售38元!原价180元地质礼堂白雪公主","longitude":116.0799914,"latitude":39.9655391}  
{"id":49,"title":"仅99元!享原价342.7元自助餐","longitude":116.0799914,"latitude":39.9655391}  
{"id":58,"title":"桑海教育暑期学生报名培训九折优惠券","longitude":116.0799914,"latitude":39.9655391}  
{"id":59,"title":"全国发货:仅29元!贝玲妃超模粉红高光光","longitude":116.0799914,"latitude":39.9655391}  
{"id":65,"title":"海之屿生态水族用品店抵用券","longitude":116.0799914,"latitude":39.9655391}  
{"id":67,"title":"小区东门时尚烫染个人护理美发套餐","longitude":116.3799914,"latitude":39.9655391}  
{"id":74,"title":"《郭德纲相声专辑》CD套装","longitude":116.0799914,"latitude":39.9655391}



     根据上面的测试数据,编写测试用例,分别搜索坐标(116.383818339.96290153千米以内的“美发”和全部内容,分别得到的结果应该是4条和6条。

 

 

import static org.junit.Assert.assertEquals;  
import static org.junit.Assert.fail;  
  
import java.util.List;  
  
import org.junit.Test;  
  
public class LuceneSpatialTest {  
      
private static LuceneSpatial spatialSearcher = new LuceneSpatial();  
  
@Test  
public void testSearch() {  
try {  
long start = System.currentTimeMillis();  
"美发", 116.3838183, 39.9629015, 3.0);  
            System.out.println(results.size()  
"个匹配结果,共耗时 "  
"毫秒。\n");  
4, results.size());  
catch (Exception e) {  
"Exception occurs...");  
            e.printStackTrace();  
        }  
    }  
  
@Test  
public void testSearchWithoutKeyword() {  
try {  
long start = System.currentTimeMillis();  
null, 116.3838183, 39.9629015, 3.0);  
            System.out.println( results.size()  
"个匹配结果,共耗时 "  
"毫秒.\n");  
6, results.size());  
catch (Exception e) {  
"Exception occurs...");  
            e.printStackTrace();  
        }  
    }  
}



         下面是LuceneSpatial类,在构造函数中初始化变量和创建索引:

 

public class LuceneSpatial {  
  
private Analyzer analyzer;  
private IndexWriter writer;  
private FSDirectory indexDirectory;  
private IndexSearcher indexSearcher;  
private IndexReader indexReader;  
private String indexPath = "c:/lucene-spatial";  
  
// Spatial  
private IProjector projector;  
private CartesianTierPlotter ctp;  
public static final double RATE_MILE_TO_KM = 1.609344; //英里和公里的比率  
public static final String LAT_FIELD = "lat";  
public static final String LON_FIELD = "lng";  
private static final double MAX_RANGE = 15.0; // 索引支持的最大范围,单位是千米  
private static final double MIN_RANGE = 3.0;  // 索引支持的最小范围,单位是千米  
private int startTier;  
private int endTier;  
  
public LuceneSpatial() {  
try {  
            init();  
catch (Exception e) {  
            e.printStackTrace();  
        }  
    }  
  
private void init() throws Exception {  
        initializeSpatialOptions();  
  
new StandardAnalyzer(Version.LUCENE_32);  
  
new File(indexPath);  
  
boolean isNeedCreateIndex = false;  
  
if (path.exists() && !path.isDirectory())  
throw new Exception("Specified path is not a directory");  
  
if (!path.exists()) {  
            path.mkdirs();  
true;  
        }  
  
new File(indexPath));  
  
//建立索引  
if (isNeedCreateIndex) {  
new IndexWriterConfig(  
                    Version.LUCENE_32, analyzer);  
            indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);  
new IndexWriter(indexDirectory, indexWriterConfig);  
            buildIndex();  
        }  
  
true);  
new IndexSearcher(indexReader);  
  
    }  
  
@SuppressWarnings("deprecation")  
private void initializeSpatialOptions() {  
new SinusoidalProjector();  
new CartesianTierPlotter(0, projector,  
                CartesianTierPlotter.DEFALT_FIELD_PREFIX);  
        startTier = ctp.bestFit(MAX_RANGE / RATE_MILE_TO_KM);  
        endTier = ctp.bestFit(MIN_RANGE / RATE_MILE_TO_KM);  
    }  
  
  
  
private int mile2Meter(double miles) {  
double dMeter = miles * RATE_MILE_TO_KM * 1000;  
  
return (int) dMeter;  
    }  
  
private double km2Mile(double km) {  
return km / RATE_MILE_TO_KM;  
    }

              创建索引的具体实现:

 

private void buildIndex() {  
null;  
try {  
//逐行添加测试数据到索引中,测试数据文件和源文件在同一个目录下  
new BufferedReader(new InputStreamReader(  
class.getResourceAsStream("data")));  
null;  
while ((line = br.readLine()) != null) {  
new JSONObject(line));  
        }  
  
        writer.commit();  
catch (Exception e) {  
        e.printStackTrace();  
finally {  
if (br != null) {  
try {  
                br.close();  
catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
}  
  
private void index(JSONObject jo) throws Exception {  
new Document();  
  
new Field("id", jo.getString("id"), Field.Store.YES,  
            Field.Index.ANALYZED));  
  
new Field("title", jo.getString("title"), Field.Store.YES,  
            Field.Index.ANALYZED));  
  
//将位置信息添加到索引中  
    indexLocation(doc, jo);  
  
    writer.addDocument(doc);  
}  
  
private void indexLocation(Document document, JSONObject jo)  
throws Exception {  
  
double longitude = jo.getDouble("longitude");  
double latitude = jo.getDouble("latitude");  
  
new Field("lat", NumericUtils  
            .doubleToPrefixCoded(latitude), Field.Store.YES,  
            Field.Index.NOT_ANALYZED));  
new Field("lng", NumericUtils  
            .doubleToPrefixCoded(longitude), Field.Store.YES,  
            Field.Index.NOT_ANALYZED));  
  
for (int tier = startTier; tier <= endTier; tier++) {  
new CartesianTierPlotter(tier, projector,  
                CartesianTierPlotter.DEFALT_FIELD_PREFIX);  
final double boxId = ctp.getTierBoxId(latitude, longitude);  
new Field(ctp.getTierFieldName(), NumericUtils  
                .doubleToPrefixCoded(boxId), Field.Store.YES,  
                Field.Index.NOT_ANALYZED_NO_NORMS));  
    }  
}



          搜索的具体实现:

 

 

public List<String> search(String keyword, double longitude,  
double latitude, double range) throws Exception {  
new ArrayList<String>();  
  
double miles = km2Mile(range);  
      
new DistanceQueryBuilder(latitude,  
"lat", "lng",  
true, startTier,  
            endTier);  
  
//按照距离排序  
new DistanceFieldComparatorSource(  
            dq.getDistanceFilter());  
new Sort(new SortField("geo_distance", dsort));  
  
    Query query = buildQuery(keyword);  
  
//搜索结果  
    TopDocs hits = indexSearcher.search(query, dq.getFilter(),  
            Integer.MAX_VALUE, sort);  
//获得各条结果相对应的距离  
    Map<Integer, Double> distances = dq.getDistanceFilter()  
            .getDistances();  
  
for (int i = 0; i < hits.totalHits; i++) {  
final int docID = hits.scoreDocs[i].doc;  
  
final Document doc = indexSearcher.doc(docID);  
  
final StringBuilder builder = new StringBuilder();  
"找到了: ")  
"title"))  
", 距离: ")  
                .append(mile2Meter(distances.get(docID)))  
"米。");  
        System.out.println(builder.toString());  
  
        result.add(builder.toString());  
    }  
  
return result;  
}  
  
private Query buildQuery(String keyword) throws Exception {  
//如果没有指定关键字,则返回范围内的所有结果  
if (keyword == null || keyword.isEmpty()) {  
return new MatchAllDocsQuery();  
    }  
new QueryParser(Version.LUCENE_32, "title",  
            analyzer);  
  
    parser.setDefaultOperator(Operator.AND);  
  
return parser.parse(keyword.toString());  
}



       

 

             执行测试用例,可以得到下面的结果:

 


找到了: 时尚码头美容美发热烫特价, 距离: 0米。  
找到了: 都美造型烫染美发护理套餐, 距离: 210米。  
找到了: 审美个人美容美发套餐, 距离: 426米。  
找到了: 小区东门时尚烫染个人护理美发套餐, 距离: 439米。  
4个匹配结果,共耗时 119毫秒。  
  
找到了: 时尚码头美容美发热烫特价, 距离: 0米。  
找到了: 仅98元!享原价335元李老爹, 距离: 68米。  
找到了: 海底捞吃300送300, 距离: 210米。  
找到了: 都美造型烫染美发护理套餐, 距离: 210米。  
找到了: 审美个人美容美发套餐, 距离: 426米。  
找到了: 小区东门时尚烫染个人护理美发套餐, 距离: 439米。  
6个匹配结果,共耗时 3毫秒.


            参考文献:

 

            Lucene-Spatial的原理介绍:http://www.nsshutdown.com/projects/lucene/whitepaper/locallucene.htm

            GeoHash:http://en.wikipedia.org/wiki/Geohash

            两篇示例(其中大部分代码就来自于这里):

            Spatial search with Lucene            

      Lucene Spatial Example

 

            

     使用 Apache Lucene 和 Solr 进行位置感知搜索

举报

相关推荐

0 条评论