I am doing a project on search using Lucene.Net. We have created an index which contains 100 000 documents with 5 fields. But while searching I'm unable to track my correct record. Can anybody help me? Why is that so?
My code looks like this
List<int> ids = new List<int>();
List<Hits> hitList = new List<Hits>();
List<Document> results = new List<Document>();
int startPage = (pageIndex.Value - 1) * pageSize.Value;
string indexFileLocation = @"c:\\ResourceIndex\\"; //Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "ResourceIndex");
var fsDirectory = FSDirectory.Open(new DirectoryInfo(indexFileLocation));
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
IndexReader indexReader = IndexReader.Open(fsDirectory, true);
Searcher indexSearch = new IndexSearcher(indexReader);
//ids.AddRange(this.SearchPredicates(indexSearch, startPage, pageSize, query));
/*Searching From the ResourceIndex*/
Query resourceQuery = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29,
new string[] { productId.ToString(), languagelds, query },
new string[] { "productId", "resourceLanguageIds", "externalIdentifier" },
analyzer);
TermQuery descriptionQuery = new TermQuery(new Term("description", '"'+query+'"'));
//TermQuery identifierQuery = new TermQuery(new Term("externalIdentifier", query));
BooleanQuery filterQuery = new BooleanQuery();
filterQuery.Add(descriptionQuery, BooleanClause.Occur.MUST);
//filterQuery.Add(identifierQuery,BooleanClause.Occur.MUST_NOT);
Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(filterQuery));
TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
//Hits resourceHit = indexSearch.Search(resourceQuery, filter);
indexSearch.Search(resourceQuery, filter, collector);
ScoreDoc[] hits = collector.TopDocs().scoreDocs;
//for (int i = startPage; i <= pageSize && i < resourceHit.Length(); i++)
//{
// ids.Add(Convert.ToInt32(resourceHit.Doc(i).GetField("id")));
//}
for (int i = 0; i < hits.Length; i++)
{
int开发者_StackOverflow社区 docId = hits[i].doc;
float score = hits[i].score;
Lucene.Net.Documents.Document doc = indexSearch.Doc(docId);
string result = "Score: " + score.ToString() +
" Field: " + doc.Get("id");
}
You're calling Document.Get("id"), which returns the value of a stored field. It wont work without Field.Store.YES when indexing.
You could use the FieldCache if you've got the field indexed without analyzing (Field.Index.NOT_ANALYZED) or using the KeywordAnalyzer. (Meaning one term per field and document.)
You'll need to use the innermost reader for the FieldCache to work optimally. Here's a code paste from FieldCache with frequently updated index which uses the FieldCache in a proper way, reading an integer value from the id field.
// Demo values, use existing code somewhere here.
var directory = FSDirectory.Open(new DirectoryInfo("index"));
var reader = IndexReader.Open(directory, readOnly: true);
var documentId = 1337;
// Grab all subreaders.
var subReaders = new List<IndexReader>();
ReaderUtil.GatherSubReaders(subReaders, reader);
// Loop through all subreaders. While subReaderId is higher than the
// maximum document id in the subreader, go to next.
var subReaderId = documentId;
var subReader = subReaders.First(sub => {
if (sub.MaxDoc() < subReaderId) {
subReaderId -= sub.MaxDoc();
return false;
}
return true;
});
var values = FieldCache_Fields.DEFAULT.GetInts(subReader, "id");
var value = values[subReaderId];
精彩评论