1 | /** @file matchspy.h |
2 | * @brief MatchSpy implementation. |
3 | */ |
4 | /* Copyright (C) 2007,2008,2009,2010,2012 Olly Betts |
5 | * Copyright (C) 2007,2009 Lemur Consulting Ltd |
6 | * Copyright (C) 2010 Richard Boulton |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License |
19 | * along with this program; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | #ifndef XAPIAN_INCLUDED_MATCHSPY_H |
24 | #define XAPIAN_INCLUDED_MATCHSPY_H |
25 | |
26 | #include <xapian/base.h> |
27 | #include <xapian/enquire.h> |
28 | #include <xapian/termiterator.h> |
29 | #include <xapian/visibility.h> |
30 | |
31 | #include <string> |
32 | #include <map> |
33 | #include <set> |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | namespace Xapian { |
38 | |
39 | class Document; |
40 | class Registry; |
41 | |
42 | /** Abstract base class for match spies. |
43 | * |
44 | * The subclasses will generally accumulate information seen during the match, |
45 | * to calculate aggregate functions, or other profiles of the matching |
46 | * documents. |
47 | */ |
48 | class XAPIAN_VISIBILITY_DEFAULT MatchSpy { |
49 | private: |
50 | /// Don't allow assignment. |
51 | void operator=(const MatchSpy &); |
52 | |
53 | /// Don't allow copying. |
54 | MatchSpy(const MatchSpy &); |
55 | |
56 | protected: |
57 | /// Default constructor, needed by subclass constructors. |
58 | MatchSpy() {} |
59 | |
60 | public: |
61 | /** Virtual destructor, because we have virtual methods. */ |
62 | virtual ~MatchSpy(); |
63 | |
64 | /** Register a document with the match spy. |
65 | * |
66 | * This is called by the matcher once with each document seen by the |
67 | * matcher during the match process. Note that the matcher will often not |
68 | * see all the documents which match the query, due to optimisations which |
69 | * allow low-weighted documents to be skipped, and allow the match process |
70 | * to be terminated early. |
71 | * |
72 | * @param doc The document seen by the match spy. |
73 | * @param wt The weight of the document. |
74 | */ |
75 | virtual void operator()(const Xapian::Document &doc, |
76 | Xapian::weight wt) = 0; |
77 | |
78 | /** Clone the match spy. |
79 | * |
80 | * The clone should inherit the configuration of the parent, but need not |
81 | * inherit the state. ie, the clone does not need to be passed |
82 | * information about the results seen by the parent. |
83 | * |
84 | * If you don't want to support the remote backend in your match spy, you |
85 | * can use the default implementation which simply throws |
86 | * Xapian::UnimplementedError. |
87 | * |
88 | * Note that the returned object will be deallocated by Xapian after use |
89 | * with "delete". If you want to handle the deletion in a special way |
90 | * (for example when wrapping the Xapian API for use from another |
91 | * language) then you can define a static <code>operator delete</code> |
92 | * method in your subclass as shown here: |
93 | * http://trac.xapian.org/ticket/554#comment:1 |
94 | */ |
95 | virtual MatchSpy * clone() const; |
96 | |
97 | /** Return the name of this match spy. |
98 | * |
99 | * This name is used by the remote backend. It is passed with the |
100 | * serialised parameters to the remote server so that it knows which class |
101 | * to create. |
102 | * |
103 | * Return the full namespace-qualified name of your class here - if your |
104 | * class is called MyApp::FooMatchSpy, return "MyApp::FooMatchSpy" from |
105 | * this method. |
106 | * |
107 | * If you don't want to support the remote backend in your match spy, you |
108 | * can use the default implementation which simply throws |
109 | * Xapian::UnimplementedError. |
110 | */ |
111 | virtual std::string name() const; |
112 | |
113 | /** Return this object's parameters serialised as a single string. |
114 | * |
115 | * If you don't want to support the remote backend in your match spy, you |
116 | * can use the default implementation which simply throws |
117 | * Xapian::UnimplementedError. |
118 | */ |
119 | virtual std::string serialise() const; |
120 | |
121 | /** Unserialise parameters. |
122 | * |
123 | * This method unserialises parameters serialised by the @a serialise() |
124 | * method and allocates and returns a new object initialised with them. |
125 | * |
126 | * If you don't want to support the remote backend in your match spy, you |
127 | * can use the default implementation which simply throws |
128 | * Xapian::UnimplementedError. |
129 | * |
130 | * Note that the returned object will be deallocated by Xapian after use |
131 | * with "delete". If you want to handle the deletion in a special way |
132 | * (for example when wrapping the Xapian API for use from another |
133 | * language) then you can define a static <code>operator delete</code> |
134 | * method in your subclass as shown here: |
135 | * http://trac.xapian.org/ticket/554#comment:1 |
136 | * |
137 | * @param s A string containing the serialised results. |
138 | * @param context Registry object to use for unserialisation to permit |
139 | * MatchSpy subclasses with sub-MatchSpy objects to be |
140 | * implemented. |
141 | */ |
142 | virtual MatchSpy * unserialise(const std::string & s, |
143 | const Registry & context) const; |
144 | |
145 | /** Serialise the results of this match spy. |
146 | * |
147 | * If you don't want to support the remote backend in your match spy, you |
148 | * can use the default implementation which simply throws |
149 | * Xapian::UnimplementedError. |
150 | */ |
151 | virtual std::string serialise_results() const; |
152 | |
153 | /** Unserialise some results, and merge them into this matchspy. |
154 | * |
155 | * The order in which results are merged should not be significant, since |
156 | * this order is not specified (and will vary depending on the speed of |
157 | * the search in each sub-database). |
158 | * |
159 | * If you don't want to support the remote backend in your match spy, you |
160 | * can use the default implementation which simply throws |
161 | * Xapian::UnimplementedError. |
162 | * |
163 | * @param s A string containing the serialised results. |
164 | */ |
165 | virtual void merge_results(const std::string & s); |
166 | |
167 | /** Return a string describing this object. |
168 | * |
169 | * This default implementation returns a generic answer, to avoid forcing |
170 | * those deriving their own MatchSpy subclasses from having to implement |
171 | * this (they may not care what get_description() gives for their |
172 | * subclass). |
173 | */ |
174 | virtual std::string get_description() const; |
175 | }; |
176 | |
177 | |
178 | /** Class for counting the frequencies of values in the matching documents. |
179 | */ |
180 | class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchSpy { |
181 | public: |
182 | struct Internal; |
183 | |
184 | #ifndef SWIG // SWIG doesn't need to know about the internal class |
185 | struct XAPIAN_VISIBILITY_DEFAULT Internal |
186 | : public Xapian::Internal::RefCntBase |
187 | { |
188 | /// The slot to count. |
189 | Xapian::valueno slot; |
190 | |
191 | /// Total number of documents seen by the match spy. |
192 | Xapian::doccount total; |
193 | |
194 | /// The values seen so far, together with their frequency. |
195 | std::map<std::string, Xapian::doccount> values; |
196 | |
197 | Internal() : slot(Xapian::BAD_VALUENO), total(0) {} |
198 | Internal(Xapian::valueno slot_) : slot(slot_), total(0) {} |
199 | }; |
200 | #endif |
201 | |
202 | protected: |
203 | Xapian::Internal::RefCntPtr<Internal> internal; |
204 | |
205 | public: |
206 | /// Construct an empty ValueCountMatchSpy. |
207 | ValueCountMatchSpy() : internal() {} |
208 | |
209 | /// Construct a MatchSpy which counts the values in a particular slot. |
210 | ValueCountMatchSpy(Xapian::valueno slot_) |
211 | : internal(new Internal(slot_)) {} |
212 | |
213 | /** Return the total number of documents tallied. */ |
214 | size_t get_total() const { |
215 | return internal.get() ? internal->total : 0; |
216 | } |
217 | |
218 | /** Get an iterator over the values seen in the slot. |
219 | * |
220 | * Items will be returned in ascending alphabetical order. |
221 | * |
222 | * During the iteration, the frequency of the current value can be |
223 | * obtained with the get_termfreq() method on the iterator. |
224 | */ |
225 | TermIterator values_begin() const; |
226 | |
227 | /** End iterator corresponding to values_begin() */ |
228 | TermIterator values_end() const { |
229 | return TermIterator(); |
230 | } |
231 | |
232 | /** Get an iterator over the most frequent values seen in the slot. |
233 | * |
234 | * Items will be returned in descending order of frequency. Values with |
235 | * the same frequency will be returned in ascending alphabetical order. |
236 | * |
237 | * During the iteration, the frequency of the current value can be |
238 | * obtained with the get_termfreq() method on the iterator. |
239 | * |
240 | * @param maxvalues The maximum number of values to return. |
241 | */ |
242 | TermIterator top_values_begin(size_t maxvalues) const; |
243 | |
244 | /** End iterator corresponding to top_values_begin() */ |
245 | TermIterator top_values_end(size_t) const { |
246 | return TermIterator(); |
247 | } |
248 | |
249 | /** Implementation of virtual operator(). |
250 | * |
251 | * This implementation tallies values for a matching document. |
252 | * |
253 | * @param doc The document to tally values for. |
254 | * @param wt The weight of the document (ignored by this class). |
255 | */ |
256 | void operator()(const Xapian::Document &doc, Xapian::weight wt); |
257 | |
258 | virtual MatchSpy * clone() const; |
259 | virtual std::string name() const; |
260 | virtual std::string serialise() const; |
261 | virtual MatchSpy * unserialise(const std::string & s, |
262 | const Registry & context) const; |
263 | virtual std::string serialise_results() const; |
264 | virtual void merge_results(const std::string & s); |
265 | virtual std::string get_description() const; |
266 | }; |
267 | |
268 | } |
269 | |
270 | #endif // XAPIAN_INCLUDED_MATCHSPY_H |
271 | |