1 | /** \file stem.h |
2 | * \brief stemming algorithms |
3 | */ |
4 | /* Copyright (C) 2005,2007,2010 Olly Betts |
5 | * Copyright (C) 2010 Evgeny Sizikov |
6 | * |
7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License as |
9 | * published by the Free Software Foundation; either version 2 of the |
10 | * License, or (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License |
18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #ifndef XAPIAN_INCLUDED_STEM_H |
23 | #define XAPIAN_INCLUDED_STEM_H |
24 | |
25 | #include <xapian/base.h> |
26 | #include <xapian/visibility.h> |
27 | |
28 | #include <string> |
29 | |
30 | namespace Xapian { |
31 | |
32 | /// Class representing a stemming algorithm implementation. |
33 | struct XAPIAN_VISIBILITY_DEFAULT StemImplementation |
34 | : public Xapian::Internal::RefCntBase |
35 | { |
36 | /// Virtual destructor. |
37 | virtual ~StemImplementation(); |
38 | |
39 | /// Stem the specified word. |
40 | virtual std::string operator()(const std::string & word) = 0; |
41 | |
42 | /// Return a string describing this object. |
43 | virtual std::string get_description() const = 0; |
44 | }; |
45 | |
46 | /// Class representing a stemming algorithm. |
47 | class XAPIAN_VISIBILITY_DEFAULT Stem { |
48 | public: |
49 | /// @private @internal Reference counted internals. |
50 | Xapian::Internal::RefCntPtr<StemImplementation> internal; |
51 | |
52 | /// Copy constructor. |
53 | Stem(const Stem & o); |
54 | |
55 | /// Assignment. |
56 | void operator=(const Stem & o); |
57 | |
58 | /** Construct a Xapian::Stem object which doesn't change terms. |
59 | * |
60 | * Equivalent to Stem("none"). |
61 | */ |
62 | Stem(); |
63 | |
64 | /** Construct a Xapian::Stem object for a particular language. |
65 | * |
66 | * @param language Either the English name for the language |
67 | * or the two letter ISO639 code. |
68 | * |
69 | * The following language names are understood (aliases follow the |
70 | * name): |
71 | * |
72 | * - none - don't stem terms |
73 | * - danish (da) |
74 | * - dutch (nl) |
75 | * - english (en) - Martin Porter's 2002 revision of his stemmer |
76 | * - english_lovins (lovins) - Lovin's stemmer |
77 | * - english_porter (porter) - Porter's stemmer as described in |
78 | * his 1980 paper |
79 | * - finnish (fi) |
80 | * - french (fr) |
81 | * - german (de) |
82 | * - german2 - Normalises umlauts and ß |
83 | * - hungarian (hu) |
84 | * - italian (it) |
85 | * - kraaij_pohlmann - A different Dutch stemmer |
86 | * - norwegian (nb, nn, no) |
87 | * - portuguese (pt) |
88 | * - romanian (ro) |
89 | * - russian (ru) |
90 | * - spanish (es) |
91 | * - swedish (sv) |
92 | * - turkish (tr) |
93 | * |
94 | * @exception Xapian::InvalidArgumentError is thrown if |
95 | * language isn't recognised. |
96 | */ |
97 | explicit Stem(const std::string &language); |
98 | |
99 | /** Construct a Xapian::Stem object with a user-provided stemming algorithm. |
100 | * |
101 | * You can subclass Xapian::StemImplementation to implement your own |
102 | * stemming algorithm (or to wrap a third-party algorithm) and then wrap |
103 | * your implementation in a Xapian::Stem object to pass to the Xapian API. |
104 | * |
105 | * @param p The user-subclassed StemImplementation object. This |
106 | * is reference counted, and so will be automatically |
107 | * deleted by the Xapian::Stem wrapper when no longer |
108 | * required. |
109 | */ |
110 | explicit Stem(StemImplementation * p); |
111 | |
112 | /// Destructor. |
113 | ~Stem(); |
114 | |
115 | /** Stem a word. |
116 | * |
117 | * @param word a word to stem. |
118 | * @return the stem |
119 | */ |
120 | std::string operator()(const std::string &word) const; |
121 | |
122 | /// Return a string describing this object. |
123 | std::string get_description() const; |
124 | |
125 | /** Return a list of available languages. |
126 | * |
127 | * Each stemmer is only included once in the list (not once for |
128 | * each alias). The name included is the English name of the |
129 | * language. |
130 | * |
131 | * The list is returned as a string, with language names separated by |
132 | * spaces. This is a static method, so a Xapian::Stem object is not |
133 | * required for this operation. |
134 | */ |
135 | static std::string get_available_languages(); |
136 | }; |
137 | |
138 | } |
139 | |
140 | #endif // XAPIAN_INCLUDED_STEM_H |
141 | |