1/** \file stem.h
2 * \brief stemming algorithms
3 */
4/* Copyright (C) 2005,2007,2010 Olly Betts
5 * Copyright (C) 2010 Evgeny Sizikov
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#ifndef XAPIAN_INCLUDED_STEM_H
23#define XAPIAN_INCLUDED_STEM_H
24
25#include <xapian/base.h>
26#include <xapian/visibility.h>
27
28#include <string>
29
30namespace Xapian {
31
32/// Class representing a stemming algorithm implementation.
33struct XAPIAN_VISIBILITY_DEFAULT StemImplementation
34 : public Xapian::Internal::RefCntBase
35{
36 /// Virtual destructor.
37 virtual ~StemImplementation();
38
39 /// Stem the specified word.
40 virtual std::string operator()(const std::string & word) = 0;
41
42 /// Return a string describing this object.
43 virtual std::string get_description() const = 0;
44};
45
46/// Class representing a stemming algorithm.
47class XAPIAN_VISIBILITY_DEFAULT Stem {
48 public:
49 /// @private @internal Reference counted internals.
50 Xapian::Internal::RefCntPtr<StemImplementation> internal;
51
52 /// Copy constructor.
53 Stem(const Stem & o);
54
55 /// Assignment.
56 void operator=(const Stem & o);
57
58 /** Construct a Xapian::Stem object which doesn't change terms.
59 *
60 * Equivalent to Stem("none").
61 */
62 Stem();
63
64 /** Construct a Xapian::Stem object for a particular language.
65 *
66 * @param language Either the English name for the language
67 * or the two letter ISO639 code.
68 *
69 * The following language names are understood (aliases follow the
70 * name):
71 *
72 * - none - don't stem terms
73 * - danish (da)
74 * - dutch (nl)
75 * - english (en) - Martin Porter's 2002 revision of his stemmer
76 * - english_lovins (lovins) - Lovin's stemmer
77 * - english_porter (porter) - Porter's stemmer as described in
78 * his 1980 paper
79 * - finnish (fi)
80 * - french (fr)
81 * - german (de)
82 * - german2 - Normalises umlauts and &szlig;
83 * - hungarian (hu)
84 * - italian (it)
85 * - kraaij_pohlmann - A different Dutch stemmer
86 * - norwegian (nb, nn, no)
87 * - portuguese (pt)
88 * - romanian (ro)
89 * - russian (ru)
90 * - spanish (es)
91 * - swedish (sv)
92 * - turkish (tr)
93 *
94 * @exception Xapian::InvalidArgumentError is thrown if
95 * language isn't recognised.
96 */
97 explicit Stem(const std::string &language);
98
99 /** Construct a Xapian::Stem object with a user-provided stemming algorithm.
100 *
101 * You can subclass Xapian::StemImplementation to implement your own
102 * stemming algorithm (or to wrap a third-party algorithm) and then wrap
103 * your implementation in a Xapian::Stem object to pass to the Xapian API.
104 *
105 * @param p The user-subclassed StemImplementation object. This
106 * is reference counted, and so will be automatically
107 * deleted by the Xapian::Stem wrapper when no longer
108 * required.
109 */
110 explicit Stem(StemImplementation * p);
111
112 /// Destructor.
113 ~Stem();
114
115 /** Stem a word.
116 *
117 * @param word a word to stem.
118 * @return the stem
119 */
120 std::string operator()(const std::string &word) const;
121
122 /// Return a string describing this object.
123 std::string get_description() const;
124
125 /** Return a list of available languages.
126 *
127 * Each stemmer is only included once in the list (not once for
128 * each alias). The name included is the English name of the
129 * language.
130 *
131 * The list is returned as a string, with language names separated by
132 * spaces. This is a static method, so a Xapian::Stem object is not
133 * required for this operation.
134 */
135 static std::string get_available_languages();
136};
137
138}
139
140#endif // XAPIAN_INCLUDED_STEM_H
141