1 | /* |
2 | * This file is part of the syndication library |
3 | * |
4 | * Copyright (C) 2006 Frank Osterfeld <osterfeld@kde.org> |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #ifndef SYNDICATION_TOOLS_H |
24 | #define SYNDICATION_TOOLS_H |
25 | |
26 | #include <syndication/person.h> |
27 | #include "ksyndication_export.h" |
28 | |
29 | |
30 | #include <QtCore/QString> |
31 | |
32 | #include <ctime> |
33 | |
34 | class QByteArray; |
35 | class QString; |
36 | |
37 | namespace Syndication { |
38 | |
39 | /** date formats supported by date parsers */ |
40 | |
41 | enum DateFormat |
42 | { |
43 | ISODate, /**< ISO 8601 extended format. |
44 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
45 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
46 | */ |
47 | RFCDate /**< RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") */ |
48 | }; |
49 | |
50 | /** |
51 | * parses a date string in ISO 8601 extended format. |
52 | * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25", |
53 | * datetime with timezone: "2003-12-13T18:30:02.25+01:00") |
54 | * |
55 | * @param str a string in ISO 8601 format |
56 | * @return parsed date in seconds since epoch, 0 if no date could |
57 | * be parsed from the string. |
58 | */ |
59 | //KDE5: uint, not time_t |
60 | SYNDICATION_EXPORT |
61 | time_t parseISODate(const QString& str); |
62 | |
63 | /** |
64 | * parses a date string as defined in RFC 822. |
65 | * (Sat, 07 Sep 2002 00:00:01 GMT) |
66 | * |
67 | * @param str a string in RFC 822 format |
68 | * @return parsed date in seconds since epoch, 0 if no date could |
69 | * be parsed from the string. |
70 | */ |
71 | //KDE5: uint, not time_t |
72 | SYNDICATION_EXPORT |
73 | time_t parseRFCDate(const QString& str); |
74 | |
75 | /** |
76 | * parses a date string in ISO (see parseISODate()) or RFC 822 (see |
77 | * parseRFCDate()) format. |
78 | * It tries both parsers and returns the first valid parsing result found (or 0 |
79 | * otherwise). |
80 | * To speed up parsing, you can give a hint which format you expect. |
81 | * The method will try the corresponding parser first then. |
82 | * |
83 | * @param str a date string |
84 | * @param hint the expected format |
85 | * @return parsed date in seconds since epoch, 0 if no date could |
86 | * be parsed from the string. |
87 | */ |
88 | //KDE5: uint, not time_t |
89 | SYNDICATION_EXPORT |
90 | time_t parseDate(const QString& str, DateFormat hint=RFCDate); |
91 | |
92 | |
93 | /** |
94 | * @internal |
95 | * returns a string representation of a datetime. |
96 | * this is used internally to create debugging output. |
97 | * |
98 | * @param date the date to convert |
99 | * @return string representation of the date, or a null string if |
100 | * @c date is 0 |
101 | */ |
102 | //KDE5: uint, not time_t |
103 | SYNDICATION_EXPORT |
104 | QString dateTimeToString(time_t date); |
105 | |
106 | /** |
107 | * resolves entities to respective unicode chars. |
108 | * |
109 | * @param str a string |
110 | */ |
111 | SYNDICATION_EXPORT |
112 | QString resolveEntities(const QString& str); |
113 | |
114 | /** |
115 | * replaces the characters < >, &, ", ' |
116 | * with &lt; &gt; &amp;, &quot; &apos;. |
117 | * @param str the string to escape |
118 | */ |
119 | SYNDICATION_EXPORT |
120 | QString escapeSpecialCharacters(const QString& str); |
121 | |
122 | /** |
123 | * replaces newlines ("\n") by <br/> |
124 | * @param str string to convert |
125 | */ |
126 | SYNDICATION_EXPORT |
127 | QString convertNewlines(const QString& str); |
128 | |
129 | /** |
130 | * converts a plain text string to HTML |
131 | * |
132 | * @param plainText a string in plain text. |
133 | */ |
134 | SYNDICATION_EXPORT |
135 | QString plainTextToHtml(const QString& plainText); |
136 | |
137 | /** |
138 | * converts a HTML string to plain text |
139 | * |
140 | * @param html string in HTML format |
141 | * @return stripped text |
142 | */ |
143 | SYNDICATION_EXPORT |
144 | QString htmlToPlainText(const QString& html); |
145 | |
146 | /** |
147 | * guesses whether a string contains plain text or HTML |
148 | * |
149 | * @param str the string in unknown format |
150 | * @return @c true if the heuristic thinks it's HTML, @c false |
151 | * if thinks it is plain text |
152 | */ |
153 | SYNDICATION_EXPORT |
154 | bool isHtml(const QString& str); |
155 | |
156 | /** |
157 | * guesses whether a string contains (HTML) markup or not. This |
158 | * implements not an exact check for valid HTML markup, but a |
159 | * simple (and relatively fast) heuristic. |
160 | * |
161 | * @param str the string that might or might not contain markup |
162 | * @return @c true if the heuristic thinks it contains markup, @c false |
163 | * if thinks it is markup-free plain text |
164 | */ |
165 | SYNDICATION_EXPORT |
166 | bool stringContainsMarkup(const QString& str); |
167 | |
168 | /** |
169 | * Ensures HTML formatting for a string. |
170 | * guesses via isHtml() if @c str contains HTML or plain text, and returns |
171 | * plainTextToHtml(str) if it thinks it is plain text, or the unmodified @c str |
172 | * otherwise. |
173 | * |
174 | * @param str a string with unknown content |
175 | * @return string as HTML (as long as the heuristics work) |
176 | */ |
177 | SYNDICATION_EXPORT |
178 | QString normalize(const QString& str); |
179 | |
180 | /** |
181 | * normalizes a string based on feed-wide properties of tag content. |
182 | * It is based on the assumption that all items in a feed encode their |
183 | * title/description content in the same way (CDATA or not, plain text |
184 | * vs. HTML). isCDATA and containsMarkup are determined once by the feed, |
185 | * and then passed to this method. |
186 | * |
187 | * The returned string contains HTML, with special characters <, >, |
188 | * &, ", and ' escaped, and all other entities resolved. |
189 | * Whitespace is collapsed, relevant whitespace is replaced by respective |
190 | * HTML tags (<br/>). |
191 | * |
192 | * @param str a string |
193 | * @param isCDATA whether the feed uses CDATA for the tag @c str was read from |
194 | * @param containsMarkup whether the feed uses HTML markup in the |
195 | * tag @c str was read from. |
196 | * @return string as HTML (as long as the heuristics work) |
197 | */ |
198 | SYNDICATION_EXPORT |
199 | QString normalize(const QString& str, bool isCDATA, bool containsMarkup); |
200 | |
201 | /** |
202 | * Parses a person object from a string by identifying name and email address |
203 | * in the string. Currently detected variants are: |
204 | * "foo@bar.com", "Foo", "Foo <foo@bar.com>", "foo@bar.com (Foo)". |
205 | * |
206 | * @param str the string to parse the person from. |
207 | * @return a Person object containing the parsed information. |
208 | */ |
209 | SYNDICATION_EXPORT |
210 | PersonPtr personFromString(const QString& str); |
211 | |
212 | /** |
213 | * @internal |
214 | * calculates a hash value for a string |
215 | */ |
216 | unsigned int calcHash(const QString& str); |
217 | |
218 | /** |
219 | * @internal |
220 | * calculates a hash value for a byte array |
221 | */ |
222 | unsigned int calcHash(const QByteArray& array); |
223 | |
224 | /** |
225 | * @internal |
226 | * calculates a md5 checksum for a string |
227 | */ |
228 | QString calcMD5Sum(const QString& str); |
229 | |
230 | //@cond PRIVATE |
231 | /** |
232 | * @internal |
233 | * used internally to represent element types |
234 | */ |
235 | struct ElementType |
236 | { |
237 | ElementType(const QString& localnamep, |
238 | const QString& nsp=QString()); // implicit |
239 | |
240 | bool operator==(const ElementType& other) const; |
241 | |
242 | QString ns; |
243 | QString localname; |
244 | }; |
245 | //@endcond |
246 | |
247 | } // namespace Syndication |
248 | |
249 | #endif // SYNDICATION_TOOLS_H |
250 | |