1/*
2 * This file is part of the syndication library
3 *
4 * Copyright (C) 2006 Frank Osterfeld <osterfeld@kde.org>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef SYNDICATION_TOOLS_H
24#define SYNDICATION_TOOLS_H
25
26#include <syndication/person.h>
27#include "ksyndication_export.h"
28
29
30#include <QtCore/QString>
31
32#include <ctime>
33
34class QByteArray;
35class QString;
36
37namespace Syndication {
38
39/** date formats supported by date parsers */
40
41enum DateFormat
42{
43 ISODate, /**< ISO 8601 extended format.
44 * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25",
45 * datetime with timezone: "2003-12-13T18:30:02.25+01:00")
46 */
47 RFCDate /**< RFC 822. (e.g. "Sat, 07 Sep 2002 00:00:01 GMT") */
48};
49
50/**
51 * parses a date string in ISO 8601 extended format.
52 * (date: "2003-12-13",datetime: "2003-12-13T18:30:02.25",
53 * datetime with timezone: "2003-12-13T18:30:02.25+01:00")
54 *
55 * @param str a string in ISO 8601 format
56 * @return parsed date in seconds since epoch, 0 if no date could
57 * be parsed from the string.
58 */
59//KDE5: uint, not time_t
60SYNDICATION_EXPORT
61time_t parseISODate(const QString& str);
62
63/**
64 * parses a date string as defined in RFC 822.
65 * (Sat, 07 Sep 2002 00:00:01 GMT)
66 *
67 * @param str a string in RFC 822 format
68 * @return parsed date in seconds since epoch, 0 if no date could
69 * be parsed from the string.
70 */
71//KDE5: uint, not time_t
72SYNDICATION_EXPORT
73time_t parseRFCDate(const QString& str);
74
75/**
76 * parses a date string in ISO (see parseISODate()) or RFC 822 (see
77 * parseRFCDate()) format.
78 * It tries both parsers and returns the first valid parsing result found (or 0
79 * otherwise).
80 * To speed up parsing, you can give a hint which format you expect.
81 * The method will try the corresponding parser first then.
82 *
83 * @param str a date string
84 * @param hint the expected format
85 * @return parsed date in seconds since epoch, 0 if no date could
86 * be parsed from the string.
87 */
88//KDE5: uint, not time_t
89SYNDICATION_EXPORT
90time_t parseDate(const QString& str, DateFormat hint=RFCDate);
91
92
93/**
94 * @internal
95 * returns a string representation of a datetime.
96 * this is used internally to create debugging output.
97 *
98 * @param date the date to convert
99 * @return string representation of the date, or a null string if
100 * @c date is 0
101 */
102//KDE5: uint, not time_t
103SYNDICATION_EXPORT
104QString dateTimeToString(time_t date);
105
106/**
107 * resolves entities to respective unicode chars.
108 *
109 * @param str a string
110 */
111SYNDICATION_EXPORT
112QString resolveEntities(const QString& str);
113
114/**
115 * replaces the characters &lt; >, &, ", '
116 * with &amp;lt; &amp;gt; &amp;amp;, &amp;quot; &amp;apos;.
117 * @param str the string to escape
118 */
119SYNDICATION_EXPORT
120QString escapeSpecialCharacters(const QString& str);
121
122/**
123 * replaces newlines ("\n") by &lt;br/>
124 * @param str string to convert
125 */
126SYNDICATION_EXPORT
127QString convertNewlines(const QString& str);
128
129/**
130 * converts a plain text string to HTML
131 *
132 * @param plainText a string in plain text.
133 */
134SYNDICATION_EXPORT
135QString plainTextToHtml(const QString& plainText);
136
137/**
138 * converts a HTML string to plain text
139 *
140 * @param html string in HTML format
141 * @return stripped text
142 */
143SYNDICATION_EXPORT
144QString htmlToPlainText(const QString& html);
145
146/**
147 * guesses whether a string contains plain text or HTML
148 *
149 * @param str the string in unknown format
150 * @return @c true if the heuristic thinks it's HTML, @c false
151 * if thinks it is plain text
152 */
153SYNDICATION_EXPORT
154bool isHtml(const QString& str);
155
156/**
157 * guesses whether a string contains (HTML) markup or not. This
158 * implements not an exact check for valid HTML markup, but a
159 * simple (and relatively fast) heuristic.
160 *
161 * @param str the string that might or might not contain markup
162 * @return @c true if the heuristic thinks it contains markup, @c false
163 * if thinks it is markup-free plain text
164 */
165SYNDICATION_EXPORT
166bool stringContainsMarkup(const QString& str);
167
168/**
169 * Ensures HTML formatting for a string.
170 * guesses via isHtml() if @c str contains HTML or plain text, and returns
171 * plainTextToHtml(str) if it thinks it is plain text, or the unmodified @c str
172 * otherwise.
173 *
174 * @param str a string with unknown content
175 * @return string as HTML (as long as the heuristics work)
176 */
177SYNDICATION_EXPORT
178QString normalize(const QString& str);
179
180/**
181 * normalizes a string based on feed-wide properties of tag content.
182 * It is based on the assumption that all items in a feed encode their
183 * title/description content in the same way (CDATA or not, plain text
184 * vs. HTML). isCDATA and containsMarkup are determined once by the feed,
185 * and then passed to this method.
186 *
187 * The returned string contains HTML, with special characters &lt;, >,
188 * &, ", and ' escaped, and all other entities resolved.
189 * Whitespace is collapsed, relevant whitespace is replaced by respective
190 * HTML tags (&lt;br/>).
191 *
192 * @param str a string
193 * @param isCDATA whether the feed uses CDATA for the tag @c str was read from
194 * @param containsMarkup whether the feed uses HTML markup in the
195 * tag @c str was read from.
196 * @return string as HTML (as long as the heuristics work)
197 */
198SYNDICATION_EXPORT
199QString normalize(const QString& str, bool isCDATA, bool containsMarkup);
200
201/**
202 * Parses a person object from a string by identifying name and email address
203 * in the string. Currently detected variants are:
204 * "foo@bar.com", "Foo", "Foo &lt;foo@bar.com>", "foo@bar.com (Foo)".
205 *
206 * @param str the string to parse the person from.
207 * @return a Person object containing the parsed information.
208 */
209SYNDICATION_EXPORT
210PersonPtr personFromString(const QString& str);
211
212/**
213 * @internal
214 * calculates a hash value for a string
215 */
216unsigned int calcHash(const QString& str);
217
218/**
219 * @internal
220 * calculates a hash value for a byte array
221 */
222unsigned int calcHash(const QByteArray& array);
223
224/**
225 * @internal
226 * calculates a md5 checksum for a string
227 */
228QString calcMD5Sum(const QString& str);
229
230//@cond PRIVATE
231/**
232 * @internal
233 * used internally to represent element types
234 */
235struct ElementType
236{
237 ElementType(const QString& localnamep,
238 const QString& nsp=QString()); // implicit
239
240 bool operator==(const ElementType& other) const;
241
242 QString ns;
243 QString localname;
244};
245//@endcond
246
247} // namespace Syndication
248
249#endif // SYNDICATION_TOOLS_H
250