1/*
2 * This file is part of the syndication library
3 *
4 * Copyright (C) 2006 Frank Osterfeld <osterfeld@kde.org>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "tools.h"
24#include "personimpl.h"
25
26#include <kcharsets.h>
27#include <kcodecs.h>
28#include <kdatetime.h>
29
30#include <QtCore/QByteArray>
31#include <QtCore/QDateTime>
32#include <QtCore/QRegExp>
33#include <QtCore/QString>
34
35#include <kdebug.h>
36
37namespace Syndication {
38
39KMD5 md5Machine;
40
41unsigned int calcHash(const QString& str)
42{
43 return calcHash(str.toUtf8());
44}
45
46unsigned int calcHash(const QByteArray& array)
47{
48 if (array.isEmpty())
49 {
50 return 0;
51 }
52 else
53 {
54 const char* s = array.data();
55 unsigned int hash = 5381;
56 int c;
57 while ( ( c = *s++ ) ) hash = ((hash << 5) + hash) + c; // hash*33 + c
58 return hash;
59 }
60}
61
62#include <KDebug>
63
64static time_t toTimeT(KDateTime& kdt)
65{
66 if (kdt.isValid()) {
67 if (kdt.isDateOnly()) {
68 kdt.setTimeSpec(KDateTime::UTC);
69 kdt.setTime(QTime(12, 0));
70 }
71 return kdt.toTime_t();
72 } else
73 return 0;
74}
75
76time_t parseISODate(const QString& str)
77{
78 KDateTime kdt = KDateTime::fromString(str, KDateTime::ISODate);
79 return toTimeT(kdt);
80}
81
82time_t parseRFCDate(const QString& str)
83{
84 KDateTime kdt = KDateTime::fromString(str, KDateTime::RFCDate);
85 return toTimeT(kdt);
86}
87
88time_t parseDate(const QString& str, DateFormat hint)
89{
90 if (str.isEmpty())
91 return 0;
92
93 if (hint == RFCDate)
94 {
95 time_t t = parseRFCDate(str);
96 return t != 0 ? t : parseISODate(str);
97 }
98 else
99 {
100 time_t t = parseISODate(str);
101 return t != 0 ? t : parseRFCDate(str);
102 }
103}
104
105QString dateTimeToString(time_t date)
106{
107 if (date == 0)
108 return QString();
109
110 QDateTime dt;
111 dt.setTime_t(date);
112 return dt.toUTC().toString();
113}
114
115QString calcMD5Sum(const QString& str)
116{
117 md5Machine.reset();
118 md5Machine.update(str.toUtf8());
119 return QLatin1String(md5Machine.hexDigest().constData());
120}
121
122QString resolveEntities(const QString& str)
123{
124 return KCharsets::resolveEntities(str);
125}
126
127QString escapeSpecialCharacters(const QString& strp)
128{
129 QString str(strp);
130 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
131 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
132 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
133 str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
134 str.replace(QLatin1Char('\''), QLatin1String("&apos;"));
135 return str.trimmed();
136}
137
138QString convertNewlines(const QString& strp)
139{
140 QString str(strp);
141 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
142 return str;
143}
144
145QString plainTextToHtml(const QString& plainText)
146{
147 QString str(plainText);
148 str.replace(QLatin1Char('&'), QLatin1String("&amp;"));
149 str.replace(QLatin1Char('\"'), QLatin1String("&quot;"));
150 str.replace(QLatin1Char('<'), QLatin1String("&lt;"));
151 //str.replace(QLatin1Char('>'), QLatin1String("&gt;"));
152 str.replace(QLatin1Char('\n'), QLatin1String("<br/>"));
153 return str.trimmed();
154}
155
156QString htmlToPlainText(const QString& html)
157{
158 QString str(html);
159 //TODO: preserve some formatting, such as line breaks
160 str.remove(QRegExp(QLatin1String("<[^>]*>"))); // remove tags
161 str = resolveEntities(str);
162 return str.trimmed();
163}
164
165namespace {
166 static QRegExp tagRegExp;
167 static bool tagRegExpSet = false;
168}
169
170bool stringContainsMarkup(const QString& str)
171{
172 //check for entities
173 if (str.contains(QRegExp(QLatin1String("&[a-zA-Z0-9#]+;"))))
174 return true;
175
176 const int ltc = str.count(QLatin1Char('<'));
177 if (ltc == 0)
178 return false;
179
180 if (!tagRegExpSet)
181 {
182 tagRegExp = QRegExp(QLatin1String("<\\w+.*/?>"));
183 tagRegExpSet = true;
184 }
185 return str.contains(tagRegExp);
186}
187
188bool isHtml(const QString& str)
189{
190 //check for entities
191 if (str.contains(QRegExp(QLatin1String("&[a-zA-Z0-9#]+;"))))
192 return true;
193
194 const int ltc = str.count(QLatin1Char('<'));
195 if (ltc == 0)
196 return false;
197
198 if (!tagRegExpSet)
199 {
200 tagRegExp = QRegExp(QLatin1String("<\\w+.*/?>"));
201 tagRegExpSet = true;
202 }
203 if (str.contains(tagRegExp))
204 return true;
205
206 return false;
207}
208
209QString normalize(const QString& str)
210{
211 return isHtml(str) ? str.trimmed() : plainTextToHtml(str);
212}
213
214QString normalize(const QString& strp, bool isCDATA, bool containsMarkup)
215{
216 if (containsMarkup)
217 return strp.trimmed();
218 else
219 {
220 if (isCDATA)
221 {
222 QString str = resolveEntities(strp);
223 str = escapeSpecialCharacters(str);
224 str = convertNewlines(str);
225 str = str.trimmed();
226 return str;
227 }
228 else
229 {
230 QString str = escapeSpecialCharacters(strp);
231 str = str.trimmed();
232 return str;
233 }
234 }
235}
236
237PersonPtr personFromString(const QString& strp)
238{
239 QString str = strp.trimmed();
240 if (str.isEmpty())
241 return PersonPtr(new PersonImpl());
242
243 str = resolveEntities(str);
244 QString name;
245 QString uri;
246 QString email;
247
248 // look for something looking like a mail address ("foo@bar.com",
249 // "<foo@bar.com>") and extract it
250
251 QRegExp remail(QLatin1String("<?([^@\\s<]+@[^>\\s]+)>?")); // FIXME: user "proper" regexp,
252 // search kmail source for it
253
254 int pos = remail.indexIn(str);
255 if (pos != -1)
256 {
257 QString all = remail.cap(0);
258 email = remail.cap(1);
259 str.remove(all); // remove mail address
260 }
261
262 // replace "mailto", "(", ")" (to be extended)
263 email.remove(QLatin1String("mailto:"));
264 email.remove(QRegExp(QLatin1String("[\\(\\)]")));
265
266 // simplify the rest and use it as name
267
268 name = str.simplified();
269
270 // after removing the email, str might have
271 // the format "(Foo M. Bar)". We cut off
272 // parentheses if there are any. However, if
273 // str is of the format "Foo M. Bar (President)",
274 // we should not cut anything.
275
276 QRegExp rename(QLatin1String("^\\(([^\\)]*)\\)"));
277
278 if (rename.exactMatch(name))
279 {
280 name = rename.cap(1);
281 }
282
283 name = name.isEmpty() ? QString() : name;
284 email = email.isEmpty() ? QString() : email;
285 uri = uri.isEmpty() ? QString() : uri;
286
287 if (name.isEmpty() && email.isEmpty() && uri.isEmpty())
288 return PersonPtr(new PersonImpl());
289
290 return PersonPtr(new PersonImpl(name, uri, email));
291}
292
293ElementType::ElementType(const QString& localnamep,
294 const QString& nsp) : ns(nsp), localname(localnamep)
295{
296}
297
298bool ElementType::operator==(const ElementType& other) const
299{
300 return localname == other.localname && ns == other.ns;
301}
302
303} // namespace Syndication
304
305
306