1 | /* |
2 | * This file is part of the syndication library |
3 | * |
4 | * Copyright (C) 2006 Frank Osterfeld <osterfeld@kde.org> |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #include "tools.h" |
24 | #include "personimpl.h" |
25 | |
26 | #include <kcharsets.h> |
27 | #include <kcodecs.h> |
28 | #include <kdatetime.h> |
29 | |
30 | #include <QtCore/QByteArray> |
31 | #include <QtCore/QDateTime> |
32 | #include <QtCore/QRegExp> |
33 | #include <QtCore/QString> |
34 | |
35 | #include <kdebug.h> |
36 | |
37 | namespace Syndication { |
38 | |
39 | KMD5 md5Machine; |
40 | |
41 | unsigned int calcHash(const QString& str) |
42 | { |
43 | return calcHash(str.toUtf8()); |
44 | } |
45 | |
46 | unsigned int calcHash(const QByteArray& array) |
47 | { |
48 | if (array.isEmpty()) |
49 | { |
50 | return 0; |
51 | } |
52 | else |
53 | { |
54 | const char* s = array.data(); |
55 | unsigned int hash = 5381; |
56 | int c; |
57 | while ( ( c = *s++ ) ) hash = ((hash << 5) + hash) + c; // hash*33 + c |
58 | return hash; |
59 | } |
60 | } |
61 | |
62 | #include <KDebug> |
63 | |
64 | static time_t toTimeT(KDateTime& kdt) |
65 | { |
66 | if (kdt.isValid()) { |
67 | if (kdt.isDateOnly()) { |
68 | kdt.setTimeSpec(KDateTime::UTC); |
69 | kdt.setTime(QTime(12, 0)); |
70 | } |
71 | return kdt.toTime_t(); |
72 | } else |
73 | return 0; |
74 | } |
75 | |
76 | time_t parseISODate(const QString& str) |
77 | { |
78 | KDateTime kdt = KDateTime::fromString(str, KDateTime::ISODate); |
79 | return toTimeT(kdt); |
80 | } |
81 | |
82 | time_t parseRFCDate(const QString& str) |
83 | { |
84 | KDateTime kdt = KDateTime::fromString(str, KDateTime::RFCDate); |
85 | return toTimeT(kdt); |
86 | } |
87 | |
88 | time_t parseDate(const QString& str, DateFormat hint) |
89 | { |
90 | if (str.isEmpty()) |
91 | return 0; |
92 | |
93 | if (hint == RFCDate) |
94 | { |
95 | time_t t = parseRFCDate(str); |
96 | return t != 0 ? t : parseISODate(str); |
97 | } |
98 | else |
99 | { |
100 | time_t t = parseISODate(str); |
101 | return t != 0 ? t : parseRFCDate(str); |
102 | } |
103 | } |
104 | |
105 | QString dateTimeToString(time_t date) |
106 | { |
107 | if (date == 0) |
108 | return QString(); |
109 | |
110 | QDateTime dt; |
111 | dt.setTime_t(date); |
112 | return dt.toUTC().toString(); |
113 | } |
114 | |
115 | QString calcMD5Sum(const QString& str) |
116 | { |
117 | md5Machine.reset(); |
118 | md5Machine.update(str.toUtf8()); |
119 | return QLatin1String(md5Machine.hexDigest().constData()); |
120 | } |
121 | |
122 | QString resolveEntities(const QString& str) |
123 | { |
124 | return KCharsets::resolveEntities(str); |
125 | } |
126 | |
127 | QString escapeSpecialCharacters(const QString& strp) |
128 | { |
129 | QString str(strp); |
130 | str.replace(QLatin1Char('&'), QLatin1String("&" )); |
131 | str.replace(QLatin1Char('\"'), QLatin1String(""" )); |
132 | str.replace(QLatin1Char('<'), QLatin1String("<" )); |
133 | str.replace(QLatin1Char('>'), QLatin1String(">" )); |
134 | str.replace(QLatin1Char('\''), QLatin1String("'" )); |
135 | return str.trimmed(); |
136 | } |
137 | |
138 | QString convertNewlines(const QString& strp) |
139 | { |
140 | QString str(strp); |
141 | str.replace(QLatin1Char('\n'), QLatin1String("<br/>" )); |
142 | return str; |
143 | } |
144 | |
145 | QString plainTextToHtml(const QString& plainText) |
146 | { |
147 | QString str(plainText); |
148 | str.replace(QLatin1Char('&'), QLatin1String("&" )); |
149 | str.replace(QLatin1Char('\"'), QLatin1String(""" )); |
150 | str.replace(QLatin1Char('<'), QLatin1String("<" )); |
151 | //str.replace(QLatin1Char('>'), QLatin1String(">")); |
152 | str.replace(QLatin1Char('\n'), QLatin1String("<br/>" )); |
153 | return str.trimmed(); |
154 | } |
155 | |
156 | QString htmlToPlainText(const QString& html) |
157 | { |
158 | QString str(html); |
159 | //TODO: preserve some formatting, such as line breaks |
160 | str.remove(QRegExp(QLatin1String("<[^>]*>" ))); // remove tags |
161 | str = resolveEntities(str); |
162 | return str.trimmed(); |
163 | } |
164 | |
165 | namespace { |
166 | static QRegExp tagRegExp; |
167 | static bool tagRegExpSet = false; |
168 | } |
169 | |
170 | bool stringContainsMarkup(const QString& str) |
171 | { |
172 | //check for entities |
173 | if (str.contains(QRegExp(QLatin1String("&[a-zA-Z0-9#]+;" )))) |
174 | return true; |
175 | |
176 | const int ltc = str.count(QLatin1Char('<')); |
177 | if (ltc == 0) |
178 | return false; |
179 | |
180 | if (!tagRegExpSet) |
181 | { |
182 | tagRegExp = QRegExp(QLatin1String("<\\w+.*/?>" )); |
183 | tagRegExpSet = true; |
184 | } |
185 | return str.contains(tagRegExp); |
186 | } |
187 | |
188 | bool isHtml(const QString& str) |
189 | { |
190 | //check for entities |
191 | if (str.contains(QRegExp(QLatin1String("&[a-zA-Z0-9#]+;" )))) |
192 | return true; |
193 | |
194 | const int ltc = str.count(QLatin1Char('<')); |
195 | if (ltc == 0) |
196 | return false; |
197 | |
198 | if (!tagRegExpSet) |
199 | { |
200 | tagRegExp = QRegExp(QLatin1String("<\\w+.*/?>" )); |
201 | tagRegExpSet = true; |
202 | } |
203 | if (str.contains(tagRegExp)) |
204 | return true; |
205 | |
206 | return false; |
207 | } |
208 | |
209 | QString normalize(const QString& str) |
210 | { |
211 | return isHtml(str) ? str.trimmed() : plainTextToHtml(str); |
212 | } |
213 | |
214 | QString normalize(const QString& strp, bool isCDATA, bool containsMarkup) |
215 | { |
216 | if (containsMarkup) |
217 | return strp.trimmed(); |
218 | else |
219 | { |
220 | if (isCDATA) |
221 | { |
222 | QString str = resolveEntities(strp); |
223 | str = escapeSpecialCharacters(str); |
224 | str = convertNewlines(str); |
225 | str = str.trimmed(); |
226 | return str; |
227 | } |
228 | else |
229 | { |
230 | QString str = escapeSpecialCharacters(strp); |
231 | str = str.trimmed(); |
232 | return str; |
233 | } |
234 | } |
235 | } |
236 | |
237 | PersonPtr personFromString(const QString& strp) |
238 | { |
239 | QString str = strp.trimmed(); |
240 | if (str.isEmpty()) |
241 | return PersonPtr(new PersonImpl()); |
242 | |
243 | str = resolveEntities(str); |
244 | QString name; |
245 | QString uri; |
246 | QString email; |
247 | |
248 | // look for something looking like a mail address ("foo@bar.com", |
249 | // "<foo@bar.com>") and extract it |
250 | |
251 | QRegExp remail(QLatin1String("<?([^@\\s<]+@[^>\\s]+)>?" )); // FIXME: user "proper" regexp, |
252 | // search kmail source for it |
253 | |
254 | int pos = remail.indexIn(str); |
255 | if (pos != -1) |
256 | { |
257 | QString all = remail.cap(0); |
258 | email = remail.cap(1); |
259 | str.remove(all); // remove mail address |
260 | } |
261 | |
262 | // replace "mailto", "(", ")" (to be extended) |
263 | email.remove(QLatin1String("mailto:" )); |
264 | email.remove(QRegExp(QLatin1String("[\\(\\)]" ))); |
265 | |
266 | // simplify the rest and use it as name |
267 | |
268 | name = str.simplified(); |
269 | |
270 | // after removing the email, str might have |
271 | // the format "(Foo M. Bar)". We cut off |
272 | // parentheses if there are any. However, if |
273 | // str is of the format "Foo M. Bar (President)", |
274 | // we should not cut anything. |
275 | |
276 | QRegExp rename(QLatin1String("^\\(([^\\)]*)\\)" )); |
277 | |
278 | if (rename.exactMatch(name)) |
279 | { |
280 | name = rename.cap(1); |
281 | } |
282 | |
283 | name = name.isEmpty() ? QString() : name; |
284 | email = email.isEmpty() ? QString() : email; |
285 | uri = uri.isEmpty() ? QString() : uri; |
286 | |
287 | if (name.isEmpty() && email.isEmpty() && uri.isEmpty()) |
288 | return PersonPtr(new PersonImpl()); |
289 | |
290 | return PersonPtr(new PersonImpl(name, uri, email)); |
291 | } |
292 | |
293 | ElementType::ElementType(const QString& localnamep, |
294 | const QString& nsp) : ns(nsp), localname(localnamep) |
295 | { |
296 | } |
297 | |
298 | bool ElementType::operator==(const ElementType& other) const |
299 | { |
300 | return localname == other.localname && ns == other.ns; |
301 | } |
302 | |
303 | } // namespace Syndication |
304 | |
305 | |
306 | |