1 | /* |
2 | * loader.cpp |
3 | * |
4 | * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> |
5 | * |
6 | * This program is distributed in the hope that it will be useful, but WITHOUT |
7 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
8 | * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the |
9 | * accompanying file 'COPYING'. |
10 | */ |
11 | #include "loader.h" |
12 | #include "dataretriever.h" |
13 | #include "documentsource.h" |
14 | #include "feed.h" |
15 | #include "global.h" |
16 | #include "parsercollection.h" |
17 | |
18 | #include <kio/global.h> |
19 | #include <kurl.h> |
20 | |
21 | #include <QtCore/QBuffer> |
22 | #include <QtCore/QRegExp> |
23 | #include <QtCore/QStringList> |
24 | |
25 | #include <boost/shared_ptr.hpp> |
26 | |
27 | // test: TODO remove |
28 | #include <iostream> |
29 | |
30 | namespace Syndication { |
31 | |
32 | struct Loader::LoaderPrivate |
33 | { |
34 | LoaderPrivate() : retriever(0), lastError(Success), |
35 | retrieverError(0) |
36 | { |
37 | } |
38 | |
39 | ~LoaderPrivate() |
40 | { |
41 | delete retriever; |
42 | } |
43 | |
44 | DataRetriever* retriever; |
45 | Syndication::ErrorCode lastError; |
46 | int retrieverError; |
47 | KUrl discoveredFeedURL; |
48 | KUrl url; |
49 | }; |
50 | |
51 | Loader* Loader::create() |
52 | { |
53 | return new Loader; |
54 | } |
55 | |
56 | Loader *Loader::create(QObject* object, const char* slot) |
57 | { |
58 | Loader *loader = create(); |
59 | connect(loader, SIGNAL(loadingComplete(Syndication::Loader*, |
60 | Syndication::FeedPtr, Syndication::ErrorCode)), |
61 | object, slot); |
62 | return loader; |
63 | } |
64 | |
65 | Loader::Loader() : d(new LoaderPrivate) |
66 | { |
67 | } |
68 | |
69 | Loader::~Loader() |
70 | { |
71 | delete d; |
72 | } |
73 | |
74 | void Loader::loadFrom(const KUrl& url) |
75 | { |
76 | loadFrom(url, new FileRetriever); |
77 | } |
78 | |
79 | void Loader::loadFrom(const KUrl &url, DataRetriever *retriever) |
80 | { |
81 | if (d->retriever != 0L) |
82 | return; |
83 | |
84 | d->url = url; |
85 | d->retriever = retriever; |
86 | |
87 | connect(d->retriever, SIGNAL(dataRetrieved(QByteArray,bool)), |
88 | this, SLOT(slotRetrieverDone(QByteArray,bool))); |
89 | |
90 | d->retriever->retrieveData(url); |
91 | } |
92 | |
93 | int Loader::retrieverError() const |
94 | { |
95 | return d->retrieverError; |
96 | } |
97 | |
98 | Syndication::ErrorCode Loader::errorCode() const |
99 | { |
100 | return d->lastError; |
101 | } |
102 | |
103 | void Loader::abort() |
104 | { |
105 | if (d && d->retriever) |
106 | { |
107 | d->retriever->abort(); |
108 | delete d->retriever; |
109 | d->retriever = 0L; |
110 | } |
111 | |
112 | emit loadingComplete(this, FeedPtr(), Aborted); |
113 | delete this; |
114 | } |
115 | |
116 | KUrl Loader::discoveredFeedURL() const |
117 | { |
118 | return d->discoveredFeedURL; |
119 | } |
120 | |
121 | void Loader::slotRetrieverDone(const QByteArray& data, bool success) |
122 | { |
123 | d->retrieverError = d->retriever->errorCode(); |
124 | ErrorCode status = Success; |
125 | FeedPtr feed; |
126 | bool isFileRetriever = dynamic_cast<FileRetriever*>(d->retriever) != 0; |
127 | delete d->retriever; |
128 | d->retriever = 0; |
129 | |
130 | if (success) |
131 | { |
132 | DocumentSource src(data, d->url.url()); |
133 | feed = parserCollection()->parse(src); |
134 | |
135 | if (parserCollection()->lastError() != Syndication::Success) |
136 | { |
137 | status = parserCollection()->lastError(); |
138 | discoverFeeds(data); |
139 | } |
140 | } |
141 | else |
142 | { |
143 | if (isFileRetriever) |
144 | { |
145 | // retriever is a FileRetriever, so we interpret the |
146 | // error code and set lastError accordingly |
147 | status = FileNotFound; // TODO |
148 | std::cout << "file retriever error: " << d->retrieverError << std::endl; |
149 | } |
150 | else |
151 | { |
152 | // retriever is a custom impl, so we set OtherRetrieverError |
153 | status = OtherRetrieverError; |
154 | } |
155 | } |
156 | |
157 | emit loadingComplete(this, feed, status); |
158 | |
159 | delete this; |
160 | } |
161 | |
162 | void Loader::discoverFeeds(const QByteArray &data) |
163 | { |
164 | QString str = QString::fromLatin1(data.constData()).simplified(); |
165 | QString s2; |
166 | //QTextStream ts( &str, QIODevice::WriteOnly ); |
167 | //ts << data.data(); |
168 | |
169 | // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>" |
170 | // "type[\\s]=[\\s]\\\"application/rss+xml\\\"" |
171 | // "href[\\s]=[\\s]\\\"application/rss+xml\\\"" |
172 | QRegExp rx( QLatin1String("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)" ), Qt::CaseInsensitive ); |
173 | if (rx.indexIn(str)!=-1) |
174 | s2=rx.cap(1); |
175 | else{ |
176 | // does not support Atom/RSS autodiscovery.. try finding feeds by brute force.... |
177 | int pos=0; |
178 | QStringList feeds; |
179 | QString host=d->url.host(); |
180 | rx.setPattern(QLatin1String("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)" )); |
181 | while ( pos >= 0 ) { |
182 | pos = rx.indexIn( str, pos ); |
183 | s2=rx.cap(1); |
184 | if (s2.endsWith(QLatin1String(".rdf" )) || |
185 | s2.endsWith(QLatin1String(".rss" )) || |
186 | s2.endsWith(QLatin1String(".xml" ))) |
187 | feeds.append(s2); |
188 | if ( pos >= 0 ) { |
189 | pos += rx.matchedLength(); |
190 | } |
191 | } |
192 | |
193 | KUrl testURL; |
194 | // loop through, prefer feeds on same host |
195 | QStringList::const_iterator end( feeds.constEnd() ); |
196 | for ( QStringList::const_iterator it = feeds.constBegin(); it != end; ++it ) { |
197 | testURL=*it; |
198 | if (testURL.host()==host) |
199 | { |
200 | s2=*it; |
201 | break; |
202 | } |
203 | } |
204 | } |
205 | |
206 | if (s2.isNull()) |
207 | { |
208 | return; |
209 | } |
210 | |
211 | if (KUrl::isRelativeUrl(s2)) |
212 | { |
213 | if (s2.startsWith(QLatin1String("//" ))) |
214 | { |
215 | s2=s2.prepend(d->url.protocol()+QLatin1Char(':')); |
216 | d->discoveredFeedURL=s2; |
217 | } |
218 | else if (s2.startsWith(QLatin1Char('/'))) |
219 | { |
220 | d->discoveredFeedURL=d->url; |
221 | d->discoveredFeedURL.setPath(s2); |
222 | } |
223 | else |
224 | { |
225 | d->discoveredFeedURL=d->url; |
226 | d->discoveredFeedURL.addPath(s2); |
227 | } |
228 | d->discoveredFeedURL.cleanPath(); |
229 | } |
230 | else |
231 | d->discoveredFeedURL=s2; |
232 | |
233 | d->discoveredFeedURL.cleanPath(); |
234 | } |
235 | |
236 | } // namespace Syndication |
237 | |
238 | |