1/*
2 * loader.cpp
3 *
4 * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
5 *
6 * This program is distributed in the hope that it will be useful, but WITHOUT
7 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8 * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
9 * accompanying file 'COPYING'.
10 */
11#include "loader.h"
12#include "dataretriever.h"
13#include "documentsource.h"
14#include "feed.h"
15#include "global.h"
16#include "parsercollection.h"
17
18#include <kio/global.h>
19#include <kurl.h>
20
21#include <QtCore/QBuffer>
22#include <QtCore/QRegExp>
23#include <QtCore/QStringList>
24
25#include <boost/shared_ptr.hpp>
26
27// test: TODO remove
28#include <iostream>
29
30namespace Syndication {
31
32struct Loader::LoaderPrivate
33{
34 LoaderPrivate() : retriever(0), lastError(Success),
35 retrieverError(0)
36 {
37 }
38
39 ~LoaderPrivate()
40 {
41 delete retriever;
42 }
43
44 DataRetriever* retriever;
45 Syndication::ErrorCode lastError;
46 int retrieverError;
47 KUrl discoveredFeedURL;
48 KUrl url;
49};
50
51Loader* Loader::create()
52{
53 return new Loader;
54}
55
56Loader *Loader::create(QObject* object, const char* slot)
57{
58 Loader *loader = create();
59 connect(loader, SIGNAL(loadingComplete(Syndication::Loader*,
60 Syndication::FeedPtr, Syndication::ErrorCode)),
61 object, slot);
62 return loader;
63}
64
65Loader::Loader() : d(new LoaderPrivate)
66{
67}
68
69Loader::~Loader()
70{
71 delete d;
72}
73
74void Loader::loadFrom(const KUrl& url)
75{
76 loadFrom(url, new FileRetriever);
77}
78
79void Loader::loadFrom(const KUrl &url, DataRetriever *retriever)
80{
81 if (d->retriever != 0L)
82 return;
83
84 d->url = url;
85 d->retriever = retriever;
86
87 connect(d->retriever, SIGNAL(dataRetrieved(QByteArray,bool)),
88 this, SLOT(slotRetrieverDone(QByteArray,bool)));
89
90 d->retriever->retrieveData(url);
91}
92
93int Loader::retrieverError() const
94{
95 return d->retrieverError;
96}
97
98Syndication::ErrorCode Loader::errorCode() const
99{
100 return d->lastError;
101}
102
103void Loader::abort()
104{
105 if (d && d->retriever)
106 {
107 d->retriever->abort();
108 delete d->retriever;
109 d->retriever = 0L;
110 }
111
112 emit loadingComplete(this, FeedPtr(), Aborted);
113 delete this;
114}
115
116KUrl Loader::discoveredFeedURL() const
117{
118 return d->discoveredFeedURL;
119}
120
121void Loader::slotRetrieverDone(const QByteArray& data, bool success)
122{
123 d->retrieverError = d->retriever->errorCode();
124 ErrorCode status = Success;
125 FeedPtr feed;
126 bool isFileRetriever = dynamic_cast<FileRetriever*>(d->retriever) != 0;
127 delete d->retriever;
128 d->retriever = 0;
129
130 if (success)
131 {
132 DocumentSource src(data, d->url.url());
133 feed = parserCollection()->parse(src);
134
135 if (parserCollection()->lastError() != Syndication::Success)
136 {
137 status = parserCollection()->lastError();
138 discoverFeeds(data);
139 }
140 }
141 else
142 {
143 if (isFileRetriever)
144 {
145 // retriever is a FileRetriever, so we interpret the
146 // error code and set lastError accordingly
147 status = FileNotFound; // TODO
148 std::cout << "file retriever error: " << d->retrieverError << std::endl;
149 }
150 else
151 {
152 // retriever is a custom impl, so we set OtherRetrieverError
153 status = OtherRetrieverError;
154 }
155 }
156
157 emit loadingComplete(this, feed, status);
158
159 delete this;
160}
161
162void Loader::discoverFeeds(const QByteArray &data)
163{
164 QString str = QString::fromLatin1(data.constData()).simplified();
165 QString s2;
166 //QTextStream ts( &str, QIODevice::WriteOnly );
167 //ts << data.data();
168
169 // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
170 // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
171 // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
172 QRegExp rx( QLatin1String("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"), Qt::CaseInsensitive );
173 if (rx.indexIn(str)!=-1)
174 s2=rx.cap(1);
175 else{
176 // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
177 int pos=0;
178 QStringList feeds;
179 QString host=d->url.host();
180 rx.setPattern(QLatin1String("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"));
181 while ( pos >= 0 ) {
182 pos = rx.indexIn( str, pos );
183 s2=rx.cap(1);
184 if (s2.endsWith(QLatin1String(".rdf")) ||
185 s2.endsWith(QLatin1String(".rss")) ||
186 s2.endsWith(QLatin1String(".xml")))
187 feeds.append(s2);
188 if ( pos >= 0 ) {
189 pos += rx.matchedLength();
190 }
191 }
192
193 KUrl testURL;
194 // loop through, prefer feeds on same host
195 QStringList::const_iterator end( feeds.constEnd() );
196 for ( QStringList::const_iterator it = feeds.constBegin(); it != end; ++it ) {
197 testURL=*it;
198 if (testURL.host()==host)
199 {
200 s2=*it;
201 break;
202 }
203 }
204 }
205
206 if (s2.isNull())
207 {
208 return;
209 }
210
211 if (KUrl::isRelativeUrl(s2))
212 {
213 if (s2.startsWith(QLatin1String("//")))
214 {
215 s2=s2.prepend(d->url.protocol()+QLatin1Char(':'));
216 d->discoveredFeedURL=s2;
217 }
218 else if (s2.startsWith(QLatin1Char('/')))
219 {
220 d->discoveredFeedURL=d->url;
221 d->discoveredFeedURL.setPath(s2);
222 }
223 else
224 {
225 d->discoveredFeedURL=d->url;
226 d->discoveredFeedURL.addPath(s2);
227 }
228 d->discoveredFeedURL.cleanPath();
229 }
230 else
231 d->discoveredFeedURL=s2;
232
233 d->discoveredFeedURL.cleanPath();
234}
235
236} // namespace Syndication
237
238