1/****************************************************************************
2 * Copyright (C) 2012-2016 Woboq GmbH
3 * Olivier Goffart <contact at woboq.com>
4 * https://woboq.com/codebrowser.html
5 *
6 * This file is part of the Woboq Code Browser.
7 *
8 * Commercial License Usage:
9 * Licensees holding valid commercial licenses provided by Woboq may use
10 * this file in accordance with the terms contained in a written agreement
11 * between the licensee and Woboq.
12 * For further information see https://woboq.com/codebrowser.html
13 *
14 * Alternatively, this work may be used under a Creative Commons
15 * Attribution-NonCommercial-ShareAlike 3.0 (CC-BY-NC-SA 3.0) License.
16 * http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en_US
17 * This license does not allow you to use the code browser to assist the
18 * development of your commercial software. If you intent to do so, consider
19 * purchasing a commercial licence.
20 ****************************************************************************/
21
22#include "commenthandler.h"
23#include "generator.h"
24#include "stringbuilder.h"
25#include "annotator.h"
26#include <clang/AST/RawCommentList.h>
27#include <clang/AST/CommentParser.h>
28#include <clang/AST/CommentVisitor.h>
29#include <clang/AST/ASTContext.h>
30#include <clang/AST/DeclTemplate.h>
31#include <clang/Lex/Preprocessor.h>
32#include <clang/Basic/Version.h>
33#include <clang/Sema/Sema.h>
34#include <clang/Sema/Lookup.h>
35#include <clang/Basic/SourceManager.h>
36#include <cctype>
37#include <iostream>
38
39clang::NamedDecl *parseDeclarationReference(llvm::StringRef Text, clang::Sema &Sema, bool isFunction) {
40
41 clang::Preprocessor &PP = Sema.getPreprocessor();
42
43 auto Buf = llvm::MemoryBuffer::getMemBufferCopy(Text);
44 llvm::MemoryBuffer *Buf2 = &*Buf;
45#if CLANG_VERSION_MAJOR == 3 && CLANG_VERSION_MINOR <= 4
46 auto FID = PP.getSourceManager().createFileIDForMemBuffer(Buf);
47#else
48 auto FID = PP.getSourceManager().createFileID(std::move(Buf));
49#endif
50 clang::Lexer Lex(FID, Buf2, PP.getSourceManager(), PP.getLangOpts());
51
52 auto TuDecl = Sema.getASTContext().getTranslationUnitDecl();
53 clang::CXXScopeSpec SS;
54 clang::Token Tok, Next;
55 Lex.LexFromRawLexer(Tok);
56
57 for (; !Tok.is(clang::tok::eof); Tok = Next) {
58 Lex.LexFromRawLexer(Next);
59 clang::IdentifierInfo* II = nullptr;
60 if (Tok.is(clang::tok::raw_identifier)) {
61 II = PP.LookUpIdentifierInfo(Tok);
62 }
63
64 if (Tok.is(clang::tok::coloncolon)) {
65 SS.MakeGlobal(Sema.getASTContext(), Tok.getLocation());
66 continue;
67 } else if (Tok.is(clang::tok::identifier)) {
68
69 if (Next.is(clang::tok::coloncolon)) {
70
71 clang::Sema::TemplateTy Template;
72 clang::UnqualifiedId Name;
73 Name.setIdentifier(II, Tok.getLocation());
74 bool dummy;
75 auto TemplateKind = Sema.isTemplateName(Sema.getScopeForContext(TuDecl), SS, false, Name, {}, false, Template, dummy);
76 if (TemplateKind == clang::TNK_Non_template) {
77#if CLANG_VERSION_MAJOR >= 4
78 clang::Sema::NestedNameSpecInfo nameInfo(II, Tok.getLocation(), Next.getLocation());
79 if (Sema.ActOnCXXNestedNameSpecifier(Sema.getScopeForContext(TuDecl), nameInfo , false, SS))
80#else
81 if (Sema.ActOnCXXNestedNameSpecifier(Sema.getScopeForContext(TuDecl), *II, Tok.getLocation(), Next.getLocation(), {}, false, SS))
82#endif
83 {
84 SS.SetInvalid(Tok.getLocation());
85 }
86 } else if (auto T = Template.get().getAsTemplateDecl()) {
87 // FIXME: For template, it is a bit tricky
88 // It is still a bit broken but works in some cases for most normal functions
89 auto T2 = llvm::dyn_cast_or_null<clang::CXXRecordDecl>(T->getTemplatedDecl());
90 if (T2) {
91 Lex.LexFromRawLexer(Tok);
92 if (!Tok.is(clang::tok::raw_identifier))
93 return nullptr;
94 II = PP.LookUpIdentifierInfo(Tok);
95 Lex.LexFromRawLexer(Next);
96 if (!Next.is(clang::tok::eof) && !Next.is(clang::tok::l_paren))
97 return nullptr;
98 auto Result = T2->lookup(II);
99 if (Result.size() != 1)
100 return nullptr;
101 auto D = Result.front();
102 if (isFunction && (llvm::isa<clang::RecordDecl>(D)
103 || llvm::isa<clang::ClassTemplateDecl>(D))) {
104 // TODO constructor
105 return nullptr;
106 }
107 return D;
108 }
109 }
110 Lex.LexFromRawLexer(Next);
111 continue;
112 }
113
114 if (Next.is(clang::tok::eof) || Next.is(clang::tok::l_paren)) {
115 clang::LookupResult Found(Sema, II, Tok.getLocation(), clang::Sema::LookupOrdinaryName);
116 Found.suppressDiagnostics();
117
118 if (SS.isEmpty())
119 Sema.LookupQualifiedName(Found, TuDecl);
120 else {
121 clang::DeclContext* DC = Sema.computeDeclContext(SS);
122 Sema.LookupQualifiedName(Found, DC ? DC : TuDecl);
123 }
124
125
126 if (Found.isSingleResult()) {
127 auto Decl = Found.getFoundDecl();
128 if (isFunction && (llvm::isa<clang::RecordDecl>(Decl)
129 || llvm::isa<clang::ClassTemplateDecl>(Decl))) {
130 // TODO handle constructors.
131 return nullptr;
132 }
133 return Decl;
134 }
135
136 if (Found.isOverloadedResult() && Next.is(clang::tok::l_paren)) {
137 // TODO
138 }
139 return nullptr;
140 }
141 }
142 if (Tok.is(clang::tok::tilde) || Tok.is(clang::tok::kw_operator)) {
143 //TODO
144 return nullptr;
145 }
146
147 if (!isFunction)
148 return nullptr;
149 SS = {};
150 // Then it is probably the return type, just skip it.
151 }
152 return nullptr;
153}
154
155struct CommentHandler::CommentVisitor : clang::comments::ConstCommentVisitor<CommentVisitor> {
156 typedef clang::comments::ConstCommentVisitor<CommentVisitor> Base;
157 CommentVisitor(Annotator &annotator, Generator &generator, const clang::comments::CommandTraits &traits, clang::Sema &Sema)
158 : annotator(annotator), generator(generator) , traits(traits), Sema(Sema) {}
159 Annotator &annotator;
160 Generator &generator;
161 const clang::comments::CommandTraits &traits;
162 clang::Sema &Sema;
163
164 clang::NamedDecl *Decl = nullptr;
165 std::string DeclRef;
166 std::vector<std::pair<std::string, Doc>> SubDocs; // typically for enum values
167
168 void visit(const clang::comments::Comment *C) {
169 Base::visit(C);
170 for (auto it = C->child_begin(); it != C->child_end(); ++it)
171 visit(*it);
172 }
173
174 // Inline content.
175 //void visitTextComment(const clang::comments::TextComment *C);
176 void visitInlineCommandComment(const clang::comments::InlineCommandComment *C) {
177 tag("command", C->getCommandNameRange());
178 for (unsigned int i = 0; i < C->getNumArgs(); ++i)
179 tag("arg", C->getArgRange(i));
180 }
181 void visitHTMLStartTagComment(const clang::comments::HTMLStartTagComment *C) {
182 tag("tag", C->getSourceRange());
183 /*for (int i = 0; i < C->getNumAttrs(); ++i) {
184 auto attr = C->getAttr(i);
185 tag("attr", attr.getNameRange());
186 }*/
187 }
188 void visitHTMLEndTagComment(const clang::comments::HTMLEndTagComment *C) {
189 tag("tag", C->getSourceRange());
190 }
191
192 // Block content.
193 //void visitParagraphComment(const clang::comments::ParagraphComment *C);
194 void visitBlockCommandComment(const clang::comments::BlockCommandComment *C) {
195 auto nameRange = C->getCommandNameRange(traits);
196 tag("command", {C->getSourceRange().getBegin(), nameRange.getEnd().getLocWithOffset(-1)});
197 for (unsigned int i = 0; i < C->getNumArgs(); ++i)
198 tag("arg", C->getArgRange(i));
199 if (C->getCommandName(traits) == "value")
200 parseEnumValue(C);
201 }
202 //void visitParamCommandComment(const clang::comments::ParamCommandComment *C);
203 //void visitTParamCommandComment(const clang::comments::TParamCommandComment *C);
204 /*void visitVerbatimBlockComment(const clang::comments::VerbatimBlockComment *C) {
205 Base::visitVerbatimBlockComment(C);
206 FIXME
207 // highlight the closing command
208 auto end = C->getLocEnd();
209 tag("arg", {end.getLocWithOffset(-C->getCloseName().size()) ,end});
210 }*/
211 void visitVerbatimBlockLineComment(const clang::comments::VerbatimBlockLineComment *C) {
212 tag("verb", C->getSourceRange());
213
214 }
215 void visitVerbatimLineComment(const clang::comments::VerbatimLineComment *C) {
216 auto R = C->getTextRange();
217 // We need to adjust because the text starts right after the name, which overlap with the
218 // command. And also includes the end of line, which is useless.
219 Base::visitVerbatimLineComment(C);
220
221 std::string ref;
222 auto Info = traits.getCommandInfo(C->getCommandID());
223 if (Info->IsDeclarationCommand) {
224 auto D = parseDeclarationReference(C->getText(), Sema,
225 Info->IsFunctionDeclarationCommand || Info->getID() == clang::comments::CommandTraits::KCI_fn);
226 if (D) {
227 Decl = D;
228 DeclRef = annotator.getVisibleRef(Decl);
229 ref = DeclRef;
230 }
231 }
232 tag("verb", {R.getBegin().getLocWithOffset(+1), R.getEnd().getLocWithOffset(-1)}, ref);
233 }
234
235 //void visitFullComment(const clang::comments::FullComment *C);
236
237private:
238 void tag(llvm::StringRef className, clang::SourceRange range, llvm::StringRef ref = llvm::StringRef()) {
239 int len = range.getEnd().getRawEncoding() - range.getBegin().getRawEncoding() + 1;
240 if (len > 0) {
241 std::string attr;
242 if (ref.empty()) {
243 attr = "class=\"" % className % "\"";
244 } else {
245 attr = "class=\"" % className % "\" data-ref=\"" % ref % "\"";
246 }
247 auto offset = annotator.getSourceMgr().getFileOffset(range.getBegin());
248 generator.addTag("span", attr, offset, len);
249 }
250 }
251
252 // Parse the \value command (for enum values)
253 void parseEnumValue(const clang::comments::BlockCommandComment *C) {
254 auto ED = llvm::dyn_cast_or_null<clang::EnumDecl>(Decl);
255 if (!ED)
256 return;
257 auto P = C->getParagraph();
258 if (!P)
259 return;
260 auto valueStartLoc = P->getSourceRange().getBegin();
261 const char *data = annotator.getSourceMgr().getCharacterData(valueStartLoc);
262 auto begin = data;
263 while(clang::isWhitespace(*begin))
264 begin++;
265 auto end = begin;
266 while(clang::isIdentifierBody(*end))
267 end++;
268 llvm::StringRef value(begin, end-begin);
269
270 auto it = std::find_if(ED->enumerator_begin(), ED->enumerator_end(),
271 [&value](const clang::EnumConstantDecl *EC)
272 { return value == EC->getName(); } );
273 if (it == ED->enumerator_end())
274 return;
275 auto ref = annotator.getVisibleRef(*it);
276
277 tag("arg", {valueStartLoc.getLocWithOffset(begin-data),
278 valueStartLoc.getLocWithOffset(end-data)}, ref);
279
280 auto range = C->getSourceRange();
281 auto len = range.getEnd().getRawEncoding() - range.getBegin().getRawEncoding() + 1;
282 auto ctn = std::string(annotator.getSourceMgr().getCharacterData(range.getBegin()), len);
283 SubDocs.push_back({std::move(ref), Doc{ std::move(ctn) , range.getBegin() } });
284 }
285};
286
287static void handleUrlsInComment(Generator& generator, llvm::StringRef rawString, int commentStart)
288{
289 std::size_t pos = 0;
290 while ((pos = rawString.find("http", pos)) != llvm::StringRef::npos) {
291 int begin = pos;
292 pos +=4;
293 if (begin != 0 && llvm::StringRef(" \t/*[]()<>|:\"'{}").find(rawString[begin-1]) == llvm::StringRef::npos) {
294 // the URL need to be the first character, or follow a space or one of the character
295 continue;
296 }
297 if (pos < rawString.size() && rawString[pos] == 's') pos++;
298 if (!rawString.substr(pos).startswith("://"))
299 continue;
300 pos+=3;
301 // We have an URL
302
303 llvm::StringRef urlChars = "-._~:/?#[]@!$&'()*+,;=%"; // chars valid in the URL
304 while(pos < rawString.size() && (std::isalnum(rawString[pos]) ||
305 urlChars.find(rawString[pos]) != llvm::StringRef::npos))
306 pos++;
307
308 // don't end with a period
309 if (rawString[pos-1]=='.') pos--;
310
311 // Don't end with a closing parenthese or bracket unless the URL contains an opening one
312 // (e.g. wikipedia urls)
313 auto candidate = rawString.substr(begin, pos-begin);
314 if (rawString[pos-1]==')' && candidate.find('(') == llvm::StringRef::npos) pos--;
315 if (rawString[pos-1]==']' && candidate.find('[') == llvm::StringRef::npos) pos--;
316
317 // don't end with a period
318 if (rawString[pos-1]=='.') pos--;
319
320 auto len = pos - begin;
321 generator.addTag("a", "href=\"" % rawString.substr(begin, len) % "\"",
322 commentStart+begin, len);
323 }
324
325}
326
327void CommentHandler::handleComment(Annotator &A, Generator& generator, clang::Sema &Sema,
328 const char *bufferStart, int commentStart, int len,
329 clang::SourceLocation searchLocBegin, clang::SourceLocation searchLocEnd,
330 clang::SourceLocation commentLoc)
331{
332 llvm::StringRef rawString(bufferStart+commentStart, len);
333
334 handleUrlsInComment(generator, rawString, commentStart);
335
336 std::string attributes;
337
338 if ((rawString.ltrim().startswith("/**") && !rawString.ltrim().startswith("/***"))
339 || rawString.ltrim().startswith("/*!") || rawString.ltrim().startswith("//!")
340 || (rawString.ltrim().startswith("///") && !rawString.ltrim().startswith("////")))
341#if CLANG_VERSION_MAJOR==3 && CLANG_VERSION_MINOR<=4
342 if (rawString.find("deprecated") == rawString.npos) // workaround crash in comments::Sema::checkDeprecatedCommand
343#endif
344 {
345 attributes = "class=\"doc\"";
346
347 clang::Preprocessor &PP = Sema.getPreprocessor();
348 clang::comments::CommandTraits traits(PP.getPreprocessorAllocator(), clang::CommentOptions());
349 traits.registerBlockCommand("value"); // enum value
350#if CLANG_VERSION_MAJOR==3 && CLANG_VERSION_MINOR<=4
351 traits.registerBlockCommand("deprecated"); // avoid typo correction leading to crash.
352#endif
353 clang::comments::Lexer lexer(PP.getPreprocessorAllocator(), PP.getDiagnostics(), traits,
354 commentLoc, bufferStart + commentStart, bufferStart + commentStart + len);
355 clang::comments::Sema sema(PP.getPreprocessorAllocator(), PP.getSourceManager(), PP.getDiagnostics(), traits, &PP);
356 clang::comments::Parser parser(lexer, sema, PP.getPreprocessorAllocator(), PP.getSourceManager(),
357 PP.getDiagnostics(), traits);
358 auto fullComment = parser.parseFullComment();
359 CommentVisitor visitor{A, generator, traits, Sema};
360 visitor.visit(fullComment);
361 if (!visitor.DeclRef.empty()) {
362 for (auto &p : visitor.SubDocs)
363 docs.insert(std::move(p));
364 docs.insert({std::move(visitor.DeclRef), { rawString.str() , commentLoc }});
365 generator.addTag("i", attributes, commentStart, len);
366 return;
367 }
368 }
369
370
371 // Try to find a matching declaration
372 const auto &dof = decl_offsets;
373 //is there one and one single decl in that range.
374 auto it_before = dof.lower_bound(searchLocBegin);
375 auto it_after = dof.upper_bound(searchLocEnd);
376 if (it_before != dof.end() && it_after != dof.begin() && it_before == (--it_after)) {
377 if (it_before->second.second) {
378 docs.insert({it_before->second.first, { rawString.str() , commentLoc }});
379 } else {
380 attributes %= " data-doc=\"" % it_before->second.first % "\"";
381 }
382 }
383
384 generator.addTag("i", attributes, commentStart, len);
385}
386