startgitStatic page generator for git repositories |
git clone git://git.dimitrijedobrota.com/startgit.git |
Log | Files | Refs | README | LICENSE | HACKING | CONTRIBUTING | CODE_OF_CONDUCT | BUILDING | |
html.cpp (20079B)
1 #include <array> 2 #include <format> 3 #include <functional> 4 #include <string> 5 6 #include <md4c-html.h> 7 8 #include "arguments.hpp" 9 10 constexpr bool isdigit(char chr) 11 { 12 return '0' <= (chr) && (chr) <= '9'; 13 } 14 15 constexpr bool islower(char chr) 16 { 17 return 'a' <= (chr) && (chr) <= 'z'; 18 } 19 20 constexpr bool isupper(char chr) 21 { 22 return 'A' <= (chr) && (chr) <= 'Z'; 23 } 24 25 constexpr bool isalnum(char chr) 26 { 27 return islower(chr) || isupper(chr) || isdigit(chr); 28 } 29 30 class md_html 31 { 32 public: 33 static bool need_html_esc(char chr) 34 { 35 return escape_map[static_cast<size_t>(chr)] & esc_flag::html; // NOLINT 36 } 37 38 static bool need_url_esc(char chr) 39 { 40 return escape_map[static_cast<size_t>(chr)] & esc_flag::url; // NOLINT 41 } 42 43 using append_fn = void (md_html::*)(const MD_CHAR*, MD_SIZE); 44 45 void render_verbatim(const std::string& text); 46 void render_verbatim(const MD_CHAR* text, MD_SIZE size); 47 48 void render_html_escaped(const MD_CHAR* data, MD_SIZE size); 49 void render_url_escaped(const MD_CHAR* data, MD_SIZE size); 50 void render_utf8_codepoint(unsigned codepoint, append_fn fn_append); 51 void render_entity(const MD_CHAR* text, MD_SIZE size, append_fn fn_append); 52 void render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append); 53 void render_open_ol_block(const MD_BLOCK_OL_DETAIL* det); 54 void render_open_li_block(const MD_BLOCK_LI_DETAIL* det); 55 void render_open_code_block(const MD_BLOCK_CODE_DETAIL* det); 56 void render_open_td_block(const MD_CHAR* cell_type, 57 const MD_BLOCK_TD_DETAIL* det); 58 void render_open_a_span(const MD_SPAN_A_DETAIL* det); 59 void render_open_img_span(const MD_SPAN_IMG_DETAIL* det); 60 void render_close_img_span(const MD_SPAN_IMG_DETAIL* det); 61 void render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det); 62 63 void (*process_output)(const MD_CHAR*, MD_SIZE, void*); 64 void* userdata; 65 unsigned flags; 66 int image_nesting_level; 67 68 private: 69 enum esc_flag : unsigned char 70 { 71 html = 0x1U, 72 url = 0x2U 73 }; 74 75 static constexpr const std::array<unsigned char, 256> escape_map = []() 76 { 77 std::array<unsigned char, 256> res = {}; 78 const std::string url_esc = "~-_.+!*(),%#@?=;:/,+$"; 79 const std::string html_esc = "\"&<>"; 80 81 for (size_t i = 0; i < res.size(); ++i) { 82 const auto chr = static_cast<char>(i); 83 84 if (html_esc.find(chr) != std::string::npos) { 85 res[i] |= esc_flag::html; // NOLINT 86 } 87 88 if (!isalnum(chr) && url_esc.find(chr) == std::string::npos) { 89 res[i] |= esc_flag::url; // NOLINT 90 } 91 } 92 93 return res; 94 }(); 95 }; 96 /***************************************** 97 *** HTML rendering helper functions *** 98 *****************************************/ 99 100 void md_html::render_verbatim(const MD_CHAR* text, MD_SIZE size) // NOLINT 101 { 102 process_output(text, size, userdata); 103 } 104 105 void md_html::render_verbatim(const std::string& text) // NOLINT 106 { 107 process_output(text.data(), static_cast<MD_SIZE>(text.size()), userdata); 108 } 109 110 void md_html::render_html_escaped(const MD_CHAR* data, MD_SIZE size) 111 { 112 MD_OFFSET beg = 0; 113 MD_OFFSET off = 0; 114 115 while (true) { 116 /* Optimization: Use some loop unrolling. */ 117 while (off + 3 < size && !md_html::need_html_esc(data[off + 0]) // NOLINT 118 && !md_html::need_html_esc(data[off + 1]) // NOLINT 119 && !md_html::need_html_esc(data[off + 2]) // NOLINT 120 && !md_html::need_html_esc(data[off + 3])) // NOLINT 121 { 122 off += 4; 123 } 124 125 while (off < size && !md_html::need_html_esc(data[off])) { // NOLINT 126 off++; 127 } 128 129 if (off > beg) { 130 render_verbatim(data + beg, off - beg); // NOLINT 131 } 132 133 if (off < size) { 134 switch (data[off]) { // NOLINT 135 case '&': 136 render_verbatim("&"); 137 break; 138 case '<': 139 render_verbatim("<"); 140 break; 141 case '>': 142 render_verbatim(">"); 143 break; 144 case '"': 145 render_verbatim("""); 146 break; 147 } 148 off++; 149 } else { 150 break; 151 } 152 beg = off; 153 } 154 } 155 156 std::string translate_url(const MD_CHAR* data, MD_SIZE size) 157 { 158 auto url = std::string(data, size); 159 160 if (url.rfind("http", 0) != std::string::npos 161 || url.rfind("www", 0) != std::string::npos) 162 { 163 const std::string github = "github.com/" + startgit::args.github; 164 const std::size_t gpos = url.find(github); 165 if (gpos != std::string::npos) { 166 url = startgit::args.base_url + url.substr(gpos + github.size()); 167 168 static const std::string blob = "/blob"; 169 const std::size_t bpos = url.find(blob); 170 if (bpos != std::string::npos) { 171 url.replace(bpos, blob.size(), ""); 172 173 const std::size_t rslash = url.rfind('/'); 174 175 auto itr = startgit::args.special.find(url.substr(rslash + 1)); 176 if (itr != startgit::args.special.end()) { 177 auto cpy = *itr; 178 url = std::format("{}/{}.html", 179 url.substr(0, rslash), 180 cpy.replace_extension().string()); 181 } else { 182 const std::size_t slash = url.find('/', bpos + 1); 183 url.replace(slash, 1, "/file/"); 184 url += ".html"; 185 } 186 } else { 187 url += "/master/log.html"; 188 } 189 } 190 } else { 191 auto itr = startgit::args.special.find(url); 192 if (itr != startgit::args.special.end()) { 193 auto cpy = *itr; 194 url = std::format("./{}.html", cpy.replace_extension().string()); 195 } else { 196 url = std::format("./file/{}.html", url); 197 } 198 } 199 200 return url; 201 } 202 203 void md_html::render_url_escaped(const MD_CHAR* data, MD_SIZE size) 204 { 205 static const MD_CHAR* hex_chars = "0123456789ABCDEF"; 206 MD_OFFSET beg = 0; 207 MD_OFFSET off = 0; 208 209 const auto url = translate_url(data, size); 210 size = static_cast<unsigned>(url.size()); 211 data = url.data(); 212 213 while (true) { 214 while (off < size && !md_html::need_url_esc(data[off])) { // NOLINT 215 off++; 216 } 217 218 if (off > beg) { 219 render_verbatim(data + beg, off - beg); // NOLINT 220 } 221 222 if (off < size) { 223 std::array<char, 3> hex = {0}; 224 225 switch (data[off]) { // NOLINT 226 case '&': 227 render_verbatim("&"); 228 break; 229 default: 230 hex[0] = '%'; 231 hex[1] = hex_chars[(static_cast<unsigned>(data[off]) >> 4) // NOLINT 232 & 0xf]; // NOLINT 233 hex[2] = hex_chars[(static_cast<unsigned>(data[off]) >> 0) // NOLINT 234 & 0xf]; // NOLINT 235 render_verbatim(hex.data(), 3); 236 break; 237 } 238 off++; 239 } else { 240 break; 241 } 242 243 beg = off; 244 } 245 } 246 247 unsigned hex_val(char chr) 248 { 249 if ('0' <= chr && chr <= '9') { 250 return static_cast<unsigned>(chr - '0'); // NOLINT 251 } 252 253 if ('A' <= chr && chr <= 'Z') { 254 return static_cast<unsigned>(chr - 'A' + 10); // NOLINT 255 } 256 257 return static_cast<unsigned>(chr - 'a' + 10); // NOLINT 258 } 259 260 // NOLINTBEGIN 261 void md_html::render_utf8_codepoint(unsigned codepoint, append_fn fn_append) 262 { 263 static const MD_CHAR utf8_replacement_char[] = { 264 char(0xef), char(0xbf), char(0xbd)}; 265 266 unsigned char utf8[4]; 267 size_t n; 268 269 if (codepoint <= 0x7f) { 270 n = 1; 271 utf8[0] = static_cast<unsigned char>(codepoint); 272 } else if (codepoint <= 0x7ff) { 273 n = 2; 274 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); 275 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); 276 } else if (codepoint <= 0xffff) { 277 n = 3; 278 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); 279 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); 280 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); 281 } else { 282 n = 4; 283 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); 284 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); 285 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); 286 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); 287 } 288 289 if (0 < codepoint && codepoint <= 0x10ffff) { 290 std::invoke(fn_append, 291 this, 292 reinterpret_cast<char*>(utf8), 293 static_cast<MD_SIZE>(n)); // NOLINT 294 } else { 295 std::invoke(fn_append, this, utf8_replacement_char, 3); 296 } 297 } 298 // NOLINTEND 299 300 /* Translate entity to its UTF-8 equivalent, or output the verbatim one 301 * if such entity is unknown (or if the translation is disabled). */ 302 void md_html::render_entity(const MD_CHAR* text, 303 MD_SIZE size, 304 append_fn fn_append) 305 { 306 /* We assume UTF-8 output is what is desired. */ 307 if (size > 3 && text[1] == '#') { // NOLINT 308 unsigned codepoint = 0; 309 310 if (text[2] == 'x' || text[2] == 'X') { // NOLINT 311 /* Hexadecimal entity (e.g. "�")). */ 312 for (MD_SIZE idx = 3; idx < size - 1; idx++) { 313 codepoint = 16 * codepoint + hex_val(text[idx]); // NOLINT 314 } 315 } else { 316 /* Decimal entity (e.g. "&1234;") */ 317 for (MD_SIZE idx = 2; idx < size - 1; idx++) { 318 codepoint = 319 10 * codepoint + static_cast<unsigned>(text[idx] - '0'); // NOLINT 320 } 321 } 322 323 render_utf8_codepoint(codepoint, fn_append); 324 return; 325 } 326 327 std::invoke(fn_append, this, text, size); 328 } 329 330 void md_html::render_attribute(const MD_ATTRIBUTE* attr, append_fn fn_append) 331 { 332 for (int i = 0; attr->substr_offsets[i] < attr->size; i++) { // NOLINT 333 MD_TEXTTYPE type = attr->substr_types[i]; // NOLINT 334 MD_OFFSET off = attr->substr_offsets[i]; // NOLINT 335 MD_SIZE size = attr->substr_offsets[i + 1] - off; // NOLINT 336 const MD_CHAR* text = attr->text + off; // NOLINT 337 338 switch (type) { 339 case MD_TEXT_NULLCHAR: 340 render_utf8_codepoint(0x0000, &md_html::render_verbatim); 341 break; 342 case MD_TEXT_ENTITY: 343 render_entity(text, size, fn_append); 344 break; 345 default: 346 std::invoke(fn_append, this, text, size); 347 break; 348 } 349 } 350 } 351 352 void md_html::render_open_ol_block(const MD_BLOCK_OL_DETAIL* det) 353 { 354 if (det->start == 1) { 355 render_verbatim("<ol>\n"); 356 return; 357 } 358 359 const auto buf = std::format(R"(<ol start="{}">\n)", det->start); 360 render_verbatim(buf); 361 } 362 363 void md_html::render_open_li_block(const MD_BLOCK_LI_DETAIL* det) 364 { 365 if (det->is_task != 0) { 366 render_verbatim( 367 "<li class=\"task-list-item\">" 368 "<input type=\"checkbox\" " 369 "class=\"task-list-item-checkbox\" disabled"); 370 if (det->task_mark == 'x' || det->task_mark == 'X') { 371 render_verbatim(" checked"); 372 } 373 render_verbatim(">"); 374 } else { 375 render_verbatim("<li>"); 376 } 377 } 378 379 void md_html::render_open_code_block(const MD_BLOCK_CODE_DETAIL* det) 380 { 381 render_verbatim("<pre><code"); 382 383 /* If known, output the HTML 5 attribute class="language-LANGNAME". */ 384 if (det->lang.text != nullptr) { 385 render_verbatim(" class=\"language-"); 386 render_attribute(&det->lang, &md_html::render_html_escaped); 387 render_verbatim("\""); 388 } 389 390 render_verbatim(">"); 391 } 392 393 void md_html::render_open_td_block(const MD_CHAR* cell_type, 394 const MD_BLOCK_TD_DETAIL* det) 395 { 396 render_verbatim("<"); 397 render_verbatim(cell_type); 398 399 switch (det->align) { 400 case MD_ALIGN_LEFT: 401 render_verbatim(" align=\"left\">"); 402 break; 403 case MD_ALIGN_CENTER: 404 render_verbatim(" align=\"center\">"); 405 break; 406 case MD_ALIGN_RIGHT: 407 render_verbatim(" align=\"right\">"); 408 break; 409 default: 410 render_verbatim(">"); 411 break; 412 } 413 } 414 415 void md_html::render_open_a_span(const MD_SPAN_A_DETAIL* det) 416 { 417 render_verbatim("<a href=\""); 418 render_attribute(&det->href, &md_html::render_url_escaped); 419 420 if (det->title.text != nullptr) { 421 render_verbatim("\" title=\""); 422 render_attribute(&det->title, &md_html::render_html_escaped); 423 } 424 425 render_verbatim("\">"); 426 } 427 428 void md_html::render_open_img_span(const MD_SPAN_IMG_DETAIL* det) 429 { 430 render_verbatim("<img src=\""); 431 render_attribute(&det->src, &md_html::render_url_escaped); 432 433 render_verbatim("\" alt=\""); 434 } 435 436 void md_html::render_close_img_span(const MD_SPAN_IMG_DETAIL* det) 437 { 438 if (det->title.text != nullptr) { 439 render_verbatim("\" title=\""); 440 render_attribute(&det->title, &md_html::render_html_escaped); 441 } 442 443 render_verbatim("\">"); 444 } 445 446 void md_html::render_open_wikilink_span(const MD_SPAN_WIKILINK_DETAIL* det) 447 { 448 render_verbatim("<x-wikilink data-target=\""); 449 render_attribute(&det->target, &md_html::render_html_escaped); 450 451 render_verbatim("\">"); 452 } 453 454 /************************************** 455 *** HTML renderer implementation *** 456 **************************************/ 457 458 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 459 { 460 static const MD_CHAR* head[] = {// NOLINT 461 "<h1>", 462 "<h2>", 463 "<h3>", 464 "<h4>", 465 "<h5>", 466 "<h6>"}; 467 auto* data = static_cast<class md_html*>(userdata); 468 469 switch (type) { 470 case MD_BLOCK_DOC: /* noop */ 471 break; 472 case MD_BLOCK_QUOTE: 473 data->render_verbatim("<blockquote>\n"); 474 break; 475 case MD_BLOCK_UL: 476 data->render_verbatim("<ul>\n"); 477 break; 478 case MD_BLOCK_OL: 479 data->render_open_ol_block( 480 static_cast<const MD_BLOCK_OL_DETAIL*>(detail)); 481 break; 482 case MD_BLOCK_LI: 483 data->render_open_li_block( 484 static_cast<const MD_BLOCK_LI_DETAIL*>(detail)); 485 break; 486 case MD_BLOCK_HR: 487 data->render_verbatim("<hr>\n"); 488 break; 489 case MD_BLOCK_H: 490 data->render_verbatim( 491 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1]); // NOLINT 492 break; 493 case MD_BLOCK_CODE: 494 data->render_open_code_block( 495 static_cast<const MD_BLOCK_CODE_DETAIL*>(detail)); 496 break; 497 case MD_BLOCK_HTML: /* noop */ 498 break; 499 case MD_BLOCK_P: 500 data->render_verbatim("<p>"); 501 break; 502 case MD_BLOCK_TABLE: 503 data->render_verbatim("<table>\n"); 504 break; 505 case MD_BLOCK_THEAD: 506 data->render_verbatim("<thead>\n"); 507 break; 508 case MD_BLOCK_TBODY: 509 data->render_verbatim("<tbody>\n"); 510 break; 511 case MD_BLOCK_TR: 512 data->render_verbatim("<tr>\n"); 513 break; 514 case MD_BLOCK_TH: 515 data->render_open_td_block("th", 516 static_cast<MD_BLOCK_TD_DETAIL*>(detail)); 517 break; 518 case MD_BLOCK_TD: 519 data->render_open_td_block("td", 520 static_cast<MD_BLOCK_TD_DETAIL*>(detail)); 521 break; 522 } 523 524 return 0; 525 } 526 527 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 528 { 529 static const MD_CHAR* head[] = {// NOLINT 530 "</h1>\n", 531 "</h2>\n", 532 "</h3>\n", 533 "</h4>\n", 534 "</h5>\n", 535 "</h6>\n"}; 536 auto* data = static_cast<class md_html*>(userdata); 537 538 switch (type) { 539 case MD_BLOCK_DOC: /*noop*/ 540 break; 541 case MD_BLOCK_QUOTE: 542 data->render_verbatim("</blockquote>\n"); 543 break; 544 case MD_BLOCK_UL: 545 data->render_verbatim("</ul>\n"); 546 break; 547 case MD_BLOCK_OL: 548 data->render_verbatim("</ol>\n"); 549 break; 550 case MD_BLOCK_LI: 551 data->render_verbatim("</li>\n"); 552 break; 553 case MD_BLOCK_HR: /*noop*/ 554 break; 555 case MD_BLOCK_H: 556 data->render_verbatim( 557 head[static_cast<MD_BLOCK_H_DETAIL*>(detail)->level - 1]); // NOLINT 558 break; 559 case MD_BLOCK_CODE: 560 data->render_verbatim("</code></pre>\n"); 561 break; 562 case MD_BLOCK_HTML: /* noop */ 563 break; 564 case MD_BLOCK_P: 565 data->render_verbatim("</p>\n"); 566 break; 567 case MD_BLOCK_TABLE: 568 data->render_verbatim("</table>\n"); 569 break; 570 case MD_BLOCK_THEAD: 571 data->render_verbatim("</thead>\n"); 572 break; 573 case MD_BLOCK_TBODY: 574 data->render_verbatim("</tbody>\n"); 575 break; 576 case MD_BLOCK_TR: 577 data->render_verbatim("</tr>\n"); 578 break; 579 case MD_BLOCK_TH: 580 data->render_verbatim("</th>\n"); 581 break; 582 case MD_BLOCK_TD: 583 data->render_verbatim("</td>\n"); 584 break; 585 } 586 587 return 0; 588 } 589 590 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 591 { 592 auto* data = static_cast<class md_html*>(userdata); 593 const bool inside_img = (data->image_nesting_level > 0); 594 595 if (type == MD_SPAN_IMG) { 596 data->image_nesting_level++; 597 } 598 599 if (inside_img) { 600 return 0; 601 } 602 603 switch (type) { 604 case MD_SPAN_EM: 605 data->render_verbatim("<em>"); 606 break; 607 case MD_SPAN_STRONG: 608 data->render_verbatim("<strong>"); 609 break; 610 case MD_SPAN_U: 611 data->render_verbatim("<u>"); 612 break; 613 case MD_SPAN_A: 614 data->render_open_a_span(static_cast<MD_SPAN_A_DETAIL*>(detail)); 615 break; 616 case MD_SPAN_IMG: 617 data->render_open_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail)); 618 break; 619 case MD_SPAN_CODE: 620 data->render_verbatim("<code>"); 621 break; 622 case MD_SPAN_DEL: 623 data->render_verbatim("<del>"); 624 break; 625 case MD_SPAN_LATEXMATH: 626 data->render_verbatim("<x-equation>"); 627 break; 628 case MD_SPAN_LATEXMATH_DISPLAY: 629 data->render_verbatim("<x-equation type=\"display\">"); 630 break; 631 case MD_SPAN_WIKILINK: 632 data->render_open_wikilink_span( 633 static_cast<MD_SPAN_WIKILINK_DETAIL*>(detail)); 634 break; 635 } 636 637 return 0; 638 } 639 640 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 641 { 642 auto* data = static_cast<class md_html*>(userdata); 643 644 if (type == MD_SPAN_IMG) { 645 data->image_nesting_level--; 646 } 647 648 if (data->image_nesting_level > 0) { 649 return 0; 650 } 651 652 switch (type) { 653 case MD_SPAN_EM: 654 data->render_verbatim("</em>"); 655 break; 656 case MD_SPAN_STRONG: 657 data->render_verbatim("</strong>"); 658 break; 659 case MD_SPAN_U: 660 data->render_verbatim("</u>"); 661 break; 662 case MD_SPAN_A: 663 data->render_verbatim("</a>"); 664 break; 665 case MD_SPAN_IMG: 666 data->render_close_img_span(static_cast<MD_SPAN_IMG_DETAIL*>(detail)); 667 break; 668 case MD_SPAN_CODE: 669 data->render_verbatim("</code>"); 670 break; 671 case MD_SPAN_DEL: 672 data->render_verbatim("</del>"); 673 break; 674 case MD_SPAN_LATEXMATH: /*fall through*/ 675 case MD_SPAN_LATEXMATH_DISPLAY: 676 data->render_verbatim("</x-equation>"); 677 break; 678 case MD_SPAN_WIKILINK: 679 data->render_verbatim("</x-wikilink>"); 680 break; 681 } 682 683 return 0; 684 } 685 686 int text_callback(MD_TEXTTYPE type, 687 const MD_CHAR* text, 688 MD_SIZE size, 689 void* userdata) 690 { 691 auto* data = static_cast<class md_html*>(userdata); 692 693 switch (type) { 694 case MD_TEXT_NULLCHAR: 695 data->render_utf8_codepoint(0x0000, &md_html::render_verbatim); 696 break; 697 case MD_TEXT_BR: 698 data->render_verbatim( 699 (data->image_nesting_level == 0 ? ("<br>\n") : " ")); 700 break; 701 case MD_TEXT_SOFTBR: 702 data->render_verbatim((data->image_nesting_level == 0 ? "\n" : " ")); 703 break; 704 case MD_TEXT_HTML: 705 data->render_verbatim(text, size); 706 break; 707 case MD_TEXT_ENTITY: 708 data->render_entity(text, size, &md_html::render_html_escaped); 709 break; 710 default: 711 data->render_html_escaped(text, size); 712 break; 713 } 714 715 return 0; 716 } 717 718 namespace startgit 719 { 720 721 int md_html(const MD_CHAR* input, 722 MD_SIZE input_size, 723 void (*process_output)(const MD_CHAR*, MD_SIZE, void*), 724 void* userdata, 725 unsigned parser_flags, 726 unsigned renderer_flags) 727 { 728 class md_html render = {process_output, userdata, renderer_flags, 0}; 729 730 const MD_PARSER parser = {0, 731 parser_flags, 732 enter_block_callback, 733 leave_block_callback, 734 enter_span_callback, 735 leave_span_callback, 736 text_callback, 737 nullptr, 738 nullptr}; 739 740 return md_parse(input, input_size, &parser, &render); 741 } 742 743 } // namespace startgit