334 lines
7.7 KiB
C++
334 lines
7.7 KiB
C++
/*
|
|
* Copyright (C) 2022-2023 luca0N!
|
|
*
|
|
* This file is part of Static Website Generator (swg).
|
|
*
|
|
* Static Website Generator (swg) is free software: you can redistribute it
|
|
* and/or modify it under the terms of the version 3 of the GNU Lesser General
|
|
* Public License as published by the Free Software Foundation.
|
|
*
|
|
* Static Website Generator (swg) is distributed in the hope that it will be
|
|
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with Static Website Generator (swg). If not, see
|
|
* <https://www.gnu.org/licenses/>.
|
|
*
|
|
* Contact luca0N! by e-mail: <luca0n [at] luca0n [dot] com>.
|
|
*/
|
|
|
|
#include "MarkdownParser.hxx"
|
|
|
|
#include <assert.h>
|
|
#include <iostream>
|
|
|
|
#define ASCII_DIGIT_START 48
|
|
|
|
enum HyperlinkStage {
|
|
NONE, // not reading a hyperlink
|
|
READING_CONTENTS, // reading the contents (like text) of a hyperlink
|
|
EXPECTING_URL, // done reading contents of hyperlink; expecting its URL to be specified
|
|
READING_URL // reading the hyperlink url
|
|
};
|
|
|
|
std::string html,
|
|
tag_a_text_buf, // used for storing the text contents of a hyperlink
|
|
tag_a_buf; // used for storing the hyperlink address
|
|
enum HyperlinkStage tag_a = NONE;
|
|
|
|
void append(std::string const &s) {
|
|
// TODO: add proper error handling.
|
|
assert(tag_a != EXPECTING_URL);
|
|
switch(tag_a) {
|
|
case NONE:
|
|
html += s;
|
|
break;
|
|
case READING_CONTENTS:
|
|
tag_a_text_buf += s;
|
|
break;
|
|
case READING_URL:
|
|
tag_a_buf += s;
|
|
break;
|
|
}
|
|
}
|
|
void append(const char c) {
|
|
std::string tmp_str;
|
|
tmp_str += c;
|
|
append(tmp_str);
|
|
}
|
|
|
|
void cleanup() {
|
|
html = "";
|
|
}
|
|
|
|
namespace MarkdownParser {
|
|
|
|
std::string make_html(std::filesystem::path const &path) {
|
|
cleanup();
|
|
FILE *mdFile = fopen(path.string().c_str(), "r");
|
|
// TODO: Add proper error handling.
|
|
assert(mdFile != NULL);
|
|
|
|
int buflen = 64;
|
|
char buf[buflen];
|
|
|
|
// Tag flags
|
|
bool tag_b = false,
|
|
tag_i = false,
|
|
tag_p = false,
|
|
tag_li = false,
|
|
tag_ul = false,
|
|
tag_s = false,
|
|
tag_comment = false,
|
|
newline = true,
|
|
manualBreak = false,
|
|
// Used to ignore spaces at the beginning of header titles.
|
|
ignoreSpace = false;
|
|
unsigned short char_skip = 0;
|
|
|
|
// For counting sub-headers (h1, h2, h3, and so on)
|
|
int tag_h = 0;
|
|
while (fgets(buf, buflen, mdFile) != NULL) {
|
|
manualBreak = false;
|
|
|
|
if (!tag_comment && tag_p && buf[0] == '\n') {
|
|
// Empty newline; end paragraph.
|
|
html += "</p>\n";
|
|
tag_p = false;
|
|
continue;
|
|
}
|
|
|
|
// End ul tag if it's active and a new line doesn't contain an
|
|
// item.
|
|
if (tag_ul && newline && buf[0] != '-' && buf[0] == '\n') {
|
|
tag_ul = false;
|
|
tag_li = false;
|
|
html += "</li></ul>";
|
|
}
|
|
|
|
// Read character by character
|
|
for (int x = 0; x < buflen; x++) {
|
|
if (char_skip > 0) {
|
|
char_skip--;
|
|
continue;
|
|
}
|
|
char c = buf[x];
|
|
if (c == '\0') break;
|
|
else if (!tag_comment && c == '\n') {
|
|
// The next buffer iteration will hold the
|
|
// first (buflen) bytes of the next new line.
|
|
newline = true;
|
|
manualBreak = true;
|
|
append(' ');
|
|
|
|
// If we were in the middle of inserting a header tag, close it here.
|
|
if (tag_h > 0) {
|
|
html += "</h";
|
|
html += (ASCII_DIGIT_START + tag_h);
|
|
html += ">";
|
|
tag_h = 0;
|
|
}
|
|
if (!tag_li)
|
|
html += '\n';
|
|
break;
|
|
}
|
|
|
|
// Start paragraph if newline and no
|
|
// special characters were matched.
|
|
if (!tag_comment && !tag_ul && (!tag_p && newline && x == 0 &&
|
|
c != '#' && c != '-'))
|
|
html += "<p>",
|
|
tag_p = true;
|
|
|
|
switch (c) {
|
|
case '<':
|
|
// Check for HTML comment
|
|
if (buf[x+1] == '!' &&
|
|
buf[x+2] == '-' &&
|
|
buf[x+3] == '-')
|
|
char_skip = 3,
|
|
tag_comment = true,
|
|
html += "<!--";
|
|
else
|
|
append(c);
|
|
break;
|
|
case '*':
|
|
// Bold check
|
|
// Check whether this character has been escaped.
|
|
if (tag_comment || buf[x-1] == '\\') {
|
|
append(c);
|
|
break;
|
|
}
|
|
|
|
append(tag_b ? "</b>" : "<b>");
|
|
tag_b = !tag_b;
|
|
break;
|
|
case '_':
|
|
// Italics check
|
|
// Check whether this character has
|
|
// been escaped.
|
|
|
|
if (tag_comment || buf[x-1] == '\\') {
|
|
append(c);
|
|
break;
|
|
}
|
|
|
|
append(tag_i ? "</i>" : "<i>");
|
|
tag_i = !tag_i;
|
|
break;
|
|
case '#':
|
|
// Header check
|
|
|
|
// Headers must be declared at the
|
|
// beginning of a new line. Ignore it
|
|
// if this is not a new line.
|
|
if (tag_comment || !newline) {
|
|
append(c);
|
|
break;
|
|
}
|
|
|
|
// Check whether this character has
|
|
// been escaped.
|
|
if (buf[x-1] == '\\' ||
|
|
// Check if this header was specified
|
|
// right at the beginning of the line.
|
|
(tag_h == 0 && x != 0)) {
|
|
append(c);
|
|
break;
|
|
}
|
|
|
|
// This seems like a header
|
|
// declaration.
|
|
//
|
|
// Increase the header count (for
|
|
// subheader support) and add it to the
|
|
// HTML output.
|
|
//
|
|
// Support up to 6 levels of headers.
|
|
// After that, ignore '#' characters
|
|
// and add them directly to the HTML
|
|
// output.
|
|
if (tag_h >= 6) {
|
|
html += "<h";
|
|
html += (ASCII_DIGIT_START + tag_h);
|
|
html += ">";
|
|
html += '#';
|
|
ignoreSpace = true;
|
|
break;
|
|
} else tag_h++;
|
|
|
|
// If we are done reading header
|
|
// characters, finally add the tag and
|
|
// then move on.
|
|
|
|
if (buf[x+1] != '#') {
|
|
html += "<h";
|
|
html += (ASCII_DIGIT_START + tag_h);
|
|
html += ">";
|
|
ignoreSpace = true;
|
|
break;
|
|
}
|
|
break;
|
|
case '~':
|
|
// Escape character
|
|
if (tag_comment || (x > 0 && buf[x-1] == '\\')) {
|
|
append(c);
|
|
break;
|
|
}
|
|
if (x > 0 && buf[x-1] == '~') {
|
|
append(tag_s ? "</s>" : "<s>");
|
|
tag_s = !tag_s;
|
|
break;
|
|
}
|
|
if (buf[x+1] == '~') break;
|
|
append(c);
|
|
break;
|
|
case '-':
|
|
if (tag_comment &&
|
|
buf[x+1] == '-' &&
|
|
buf[x+2] == '>') {
|
|
tag_comment = false,
|
|
html += "-->",
|
|
char_skip = 2;
|
|
continue;
|
|
}
|
|
if (tag_comment || x != 0) {
|
|
append(c);
|
|
break;
|
|
}
|
|
|
|
// Start unordered list tag if it's not active.
|
|
if (!tag_ul) html += "<ul>", tag_ul = true;
|
|
|
|
// End previous list item, if active.
|
|
if (tag_li) html += "</li>\n", tag_li = false;
|
|
|
|
html += "<li>";
|
|
ignoreSpace = true;
|
|
tag_li = true;
|
|
break;
|
|
case '[':
|
|
// Hyperlink text declaration has begun
|
|
if (tag_comment || tag_a != NONE || buf[x-1] == '\\') {
|
|
// Cannot add hyperlinks inside of hyperlinks;
|
|
append(c);
|
|
break;
|
|
}
|
|
tag_a_buf = "";
|
|
tag_a_text_buf = "";
|
|
tag_a = READING_CONTENTS;
|
|
break;
|
|
case ']':
|
|
// Hyperlink text declaration ended
|
|
if (tag_comment || tag_a != READING_CONTENTS || buf[x-1] == '\\') {
|
|
// Ignore if not reading hyperlink.
|
|
append(c);
|
|
break;
|
|
}
|
|
tag_a = EXPECTING_URL;
|
|
break;
|
|
case '(':
|
|
// Hyperlink address declaration has begun
|
|
if (tag_comment || tag_a != EXPECTING_URL) {
|
|
append(c);
|
|
break;
|
|
}
|
|
tag_a = READING_URL;
|
|
break;
|
|
case ')':
|
|
// Hyperlink address declaration ended
|
|
if (tag_comment || tag_a != READING_URL) {
|
|
append(c);
|
|
break;
|
|
}
|
|
tag_a = NONE;
|
|
append("<a href=\"");
|
|
append(tag_a_buf);
|
|
append("\">");
|
|
append(tag_a_text_buf);
|
|
append("</a>");
|
|
break;
|
|
case ' ':
|
|
if (!tag_comment && ignoreSpace) {
|
|
ignoreSpace = false;
|
|
break;
|
|
} else append(" ");
|
|
break;
|
|
case '\\':
|
|
break;
|
|
default:
|
|
append(c);
|
|
break;
|
|
}
|
|
}
|
|
if (!manualBreak) newline = false;
|
|
}
|
|
|
|
fclose(mdFile);
|
|
return html;
|
|
}
|
|
|
|
};
|