swg/src/MarkdownParser.cxx

334 lines
7.7 KiB
C++

/*
* Copyright (C) 2022-2023 luca0N!
*
* This file is part of Static Website Generator (swg).
*
* Static Website Generator (swg) is free software: you can redistribute it
* and/or modify it under the terms of the version 3 of the GNU Lesser General
* Public License as published by the Free Software Foundation.
*
* Static Website Generator (swg) is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
* General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Static Website Generator (swg). If not, see
* <https://www.gnu.org/licenses/>.
*
* Contact luca0N! by e-mail: <luca0n [at] luca0n [dot] com>.
*/
#include "MarkdownParser.hxx"
#include <assert.h>
#include <iostream>
#define ASCII_DIGIT_START 48
enum HyperlinkStage {
NONE, // not reading a hyperlink
READING_CONTENTS, // reading the contents (like text) of a hyperlink
EXPECTING_URL, // done reading contents of hyperlink; expecting its URL to be specified
READING_URL // reading the hyperlink url
};
std::string html,
tag_a_text_buf, // used for storing the text contents of a hyperlink
tag_a_buf; // used for storing the hyperlink address
enum HyperlinkStage tag_a = NONE;
void append(std::string const &s) {
// TODO: add proper error handling.
assert(tag_a != EXPECTING_URL);
switch(tag_a) {
case NONE:
html += s;
break;
case READING_CONTENTS:
tag_a_text_buf += s;
break;
case READING_URL:
tag_a_buf += s;
break;
}
}
void append(const char c) {
std::string tmp_str;
tmp_str += c;
append(tmp_str);
}
void cleanup() {
html = "";
}
namespace MarkdownParser {
std::string make_html(std::filesystem::path const &path) {
cleanup();
FILE *mdFile = fopen(path.string().c_str(), "r");
// TODO: Add proper error handling.
assert(mdFile != NULL);
int buflen = 64;
char buf[buflen];
// Tag flags
bool tag_b = false,
tag_i = false,
tag_p = false,
tag_li = false,
tag_ul = false,
tag_s = false,
tag_comment = false,
newline = true,
manualBreak = false,
// Used to ignore spaces at the beginning of header titles.
ignoreSpace = false;
unsigned short char_skip = 0;
// For counting sub-headers (h1, h2, h3, and so on)
int tag_h = 0;
while (fgets(buf, buflen, mdFile) != NULL) {
manualBreak = false;
if (!tag_comment && tag_p && buf[0] == '\n') {
// Empty newline; end paragraph.
html += "</p>\n";
tag_p = false;
continue;
}
// End ul tag if it's active and a new line doesn't contain an
// item.
if (tag_ul && newline && buf[0] != '-' && buf[0] == '\n') {
tag_ul = false;
tag_li = false;
html += "</li></ul>";
}
// Read character by character
for (int x = 0; x < buflen; x++) {
if (char_skip > 0) {
char_skip--;
continue;
}
char c = buf[x];
if (c == '\0') break;
else if (!tag_comment && c == '\n') {
// The next buffer iteration will hold the
// first (buflen) bytes of the next new line.
newline = true;
manualBreak = true;
append(' ');
// If we were in the middle of inserting a header tag, close it here.
if (tag_h > 0) {
html += "</h";
html += (ASCII_DIGIT_START + tag_h);
html += ">";
tag_h = 0;
}
if (!tag_li)
html += '\n';
break;
}
// Start paragraph if newline and no
// special characters were matched.
if (!tag_comment && !tag_ul && (!tag_p && newline && x == 0 &&
c != '#' && c != '-'))
html += "<p>",
tag_p = true;
switch (c) {
case '<':
// Check for HTML comment
if (buf[x+1] == '!' &&
buf[x+2] == '-' &&
buf[x+3] == '-')
char_skip = 3,
tag_comment = true,
html += "<!--";
else
append(c);
break;
case '*':
// Bold check
// Check whether this character has been escaped.
if (tag_comment || buf[x-1] == '\\') {
append(c);
break;
}
append(tag_b ? "</b>" : "<b>");
tag_b = !tag_b;
break;
case '_':
// Italics check
// Check whether this character has
// been escaped.
if (tag_comment || buf[x-1] == '\\') {
append(c);
break;
}
append(tag_i ? "</i>" : "<i>");
tag_i = !tag_i;
break;
case '#':
// Header check
// Headers must be declared at the
// beginning of a new line. Ignore it
// if this is not a new line.
if (tag_comment || !newline) {
append(c);
break;
}
// Check whether this character has
// been escaped.
if (buf[x-1] == '\\' ||
// Check if this header was specified
// right at the beginning of the line.
(tag_h == 0 && x != 0)) {
append(c);
break;
}
// This seems like a header
// declaration.
//
// Increase the header count (for
// subheader support) and add it to the
// HTML output.
//
// Support up to 6 levels of headers.
// After that, ignore '#' characters
// and add them directly to the HTML
// output.
if (tag_h >= 6) {
html += "<h";
html += (ASCII_DIGIT_START + tag_h);
html += ">";
html += '#';
ignoreSpace = true;
break;
} else tag_h++;
// If we are done reading header
// characters, finally add the tag and
// then move on.
if (buf[x+1] != '#') {
html += "<h";
html += (ASCII_DIGIT_START + tag_h);
html += ">";
ignoreSpace = true;
break;
}
break;
case '~':
// Escape character
if (tag_comment || (x > 0 && buf[x-1] == '\\')) {
append(c);
break;
}
if (x > 0 && buf[x-1] == '~') {
append(tag_s ? "</s>" : "<s>");
tag_s = !tag_s;
break;
}
if (buf[x+1] == '~') break;
append(c);
break;
case '-':
if (tag_comment &&
buf[x+1] == '-' &&
buf[x+2] == '>') {
tag_comment = false,
html += "-->",
char_skip = 2;
continue;
}
if (tag_comment || x != 0) {
append(c);
break;
}
// Start unordered list tag if it's not active.
if (!tag_ul) html += "<ul>", tag_ul = true;
// End previous list item, if active.
if (tag_li) html += "</li>\n", tag_li = false;
html += "<li>";
ignoreSpace = true;
tag_li = true;
break;
case '[':
// Hyperlink text declaration has begun
if (tag_comment || tag_a != NONE || buf[x-1] == '\\') {
// Cannot add hyperlinks inside of hyperlinks;
append(c);
break;
}
tag_a_buf = "";
tag_a_text_buf = "";
tag_a = READING_CONTENTS;
break;
case ']':
// Hyperlink text declaration ended
if (tag_comment || tag_a != READING_CONTENTS || buf[x-1] == '\\') {
// Ignore if not reading hyperlink.
append(c);
break;
}
tag_a = EXPECTING_URL;
break;
case '(':
// Hyperlink address declaration has begun
if (tag_comment || tag_a != EXPECTING_URL) {
append(c);
break;
}
tag_a = READING_URL;
break;
case ')':
// Hyperlink address declaration ended
if (tag_comment || tag_a != READING_URL) {
append(c);
break;
}
tag_a = NONE;
append("<a href=\"");
append(tag_a_buf);
append("\">");
append(tag_a_text_buf);
append("</a>");
break;
case ' ':
if (!tag_comment && ignoreSpace) {
ignoreSpace = false;
break;
} else append(" ");
break;
case '\\':
break;
default:
append(c);
break;
}
}
if (!manualBreak) newline = false;
}
fclose(mdFile);
return html;
}
};