swg/src/WebsiteBuilder.cxx

364 lines
12 KiB
C++

/*
* Copyright (C) 2022 luca0N!
*
* This file is part of Static Website Generator (swg).
*
* Static Website Generator (swg) is free software: you can redistribute it
* and/or modify it under the terms of the version 3 of the GNU Lesser General
* Public License as published by the Free Software Foundation.
*
* Static Website Generator (swg) is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
* General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Static Website Generator (swg). If not, see
* <https://www.gnu.org/licenses/>.
*
* Contact luca0N! by e-mail: <luca0n [at] luca0n [dot] com>.
*/
#include <iostream>
#include <cstring>
#include <string>
#include <list>
#include <regex>
#include <filesystem>
#include "time.h"
#include "SwgRuntime.hxx"
#include "SwgContext.hxx"
#include "ConfigUtils.hxx"
// #include "Blog.h"
#include "Common.hxx"
#include "MarkdownParser.hxx"
#include "Article.hxx"
std::string blog_relative_path(std::string const &pathPrefix, std::string const &path) {
return path.substr(pathPrefix.length() - 1);
}
std::string getFilename(std::string const &path, bool const ext = true) {
int extSeparator = -1;
// Backwards search for directory separator.
for (int x = path.length(); x > 0; --x) {
if (path[x] == '/') return ext ? path.substr(x + 1) : path.substr(x + 1, extSeparator - x - 1);
// If this function was called with "ext" set to false,
// generate a substring of the path containing the filename
// only.
else if (!ext && path[x] == '.' && extSeparator == -1)
extSeparator = x;
}
return path;
}
bool isValidArticle(std::string const &pathPrefix, std::string const &path) {
return std::regex_search(blog_relative_path(pathPrefix, path), std::regex("^/\\d{4}/\\d{2}/.*\\.md"));
}
std::filesystem::path get_output_path(std::string const &path) {
std::filesystem::path output = path;
return output /= "output";
}
void build_dir_structure(std::string const &path) {
// Create directory tree, which will be used for the website.
std::filesystem::path rootDir = get_output_path(path);
// Create output directory if it doesn't exist.
try {
if(!std::filesystem::exists(rootDir)) std::filesystem::create_directory(rootDir);
} catch (std::filesystem::filesystem_error const &e) {
std::cerr << "error: fs error while generating output website directory structure on \""
<< path << "\": " << e.what() << std::endl;
exit(RETURN_FAILED_UNKNOWN_ERROR);
}
// Check if a website has already been built.
//
// This is a safe measure to prevent overwriting an existing website
// output, but it can be skipped if explicitly asked by the end-user.
if (!swg_rt_global_bool("overwrite_existing", false)) {
std::filesystem::path websiteLock = rootDir /= ".swg_built";
if (std::filesystem::exists(websiteLock)) {
std::cout << "error: existing website build detected, exiting...\n";
exit(RETURN_FAILED_WEBSITE_BUILD_EXISTS);
}
}
}
std::string get_template(std::string const &path) {
// Template lookup
std::string stPath = path;
stPath += "/__swg_template.html";
FILE *swgTemplate = fopen(stPath.c_str(), "r");
if (swgTemplate == NULL) {
std::cerr << "error: couldn't open the SWG HTML template file; does it exist?\n";
perror(stPath.c_str());
exit(RETURN_FAILED_INVALID_DIRECTORY);
}
// Check for content placeholder
int buflen = 8;
char buf[buflen];
std::string htmlTemplate;
while (fgets(buf, buflen, swgTemplate) != NULL) {
htmlTemplate += buf;
}
fclose(swgTemplate);
std::cout << "Loaded HTML template into memory.\n";
return htmlTemplate;
}
void compile_markdown(std::string const &path, std::string const &md, std::string const &to) {
FILE *articleOutput = fopen(to.c_str(), "w");
std::string htmlTemplate = get_template(path);
// NOTE: std::regex requires the C++11 standard.
std::regex contentPlaceholder("<!--\\[_SWG: \\$CONTENT\\]-->");
std::string articleHtml = MarkdownParser::make_html(md);
std::string articleContents = std::regex_replace(htmlTemplate, contentPlaceholder, articleHtml);
fputs(articleContents.c_str(), articleOutput);
fclose(articleOutput);
}
void build_blog_structure(std::string const &path, std::string const &prefix, std::list<std::string> const &articles, Blog *blog) {
std::filesystem::path obp = get_output_path(path); // Output Blog Path
obp /= "blog";
obp /= blog->dir;
try {
// Create blog directory
if (!std::filesystem::exists(obp)) std::filesystem::create_directories(obp);
} catch (std::filesystem::filesystem_error const &e) {
std::cerr << "error: failed to create directory for blog \"" << blog->name << "\": " << e.what() << std::endl;
exit(RETURN_FAILED_UNKNOWN_ERROR);
}
std::list<Article::Metadata*> am;
//std::map<std::string, std::list<std::string>> sorted_articles;
for (std::string const &a : articles) {
std::string articlePath = blog_relative_path(prefix, a);
std::regex yearMonth("(\\d+)");
auto match = std::sregex_iterator(articlePath.begin(), articlePath.end(), yearMonth);
std::string year = match->str(),
month = (++match)->str();
// Go ahead and parse article metadata.
Article::Metadata *articleMetadata = (Article::Metadata*) malloc(sizeof(Article::Metadata));
Article::get_metadata(a, articleMetadata);
std::cout << "Parsed metadata for article \"" << articleMetadata->title << "\"\n\tPublished on "
<< ctime(&(articleMetadata->publish_ts)) << "\n";
// TODO: This code could be optimized by removing directory
// checks for every single article. Instead, add
// directory-checks for years and months to a queue (skipping
// existing ones) and then checking and creating the
// directories later as needed.
try {
// Create directory for the year of this article if it doesn't exist.
std::filesystem::path oad = obp, // Output Article Directory
rap; // Relative Article Path
oad /= year;
rap /= year;
if (!std::filesystem::exists(oad)) std::filesystem::create_directory(oad);
// Do the same for the article month.
oad /= month;
rap /= month;
if (!std::filesystem::exists(oad)) std::filesystem::create_directory(oad);
// Now create the article file.
std::string new_article_filename = getFilename(a, false);
new_article_filename += ".html";
oad /= new_article_filename;
rap /= new_article_filename;
compile_markdown(path, a, oad);
strncpy(articleMetadata->path, rap.c_str(), sizeof(articleMetadata->path));
am.push_back(articleMetadata);
} catch (std::filesystem::filesystem_error const &e) {
std::cerr << "error: failed to create directory for an article from blog \""
<< blog->name << "\": " << e.what() << std::endl;
exit(RETURN_FAILED_UNKNOWN_ERROR);
}
}
// Sort am list.
am.sort(Article::Comparator::comp);
// Generate blog catalog.
std::string blog_html_catalog = "<ul>",
last_date = "";
std::string hr_month[] = {
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December"
};
for (Article::Metadata *m : am) {
struct tm *article_time = gmtime(&(m->publish_ts));
std::string hr_date = hr_month[article_time->tm_mon];
hr_date += " ";
hr_date += std::to_string(article_time->tm_year + 1900);
// Check if this article belongs to the same "month + year" group as
// the last article. If it doesn't, add a new group to the catalog.
if (hr_date != last_date) {
last_date = hr_date;
blog_html_catalog += "<span><b>";
blog_html_catalog += hr_date;
blog_html_catalog += "</b></span>";
}
blog_html_catalog += "<li><a href=\"./";
blog_html_catalog += m->path;
blog_html_catalog += "\">";
blog_html_catalog += m->title;
blog_html_catalog += "</a></li>";
// Free memory as it's no longer needed.
free(m);
}
blog_html_catalog += "</ul>";
}
/**
* Used to determine whether a file should be copied onto the output directory
* or not. Certain files, like the config file shouldn't be copied.
*/
bool is_special_file(std::string const &filename) {
if (filename == "swg.cfg" || filename.find("__swg_") == 0)
return true;
return false;
}
void build_website(SwgContext &ctx, std::string const &path) {
build_dir_structure(path);
// Build regular webpages (index, privacy notice, etc)
std::filesystem::path ws = path;
// Copy all non-MD files to output (to include assets, for instance).
try {
for (auto const &ws_entry : std::filesystem::recursive_directory_iterator(ws)) {
// Skip all files inside the "output" directory.
if (ws_entry.path().string().find(path + "output/") == 0 ||
// Skip all files inside the ".swg_ignore" directory.
ws_entry.path().string().find(path + ".swg_ignore/") == 0) continue;
std::string currentFile = getFilename(ws_entry.path());
// Assuming this is a non-MD file, copy it to the output.
if (ws_entry.is_regular_file() &&
!is_special_file(getFilename(ws_entry.path()))) {
// Subdirectories should be respected! If this
// file is in a subdirectory, it should also be
// created in the output.
std::filesystem::path copied_subdir = path;
copied_subdir /= "output";
copied_subdir /= ws_entry.path().string()
.substr(path.length(),
ws_entry.path().string().length() - path.length() -
getFilename(ws_entry.path()).length());
if (!std::filesystem::exists(copied_subdir)) {
std::filesystem::create_directories(copied_subdir);
std::cout << "Creating: " << copied_subdir << "\n";
}
// If this isn't a Markdown file, just copy it.
bool compiled = false;
if (currentFile.find(".md") == std::string::npos) {
try {
std::filesystem::copy(ws_entry, copied_subdir);
} catch (std::filesystem::filesystem_error const &cpyErr) {
// Ignore exception if it was
// thrown due to an existing
// file error while copying.
if (cpyErr.code().value() != 17)
throw cpyErr;
}
} else {
// This is a Markdown file. Compile a HTML version.
std::filesystem::path output_html = copied_subdir;
// Remove the .md extension and use the HTML extension instead
output_html /= currentFile.substr(0, currentFile.length() - 3) + ".html";
compile_markdown(path, ws_entry.path(), output_html);
compiled = true;
}
std::cout << (compiled ? "Compiled: " : "Copied: ") <<
ws_entry.path().string().substr(path.length()) << "\n";
}
}
} catch (std::filesystem::filesystem_error const &e) {
std::cerr << "error: could not copy website assets to output: " << e.what() << std::endl;
exit(RETURN_FAILED_UNKNOWN_ERROR);
}
// Blog lookup
bool failed = false;
for (Blog *b : ctx.blogs) {
std::list<std::string> parsedArticles = { },
failedArticles = { };
// std::filesystem requires the C++17 standard.
std::string relativePath = path;
relativePath += "/";
relativePath += b->path;
const std::filesystem::path blogPath(relativePath);
try {
for (auto const &dir_entry : std::filesystem::recursive_directory_iterator(blogPath)) {
// Skip all files inside the "output" directory.
if (dir_entry.path().string().find(path + "output/") == 0) continue;
// Directory item iteration
std::cout << "\t" << dir_entry << std::endl;
// Check if the item is a Markdown file.
std::string filename = getFilename(dir_entry.path());
if (dir_entry.is_regular_file() &&
filename.find(".md") == filename.length() - 3) {
std::cout << "\t\tIs file: " << getFilename(dir_entry.path()) << "\n";
// Markdown files should be insite a YYYY/MM directory.
if (!isValidArticle(relativePath, dir_entry.path()))
failedArticles.insert(failedArticles.end(), dir_entry.path());
else parsedArticles.insert(parsedArticles.end(), dir_entry.path());
}
}
} catch (std::filesystem::filesystem_error const &e) {
std::cerr << "error: fs error while attempting to read path for "
<< b->name << ": " << e.what() << std::endl;
failed = true;
}
/*std::cout << "Parsed " << parsedArticles << " articles;\n" <<
"Failed to parse " << failedArticles << " articles\n";*/
for (std::string const &a : parsedArticles)
std::cout << "Parsed: " << a << "\n";
for (std::string const &a : failedArticles)
std::cout << "Unable to parse: " << a << "\n";
build_blog_structure(path, relativePath, parsedArticles, b);
free(b);
}
if (failed) {
std::cerr << "Refusing to proceed due to previous errors\n";
exit(RETURN_FAILED_CONFIG_INVALID_SYNTAX);
}
// Website blog
// Append blog to website
//ctx.appendBlog(blog1);
// Generate website
//ctx.generateWebsite();
}