swg/src/WebsiteBuilder.cxx

/*
 * Copyright (C) 2022 luca0N!
 *
 * This file is part of Static Website Generator (swg).
 *
 * Static Website Generator (swg) is free software: you can redistribute it
 * and/or modify it under the terms of the version 3 of the GNU Lesser General
 * Public License as published by the Free Software Foundation.
 *
 * Static Website Generator (swg) is distributed in the hope that it will be
 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
 * General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Static Website Generator (swg). If not, see
 * <https://www.gnu.org/licenses/>.
 *
 * Contact luca0N! by e-mail: <luca0n [at] luca0n [dot] com>.
 */

#include <iostream>
#include <cstring>
#include <string>
#include <list>
#include <regex>
#include <filesystem>
#include "time.h"

#include "SwgRuntime.hxx"
#include "SwgContext.hxx"
#include "ConfigUtils.hxx"
// #include "Blog.h"
#include "Common.hxx"
#include "MarkdownParser.hxx"
#include "Article.hxx"

std::string blog_relative_path(std::string const &pathPrefix, std::string const &path) {
	return path.substr(pathPrefix.length() - 1);
}

std::string getFilename(std::string const &path, bool const ext = true) {
	int extSeparator = -1;
	// Backwards search for directory separator.
	for (int x = path.length(); x > 0; --x) {
		if (path[x] == '/') return ext ? path.substr(x + 1) : path.substr(x + 1, extSeparator - x - 1);
		// If this function was called with "ext" set to false,
		// generate a substring of the path containing the filename
		// only.
		else if (!ext && path[x] == '.' && extSeparator == -1)
			extSeparator = x;
	}
	return path;
}

bool isValidArticle(std::string const &pathPrefix, std::string const &path) {
	return std::regex_search(blog_relative_path(pathPrefix, path), std::regex("^/\\d{4}/\\d{2}/.*\\.md"));
}

std::filesystem::path get_output_path(std::string const &path) {
	std::filesystem::path output = path;
	return output /= "output";
}

void build_dir_structure(std::string const &path) {
	// Create directory tree, which will be used for the website.
	std::filesystem::path rootDir = get_output_path(path);

	// Create output directory if it doesn't exist.
	try {
		if(!std::filesystem::exists(rootDir)) std::filesystem::create_directory(rootDir);
	} catch (std::filesystem::filesystem_error const &e) {
		std::cerr << "error: fs error while generating output website directory structure on \""
			<< path << "\": " << e.what() << std::endl;
		exit(RETURN_FAILED_UNKNOWN_ERROR);
	}

	// Check if a website has already been built.
	//
	// This is a safe measure to prevent overwriting an existing website
	// output, but it can be skipped if explicitly asked by the end-user.
	if (!swg_rt_global_bool("overwrite_existing", false)) {
		std::filesystem::path websiteLock = rootDir /= ".swg_built";
		if (std::filesystem::exists(websiteLock)) {
			std::cout << "error: existing website build detected, exiting...\n";
			exit(RETURN_FAILED_WEBSITE_BUILD_EXISTS);
		}
	}
}

std::string get_template(std::string const &path) {
	// Template lookup
	std::string stPath = path;
	stPath += "/__swg_template.html";
	FILE *swgTemplate = fopen(stPath.c_str(), "r");
	if (swgTemplate == NULL) {
		std::cerr << "error: couldn't open the SWG HTML template file; does it exist?\n";
		perror(stPath.c_str());
		exit(RETURN_FAILED_INVALID_DIRECTORY);
	}

	// Check for content placeholder
	int buflen = 8;
	char buf[buflen];
	std::string htmlTemplate;
	while (fgets(buf, buflen, swgTemplate) != NULL) {
		htmlTemplate += buf;
	}

	fclose(swgTemplate);
	std::cout << "Loaded HTML template into memory.\n";
	return htmlTemplate;
}

void compile_markdown(std::string const &path, std::string const &md, std::string const &to) {
	FILE *articleOutput = fopen(to.c_str(), "w");

	std::string htmlTemplate = get_template(path);
	// NOTE: std::regex requires the C++11 standard.
	std::regex contentPlaceholder("<!--\\[_SWG: \\$CONTENT\\]-->");
	std::string articleHtml = MarkdownParser::make_html(md);
	std::string articleContents = std::regex_replace(htmlTemplate, contentPlaceholder, articleHtml);

	fputs(articleContents.c_str(), articleOutput);
	fclose(articleOutput);
}

void build_blog_structure(std::string const &path, std::string const &prefix, std::list<std::string> const &articles, Blog *blog) {
	std::filesystem::path obp = get_output_path(path); // Output Blog Path
	obp /= "blog";
	obp /= blog->dir;
	try {
		// Create blog directory
		if (!std::filesystem::exists(obp)) std::filesystem::create_directories(obp);
	} catch (std::filesystem::filesystem_error const &e) {
		std::cerr << "error: failed to create directory for blog \"" << blog->name << "\": " << e.what() << std::endl;
		exit(RETURN_FAILED_UNKNOWN_ERROR);
	}

	std::list<Article::Metadata*> am;
	//std::map<std::string, std::list<std::string>> sorted_articles;

	for (std::string const &a : articles) {
		std::string articlePath = blog_relative_path(prefix, a);
		std::regex yearMonth("(\\d+)");
		auto match = std::sregex_iterator(articlePath.begin(), articlePath.end(), yearMonth);

		std::string year = match->str(),
			month = (++match)->str();

		// Go ahead and parse article metadata.
		Article::Metadata *articleMetadata = (Article::Metadata*) malloc(sizeof(Article::Metadata));
		Article::get_metadata(a, articleMetadata);
		std::cout << "Parsed metadata for article \"" << articleMetadata->title << "\"\n\tPublished on "
				<< ctime(&(articleMetadata->publish_ts)) << "\n";

		// TODO: This code could be optimized by removing directory
		// checks for every single article.  Instead, add
		// directory-checks for years and months to a queue (skipping
		// existing ones) and then checking and creating the
		// directories later as needed.
		try {
			// Create directory for the year of this article if it doesn't exist.
			std::filesystem::path oad = obp, // Output Article Directory
								  rap;		 // Relative Article Path
			oad /= year;
			rap /= year;
			if (!std::filesystem::exists(oad)) std::filesystem::create_directory(oad);

			// Do the same for the article month.
			oad /= month;
			rap /= month;
			if (!std::filesystem::exists(oad)) std::filesystem::create_directory(oad);

			// Now create the article file.
			std::string new_article_filename = getFilename(a, false);
			new_article_filename += ".html";
			oad /= new_article_filename;
			rap /= new_article_filename;
			compile_markdown(path, a, oad);
			strncpy(articleMetadata->path, rap.c_str(), sizeof(articleMetadata->path));
			am.push_back(articleMetadata);
		} catch (std::filesystem::filesystem_error const &e) {
			std::cerr << "error: failed to create directory for an article from blog \""
				<< blog->name << "\": " << e.what() << std::endl;
			exit(RETURN_FAILED_UNKNOWN_ERROR);
		}
	}
	// Sort am list.
	am.sort(Article::Comparator::comp);
	// Generate blog catalog.
	std::string blog_html_catalog = "<ul>",
			last_date = "";
	std::string hr_month[] = {
		"January",
		"February",
		"March",
		"April",
		"May",
		"June",
		"July",
		"August",
		"September",
		"October",
		"November",
		"December"
	};

	for (Article::Metadata *m : am) {
		struct tm *article_time = gmtime(&(m->publish_ts));
		std::string hr_date = hr_month[article_time->tm_mon];
		hr_date += " ";
		hr_date += std::to_string(article_time->tm_year + 1900);
		// Check if this article belongs to the same "month + year" group as
		// the last article.  If it doesn't, add a new group to the catalog.
		if (hr_date != last_date) {
			last_date = hr_date;
			blog_html_catalog += "<span><b>";
			blog_html_catalog += hr_date;
			blog_html_catalog += "</b></span>";
		}

		blog_html_catalog += "<li><a href=\"./";
		blog_html_catalog += m->path;
		blog_html_catalog += "\">";
		blog_html_catalog += m->title;
		blog_html_catalog += "</a></li>";
		// Free memory as it's no longer needed.
		free(m);
	}
	blog_html_catalog += "</ul>";
}

/**
 * Used to determine whether a file should be copied onto the output directory
 * or not.  Certain files, like the config file shouldn't be copied.
 */
bool is_special_file(std::string const &filename) {
	if (filename == "swg.cfg" || filename.find("__swg_") == 0)
		return true;
	return false;
}

void build_website(SwgContext &ctx, std::string const &path) {
	build_dir_structure(path);

	// Build regular webpages (index, privacy notice, etc)
	std::filesystem::path ws = path;
	// Copy all non-MD files to output (to include assets, for instance).
	try {
		for (auto const &ws_entry : std::filesystem::recursive_directory_iterator(ws)) {
			// Skip all files inside the "output" directory.
			if (ws_entry.path().string().find(path + "output/") == 0 ||
			// Skip all files inside the ".swg_ignore" directory.
			    ws_entry.path().string().find(path + ".swg_ignore/") == 0) continue;
			std::string currentFile = getFilename(ws_entry.path());
			// Assuming this is a non-MD file, copy it to the output.
			if (ws_entry.is_regular_file() &&
				!is_special_file(getFilename(ws_entry.path()))) {
				// Subdirectories should be respected!  If this
				// file is in a subdirectory, it should also be
				// created in the output.
				std::filesystem::path copied_subdir = path;
				copied_subdir /= "output";
				copied_subdir /= ws_entry.path().string()
						.substr(path.length(),
							ws_entry.path().string().length() - path.length() -
							getFilename(ws_entry.path()).length());
				if (!std::filesystem::exists(copied_subdir)) {
					std::filesystem::create_directories(copied_subdir);
					std::cout << "Creating: " << copied_subdir << "\n";
				}
				// If this isn't a Markdown file, just copy it.
				bool compiled = false;
				if (currentFile.find(".md") == std::string::npos) {
					try {
						std::filesystem::copy(ws_entry, copied_subdir);
					} catch (std::filesystem::filesystem_error const &cpyErr) {
						// Ignore exception if it was
						// thrown due to an existing
						// file error while copying.
						if (cpyErr.code().value() != 17)
							throw cpyErr;
					}
				} else {
					// This is a Markdown file. Compile a HTML version.
					std::filesystem::path output_html = copied_subdir;
					// Remove the .md extension and use the HTML extension instead
					output_html /= currentFile.substr(0, currentFile.length() - 3) + ".html";
					compile_markdown(path, ws_entry.path(), output_html);
					compiled = true;
				}
				std::cout << (compiled ? "Compiled: " : "Copied: ") <<
						ws_entry.path().string().substr(path.length()) << "\n";
			}
		}
	} catch (std::filesystem::filesystem_error const &e) {
		std::cerr << "error: could not copy website assets to output: " << e.what() << std::endl;
		exit(RETURN_FAILED_UNKNOWN_ERROR);
	}

	// Blog lookup
	bool failed = false;

	for (Blog *b : ctx.blogs) {
		std::list<std::string> parsedArticles = { },
			failedArticles = { };

		// std::filesystem requires the C++17 standard.
		std::string relativePath = path;
		relativePath += "/";
		relativePath += b->path;
		const std::filesystem::path blogPath(relativePath);
		try {
		for (auto const &dir_entry : std::filesystem::recursive_directory_iterator(blogPath)) {
			// Skip all files inside the "output" directory.
			if (dir_entry.path().string().find(path + "output/") == 0) continue;
			// Directory item iteration
			std::cout << "\t" << dir_entry << std::endl;

			// Check if the item is a Markdown file.
			std::string filename = getFilename(dir_entry.path());
			if (dir_entry.is_regular_file() &&
					filename.find(".md") == filename.length() - 3) {
				std::cout << "\t\tIs file: " << getFilename(dir_entry.path()) << "\n";
				// Markdown files should be insite a YYYY/MM directory.
				if (!isValidArticle(relativePath, dir_entry.path()))
					failedArticles.insert(failedArticles.end(), dir_entry.path());
				else parsedArticles.insert(parsedArticles.end(), dir_entry.path());
			}
		}
		} catch (std::filesystem::filesystem_error const &e) {
			std::cerr << "error: fs error while attempting to read path for "
				<< b->name << ": " << e.what() << std::endl;
			failed = true;
		}

		/*std::cout << "Parsed " << parsedArticles << " articles;\n" <<
			"Failed to parse " << failedArticles << " articles\n";*/

		for (std::string const &a : parsedArticles)
			std::cout << "Parsed: " << a << "\n";
		for (std::string const &a : failedArticles)
			std::cout << "Unable to parse: " << a << "\n";

		build_blog_structure(path, relativePath, parsedArticles, b);

		free(b);
	}

	if (failed) {
		std::cerr << "Refusing to proceed due to previous errors\n";
		exit(RETURN_FAILED_CONFIG_INVALID_SYNTAX);
	}

	// Website blog

	// Append blog to website
	//ctx.appendBlog(blog1);

	// Generate website
	//ctx.generateWebsite();
}