Separate indexing class supporting cmd line args - stamd

commit b014ac73d73be5ae8a85f2ff3d43ff07134e6714
parent 4f9cded954e4bcb29d9dce3b9b24f461e9e6755f
Author: Dimitrije Dobrota <mail@dimitrijedobrota.com>
Date:   Thu, 27 Jun 2024 03:01:03 +0200

Separate indexing class supporting cmd line args

Diffstat:
M CMakeLists.txt  | 2 +-
M source/article.cpp  | 10 ++++++----
M source/article.hpp  | 4 ++++
D source/index.cpp  | 202 -------------------------------------------------------------------------------
D source/index.hpp  | 30 ------------------------------
A source/indexer.cpp  | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/indexer.hpp  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M source/main.cpp  | 72 ++++++++++++++++++++++++++++++++++++++++++++----------------------------

8 files changed, 322 insertions(+), 265 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -25,7 +25,7 @@ find_package(hemplate 0.1 CONFIG REQUIRED)
 add_library(
     stamd_lib OBJECT
     source/article.cpp
-    source/index.cpp
+    source/indexer.cpp
 )
 
 target_link_libraries(stamd_lib PUBLIC hemplate::hemplate)
diff --git a/source/article.cpp b/source/article.cpp
@@ -9,12 +9,14 @@
 
 #include "utility.hpp"
 
+namespace stamd {
+
 std::optional<std::string> article::get(const std::string& key) const
 {
   const auto itr = m_symbols.find(key);
   if (itr == end(m_symbols))
   {
-    std::cerr << "Warning: getting invalid value for: " << key << std::endl;
+    // std::cerr << "Warning: getting invalid value for: " << key << std::endl;
     return {};
   }
   return itr->second;
@@ -158,9 +160,9 @@ void article::write_footer(std::ostream& ost) const
   }
 
   ost << html::div();
-  ost << html::script(" ")
-             .set("type", "text/javascript")
-             .set("src", "/scripts/main.js");
+  ost << html::script(" ").set("src", "/scripts/main.js");
   ost << html::body();
   ost << html::html();
 }
+
+}  // namespace stamd
diff --git a/source/article.hpp b/source/article.hpp
@@ -6,6 +6,8 @@
 #include <unordered_map>
 #include <vector>
 
+namespace stamd {
+
 class article
 {
 public:
@@ -52,3 +54,5 @@ private:
   categories_t m_categories;
   symbols_t m_symbols;
 };
+
+}  // namespace stamd
diff --git a/source/index.cpp b/source/index.cpp
@@ -1,202 +0,0 @@
-#include <algorithm>
-#include <chrono>
-#include <format>
-#include <fstream>
-#include <numeric>
-#include <sstream>
-
-#include "index.hpp"
-
-#include <hemplate/attribute.hpp>
-#include <hemplate/classes.hpp>
-
-namespace stamd {
-
-std::tm get_time(const std::string& date)
-{
-  int year  = 0;
-  int month = 0;
-  int day   = 0;
-
-  std::sscanf(date.c_str(), "%d-%d-%d", &year, &month, &day);
-
-  tm time = {
-      .tm_sec  = 0,
-      .tm_min  = 0,
-      .tm_hour = 0,
-      .tm_mday = day,
-      .tm_mon  = month - 1,
-      .tm_year = year - 1900,
-  };
-
-  return time;
-}
-
-#define rfc882_f "{:%a, %d %b %Y %H:%M:%S %z}"  // NOLINT
-#define rfc3339_f "{:%FT%H:%M:%SZ}"  // NOLINT
-
-std::string to_rfc882(const std::string& date)
-{
-  using namespace std::chrono;  // NOLINT
-
-  tm time = get_time(date);
-
-  const auto tmp = std::mktime(&time);
-  const auto chrono_time =
-      time_point_cast<seconds>(system_clock::from_time_t(tmp));
-
-  return std::format(rfc882_f, chrono_time);
-}
-
-std::string to_rfc3339(const std::string& date)
-{
-  using namespace std::chrono;  // NOLINT
-
-  tm time = get_time(date);
-
-  const auto tmp = std::mktime(&time);
-  const auto chrono_time =
-      time_point_cast<seconds>(system_clock::from_time_t(tmp));
-
-  return std::format(rfc3339_f, chrono_time);
-}
-
-std::shared_ptr<article> create_index(std::ostream& ost,
-                                      const std::string& name,
-                                      const article_list& articles,
-                                      const categories_t& categories)
-{
-  using namespace hemplate;  // NOLINT
-
-  auto index = std::make_shared<article>(name, categories);
-
-  index->write_header(ost);
-  ost << html::h1(name);
-  ost << html::ul().set("class", "index");
-  for (const auto& article : articles)
-  {
-    if (article->is_hidden()) continue;
-
-    const auto& filename = article->get_filename();
-    const auto& title    = article->get_title();
-    const auto& date     = article->get_date();
-
-    ost << html::li()
-               .add(html::span(std::format("{} -&nbsp", date)))
-               .add(html::a(title).set("href", filename));
-  };
-  ost << html::ul();
-  index->write_footer(ost);
-
-  return index;
-}
-
-void create_atom(std::ostream& ost,
-                 const std::string& name,
-                 const article_list& articles)
-{
-  using namespace hemplate;  // NOLINT
-
-  static const char* base    = "https://dimitrijedobrota.com/blog";
-  static const char* loc     = "https://dimitrijedobrota.com/blog/atom.xml";
-  static const char* summary = "Click on the article link to read...";
-
-  auto const time =
-      std::chrono::current_zone()->to_local(std::chrono::system_clock::now());
-
-  ost << xml();
-  ost << atom::feed();
-  ost << atom::title(name);
-  ost << atom::id(base);
-  ost << atom::updated(std::format(rfc3339_f, time));
-  ost << atom::author().add(atom::name(name));
-  ost << atom::link(" ", {{"rel", "self"}, {"href", loc}});
-  ost << atom::link(
-      " ", {{"href", base}, {"rel", "alternate"}, {"type", "text/html"}});
-
-  for (const auto& article : articles)
-  {
-    const auto filename = article->get_filename();
-    const auto title    = article->get_title();
-    const auto date     = article->get_date();
-    const auto path     = std::format("{}/{}", base, filename);
-
-    ost << atom::entry()
-               .add(atom::title(title))
-               .add(atom::id(path))
-               .add(atom::link(" ").set("href", path))
-               .add(atom::updated(to_rfc3339(date)))
-               .add(atom::summary(summary));
-  }
-
-  ost << atom::feed();
-}
-
-void create_rss(std::ostream& ost,
-                const std::string& name,
-                const article_list& articles)
-{
-  using namespace hemplate;  // NOLINT
-
-  static const char* author      = "Dimitrije Dobrota";
-  static const char* email       = "mail@dimitrijedobrota.com";
-  static const char* base        = "https://dimitrijedobrota.com/blog";
-  static const char* description = "Contents of Dimitrije Dobrota's webpage";
-  static const char* loc         = "https://dimitrijedobrota.com/blog/rss.xml";
-
-  ost << xml();
-  ost << rss::rss();
-  ost << rss::channel();
-  ost << rss::title(name);
-  ost << rss::link(base);
-  ost << rss::description(description);
-  ost << rss::generator("stamd");
-  ost << rss::language("en-us");
-  ost << rss::atomLink().set("href", loc);
-
-  for (const auto& article : articles)
-  {
-    const auto filename = article->get_filename();
-    const auto date     = article->get_date();
-
-    ost << rss::item()
-               .add(rss::title(filename))
-               .add(rss::link(std::format("{}/{}", base, filename)))
-               .add(rss::guid(std::format("{}/{}", base, filename)))
-               .add(rss::pubDate(to_rfc882(date)))
-               .add(rss::author(std::format("{} ({})", email, author)));
-  }
-
-  ost << rss::channel();
-  ost << rss::rss();
-}
-
-void create_sitemap(std::ostream& ost, const article_list& articles)
-{
-  using namespace hemplate;  // NOLINT
-
-  static const char* base = "https://dimitrijedobrota.com/blog";
-
-  ost << xml();
-  ost << sitemap::urlset();
-  for (const auto& article : articles)
-  {
-    const auto& name = article->get_filename();
-    const auto& date = article->get_date();
-
-    ost << sitemap::url()
-               .add(sitemap::loc(std::format("{}/{}.html", base, name)))
-               .add(sitemap::lastmod(date));
-  }
-  ost << sitemap::urlset();
-}
-
-void create_robots(std::ostream& ost)
-{
-  static const char* base = "https://dimitrijedobrota.com/blog";
-
-  ost << "User-agent: *";
-  ost << std::format("Sitemap: {}/sitemap.xml", base);
-}
-
-}  // namespace stamd
diff --git a/source/index.hpp b/source/index.hpp
@@ -1,30 +0,0 @@
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include "article.hpp"
-
-namespace stamd {
-
-using article_list = std::vector<std::shared_ptr<article>>;
-using categories_t = article::categories_t;
-
-void create_robots(std::ostream& ost);
-
-void create_sitemap(std::ostream& ost, const article_list& articles);
-
-void create_atom(std::ostream& ost,
-                 const std::string& name,
-                 const article_list& articles);
-
-void create_rss(std::ostream& ost,
-                const std::string& name,
-                const article_list& articles);
-
-std::shared_ptr<article> create_index(std::ostream& ost,
-                                      const std::string& name,
-                                      const article_list& articles,
-                                      const categories_t& categories);
-
-}  // namespace stamd
diff --git a/source/indexer.cpp b/source/indexer.cpp
@@ -0,0 +1,210 @@
+#include <algorithm>
+#include <chrono>
+#include <format>
+#include <fstream>
+#include <numeric>
+#include <sstream>
+
+#include "indexer.hpp"
+
+#include <hemplate/attribute.hpp>
+#include <hemplate/classes.hpp>
+
+namespace stamd {
+
+indexer::article_s& indexer::add(const article_s& article)
+{
+  m_articles.emplace_back(article);
+  return m_articles.back();
+}
+
+void indexer::sort()
+{
+  std::sort(begin(m_articles),
+            end(m_articles),
+            [](const auto& lft, const auto& rht)
+            { return lft->get_date() > rht->get_date(); });
+}
+
+std::tm get_time(const std::string& date)
+{
+  int year  = 0;
+  int month = 0;
+  int day   = 0;
+
+  std::sscanf(date.c_str(), "%d-%d-%d", &year, &month, &day);
+
+  tm time = {
+      .tm_sec  = 0,
+      .tm_min  = 0,
+      .tm_hour = 0,
+      .tm_mday = day,
+      .tm_mon  = month - 1,
+      .tm_year = year - 1900,
+  };
+
+  return time;
+}
+
+#define rfc882_f "{:%a, %d %b %Y %H:%M:%S %z}"  // NOLINT
+#define rfc3339_f "{:%FT%H:%M:%SZ}"  // NOLINT
+
+std::string to_rfc882(const std::string& date)
+{
+  using namespace std::chrono;  // NOLINT
+
+  tm time = get_time(date);
+
+  const auto tmp = std::mktime(&time);
+  const auto chrono_time =
+      time_point_cast<seconds>(system_clock::from_time_t(tmp));
+
+  return std::format(rfc882_f, chrono_time);
+}
+
+std::string to_rfc3339(const std::string& date)
+{
+  using namespace std::chrono;  // NOLINT
+
+  tm time = get_time(date);
+
+  const auto tmp = std::mktime(&time);
+  const auto chrono_time =
+      time_point_cast<seconds>(system_clock::from_time_t(tmp));
+
+  return std::format(rfc3339_f, chrono_time);
+}
+
+void indexer::create_index(std::ostream& ost,
+                           const std::string& name,
+                           const categories_t& categories)
+{
+  using namespace hemplate;  // NOLINT
+
+  auto index = std::make_shared<stamd::article>(name, categories);
+
+  index->write_header(ost);
+  ost << html::h1(name);
+  ost << html::ul().set("class", "index");
+  for (const auto& article : m_articles)
+  {
+    if (article->is_hidden()) continue;
+
+    const auto& filename = article->get_filename();
+    const auto& title    = article->get_title();
+    const auto& date     = article->get_date();
+
+    ost << html::li()
+               .add(html::span(date + " -&nbsp"))
+               .add(html::a(title).set("href", filename));
+  };
+  ost << html::ul();
+  index->write_footer(ost);
+
+  add(index);
+}
+
+void indexer::create_atom(std::ostream& ost, const std::string& name) const
+{
+  using namespace hemplate;  // NOLINT
+
+  const std::string& base_url = m_options.base_url;
+
+  auto const time =
+      std::chrono::current_zone()->to_local(std::chrono::system_clock::now());
+
+  ost << xml();
+  ost << atom::feed();
+  ost << atom::title(name);
+  ost << atom::id(base_url);
+  ost << atom::updated(std::format(rfc3339_f, time));
+  ost << atom::author().add(atom::name(name));
+  ost << atom::link(" ",
+                    {{"rel", "self"}, {"href", base_url + "blog/atom.xml"}});
+  ost << atom::link(
+      " ", {{"href", base_url}, {"rel", "alternate"}, {"type", "text/html"}});
+
+  for (const auto& article : m_articles)
+  {
+    const auto filename = article->get_filename();
+    const auto title    = article->get_title();
+    const auto date     = article->get_date();
+    const auto summary  = article->get("summary").value_or(m_options.summary);
+
+    ost << atom::entry()
+               .add(atom::title(title))
+               .add(atom::id(base_url + filename))
+               .add(atom::link(" ").set("href", base_url + filename))
+               .add(atom::updated(to_rfc3339(date)))
+               .add(atom::summary(summary));
+  }
+
+  ost << atom::feed();
+}
+
+void indexer::create_rss(std::ostream& ost, const std::string& name) const
+{
+  using namespace hemplate;  // NOLINT
+
+  const std::string& base_url    = m_options.base_url;
+  const std::string& description = m_options.description;
+
+  ost << xml();
+  ost << rss::rss();
+  ost << rss::channel();
+
+  ost << rss::title(name);
+  ost << rss::link(base_url);
+  ost << rss::description(description);
+  ost << rss::generator("stamd");
+  ost << rss::language("en-us");
+  ost << rss::atomLink().set("href", base_url + "blog/rss.xml");
+
+  for (const auto& article : m_articles)
+  {
+    const auto filename = article->get_filename();
+    const auto date     = article->get_date();
+    const auto author   = article->get("author").value_or(m_options.author);
+    const auto email    = article->get("email").value_or(m_options.email);
+
+    ost << rss::item()
+               .add(rss::title(filename))
+               .add(rss::link(base_url + filename))
+               .add(rss::guid(base_url + filename))
+               .add(rss::pubDate(to_rfc882(date)))
+               .add(rss::author(std::format("{} ({})", email, author)));
+  }
+
+  ost << rss::channel();
+  ost << rss::rss();
+}
+
+void indexer::create_sitemap(std::ostream& ost) const
+{
+  using namespace hemplate;  // NOLINT
+
+  static const std::string& base_url = m_options.base_url;
+
+  ost << xml();
+  ost << sitemap::urlset();
+  for (const auto& article : m_articles)
+  {
+    const auto& filename = article->get_filename();
+    const auto& date     = article->get_date();
+
+    ost << sitemap::url()
+               .add(sitemap::loc(base_url + filename))
+               .add(sitemap::lastmod(date));
+  }
+  ost << sitemap::urlset();
+}
+
+void indexer::create_robots(std::ostream& ost) const
+{
+  static const std::string& base_url = m_options.base_url;
+
+  ost << "User-agent: *";
+  ost << std::format("Sitemap: {}/sitemap.xml", base_url);
+}
+
+}  // namespace stamd
diff --git a/source/indexer.hpp b/source/indexer.hpp
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "article.hpp"
+
+namespace stamd {
+
+class indexer
+{
+public:
+  using article_s = std::shared_ptr<article>;
+
+  using article_list = std::vector<article_s>;
+  using categories_t = article::categories_t;
+
+  struct options_t
+  {
+    std::string base_url;
+    std::string author;
+    std::string email;
+    std::string description;
+    std::string summary;
+  };
+
+  explicit indexer(options_t options)
+      : m_options(std::move(options))
+  {
+    if (m_options.base_url.empty() || m_options.base_url.back() != '/')
+    {
+      m_options.base_url += '/';
+    }
+  }
+
+  article_s& add(const article_s& article);
+
+  void sort();
+
+  void create_robots(std::ostream& ost) const;
+  void create_sitemap(std::ostream& ost) const;
+
+  void create_atom(std::ostream& ost, const std::string& name) const;
+  void create_rss(std::ostream& ost, const std::string& name) const;
+  void create_index(std::ostream& ost,
+                    const std::string& name,
+                    const categories_t& categories);
+
+  void create_categories() const;
+
+private:
+  options_t m_options;
+
+  article_list m_articles;
+};
+
+}  // namespace stamd
diff --git a/source/main.cpp b/source/main.cpp
@@ -7,10 +7,10 @@
 #include <poafloc/poafloc.hpp>
 
 #include "article.hpp"
-#include "index.hpp"
+#include "indexer.hpp"
 #include "utility.hpp"
 
-void preprocess(article& article, std::istream& ist)
+void preprocess(stamd::article& article, std::istream& ist)
 {
   std::string line;
   std::string key;
@@ -47,7 +47,7 @@ struct arguments_t
   std::vector<std::filesystem::path> files;
   bool index = false;
 
-  std::string base = "https://dimitrijedobrota.com/blog";
+  stamd::indexer::options_t options;
 };
 
 int parse_opt(int key, const char* arg, poafloc::Parser* parser)
@@ -58,12 +58,24 @@ int parse_opt(int key, const char* arg, poafloc::Parser* parser)
     case 'o':
       args->output_dir = arg;
       break;
-    case 'b':
-      args->base = arg;
-      break;
     case 'i':
       args->index = true;
       break;
+    case 'b':
+      args->options.base_url = arg;
+      break;
+    case 'a':
+      args->options.author = arg;
+      break;
+    case 'e':
+      args->options.email = arg;
+      break;
+    case 'd':
+      args->options.description = arg;
+      break;
+    case 's':
+      args->options.summary = arg;
+      break;
     case poafloc::ARG:
       args->files.emplace_back(arg);
       break;
@@ -74,12 +86,21 @@ int parse_opt(int key, const char* arg, poafloc::Parser* parser)
 }
 
 // NOLINTBEGIN
+// clang-format off
 static const poafloc::option_t options[] = {
+    {0, 0, 0, 0, "Output mode", 1},
     {"output", 'o', "DIR", 0, "Output directory"},
     {"index", 'i', 0, 0, "Generate all of the indices"},
+    {0, 0, 0, 0, "General information", 2},
     {"base", 'b', "URL", 0, "Base URL for the content"},
+    {"author", 'a', "NAME", 0, "Name of the author, if not specified in article"},
+    {"email", 'e', "EMAIL", 0, "Email of the author, if not specified in article"},
+    {"summary", 's', "SMRY", 0, "A summary, if not specified in article"},
+    {"description", 'd', "DESC", 0, "Description of RSS feed"},
+    {0, 0, 0, 0, "Informational Options", -1},
     {0},
 };
+// clang-format on
 
 static const poafloc::arg_t arg {
     options,
@@ -99,8 +120,6 @@ int main(int argc, char* argv[])
 {
   using namespace stamd;  // NOLINT
 
-  using category_map_t = std::unordered_map<std::string, article_list>;
-
   arguments_t args;
 
   if (poafloc::parse(&arg, argc, argv, 0, &args) != 0)
@@ -109,23 +128,24 @@ int main(int argc, char* argv[])
     return 1;
   }
 
+  using category_map_t =
+      std::unordered_map<std::string, indexer::article_list>;
+
+  stamd::indexer::categories_t categories;
   category_map_t category_map;
-  categories_t all_categories;
-  article_list all_articles;
+  indexer indexer(args.options);
 
   for (const auto& path : args.files)
   {
     const std::string filename = path.stem().string() + ".html";
 
     std::ifstream ifs(path.string());
-    all_articles.push_back(make_shared<article>(filename));
+    auto& article = indexer.add(make_shared<stamd::article>(filename));
 
-    auto& article = all_articles.back();
     preprocess(*article, ifs);
 
     // filename can change in preprocessing phase
-    std::filesystem::path out = args.output_dir / article->get_filename();
-    std::ofstream ofs(out);
+    std::ofstream ofs(args.output_dir / article->get_filename());
     std::stringstream sst;
 
     sst << ifs.rdbuf();
@@ -141,41 +161,37 @@ int main(int argc, char* argv[])
 
     if (!article->is_hidden())
     {
-      all_categories.merge(article->get_categories());
-      for (const auto& ctgry : article->get_categories())
-        category_map[ctgry].push_back(article);
+      categories.merge(article->get_categories());
+      for (const auto& category : article->get_categories())
+        category_map[category].emplace_back(article);
     }
   }
 
   if (!args.index) return 0;
 
-  sort(begin(all_articles),
-       end(all_articles),
-       [](const auto& lft, const auto& rht)
-       { return lft->get_date() > rht->get_date(); });
+  indexer.sort();
 
   std::ofstream rss(args.output_dir / "rss.xml");
-  create_rss(rss, "index", all_articles);
+  indexer.create_rss(rss, "index");
 
   std::ofstream atom(args.output_dir / "atom.xml");
-  create_atom(atom, "index", all_articles);
+  indexer.create_atom(atom, "index");
 
   std::ofstream index(args.output_dir / "index.html");
+  indexer.create_index(index, "blog index", categories);
 
-  all_articles.push_back(
-      create_index(index, "index", all_articles, all_categories));
   for (const auto& [category, articles] : category_map)
   {
     auto ctgry = category;
     std::ofstream ost(args.output_dir / (normalize(ctgry) + ".html"));
-    all_articles.push_back(create_index(ost, category, articles, {}));
+    indexer.create_index(ost, category, {});
   }
 
   std::ofstream robots(args.output_dir / "robots.txt");
-  create_robots(robots);
+  indexer.create_robots(robots);
 
   std::ofstream sitemap(args.output_dir / "sitemap.xml");
-  create_sitemap(sitemap, all_articles);
+  indexer.create_sitemap(sitemap);
 
   return 0;
 }

	stamd Static Markdown Page Generator
	git clone git://git.dimitrijedobrota.com/stamd.git
	Log \| Files \| Refs \| README \| LICENSE

M	CMakeLists.txt	\|	2	+-
M	source/article.cpp	\|	10	++++++----
M	source/article.hpp	\|	4	++++
D	source/index.cpp	\|	202	-------------------------------------------------------------------------------
D	source/index.hpp	\|	30	------------------------------
A	source/indexer.cpp	\|	210	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/indexer.hpp	\|	57	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	source/main.cpp	\|	72	++++++++++++++++++++++++++++++++++++++++++++----------------------------