initial commit

basically works, but still missing things. as well as some (much?) code
being kinda janky
This commit is contained in:
Gered 2023-06-27 17:15:58 -04:00
commit 4c3fc25e50
9 changed files with 2889 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/target
.DS_Store
/.idea

2248
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

22
Cargo.toml Normal file
View file

@ -0,0 +1,22 @@
[package]
name = "pbe"
description = "Personal Blog Engine. Gered's umpteenth take on a custom blog."
version = "0.1.0"
edition = "2021"
[dependencies]
actix-web = "4.3.1"
actix-files = "0.6.2"
anyhow = "1.0.71"
chrono = "0.4.26"
itertools = "0.11.0"
log = "0.4.19"
pulldown-cmark = "0.9.3"
rss = "2.0.4"
serde = { version = "1.0.164", features = ["derive"]}
serde_json = "1.0.97"
simple-log = "1.6.0"
syntect = "5.0.0"
tera = "1.19.0"
thiserror = "1.0.40"
url = "2.4.0"

2
rust-toolchain.toml Normal file
View file

@ -0,0 +1,2 @@
[toolchain]
channel = "stable"

12
rustfmt.toml Normal file
View file

@ -0,0 +1,12 @@
#blank_lines_upper_bound = 3
#format_code_in_doc_comments = true
#group_imports = "StdExternalCrate"
hard_tabs = true
#hex_literal_case = "Lower"
#imports_granularity = "Module"
max_width = 120
#single_line_if_else_max_width = 0
tab_spaces = 4
use_field_init_shorthand = true
use_small_heuristics = "Max"
#wrap_comments = true

100
src/config.rs Normal file
View file

@ -0,0 +1,100 @@
use std::fs::File;
use std::io::BufReader;
use std::path::PathBuf;
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Server {
pub bind_addr: String,
pub bind_port: u16,
pub static_files_path: PathBuf,
pub templates_path: PathBuf,
pub pages_path: PathBuf,
pub posts_path: PathBuf,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Rss {
pub title: String,
pub description: String,
pub url: String,
pub count: usize,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Page {
pub file_path: PathBuf,
pub title: String,
pub url: String,
pub old_urls: Option<Vec<String>>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Pages {
pub pages: Vec<Page>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Post {
pub file_path: PathBuf,
pub title: String,
#[serde(deserialize_with = "crate::util::deserialize_naivedate")]
pub date: chrono::NaiveDate,
pub slug: String,
pub old_urls: Option<Vec<String>>,
pub tags: Option<Vec<String>>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Posts {
pub posts: Vec<Post>,
pub rss: Rss,
}
#[derive(Debug, thiserror::Error)]
pub enum ConfigError {
#[error("Json config I/O error with path {0}")]
IOError(PathBuf, #[source] std::io::Error),
#[error("Json deserialization error: {0}")]
SerdeJsonError(String),
}
fn load_json_config<T>(path: &PathBuf) -> Result<T, ConfigError>
where
T: serde::de::DeserializeOwned,
{
let file = File::open(path).map_err(|e| ConfigError::IOError(path.clone(), e))?;
let mut reader = BufReader::new(file);
match serde_json::from_reader(&mut reader) {
Ok(deserialized) => Ok(deserialized),
Err(err) => Err(ConfigError::SerdeJsonError(err.to_string())),
}
}
pub fn load_server(path: &PathBuf, site_root: &PathBuf) -> Result<Server, ConfigError> {
log::info!("Loading server json config from {:?}", path);
let mut server_config: Server = load_json_config(path)?;
server_config.static_files_path = [site_root, &server_config.static_files_path].iter().collect();
server_config.templates_path = [site_root, &server_config.templates_path].iter().collect();
server_config.pages_path = [site_root, &server_config.pages_path].iter().collect();
server_config.posts_path = [site_root, &server_config.posts_path].iter().collect();
Ok(server_config)
}
pub fn load_content(
pages_path: &PathBuf,
posts_path: &PathBuf,
server_config: &Server,
) -> Result<(Pages, Posts), ConfigError> {
log::info!("Loading pages json config from {:?}", pages_path);
let mut pages: Pages = load_json_config(pages_path)?;
for page in pages.pages.iter_mut() {
page.file_path = [&server_config.pages_path, &page.file_path].iter().collect();
}
log::info!("Loading posts json config from {:?}", posts_path);
let mut posts: Posts = load_json_config(posts_path)?;
for post in posts.posts.iter_mut() {
post.file_path = [&server_config.posts_path, &post.file_path].iter().collect();
}
Ok((pages, posts))
}

120
src/main.rs Normal file
View file

@ -0,0 +1,120 @@
use actix_files::Files;
use actix_web::web::Redirect;
use actix_web::{web, App, Either, HttpRequest, HttpResponse, HttpServer, Responder};
use anyhow::Context;
use std::env;
use std::path::{Path, PathBuf};
mod config;
mod site;
mod util;
fn not_found() -> HttpResponse {
HttpResponse::NotFound().body("not found")
}
#[actix_web::get("/")]
async fn latest_posts(data: web::Data<site::SiteService>) -> impl Responder {
log::debug!("GET / -> latest_posts()");
data.serve_latest_post()
}
#[actix_web::get("/tag/{tag}/")]
async fn latest_posts_by_tag(path: web::Path<(String,)>, data: web::Data<site::SiteService>) -> impl Responder {
let tag = path.into_inner().0;
log::debug!("GET /tag/{0}/ -> latest_posts_by_tag(), tag = {0}", tag);
data.serve_posts_by_tag(&tag)
}
#[actix_web::get("/archive/")]
async fn posts_archive(data: web::Data<site::SiteService>) -> impl Responder {
log::debug!("GET /archive/ -> posts_archive()");
data.serve_posts_archive()
}
#[actix_web::get("/rss/")]
async fn rss_feed(data: web::Data<site::SiteService>) -> impl Responder {
log::debug!("GET /rss/ -> rss_feed()");
data.serve_rss_feed()
}
async fn site_content(req: HttpRequest, data: web::Data<site::SiteService>) -> Either<HttpResponse, Redirect> {
log::debug!("GET {} -> fallback to site_content()", req.path());
if let Some(response) = data.serve_content_by_url(&req) {
response
} else {
Either::Left(not_found())
}
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
simple_log::new(
simple_log::LogConfigBuilder::builder() //
.level("debug")
.output_console()
.build(),
)
.map_err(|err| anyhow::anyhow!(err))?;
println!("PBE - Personal Blog Engine - https://github.com/gered/pbe");
// manually handling args because
// 1) i have very simple needs
// 2) the common crate options are kinda complex and/or have poorly laid out docs (especially so when it comes to
// the typical simple use-case "getting started" stuff ... bleh)
// so ... whatever!
let mut args: Vec<String> = env::args().collect();
args.remove(0); // normally the path of the executable itself. TODO: when is this not true? probably only some exotic environments i don't give a shit about ...?
let first_arg = args.first().unwrap_or(&String::new()).to_lowercase();
if first_arg == "-h" || first_arg == "--help" {
println!("Usage: pbe <SITE_ROOT>");
println!("Where SITE_ROOT is a path that contains the json config files and all content and web resources.");
Ok(())
} else {
let site_root = if first_arg.is_empty() {
env::current_dir()? //
} else {
Path::new(&first_arg).canonicalize()?
};
log::info!("Using site root {:?}", site_root);
let server_config_path: PathBuf = [&site_root, &"server.json".into()].iter().collect();
let pages_config_path: PathBuf = [&site_root, &"pages.json".into()].iter().collect();
let posts_config_path: PathBuf = [&site_root, &"posts.json".into()].iter().collect();
log::info!("Loading config ...");
let server_config = config::load_server(&server_config_path, &site_root) //
.context("Loading server json config")?;
let (pages_config, posts_config) = config::load_content(&pages_config_path, &posts_config_path, &server_config) //
.context("Loading content json configs")?;
log::info!("Initializing site data and content ...");
let site_service = site::SiteService::new(server_config.clone(), pages_config, posts_config)
.context("Constructing SiteService instance")?;
let data = web::Data::new(site_service);
log::info!(
"Starting HTTP server for site, listening on {}:{} ...",
server_config.bind_addr,
server_config.bind_port
);
HttpServer::new(move || {
App::new() //
.app_data(data.clone())
.service(latest_posts)
.service(latest_posts_by_tag)
.service(posts_archive)
.service(rss_feed)
.service(Files::new("/", &server_config.static_files_path))
.default_service(web::get().to(site_content))
})
.bind((server_config.bind_addr.clone(), server_config.bind_port))
.with_context(|| format!("Binding HTTP server on {}:{}", server_config.bind_addr, server_config.bind_port))?
.run()
.await
.map_err(anyhow::Error::from)
}
}

366
src/site.rs Normal file
View file

@ -0,0 +1,366 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::RwLock;
use actix_web::web::Redirect;
use actix_web::{Either, HttpRequest, HttpResponse};
use chrono::{Datelike, TimeZone};
use itertools::Itertools;
use crate::{config, util};
type UriPath = String;
type Tag = String;
#[derive(Debug, thiserror::Error)]
pub enum ContentError {
#[error("Content rendering I/O error with path {0}")]
IOError(PathBuf, #[source] std::io::Error),
}
fn render_content(path: &PathBuf) -> Result<String, ContentError> {
let raw_content = match std::fs::read_to_string(path) {
Err(e) => return Err(ContentError::IOError(path.clone(), e)),
Ok(s) => s,
};
match path.extension().unwrap_or_default().to_str() {
Some("md") => {
let parser = pulldown_cmark::Parser::new_ext(&raw_content, pulldown_cmark::Options::all());
let mut output = String::new();
// TODO: use write_html() instead because that can actually return errors instead of just panicking
pulldown_cmark::html::push_html(&mut output, parser);
Ok(output)
}
Some("html") | Some("htm") => Ok(raw_content),
_ => Ok(raw_content),
}
}
#[derive(Debug, thiserror::Error)]
pub enum SiteError {
#[error("Content rendering error")]
ContentError(#[from] ContentError),
#[error("Tera templates error")]
TeraError(#[from] tera::Error),
}
pub struct OldUrlMappings {
mapping: HashMap<UriPath, UriPath>,
}
impl OldUrlMappings {
pub fn new() -> Self {
OldUrlMappings { mapping: HashMap::new() }
}
#[inline]
pub fn get(&self, old_url: &UriPath) -> Option<&UriPath> {
self.mapping.get(old_url)
}
#[inline]
pub fn add_mapping(&mut self, old_url: &UriPath, new_url: &UriPath) {
self.mapping.insert(old_url.clone(), new_url.clone());
}
pub fn add_mappings(&mut self, old_urls: &[UriPath], new_url: &UriPath) {
for old_url in old_urls.iter() {
self.add_mapping(old_url, new_url);
}
}
}
pub struct UrlsByTag {
mapping: HashMap<Tag, Vec<UriPath>>,
}
impl UrlsByTag {
pub fn new() -> Self {
UrlsByTag { mapping: HashMap::new() }
}
#[inline]
pub fn get(&self, tag: &Tag) -> Option<&[UriPath]> {
self.mapping.get(tag).map(|x| x.as_slice())
}
pub fn add_mapping(&mut self, url: &UriPath, tag: &Tag) {
if let Some(urls) = self.mapping.get_mut(tag) {
urls.push(url.clone());
} else {
let urls = vec![url.clone()];
self.mapping.insert(tag.clone(), urls);
}
}
pub fn add_mappings(&mut self, url: &UriPath, tags: &[Tag]) {
for tag in tags.iter() {
self.add_mapping(url, tag);
}
}
}
#[derive(serde::Serialize)]
pub struct Post {
pub url: UriPath,
pub title: String,
pub content_html: String,
#[serde(serialize_with = "crate::util::serialize_naivedate")]
pub date: chrono::NaiveDate,
pub tags: Vec<Tag>,
}
impl TryFrom<config::Post> for Post {
type Error = SiteError;
fn try_from(value: config::Post) -> Result<Self, Self::Error> {
let url = format!("/{:04}/{:02}/{:02}/{}", value.date.year(), value.date.month(), value.date.day(), value.slug);
let content_html = render_content(&value.file_path)?;
let tags = value.tags.map_or_else(|| Vec::new(), |x| x.clone());
Ok(Post {
url, //
title: value.title,
content_html,
date: value.date,
tags,
})
}
}
#[derive(serde::Serialize)]
pub struct Page {
pub url: UriPath,
pub title: String,
pub content_html: String,
}
impl TryFrom<config::Page> for Page {
type Error = SiteError;
fn try_from(value: config::Page) -> Result<Self, Self::Error> {
let content_html = render_content(&value.file_path)?;
Ok(Page {
url: value.url, //
title: value.title,
content_html,
})
}
}
pub struct RssMetadata {
pub title: String,
pub description: String,
pub url: String,
pub count: usize,
}
impl From<config::Rss> for RssMetadata {
fn from(value: config::Rss) -> Self {
RssMetadata {
title: value.title, //
description: value.description,
url: value.url,
count: value.count,
}
}
}
pub enum Content<'a> {
Page(&'a Page),
Post(&'a Post),
Redirect(UriPath),
}
pub struct SiteContent {
pub pages: HashMap<UriPath, Page>,
pub posts: HashMap<UriPath, Post>,
pub old_url_mappings: OldUrlMappings,
pub post_tag_mappings: UrlsByTag,
pub sorted_post_urls: Vec<UriPath>,
pub rss: RssMetadata,
}
impl SiteContent {
pub fn new(pages_config: config::Pages, posts_config: config::Posts) -> Result<Self, SiteError> {
let mut old_url_mappings = OldUrlMappings::new();
let mut post_tag_mappings = UrlsByTag::new();
let mut sorted_post_urls = Vec::<UriPath>::new();
// load pages
let mut pages = HashMap::<UriPath, Page>::new();
for page_config in pages_config.pages.iter() {
let page = Page::try_from(page_config.clone())?;
if let Some(old_urls) = &page_config.old_urls {
old_url_mappings.add_mappings(old_urls, &page.url);
}
pages.insert(page.url.clone(), page);
}
// load posts
let mut posts = HashMap::<UriPath, Post>::new();
for post_config in posts_config.posts.iter() {
let post = Post::try_from(post_config.clone())?;
if let Some(old_urls) = &post_config.old_urls {
old_url_mappings.add_mappings(old_urls, &post.url);
}
posts.insert(post.url.clone(), post);
}
// build pre-sorted post urls table. as well, build the post url by tag mapping here so that
// the post urls for each tag will already be ordered by date
for post in posts.values().sorted_by(|a, b| b.date.cmp(&a.date)) {
sorted_post_urls.push(post.url.clone());
post_tag_mappings.add_mappings(&post.url, &post.tags);
}
let rss = RssMetadata::from(posts_config.rss);
Ok(SiteContent { pages, posts, old_url_mappings, post_tag_mappings, sorted_post_urls, rss })
}
pub fn get_content_at(&self, url: &UriPath) -> Option<Content> {
if let Some(new_url) = self.old_url_mappings.get(url) {
Some(Content::Redirect(new_url.clone()))
} else if let Some(post) = self.posts.get(url) {
Some(Content::Post(post))
} else if let Some(page) = self.pages.get(url) {
Some(Content::Page(page))
} else {
None
}
}
pub fn get_posts_ordered_by_date(&self) -> Vec<&Post> {
self.sorted_post_urls.iter().map(|post_url| self.posts.get(post_url).unwrap()).collect()
}
pub fn get_posts_with_tag_ordered_by_date(&self, tag: &Tag) -> Vec<&Post> {
let mut posts = Vec::new();
if let Some(post_urls) = self.post_tag_mappings.get(tag) {
for url in post_urls.iter() {
posts.push(self.posts.get(url).unwrap())
}
}
posts
}
pub fn get_latest_post(&self) -> Option<&Post> {
self.sorted_post_urls.first().map(|post_url| self.posts.get(post_url).unwrap())
}
}
pub struct SiteService {
pub server_config: config::Server,
pub renderer: tera::Tera,
pub content: RwLock<SiteContent>,
}
impl SiteService {
pub fn new(
server_config: config::Server,
pages_config: config::Pages,
posts_config: config::Posts,
) -> Result<Self, SiteError> {
let content = SiteContent::new(pages_config, posts_config)?;
let mut templates_path = PathBuf::from(&server_config.templates_path);
templates_path.push("**/*");
log::debug!("Using templates path: {:?}", templates_path);
let renderer = tera::Tera::new(templates_path.as_path().to_str().unwrap())?;
log::debug!(
"Templates loaded and parsed from the templates path: {:?}",
renderer.get_template_names().collect::<Vec<&str>>()
);
Ok(SiteService {
server_config, //
renderer,
content: RwLock::new(content),
})
}
pub fn serve_latest_post(&self) -> HttpResponse {
let content = self.content.read().expect("SiteContent read lock failed"); // TODO: better error handling
let post = content.get_latest_post();
let mut context = tera::Context::new();
if let Some(post) = post {
context.insert("post", post);
}
HttpResponse::Ok().body(self.renderer.render("post.html", &context).unwrap())
}
pub fn serve_posts_by_tag(&self, tag: &Tag) -> HttpResponse {
let content = self.content.read().expect("SiteContent read lock failed"); // TODO: better error handling
let posts = content.get_posts_with_tag_ordered_by_date(tag);
let mut context = tera::Context::new();
context.insert("tag", tag);
context.insert("posts", &posts);
HttpResponse::Ok().body(self.renderer.render("tag.html", &context).unwrap())
}
pub fn serve_posts_archive(&self) -> HttpResponse {
let content = self.content.read().expect("SiteContent read lock failed"); // TODO: better error handling
let posts = content.get_posts_ordered_by_date();
let mut context = tera::Context::new();
context.insert("posts", &posts);
HttpResponse::Ok().body(self.renderer.render("archive.html", &context).unwrap())
}
pub fn serve_rss_feed(&self) -> HttpResponse {
let content = self.content.read().expect("SiteContent read lock failed"); // TODO: better error handling
let base_url = url::Url::parse(&content.rss.url).unwrap();
let posts = content.get_posts_ordered_by_date();
let mut channel = rss::ChannelBuilder::default() //
.title(&content.rss.title)
.description(&content.rss.description)
.link(&content.rss.url)
.build();
channel.set_items(
posts
.iter()
.take(content.rss.count)
.map(|post| {
rss::ItemBuilder::default() //
.title(post.title.clone())
.content(post.content_html.clone())
.link(base_url.clone().join(&post.url).unwrap().to_string())
.pub_date(chrono::Local.from_local_date(&post.date).unwrap().to_string())
.build()
})
.collect::<Vec<rss::Item>>(),
);
HttpResponse::Ok().content_type("application/rss+xml").body(channel.to_string())
}
pub fn serve_content_by_url(&self, req: &HttpRequest) -> Option<Either<HttpResponse, Redirect>> {
let content = self.content.read().expect("SiteContent read lock failed"); // TODO: better error handling
let url = String::from(req.path());
match content.get_content_at(&url) {
Some(Content::Page(page)) => {
log::debug!("Found page content at {}", req.path());
let mut context = tera::Context::new();
context.insert("page", page);
let rendered = self.renderer.render("page.html", &context).unwrap();
Some(Either::Left(HttpResponse::Ok().body(rendered)))
}
Some(Content::Post(post)) => {
log::debug!("Found post content at {}", req.path());
let mut context = tera::Context::new();
context.insert("post", post);
let rendered = self.renderer.render("post.html", &context).unwrap();
Some(Either::Left(HttpResponse::Ok().body(rendered)))
}
Some(Content::Redirect(url)) => {
log::debug!("Found redirect at {}", req.path());
Some(Either::Right(Redirect::to(url).permanent()))
}
None => {
log::debug!("No matching content at {}", req.path());
None
}
}
}
}

16
src/util.rs Normal file
View file

@ -0,0 +1,16 @@
pub fn deserialize_naivedate<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<chrono::NaiveDate, D::Error> {
let s: String = serde::Deserialize::deserialize(deserializer)?;
chrono::NaiveDate::parse_from_str(&s, "%Y-%m-%d").map_err(serde::de::Error::custom)
}
pub fn serialize_naivedate<S: serde::Serializer>(value: &chrono::NaiveDate, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&value.to_string())
}
pub fn safe_subslice<T>(slice: &[T], start: usize, count: usize) -> Option<&[T]> {
if start >= slice.len() {
return None;
}
let end = std::cmp::min(start + count - 1, slice.len() - 1);
Some(&slice[start..=end])
}