Created
June 27, 2015 19:52
-
-
Save KMahoney/dcc12d3ff6a49c11cdc9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- So as an example of using a log orientated approach in PostgreSQL, | |
-- let's write a simple blog application. We will want to be able to: | |
-- * Write and edit blog posts | |
-- * Publish revisions of posts for public viewing | |
-- * Delete posts | |
-- * Add or remove tags to posts | |
-- Let's start by creating a schema. | |
DROP SCHEMA IF EXISTS logblog CASCADE; | |
CREATE SCHEMA logblog; | |
-- Article Table | |
-- Create a table representing the set of article slugs. This allows us | |
-- to reference a slug as a foriegn key and maintain | |
-- consistency. PostgreSQL would not allow you to reference the `slug` | |
-- field in the `article_revision` table in a foreign key as it is not | |
-- unique in that table. | |
CREATE TABLE logblog.article ( | |
slug TEXT NOT NULL PRIMARY KEY CHECK(slug SIMILAR TO '[-a-z]+') | |
); | |
-- Revision Table | |
-- Next is an immutable log of article revisions. This should look fairly | |
-- straight forward. We can create new articles by inserting the slug | |
-- into `article` and the revision into `article_revision` inside a | |
-- transaction, and update an article by simply inserting the revision | |
-- (I'll show you later). | |
CREATE SEQUENCE logblog.revision_id_seq; | |
CREATE TABLE logblog.article_revision ( | |
revision_id INTEGER PRIMARY KEY DEFAULT nextval('logblog.revision_id_seq'), | |
timestamp TIMESTAMP NOT NULL DEFAULT now(), | |
slug TEXT NOT NULL REFERENCES logblog.article, | |
title TEXT NOT NULL, | |
content TEXT NOT NULL | |
); | |
-- Publishing | |
-- Now an immutable log of article publish events. The public will be | |
-- able to see the last published version of an article. This means you | |
-- can publish an earlier revision to 'rollback' an article. | |
-- Note the timestamp when an article was published or deleted is | |
-- distinct from when the revision was created. Readers are probably more | |
-- interested in when an article was first published than when it was | |
-- first drafted. | |
CREATE TABLE logblog.article_publish ( | |
timestamp TIMESTAMP NOT NULL DEFAULT now(), | |
revision_id INTEGER REFERENCES logblog.article_revision | |
); | |
-- Deleting | |
-- An immutable log of article deletion events. Articles are only | |
-- considered deleted when the deletion timestamp is later than any | |
-- publish actions. This means articles can be 'undeleted' by | |
-- re-publishing them. In a log orientated database no data is every | |
-- truly removed. This is something you'll have to take into account | |
-- if you're handling sensitive data. | |
CREATE TABLE logblog.article_deletion ( | |
timestamp TIMESTAMP NOT NULL DEFAULT now(), | |
slug TEXT NOT NULL REFERENCES logblog.article | |
); | |
-- Tagging | |
-- An immutable log of tag events. It's awkward to create a tag table | |
-- with a set of unique tag names like we do with articles, so instead | |
-- we just record tag events. This is a bit lazy as it doesn't enforce | |
-- consistency with revoked tags (i.e. you can revoke a non-existant | |
-- tag). | |
CREATE TYPE logblog.tag_event_type AS ENUM ('add', 'revoke'); | |
CREATE TABLE logblog.tag_event ( | |
timestamp TIMESTAMP NOT NULL DEFAULT now(), | |
slug TEXT NOT NULL REFERENCES logblog.article, | |
event logblog.tag_event_type NOT NULL, | |
tag TEXT NOT NULL | |
); | |
-- Building Views | |
-- In order to easily query the current state of our application we can | |
-- build up some PostgreSQL views to make it easier for us. We're going | |
-- to make heavy use of `DISTINCT ON` to find the latest state of each | |
-- component. | |
-- This view is the latest deletion date for an article (if applicable) | |
CREATE VIEW logblog.last_deleted_view AS | |
SELECT DISTINCT ON (slug) timestamp AS deleted_on, slug | |
FROM logblog.article_deletion | |
ORDER BY slug, deleted_on DESC; | |
-- We will want to show users the latest published content of an article. | |
CREATE VIEW logblog.last_published_view AS | |
SELECT DISTINCT ON (rev.slug) | |
rev.revision_id, | |
pub.timestamp AS last_updated_on, | |
rev.slug, | |
rev.title, | |
rev.content | |
FROM logblog.article_publish AS pub | |
INNER JOIN logblog.article_revision AS rev ON rev.revision_id = pub.revision_id | |
ORDER BY rev.slug, last_updated_on DESC; | |
-- We'll also want to know when an article was first published, as this | |
-- is the date you usually show on an article (maybe you could use the | |
-- last published timestamp to show when it was last updated). | |
CREATE VIEW logblog.first_published_view AS | |
SELECT DISTINCT ON (rev.slug) | |
rev.slug, | |
pub.timestamp AS first_published_on | |
FROM logblog.article_publish AS pub | |
INNER JOIN logblog.article_revision AS rev ON rev.revision_id = pub.revision_id | |
ORDER BY rev.slug, first_published_on; | |
-- We'll aggregate the tags as a PostgreSQL array for convenience. | |
CREATE VIEW logblog.article_tag_view AS | |
WITH last_tag_event AS | |
(SELECT DISTINCT ON (slug, tag) * | |
FROM logblog.tag_event | |
ORDER BY slug, tag, timestamp DESC) | |
SELECT slug, array_agg(tag) AS tags | |
FROM last_tag_event | |
WHERE event = 'add' | |
GROUP BY slug; | |
-- The Public's View | |
-- Here we use the previous views as building blocks to create our public | |
-- article view. Note we don't show articles that have a deletion date | |
-- later than the last published date. | |
CREATE VIEW logblog.public_article_view AS | |
SELECT last_pub.slug, | |
first_pub.first_published_on, | |
last_pub.last_updated_on, | |
last_pub.title, | |
last_pub.content, | |
COALESCE(tags.tags, '{}'::TEXT[]) AS tags | |
FROM logblog.last_published_view AS last_pub | |
LEFT JOIN logblog.last_deleted_view AS del | |
ON del.slug = last_pub.slug | |
LEFT JOIN logblog.first_published_view AS first_pub | |
ON first_pub.slug = last_pub.slug | |
LEFT JOIN logblog.article_tag_view AS tags | |
ON tags.slug = last_pub.slug | |
WHERE NOT COALESCE(del.deleted_on > last_pub.last_updated_on, false) | |
ORDER BY first_pub.first_published_on; | |
-- The Life of a Blog Post | |
-- To finish, a fun query to show the entire history of an article. | |
CREATE VIEW logblog.article_history_view AS | |
WITH | |
revision_events AS | |
(SELECT timestamp, | |
slug, | |
('Created article revision ' || revision_id)::TEXT AS event | |
FROM logblog.article_revision), | |
publish_events AS | |
(SELECT pub.timestamp, | |
rev.slug, | |
('Published revision ' || pub.revision_id)::TEXT AS event | |
FROM logblog.article_publish AS pub | |
INNER JOIN logblog.article_revision AS rev | |
ON rev.revision_id = pub.revision_id), | |
deletion_events AS | |
(SELECT timestamp, | |
slug, | |
'Deleted article'::TEXT AS event | |
FROM logblog.article_deletion), | |
tag_events AS | |
(SELECT timestamp, | |
slug, | |
(CASE | |
WHEN event = 'add' THEN ('Added tag ' || tag) | |
WHEN event = 'revoke' THEN ('Deleted tag ' || tag) | |
END)::TEXT AS event | |
FROM logblog.tag_event) | |
(SELECT * FROM revision_events) | |
UNION (SELECT * FROM publish_events) | |
UNION (SELECT * FROM deletion_events) | |
UNION (SELECT * FROM tag_events); | |
-- Improving read performance | |
CREATE TABLE logblog.public_article_state ( | |
slug TEXT PRIMARY KEY REFERENCES logblog.article, | |
title TEXT, | |
content TEXT | |
); | |
CREATE FUNCTION logblog.insert_slug() RETURNS trigger AS $$ | |
BEGIN | |
INSERT INTO logblog.public_article_state (slug) VALUES (NEW.slug); | |
RETURN NEW; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE TRIGGER insert_slug | |
AFTER INSERT ON logblog.article | |
FOR EACH ROW EXECUTE PROCEDURE logblog.insert_slug(); | |
CREATE FUNCTION logblog.update_content() RETURNS trigger AS $$ | |
BEGIN | |
UPDATE logblog.public_article_state | |
SET title = rev.title, content = rev.content | |
FROM logblog.article_revision AS rev | |
WHERE rev.slug = logblog.public_article_state.slug | |
AND rev.revision_id = NEW.revision_id; | |
RETURN NEW; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE TRIGGER update_content | |
AFTER INSERT ON logblog.article_publish | |
FOR EACH ROW EXECUTE PROCEDURE logblog.update_content(); | |
-- Test Data | |
-- This is how you create a new article. If the article slug already | |
-- exists the transaction will abort. | |
BEGIN; | |
INSERT INTO logblog.article VALUES ('simple'); | |
INSERT INTO logblog.article_revision (revision_id, timestamp, slug, title, content) VALUES | |
(1, '2015-01-01 00:00:00', 'simple', | |
'A Simple Title', | |
'This is a simple published article'); | |
COMMIT; | |
-- Pubish the article | |
INSERT INTO logblog.article_publish (timestamp, revision_id) VALUES | |
('2015-01-01 01:00:00', 1); | |
-- Tag the article | |
INSERT INTO logblog.tag_event (timestamp, slug, event, tag) VALUES | |
('2015-01-01 02:00:00', 'simple', 'add', 'simple-tag'); | |
-- Some more articles | |
INSERT INTO logblog.article VALUES | |
('revised'), ('deleted'), ('unpublished'), ('unpublished-revision'), ('republished'); | |
INSERT INTO logblog.article_revision (revision_id, timestamp, slug, title, content) VALUES | |
(2, '2015-01-01 00:00:00', 'revised', | |
'Revised', | |
'You will not see this content because it has been revised.'), | |
(3, '2015-01-01 02:00:00', 'revised', | |
'Revised', | |
'This is revised and published content.'), | |
(4, '2015-01-01 00:00:00', 'deleted', | |
'Deleted', | |
'This is a deleted article. You should not see this.'), | |
(5, '2015-01-01 00:00:00', 'unpublished', | |
'Unpublished', | |
'This content was never published :('), | |
(6, '2015-01-01 00:00:00', 'unpublished-revision', | |
'Unpublished Revision', | |
'This article has revised content you can not see yet.'), | |
(7, '2015-01-01 02:00:00', 'unpublished-revision', | |
'Unpublished Revision', | |
'This is revised content you can not see.'), | |
(8, '2015-01-01 00:00:00', 'republished', | |
'Republished', | |
'This article was deleted then re-published.'); | |
INSERT INTO logblog.article_publish (timestamp, revision_id) VALUES | |
('2015-01-01 01:00:00', 2), | |
('2015-01-01 03:00:00', 3), | |
('2015-01-01 01:00:00', 4), | |
('2015-01-01 01:00:00', 6), | |
('2015-01-01 01:00:00', 8), | |
('2015-01-01 03:00:00', 8); | |
INSERT INTO logblog.article_deletion (timestamp, slug) VALUES | |
('2015-01-01 02:00:00', 'deleted'), | |
('2015-01-01 02:00:00', 'republished'); | |
INSERT INTO logblog.tag_event (timestamp, slug, event, tag) VALUES | |
('2015-01-01 01:00:00', 'revised', 'add', 'lots'), | |
('2015-01-01 02:00:00', 'revised', 'add', 'of'), | |
('2015-01-01 03:00:00', 'revised', 'add', 'tags'), | |
('2015-01-01 04:00:00', 'revised', 'add', 'deleted-tag'), | |
('2015-01-01 05:00:00', 'revised', 'revoke', 'deleted-tag'); | |
-- Just in case you want to add further revisions | |
ALTER SEQUENCE logblog.revision_id_seq RESTART WITH 9; | |
SELECT slug, first_published_on, content FROM logblog.public_article_view; | |
SELECT * FROM logblog.article_history_view ORDER BY slug, timestamp; | |
EXPLAIN SELECT * FROM logblog.public_article_view; | |
SELECT * FROM logblog.public_article_state; | |
EXPLAIN SELECT * FROM logblog.public_article_state; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment