From 18f43c5f83670b66893dabd821e944ed229b827c Mon Sep 17 00:00:00 2001 From: Macoy Madson Date: Wed, 13 Mar 2019 20:43:35 -0700 Subject: [PATCH] Added wordpress XML to HTML converter * ContentConverter now copies HTML files over * Home.html has slightly improved formatting --- ContentConverter.py | 7 +++++- WordPressXmlToOrgMode.py | 52 ++++++++++++++++++++++++++++++++++++++++ templates/Home.html | 4 +++- 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 WordPressXmlToOrgMode.py diff --git a/ContentConverter.py b/ContentConverter.py index dd0a3f7..9bc5419 100644 --- a/ContentConverter.py +++ b/ContentConverter.py @@ -5,7 +5,8 @@ contentDirectory = "content" renderedDirectory = "renderedContent" # Pairs of extension, pandoc read type -convertableContentTypes = [(".org", "org")] +# HTML->HTML is essentially a copy, with some pandoc sugar added (see pandoc docs) +convertableContentTypes = [(".org", "org"), (".html", "html")] contentExtensions = [] for contentType in convertableContentTypes: contentExtensions.append(contentType[0]) @@ -45,6 +46,10 @@ def renderContent(contentFilename): print("\tRendering {}".format(contentFilename)) outputFilename = contentFilenameToRenderedFilename(contentFilename) + + # Make subdirectory if necessary + if not os.path.exists(os.path.dirname(outputFilename)): + os.makedirs(os.path.dirname(outputFilename)) # TODO: Support multiple output formats? subprocess.run(["pandoc", diff --git a/WordPressXmlToOrgMode.py b/WordPressXmlToOrgMode.py new file mode 100644 index 0000000..956c29e --- /dev/null +++ b/WordPressXmlToOrgMode.py @@ -0,0 +1,52 @@ +""" +Dead stupid converter for Wordpress XML dumps. + +To get a dump of your Wordpress blog, go to "My Sites"->Configure->Settings->Export->Export your Content. +Extract that and feed this the .xml's filename +""" + +import xml.dom.minidom as minidom +import os + +wordpressFilename = "/home/macoy/Downloads/au79games.wordpress.com-2019-03-14-02_02_05-kzxvzzu42xryynuzqbm9c23f7gtdtyk3/au79games.wordpress.com-2019-03-14-02_02_03/au79games.wordpress.2019-03-14.001.xml" + +outputDir = "content/gamedev" + +contentFormat = "

{}

{}

" + +def titleToFilename(title): + filename = "" + for char in title: + if char.isalnum(): + filename += char + + return filename + +def main(): + domTree = minidom.parse(wordpressFilename) + collection = domTree.documentElement + items = collection.getElementsByTagName("item") + + for item in items: + # Ignore anything which isn't a post + if not item.getElementsByTagName("wp:post_type")[0].childNodes[0].data == "post": + continue + + title = item.getElementsByTagName("title")[0].childNodes[0].data + content = item.getElementsByTagName("content:encoded")[0].childNodes[0].data + + print(title) + print(titleToFilename(title)) + print(content) + + # Write to .html + outputFilename = "{}/{}.html".format(outputDir, titleToFilename(title)) + if not os.path.exists(os.path.dirname(outputFilename)): + os.makedirs(os.path.dirname(outputFilename)) + outFile = open(outputFilename, "w") + outFile.write(contentFormat.format(title, content)) + outFile.close() + print("Wrote {}".format(outputFilename)) + +if __name__ == "__main__": + main() diff --git a/templates/Home.html b/templates/Home.html index f09f81b..0a230b8 100644 --- a/templates/Home.html +++ b/templates/Home.html @@ -4,8 +4,10 @@ +

Simple Blog

+

The owner hasn't set up their templates/Home.html template yet.

{% for post in allPosts %} - {{post}} + {{post}}
{% end %}