Browse Source

Added wordpress XML to HTML converter

* ContentConverter now copies HTML files over
* Home.html has slightly improved formatting
master
Macoy Madson 3 years ago
parent
commit
18f43c5f83
  1. 7
      ContentConverter.py
  2. 52
      WordPressXmlToOrgMode.py
  3. 4
      templates/Home.html

7
ContentConverter.py

@ -5,7 +5,8 @@ contentDirectory = "content"
renderedDirectory = "renderedContent"
# Pairs of extension, pandoc read type
convertableContentTypes = [(".org", "org")]
# HTML->HTML is essentially a copy, with some pandoc sugar added (see pandoc docs)
convertableContentTypes = [(".org", "org"), (".html", "html")]
contentExtensions = []
for contentType in convertableContentTypes:
contentExtensions.append(contentType[0])
@ -45,6 +46,10 @@ def renderContent(contentFilename):
print("\tRendering {}".format(contentFilename))
outputFilename = contentFilenameToRenderedFilename(contentFilename)
# Make subdirectory if necessary
if not os.path.exists(os.path.dirname(outputFilename)):
os.makedirs(os.path.dirname(outputFilename))
# TODO: Support multiple output formats?
subprocess.run(["pandoc",

52
WordPressXmlToOrgMode.py

@ -0,0 +1,52 @@
"""
Dead stupid converter for Wordpress XML dumps.
To get a dump of your Wordpress blog, go to "My Sites"->Configure->Settings->Export->Export your Content.
Extract that and feed this the .xml's filename
"""
import xml.dom.minidom as minidom
import os
wordpressFilename = "/home/macoy/Downloads/au79games.wordpress.com-2019-03-14-02_02_05-kzxvzzu42xryynuzqbm9c23f7gtdtyk3/au79games.wordpress.com-2019-03-14-02_02_03/au79games.wordpress.2019-03-14.001.xml"
outputDir = "content/gamedev"
contentFormat = "<h1>{}</h1><p>{}</p>"
def titleToFilename(title):
filename = ""
for char in title:
if char.isalnum():
filename += char
return filename
def main():
domTree = minidom.parse(wordpressFilename)
collection = domTree.documentElement
items = collection.getElementsByTagName("item")
for item in items:
# Ignore anything which isn't a post
if not item.getElementsByTagName("wp:post_type")[0].childNodes[0].data == "post":
continue
title = item.getElementsByTagName("title")[0].childNodes[0].data
content = item.getElementsByTagName("content:encoded")[0].childNodes[0].data
print(title)
print(titleToFilename(title))
print(content)
# Write to .html
outputFilename = "{}/{}.html".format(outputDir, titleToFilename(title))
if not os.path.exists(os.path.dirname(outputFilename)):
os.makedirs(os.path.dirname(outputFilename))
outFile = open(outputFilename, "w")
outFile.write(contentFormat.format(title, content))
outFile.close()
print("Wrote {}".format(outputFilename))
if __name__ == "__main__":
main()

4
templates/Home.html

@ -4,8 +4,10 @@
<link rel="stylesheet" type="text/css" href="/webResources/styles.css">
</head>
<body>
<h1>Simple Blog</h1>
<p>The owner hasn't set up their templates/Home.html template yet.</p>
{% for post in allPosts %}
<a href="blog/{{post}}">{{post}}</a>
<a href="blog/{{post}}">{{post}}</a><br />
{% end %}
</body>
</html>

Loading…
Cancel
Save