From f80b7de2d8d614fa85bfa5121a91eab94597c89d Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Tue, 18 Jul 2017 18:17:45 -0400 Subject: [PATCH] Import feed into Nikola --- .gitlab-ci.yml | 4 ++++ import_feed.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100755 import_feed.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4851294..516a35e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,12 +2,16 @@ image: registry.gitlab.com/paddy-hack/nikola test: script: + - pip3 install python-dateutil feedparser + - ./import_feed.py - nikola build except: - master pages: script: + - pip3 install python-dateutil feedparser + - ./import_feed.py - nikola build artifacts: paths: diff --git a/import_feed.py b/import_feed.py new file mode 100755 index 0000000..a53dda8 --- /dev/null +++ b/import_feed.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import dateutil.parser +import io +import feedparser +import re +from urllib.parse import urlparse + + +def main(): + non_slug_chars = re.compile(r'[^a-zA-Z0-9-_]') + + # Get feed + + for d in feed['entries']: + # Generate file name + url = urlparse(d['link']).path + + slug = url + while non_slug_chars.match(slug[0]): + slug = slug[1:] + slug = non_slug_chars.sub('_', slug) + + file_name = 'scholarship/%s.html' % slug + + # Parse date + date = dateutil.parser.parse(d['date']) + + # Open output file + with io.open(file_name, 'w', encoding='utf-8') as fp: + fp.write(u"""\ + + + + + +

{text}

+ +""".format(title=d['title'].strip(), slug=slug, + date=date.strftime('%Y-%m-%d %H:%M:%S'), + link=d['link'].strip(), + text=d['summary'].strip())) + + +if __name__ == '__main__': + main()