diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4851294..5dd9cec 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,12 +2,16 @@ image: registry.gitlab.com/paddy-hack/nikola test: script: + - pip3 install requests python-dateutil feedparser + - ./import_feed.py - nikola build except: - master pages: script: + - pip3 install requests python-dateutil feedparser + - ./import_feed.py - nikola build artifacts: paths: diff --git a/import_feed.py b/import_feed.py new file mode 100755 index 0000000..2affb46 --- /dev/null +++ b/import_feed.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import dateutil.parser +import io +import feedparser +import re +import requests +from urllib.parse import urlparse + + +def main(): + non_slug_chars = re.compile(r'[^a-zA-Z0-9-_]') + + # Get feed + req = requests.get('https://share.osf.io/api/v2/atom/?elasticQuery=%7B%22bool%22%3A%7B%22must%22%3A%7B%22query_string%22%3A%7B%22query%22%3A%22*%22%7D%7D%2C%22filter%22%3A%5B%7B%22term%22%3A%7B%22sources%22%3A%22LIS%20Scholarship%20Archive%22%7D%7D%2C%7B%22term%22%3A%7B%22types%22%3A%22preprint%22%7D%7D%5D%7D%7D') + feed = feedparser.parse(req.content) + + for d in feed['entries']: + # Generate file name + url = urlparse(d['link']).path + + slug = url + while non_slug_chars.match(slug[0]): + slug = slug[1:] + slug = non_slug_chars.sub('_', slug) + + file_name = 'scholarship/%s.html' % slug + + # Parse date + date = dateutil.parser.parse(d['date']) + + # Open output file + with io.open(file_name, 'w', encoding='utf-8') as fp: + fp.write(u"""\ + + + + + +

{text}

+ +""".format(title=d['title'].strip(), slug=slug, + date=date.strftime('%Y-%m-%d %H:%M:%S'), + link=d['link'].strip(), + text=d['summary'].strip())) + + +if __name__ == '__main__': + main() diff --git a/scholarship/.gitkeep b/scholarship/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/scholarship/coming-soon.html b/scholarship/coming-soon.html deleted file mode 100644 index 85cf1df..0000000 --- a/scholarship/coming-soon.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - -

This page will be propagated by an RSS feed of new materials in LISSA. Check back during our soft launch to see what's being added!

- - -