2017-07-18 22:17:45 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import dateutil.parser
|
|
|
|
import io
|
|
|
|
import feedparser
|
|
|
|
import re
|
2017-07-18 22:18:31 +00:00
|
|
|
import requests
|
2017-07-18 22:17:45 +00:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
non_slug_chars = re.compile(r'[^a-zA-Z0-9-_]')
|
|
|
|
|
|
|
|
# Get feed
|
2017-07-18 22:18:31 +00:00
|
|
|
req = requests.get('https://share.osf.io/api/v2/atom/?elasticQuery=%7B%22bool%22%3A%7B%22must%22%3A%7B%22query_string%22%3A%7B%22query%22%3A%22*%22%7D%7D%2C%22filter%22%3A%5B%7B%22term%22%3A%7B%22sources%22%3A%22LIS%20Scholarship%20Archive%22%7D%7D%2C%7B%22term%22%3A%7B%22types%22%3A%22preprint%22%7D%7D%5D%7D%7D')
|
|
|
|
feed = feedparser.parse(req.content)
|
2017-07-18 22:17:45 +00:00
|
|
|
|
|
|
|
for d in feed['entries']:
|
|
|
|
# Generate file name
|
|
|
|
url = urlparse(d['link']).path
|
|
|
|
|
|
|
|
slug = url
|
|
|
|
while non_slug_chars.match(slug[0]):
|
|
|
|
slug = slug[1:]
|
|
|
|
slug = non_slug_chars.sub('_', slug)
|
|
|
|
|
|
|
|
file_name = 'scholarship/%s.html' % slug
|
|
|
|
|
|
|
|
# Parse date
|
|
|
|
date = dateutil.parser.parse(d['date'])
|
|
|
|
|
|
|
|
# Open output file
|
|
|
|
with io.open(file_name, 'w', encoding='utf-8') as fp:
|
|
|
|
fp.write(u"""\
|
|
|
|
<!--
|
|
|
|
.. title: {title}
|
|
|
|
.. slug: {slug}
|
|
|
|
.. date: {date}
|
|
|
|
.. link: {link}
|
|
|
|
.. description:
|
|
|
|
-->
|
|
|
|
<!DOCTYPE html>
|
|
|
|
<html lang="en">
|
|
|
|
<body>
|
|
|
|
|
|
|
|
<p>{text}</p>
|
|
|
|
|
|
|
|
""".format(title=d['title'].strip(), slug=slug,
|
|
|
|
date=date.strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
link=d['link'].strip(),
|
|
|
|
text=d['summary'].strip()))
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|