Merge branch 'feed-fix' into 'master'

Little tweaks to import_feed.py

See merge request !2
This commit is contained in:
Vicky Steeves 2017-07-20 14:19:01 +00:00
commit d8dd378129
1 changed files with 9 additions and 1 deletions

View File

@ -15,7 +15,11 @@ def main():
req = requests.get('https://share.osf.io/api/v2/atom/?elasticQuery=%7B%22bool%22%3A%7B%22must%22%3A%7B%22query_string%22%3A%7B%22query%22%3A%22*%22%7D%7D%2C%22filter%22%3A%5B%7B%22term%22%3A%7B%22sources%22%3A%22LIS%20Scholarship%20Archive%22%7D%7D%2C%7B%22term%22%3A%7B%22types%22%3A%22preprint%22%7D%7D%5D%7D%7D') req = requests.get('https://share.osf.io/api/v2/atom/?elasticQuery=%7B%22bool%22%3A%7B%22must%22%3A%7B%22query_string%22%3A%7B%22query%22%3A%22*%22%7D%7D%2C%22filter%22%3A%5B%7B%22term%22%3A%7B%22sources%22%3A%22LIS%20Scholarship%20Archive%22%7D%7D%2C%7B%22term%22%3A%7B%22types%22%3A%22preprint%22%7D%7D%5D%7D%7D')
feed = feedparser.parse(req.content) feed = feedparser.parse(req.content)
print("Downloaded feed, %d entries" % len(feed['entries']))
for d in feed['entries']: for d in feed['entries']:
print("Title: %s" % d['title'].strip())
# Generate file name # Generate file name
url = urlparse(d['link']).path url = urlparse(d['link']).path
@ -23,11 +27,13 @@ def main():
while non_slug_chars.match(slug[0]): while non_slug_chars.match(slug[0]):
slug = slug[1:] slug = slug[1:]
slug = non_slug_chars.sub('_', slug) slug = non_slug_chars.sub('_', slug)
print("Filename: %s" % slug)
file_name = 'scholarship/%s.html' % slug file_name = 'scholarship/%s.html' % slug
# Parse date # Parse date
date = dateutil.parser.parse(d['date']) date = dateutil.parser.parse(d['published'])
print("Date: %s" % date.strftime('%Y-%m-%d %H:%M:%S'))
# Open output file # Open output file
with io.open(file_name, 'w', encoding='utf-8') as fp: with io.open(file_name, 'w', encoding='utf-8') as fp:
@ -50,6 +56,8 @@ def main():
link=d['link'].strip(), link=d['link'].strip(),
text=d['summary'].strip())) text=d['summary'].strip()))
print()
if __name__ == '__main__': if __name__ == '__main__':
main() main()