huan/rss.py

95 lines
2.4 KiB
Python

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import feedparser
import time
def check():
feed_name = 'fefe'
url = 'https://blog.fefe.de/rss.xml'
db = 'rss_feeds.db'
limit = 12 * 3600 * 1000
blogresult = None
#
# function to get the current time
#
current_time_millis = lambda: int(round(time.time() * 1000))
current_timestamp = current_time_millis()
def post_is_in_db(title):
with open(db, 'r') as database:
for line in database:
if title in line:
return True
database.close()
return False
# return true if the title is in the database with a timestamp > limit
def post_is_in_db_with_old_timestamp(title):
with open(db, 'r') as database:
for line in database:
if title in line:
ts_as_string = line.split('|', 1)[1]
ts = int(ts_as_string)
if current_timestamp - ts > limit:
return True
database.close()
return False
#
# get the feed data from the url
#
feed = feedparser.parse(url)
#
# figure out which posts to print
#
posts_to_print = []
posts_to_skip = []
for post in feed.entries:
# if post is already in the database, skip it
# TODO check the time
title = post.title.split(".")[0]
link = post.link
if post_is_in_db_with_old_timestamp(title):
posts_to_skip.append(title+";"+link)
else:
posts_to_print.append(title+";"+link)
#
# add all the posts we're going to print to the database with the current timestamp
# (but only if they're not already in there)
#
f = open(db, 'a')
for line in posts_to_print:
content = line.split(";")
try:
title = content[0]
link = content[1]
if not post_is_in_db(title):
f.write(title + "|" + str(current_timestamp) + "\n")
blogresult = str(title)+" - "+str(link)
except:
title = content[0].encode("cp1252").decode("utf-8", "ignore")
link = content[1].encode("cp1252").decode("utf-8", "ignore")
if not post_is_in_db(title):
f.write(title + "|" + str(current_timestamp) + "\n")
blogresult = str(title)+" - "+str(link)
f.close
del feed
return blogresult
def main():
check()
if __name__ == "__main__":
main()