95 lines
2.4 KiB
Python
95 lines
2.4 KiB
Python
#!/usr/bin/python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import feedparser
|
|
import time
|
|
|
|
def check():
|
|
feed_name = 'fefe'
|
|
url = 'https://blog.fefe.de/rss.xml'
|
|
|
|
db = 'rss_feeds.db'
|
|
limit = 12 * 3600 * 1000
|
|
|
|
blogresult = None
|
|
|
|
#
|
|
# function to get the current time
|
|
#
|
|
current_time_millis = lambda: int(round(time.time() * 1000))
|
|
current_timestamp = current_time_millis()
|
|
|
|
def post_is_in_db(title):
|
|
with open(db, 'r') as database:
|
|
for line in database:
|
|
if title in line:
|
|
return True
|
|
database.close()
|
|
return False
|
|
|
|
# return true if the title is in the database with a timestamp > limit
|
|
def post_is_in_db_with_old_timestamp(title):
|
|
with open(db, 'r') as database:
|
|
for line in database:
|
|
if title in line:
|
|
ts_as_string = line.split('|', 1)[1]
|
|
ts = int(ts_as_string)
|
|
if current_timestamp - ts > limit:
|
|
return True
|
|
database.close()
|
|
return False
|
|
|
|
#
|
|
# get the feed data from the url
|
|
#
|
|
feed = feedparser.parse(url)
|
|
|
|
#
|
|
# figure out which posts to print
|
|
#
|
|
posts_to_print = []
|
|
posts_to_skip = []
|
|
|
|
for post in feed.entries:
|
|
# if post is already in the database, skip it
|
|
# TODO check the time
|
|
title = post.title.split(".")[0]
|
|
link = post.link
|
|
|
|
if post_is_in_db_with_old_timestamp(title):
|
|
posts_to_skip.append(title+";"+link)
|
|
else:
|
|
posts_to_print.append(title+";"+link)
|
|
|
|
#
|
|
# add all the posts we're going to print to the database with the current timestamp
|
|
# (but only if they're not already in there)
|
|
#
|
|
f = open(db, 'a')
|
|
for line in posts_to_print:
|
|
content = line.split(";")
|
|
try:
|
|
title = content[0]
|
|
link = content[1]
|
|
if not post_is_in_db(title):
|
|
f.write(title + "|" + str(current_timestamp) + "\n")
|
|
blogresult = str(title)+" - "+str(link)
|
|
except:
|
|
title = content[0].encode("cp1252").decode("utf-8", "ignore")
|
|
link = content[1].encode("cp1252").decode("utf-8", "ignore")
|
|
if not post_is_in_db(title):
|
|
f.write(title + "|" + str(current_timestamp) + "\n")
|
|
blogresult = str(title)+" - "+str(link)
|
|
f.close
|
|
|
|
del feed
|
|
|
|
return blogresult
|
|
|
|
|
|
def main():
|
|
check()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|