update
This commit is contained in:
99
rss.py
Normal file
99
rss.py
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import feedparser
|
||||
import time
|
||||
from subprocess import check_output
|
||||
import sys
|
||||
|
||||
def check():
|
||||
feed_name = 'fefe'
|
||||
url = 'https://blog.fefe.de/rss.xml'
|
||||
|
||||
#feed_name = sys.argv[1]
|
||||
#url = sys.argv[2]
|
||||
|
||||
db = 'rss_feeds.db'
|
||||
limit = 12 * 3600 * 1000
|
||||
|
||||
blogresult = None
|
||||
|
||||
#
|
||||
# function to get the current time
|
||||
#
|
||||
current_time_millis = lambda: int(round(time.time() * 1000))
|
||||
current_timestamp = current_time_millis()
|
||||
|
||||
def post_is_in_db(title):
|
||||
with open(db, 'r') as database:
|
||||
for line in database:
|
||||
if title in line:
|
||||
return True
|
||||
database.close()
|
||||
return False
|
||||
|
||||
# return true if the title is in the database with a timestamp > limit
|
||||
def post_is_in_db_with_old_timestamp(title):
|
||||
with open(db, 'r') as database:
|
||||
for line in database:
|
||||
if title in line:
|
||||
ts_as_string = line.split('|', 1)[1]
|
||||
ts = int(ts_as_string)
|
||||
if current_timestamp - ts > limit:
|
||||
return True
|
||||
database.close()
|
||||
return False
|
||||
|
||||
#
|
||||
# get the feed data from the url
|
||||
#
|
||||
feed = feedparser.parse(url)
|
||||
|
||||
#
|
||||
# figure out which posts to print
|
||||
#
|
||||
posts_to_print = []
|
||||
posts_to_skip = []
|
||||
|
||||
for post in feed.entries:
|
||||
# if post is already in the database, skip it
|
||||
# TODO check the time
|
||||
title = post.title.split(".")[0]
|
||||
link = post.link
|
||||
|
||||
if post_is_in_db_with_old_timestamp(title):
|
||||
posts_to_skip.append(title+";"+link)
|
||||
else:
|
||||
posts_to_print.append(title+";"+link)
|
||||
|
||||
#
|
||||
# add all the posts we're going to print to the database with the current timestamp
|
||||
# (but only if they're not already in there)
|
||||
#
|
||||
f = open(db, 'a')
|
||||
for line in posts_to_print:
|
||||
content = line.split(";")
|
||||
try:
|
||||
title = content[0]
|
||||
link = content[1]
|
||||
if not post_is_in_db(title):
|
||||
f.write(title + "|" + str(current_timestamp) + "\n")
|
||||
blogresult = str(title)+" - "+str(link)
|
||||
except:
|
||||
title = content[0].encode("cp1252").decode("utf-8", "ignore")
|
||||
link = content[1].encode("cp1252").decode("utf-8", "ignore")
|
||||
if not post_is_in_db(title):
|
||||
f.write(title + "|" + str(current_timestamp) + "\n")
|
||||
blogresult = str(title)+" - "+str(link)
|
||||
f.close
|
||||
|
||||
del feed
|
||||
|
||||
return blogresult
|
||||
|
||||
|
||||
def main():
|
||||
check()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user