source: feed2telegram/trunk/feed2telegram.py

Last change on this file was 13, checked in by cheese, 6 years ago

#1 apply cloudflare-scrape

File size: 5.6 KB
Line 
1#-*- coding: utf-8 -*-
2import datetime
3import email
4import json
5import os
6import sys
7import threading
8import time
9import traceback
10import urlparse
11
12import feedparser
13import requests
14
15class Telegram:
16 class API:
17 URL = 'https://api.telegram.org/bot<token>/<method>'
18
19 def __init__(self, token, method, http_method='POST'):
20 self.token = token
21 self.method = method
22 self.http_method = http_method.upper()
23 self.param = {}
24
25 def setParam(self, **kwargs):
26 self.param.update(kwargs)
27 return self
28
29 @property
30 def url(self):
31 ret = Telegram.API.URL.replace('<token>', self.token).replace('<method>', self.method)
32 return ret
33
34 @property
35 def header(self):
36 header = {
37 'accept': 'application/x-www-form-urlencoded',
38 }
39 return header
40
41 def __init__(self, token, chat_id):
42 self.token = token
43 self.chatId = chat_id
44
45 def request(self, api):
46 if api.http_method == 'GET':
47 req = requests.get(api.url, headers=api.header, params=api.param)
48 else:# if api.http_method == 'POST':
49 req = requests.get(api.url, headers=api.header, data=api.param)
50
51 response = req.json()
52
53 if not response['ok']:
54 raise Exception(response['description'])
55
56 return response
57
58 def getUpdates(self, limit=None):
59 api = Telegram.API(token=self.token, method='getUpdates', http_method='GET')
60
61 if limit:
62 api.setParam(limit=limit)
63
64 return self.request(api)
65
66 def sendMessage(self, text):
67 api = Telegram.API(token=self.token, method='sendMessage').setParam(chat_id=self.chatId, text=text)
68 return self.request(api)
69
70class Datetime:
71 def __init__(self):
72 self.datetime = None
73
74 def setRfc2822(self, value):
75 parsed = email.utils.parsedate(value)
76 self.datetime = datetime.datetime(*parsed[:6])
77 return self
78
79 def toTimestamp(self):
80 timetuple = self.datetime.timetuple()
81 return time.mktime(timetuple)
82
83class Cloudflare:
84 @staticmethod
85 def check(response):
86 return response.status_code == 503 and 'DDoS protection by Cloudflare' in response.content
87
88 @staticmethod
89 def get(*args, **kwargs):
90 import cfscrape
91 return cfscrape.create_scraper().get(*args, **kwargs)
92
93class Feed:
94 def __init__(self, url):
95 self.url = url
96 self.etag = None
97 self.modified = None
98 self.feed = None
99
100 self.save_dir = '.changes'
101
102 if os.path.isfile(self.filepath):
103 self.load()
104
105 @property
106 def filepath(self):
107 domain = urlparse.urlparse(self.url).netloc
108 return os.path.join(self.save_dir, domain)
109
110 def save(self):
111 data = {
112 'url': self.url,
113 'etag': self.etag,
114 'modified': self.modified
115 }
116 jsonstr = json.dumps(data, indent=2)
117
118 if not os.path.isdir(self.save_dir):
119 os.makedirs(self.save_dir)
120
121 with open(self.filepath, 'w') as f:
122 f.write(jsonstr)
123
124 def load(self):
125 with open(self.filepath, 'r') as f:
126 jsonobj = json.load(f)
127
128 self.etag = jsonobj['etag']
129 self.modified = jsonobj['modified']
130
131 def get(self):
132 try:
133 resp = requests.get(self.url, timeout=10.0)
134 if Cloudflare.check(resp):
135 resp = Cloudflare.get(self.url, timeout=10.0)
136 except requests.ReadTimeout as e:
137 raise
138
139 # self.feed = feedparser.parse(self.url)# , etag=self.etag, modified=self.modified)
140 self.feed = feedparser.parse(resp.content)
141
142 try:
143 self.etag = self.feed.etag
144 except:
145 self.etag = None
146
147 try:
148 self.modified = self.feed.modified
149 except:
150 convert = lambda rfc2822: Datetime().setRfc2822(rfc2822).toTimestamp()
151 mtimes = [convert(entry.published) for entry in self.feed.entries]
152
153 if len(mtimes) > 0:
154 self.modified = self.feed.entries[mtimes.index(max(mtimes))].published
155
156 return self.feed
157
158class Feed2Telegram:
159 def __init__(self,
160 feed_url, # required
161 telegram_token, # required
162 chat_id, # required
163 callback_get_message, # required
164 callback_get_entries=lambda entries: reversed(entries),
165 check_interval=60*60,
166 new_entries_only=True,
167 send_error=True,
168 continue_on_error=False,
169 threaded=False):
170 self.url = feed_url
171 self.feed = Feed(self.url)
172
173 self.token = telegram_token
174 self.chat_id = chat_id
175 self.telegram = Telegram(self.token, self.chat_id)
176
177 self.get_message = callback_get_message
178 self.get_entries = callback_get_entries
179
180 self.new_entries_only = new_entries_only
181 self.send_error = send_error
182 self.continue_on_error = continue_on_error
183
184 self.thread = None
185 if threaded:
186 self.thread = threading.Thread(target=self.run)
187
188 self.stop_event = threading.Event()
189 self.last_modified = None
190
191 def isSendingEntry(self, entry):
192 if self.new_entries_only:
193 if self.last_modified:
194 published = Datetime().setRfc2822(entry.published).toTimestamp()
195 last = Datetime().setRfc2822(self.last_modified).toTimestamp()
196
197 if published > last:
198 return True
199 else:
200 return True
201
202 return False
203
204 def once(self):
205 self.last_modified = self.feed.modified
206 feed = self.feed.get()
207
208 for entry in self.get_entries(feed.entries):
209 if self.stop_event.is_set():
210 break
211
212 if not self.isSendingEntry(entry):
213 continue
214
215 message = self.get_message(entry)
216 self.telegram.sendMessage(message)
217
218 self.feed.save()
219
220 def run(self):
221 while not self.stop_event.is_set():
222
223 try:
224 self.once()
225
226 except:
227 if self.send_error:
228 e = traceback.format_exc()
229 sys.stderr.write(e)
230# self.telegram.sendMessage(e)
231
232 if not self.continue_on_error:
233 raise
234
235 finally:
236 if not self.stop_event.is_set() and self.continue_on_error:
237 self.stop_event.wait(60 * 60)
238
239 def start(self):
240 self.stop_event.clear()
241
242 if self.thread:
243 self.thread.run()
244 else:
245 self.run()
246
247 def join(self, timeout=None):
248 if self.thread:
249 self.thread.join(timeout)
250
251 def stop(self):
252 self.stop_event.set()
253
Note: See TracBrowser for help on using the repository browser.