Changeset 158 for calcium/trunk
- Timestamp:
- 02/12/06 19:16:24 (3 years ago)
- Files:
-
- calcium/trunk/calcium.py (modified) (4 diffs)
- calcium/trunk/coralcache.py (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
calcium/trunk/calcium.py
r156 r158 6 6 from BeautifulSoup import BeautifulSoup 7 7 import feedparser 8 import coralcache 8 9 9 10 class Feed: … … 18 19 opener = urllib2.build_opener() 19 20 return opener.open(req).read(length) 21 22 def coral_seed(url): 23 sys.stdout.write('[%s]' % coralcache.coralize(url)) 24 page = urlfetch(coral_url, 1) 25 return True 20 26 21 27 CRAWL_FEEDS = [ … … 33 39 print "Calcium: loaded %d old URLs" % len(links_seen) 34 40 35 def coralize(url):36 parts = list(urlparse.urlsplit(url))37 if parts[0] != 'http': return False38 parts[1] = '%s.nyud.net:8090' % parts[1]39 coral_url = urlparse.urlunsplit(parts)40 sys.stdout.write('[%s]' % coral_url)41 page = urlfetch(coral_url, 1)42 return True43 41 44 42 def feedtree_fetch(url): … … 73 71 sys.stdout.write("\n => Coralizing new URL: %s " 74 72 % coral_url) 75 if coralize(coral_url): 76 sys.stdout.write(" (OK)\n") 77 else: 78 sys.stdout.write(" (ERR)\n") 73 74 coral_seed(coral_url) 75 sys.stdout.write(" (OK)\n") 79 76 80 77 links_seen[link] = True
