|
Revision 159, 0.6 kB
(checked in by dsandler, 2 years ago)
|
The refactoring monster was here. Now Digg's link-extraction code is factored out, so other sites can be added.
|
| Line | |
|---|
| 1 |
import sys ; sys.path.append('lib') |
|---|
| 2 |
from BeautifulSoup import BeautifulSoup |
|---|
| 3 |
from utils import * |
|---|
| 4 |
|
|---|
| 5 |
class DefaultExtractor: |
|---|
| 6 |
def __init__(self): pass |
|---|
| 7 |
def get_link(self,link): |
|---|
| 8 |
return link |
|---|
| 9 |
|
|---|
| 10 |
class DiggExtractor(DefaultExtractor): |
|---|
| 11 |
def get_link(self,link): |
|---|
| 12 |
page = urlfetch(link) |
|---|
| 13 |
soup = BeautifulSoup(page) |
|---|
| 14 |
|
|---|
| 15 |
title = soup.first('h3') |
|---|
| 16 |
coral_url = title.a['href'] |
|---|
| 17 |
return coral_url |
|---|