开发者

Python Scrapy Framework Posting Wrong Images - Why/How Can I fix this?

开发者 https://www.devze.com 2023-03-19 10:14 出处:网络
I am working with the Scrapy framework for Python to scrape several entries including text and images from one site and post them to another, one by one. It all works well, except that the images are

I am working with the Scrapy framework for Python to scrape several entries including text and images from one site and post them to another, one by one. It all works well, except that the images are posting with the wrong corresponding text. I can't, for the life of me, figure out what to do differently.

Here is the code, if anyone could please help me figure this out, I would greatly appreciate it:

from flexmls.items import FlexmlsItem

class Epropertysites(BaseSpider):

name = 'epropertysites'

start_urls = ['http://www.epropertysites.com/']
URL  = 'http://www.epropertysites.com'

def parse(self, response):
    return FormRequest.from_response(response,
                                     formdata={'i_login':settings.get('EPROP_USER', u''),
                                               'i_password':settings.get('EPROP_PASSW', u'')},
                                               callback=self.after_login)

def after_login(self, response):
    if 'is incorrect' in response.body:
        print 'Failed to login with\r\n press enter'
        self.log('Login failes', log.ERROR)
        raw_input()
        return
    for row in csv.DictReader(open(os.path.join("results", 'flexmls.csv'))):
        yield Request('http://www.epropertysites.com/myprop_add.htm',
                      meta={'item':row},
                      dont_filter=True,
                   callback=self.post_ad)

def post_ad(self, response):
    item = response.request.meta['item']
    try:
        print 'posting', item['address'].encode()
    except:pass
    formdata={'i_address':item['address'],
        'i_city':item['city'],
        'i_price':item['price'] if item['price'] else u'0',
        'i_state':item['state'].strip(),
        'i_zip':item['zip'].strip(),
        'i_county':item['county'],
       'i_mls':item['id'].strip(),
        'i_type':'1',
        'i_br':item['beds'] if item['beds'] else u'1',
        'i_ba':item['baths'] if item['baths'] else u'1',
        'i_sqft':item['sqft'],
        'i_year_blt':item['year_built'],
        'i_tagline':item['address'],
        'i_desc':item['description'].replace("\n", '\r\n'),
        'i_site_key':item['address'].replace(u" ", u'-').replace(u".", u'').strip(),
        'i_domain':'ePropertySites.com',
        'i_layout':'%.2d' %random.randint(2,5),
        'i_color02':'%.2d' %random.randint(1,12)
        }

    return FormRequest('http://www.epropertysites.com/myprop_add.htm?&f=3',
                       formdata=formdata,
                              meta={'item':item, 'form':formdata},
                       callback=self.post_images)


def encode_multipart_formdata(self, fields, files):

    BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
    CRLF = '\r\n'
    L开发者_StackOverflow中文版 = []
    for (key, value) in fields:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"' % key)
        L.append('')
        L.append(value)
    for (filename, value) in files:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (filename, filename))
        L.append('Content-Type: image/jpeg')
        L.append('')
        L.append(value)
    L.append('--' + BOUNDARY + '--')
    L.append('')
    body = CRLF.join(L)
    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
    return content_type, body

def post_images(self, response):
    if 'That Website Key is already being used' in response.body:return
    page = HtmlXPathSelector(response)

    item = response.request.meta['item']
    images = eval(item['images'])
    fields = [('i_caption_1',''), ('v_max','1'),
              ('Content-Disposition: form-data; name="mode"','send')]
    files = [ ( os.path.basename(image),
               open(os.path.join(settings.get("IMAGES_STORE"), image)).read())
             for image in images]
    content_type, body = self.encode_multipart_formdata(fields, files)
    return FormRequest(self.URL + page.select("//form/@action").extract()[0],
                           body=body,
                           method='POST',
                           headers={'Content-Type':content_type,
                                    'content-length':len(body)},
                           meta={"item":item, 'form':response.request.meta['form']},
                           callback=self.get_change_page)

def get_change_page(self, response):
    page = HtmlXPathSelector(response)
    ad_id = page.select("//form/@action").re(r"&key=(\d+)&")[0].strip()
    return Request("http://www.epropertysites.com/myproperties.htm?&f=mod&key=%s" %ad_id,
                   meta={'item':response.request.meta['item'],
                         'form':response.request.meta['form'],
                         'id':ad_id},
                   callback=self.post_rest_info)
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号