can't figure this up this function (part of class for scraping internet site into a pdf) supposed to merge the pdf file generated from web pages using pypdf.
this is the method code:
def mergePdf(self,mainname,inputlist=0):
"""merging the pdf pages
getting an inputlist to merge or defaults to the class instance self.pdftomerge list"""
from pyPdf import PdfFileWriter, PdfFileReader
self._mergelist = inputlist or self.pdftomerge
self.pdfoutput = PdfFileWriter()
for name in self._mergelist:
print "merging %s into main pdf file: %s" % (name,mainname)
self._filestream = file(name,"rb")
self.pdfinput = PdfFileReader(self._filestream)
for p in self.pdfinput.pages:
self.pdfoutput.addPage(p)
self._filestream.close()
self._pdfstream = file(mainname,"wb")
self._pdfstream.open()
self.pdfoutput.write(self._pdfstream)
self._pdfstream.close()
I keep getting this error:
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 264, in write
self._sweepIndirectReferences(externalReferenceMap, self._root)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 339, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 315, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 339, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 315, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 324, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, data[i])
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 339, in _sweepIndirectReferences
self._sweepIndirectReferences(externMap, realdata)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 315, in _sweepIndirectReferences
value = self._sweepIndirectReferences(externMap, value)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 345, in _sweepIndirectReferences
newobj = data.pdf.getObject(data)
File "c:\tmp\easy_install-iik9vj\pyPdf-1.13-py2.7-win32.egg.tmp\pyPdf\pdf.py", line 645, in getObject
self.stream.seek(start, 0)
ValueError: I/O operation 开发者_开发知识库on closed file
but when I check the status of self._pdfstream I get:
<open file 'c:\python27\learn\dive.pdf', mode 'wb' at 0x013B2020>
what am I doing wrong?
i'll be glad for any help
OK, I found your problem. You were right to call file(). Don't try to call open() at all.
Your problem is the input file still needs to be open when you call self.pdfoutput.write(self._pdfstream), so you need to remove the line self._filestream.close().
Edit: This script will trigger the problem. The first write will succeed and the second will fail.
from pyPdf import PdfFileReader as PfR, PdfFileWriter as PfW
input_filename = 'in.PDF' # replace with a real file
output_filename = 'out.PDF' # something that doesn't exist
infile = file(input_filename, 'rb')
reader = PfR(infile)
writer = PfW()
writer.addPage(reader.getPage(0))
outfile = file(output_filename, 'wb')
writer.write(outfile)
print "First Write Successful!"
infile.close()
outfile.close()
infile = file(input_filename, 'rb')
reader = PfR(infile)
writer = PfW()
writer.addPage(reader.getPage(0))
outfile = file(output_filename, 'wb')
infile.close() # BAD!
writer.write(outfile)
print "You'll get an IOError Before this line"
outfile.close()
加载中,请稍侯......
精彩评论