我想删除某些图像显示在页面使用PDFBox
库.据我所知,识别一个图像最合适的方法是在XOBJECT
目录中找到它的"名称".因此,从理论上讲,应该做的一切都是从XOBJECT
个目录和显示该图像的操作员中删除该图像.这就是我如何处理从页面中删除图像的问题.Using PDFBox Debugger
我找到了图像的id(Im3
)和在页面上显示它的说明:
我的问题是,我应该如何删除显示图像的指令之前和之后的指令(Do
用于显示图像).我决定删除从/Gs1 gs
到/Im3 Do
的所有内容(因此包括在内).这是正确的做法吗?这是我的代码.它工作正常,即.没有包含不需要的图像的页面.我使用最新的(即V3).
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import static org.apache.pdfbox.contentstream.operator.OperatorName.DRAW_OBJECT;
import static org.apache.pdfbox.contentstream.operator.OperatorName.SET_GRAPHICS_STATE_PARAMS;
public class GraphicRemover {
private static final COSName X_OBJECT_NAME_TO_REMOVE = COSName.getPDFName("Im3");
public void remove(final PDDocument document) throws IOException {
for (final PDPage page : document.getPages()) {
final PDFStreamParser parser = new PDFStreamParser(page);
final List<Object> tokens = parser.parse();
final boolean hasImage = tokens.stream().anyMatch(X_OBJECT_NAME_TO_REMOVE::equals);
if (!hasImage) {
continue;
}
for (int i = tokens.size() - 1; i >= 0; i--) {
if (!tokens.get(i).equals(X_OBJECT_NAME_TO_REMOVE)) {
continue;
}
int indexOfGraphicStartCommand = i - 1;
while (!Operator.getOperator(SET_GRAPHICS_STATE_PARAMS).equals(tokens.get(indexOfGraphicStartCommand))) {
--indexOfGraphicStartCommand;
}
int indexOfDisplayGraphicCommand = i;
while (!Operator.getOperator(DRAW_OBJECT).equals(tokens.get(indexOfDisplayGraphicCommand))) {
++indexOfDisplayGraphicCommand;
}
final int indexOfDisplayGraphicArgument = --indexOfGraphicStartCommand;
tokens.subList(indexOfDisplayGraphicArgument, indexOfDisplayGraphicCommand).clear();
final PDStream newContents = new PDStream(document);
final OutputStream newContentOutput = newContents.createOutputStream(COSName.FLATE_DECODE);
final ContentStreamWriter newContentWriter = new ContentStreamWriter(newContentOutput);
newContentWriter.writeTokens(tokens);
newContentOutput.close();
page.setContents(newContents);
removeWatermarkObject(page);
break;
}
}
}
private void removeWatermarkObject(final PDPage page) {
((COSDictionary) page.getResources().getCOSObject()
.getDictionaryObject(COSName.XOBJECT)).removeItem(X_OBJECT_NAME_TO_REMOVE);
removeEmptyXobjects(page);
}
private void removeEmptyXobjects(final PDPage page) {
final COSDictionary xObjects =
(COSDictionary) page.getResources().getCOSObject().getDictionaryObject(COSName.XOBJECT);
if (xObjects == null || xObjects.size() == 0) {
page.getResources().getCOSObject().removeItem(COSName.XOBJECT);
}
}
}