package net.sourceforge.docfetcher.model.parse;

import com.google.common.io.Closeables;
import de.schlichtherle.truezip.file.TFile;
import de.schlichtherle.truezip.file.TFileInputStream;
import de.schlichtherle.truezip.file.TVFS;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.TextExtractor;
import net.sourceforge.docfetcher.TestFiles;
import net.sourceforge.docfetcher.util.Util;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.EmptyFileException;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:net/sourceforge/docfetcher/model/parse/TestParseFromZip.class */
public final class TestParseFromZip {

    /* loaded from: input_file:net/sourceforge/docfetcher/model/parse/TestParseFromZip$ZipAndRun.class */
    private static abstract class ZipAndRun {
        public ZipAndRun(TestFiles testFiles) throws Exception {
            TFile tFile = new TFile(testFiles.getPath());
            File createTempDir = Util.createTempDir();
            TFile tFile2 = new TFile(createTempDir, "archive.zip");
            tFile2.mkdir();
            TFile tFile3 = new TFile(tFile2, tFile.getName());
            tFile.cp(tFile3);
            TFileInputStream tFileInputStream = new TFileInputStream(tFile3);
            try {
                handleInputStream(tFileInputStream);
                Closeables.closeQuietly(tFileInputStream);
                TVFS.umount(tFile2);
                Util.deleteRecursively(createTempDir);
            } catch (Throwable th) {
                Closeables.closeQuietly(tFileInputStream);
                TVFS.umount(tFile2);
                Util.deleteRecursively(createTempDir);
                throw th;
            }
        }

        protected abstract void handleInputStream(InputStream inputStream) throws Exception;
    }

    @Test
    public void testZippedOffice() throws Exception {
        new ZipAndRun(TestFiles.doc) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.1
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                POIFSReader pOIFSReader = new POIFSReader();
                pOIFSReader.registerListener(new POIFSReaderListener() { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.1.1
                    public void processPOIFSReaderEvent(POIFSReaderEvent pOIFSReaderEvent) {
                    }
                }, "\u0005SummaryInformation");
                pOIFSReader.read(inputStream);
            }
        };
        new ZipAndRun(TestFiles.doc) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.2
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                WordExtractor wordExtractor = null;
                try {
                    wordExtractor = new WordExtractor(inputStream);
                    wordExtractor.getText();
                    Closeables.closeQuietly(wordExtractor);
                } catch (Throwable th) {
                    Closeables.closeQuietly(wordExtractor);
                    throw th;
                }
            }
        };
    }

    @Test(expected = OldWordFileFormatException.class)
    public void testOldWordDoc() throws Exception {
        new ZipAndRun(TestFiles.doc_old) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.3
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                WordExtractor wordExtractor = null;
                try {
                    wordExtractor = new WordExtractor(inputStream);
                    wordExtractor.getText();
                    Closeables.closeQuietly(wordExtractor);
                } catch (Throwable th) {
                    Closeables.closeQuietly(wordExtractor);
                    throw th;
                }
            }
        };
    }

    @Test(expected = EmptyFileException.class)
    public void testZippedOfficeFail() throws Exception {
        new ZipAndRun(TestFiles.doc) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.4
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                POIFSReader pOIFSReader = new POIFSReader();
                pOIFSReader.registerListener(new POIFSReaderListener() { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.4.1
                    public void processPOIFSReaderEvent(POIFSReaderEvent pOIFSReaderEvent) {
                    }
                }, "\u0005SummaryInformation");
                pOIFSReader.read(inputStream);
                WordExtractor wordExtractor = null;
                try {
                    wordExtractor = new WordExtractor(inputStream);
                    wordExtractor.getText();
                    Closeables.closeQuietly(wordExtractor);
                } catch (Throwable th) {
                    Closeables.closeQuietly(wordExtractor);
                    throw th;
                }
            }
        };
    }

    @Test
    public void testZippedOffice2007() throws Exception {
        new ZipAndRun(TestFiles.docx) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.5
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                Assert.assertEquals(659L, ExtractorFactory.createExtractor(inputStream).getText().length());
            }
        };
        new ZipAndRun(TestFiles.docx) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.6
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                OPCPackage open = OPCPackage.open(inputStream);
                open.getPackageProperties();
                Closeables.closeQuietly(open);
            }
        };
    }

    @Test(expected = IOException.class)
    public void testZippedOffice2007Fail() throws Exception {
        new ZipAndRun(TestFiles.docx) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.7
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                Assert.assertEquals(659L, ExtractorFactory.createExtractor(inputStream).getText().length());
                OPCPackage open = OPCPackage.open(inputStream);
                open.getPackageProperties();
                Closeables.closeQuietly(open);
            }
        };
    }

    @Test
    public void testZippedHtml() throws Exception {
        new ZipAndRun(TestFiles.html) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.8
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                Source source = new Source(inputStream);
                source.fullSequentialParse();
                TextExtractor textExtractor = source.getTextExtractor();
                textExtractor.setIncludeAttributes(true);
                Assert.assertTrue(textExtractor.toString().contains("HTML file"));
            }
        };
    }

    @Test
    public void testZippedPdf() throws Exception {
        new ZipAndRun(TestFiles.multi_page_pdf) { // from class: net.sourceforge.docfetcher.model.parse.TestParseFromZip.9
            @Override // net.sourceforge.docfetcher.model.parse.TestParseFromZip.ZipAndRun
            protected void handleInputStream(InputStream inputStream) throws Exception {
                PDDocument load = PDDocument.load(inputStream);
                PDFTextStripper pDFTextStripper = new PDFTextStripper();
                StringWriter stringWriter = new StringWriter();
                pDFTextStripper.setSortByPosition(true);
                pDFTextStripper.writeText(load, stringWriter);
                PDDocumentInformation documentInformation = load.getDocumentInformation();
                Assert.assertEquals(Util.join(Util.LS, "page 1", "page 2", "page 3"), new ParseResult(stringWriter.getBuffer()).setTitle(documentInformation.getTitle()).addAuthor(documentInformation.getAuthor()).addMiscMetadata(documentInformation.getSubject()).addMiscMetadata(documentInformation.getKeywords()).getContent().toString().trim());
            }
        };
    }
}
