Monday, March 3

Reading Content from a PDF file






How to extract content from a PDF file in java

Here I am extreacting last 200 characters of a PDF file.



import java.io.File;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

public class PDFReader {

    /**
     * @param args
     */
    public static void main(String[] args) {

        {
            try
            {
            PDDocument pddDocument=PDDocument.load(new File("C:/Users/mkum63/Desktop/vit-strategic-growth-inst-sp.pdf"));
           System.out.println(pddDocument.getNumberOfPages());
           PDFTextStripper textStripper=new PDFTextStripper();
          String text=textStripper.getText(pddDocument);
          //System.out.println(textStripper.getEndPage());
           System.out.println(text.subSequence(text.length()-200, text.length()));
            pddDocument.close();
            }
            catch(Exception ex)
            {
            ex.printStackTrace();
            }
            }
    }

}

Jar files required :

commons-logging-api-1.1.1
fontbox-1.2.1
pdfbox-1.3.1









JCR SQL2 Query to exclude a path

JCR Query to select data from one parent path and at the same time exculding a child path




For example

I want to select all nt:unstructured PDF nodes under en folder except nodes falling under investor-resource folder









     select * from [nt:unstructured] as p
            where
               (isdescendantnode (p, [/content/dam/gsam/pdfs/us/en/])
               AND NOT isdescendantnode (p, [/content/dam/gsam/pdfs/us/en/investor-resources]))
                         and contains(p.*, 'application/pdf')



Sunday, March 2

Jersey web service + File Upload + Maven + Tomcat +Java + Rest Web service









------------------------------------------------------------------------------------------

package com.gs.gsam.lo.du;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;

import com.sun.jersey.core.header.FormDataContentDisposition;
import com.sun.jersey.multipart.FormDataParam;

// Plain old Java Object it does not extend as class or implements
// an interface

// The class registers its methods for the HTTP GET request using the @GET annotation.
// Using the @Produces annotation, it defines that it can deliver several MIME types,
// text, XML and HTML.

// The browser requests per default the HTML MIME type.

//Sets the path to base URL + /hello
@Path("/upload")
public class DocumentUploader {

    /*
     * // @Produces(MediaType.TEXT_HTML)
     *
     * @GET public String sayHtmlHello() { return " " + " " + "Hello Jerse y  " + "" + "

"
     * + "Hello Jersey" + "

" + " "; }
     */
    @POST
    @Path("/pdf")
    @Consumes(MediaType.MULTIPART_FORM_DATA)
    public Response uploadFile(@FormDataParam("file") File file) {
        InputStream IS = null;;
        String uploadedFileLocation = "d://" + "Test.zip";

        try {
            IS = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {

        }

        // save it
        writeToFile(IS, uploadedFileLocation);

        String output = "The PDF File uploaded to : " + uploadedFileLocation;

        return Response.status(200).entity(output).build();

    }

    // save uploaded file to new location
    private void writeToFile(InputStream uploadedInputStream, String uploadedFileLocation) {
        OutputStream out = null;
        try {
            out = new FileOutputStream(new File(uploadedFileLocation));
            int read = 0;
            byte[] bytes = new byte[1024];

            out = new FileOutputStream(new File(uploadedFileLocation));
            while ((read = uploadedInputStream.read(bytes)) != -1) {
                out.write(bytes, 0, read);
            }
            out.flush();
            out.close();
            uploadedInputStream.close();
        } catch (IOException e) {

            e.printStackTrace();
        } finally {
            try {
                if (out != null) {
                    out.close();

                }
                if (uploadedInputStream != null) {
                    uploadedInputStream.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

}

-------------------------------------------------------------------------

package com.gs.gsam.lo.client;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.ws.rs.core.MediaType;

public class ClientJ {

  public static void main(String[] args) throws IOException {

      InputStream is=null;
  try {

    Client client = Client.create();

    WebResource webResource = client
       .resource("http://localhost:8080/FileUpload-0.0.1-SNAPSHOT/rest/upload/pdf");

    InputStream IS = null;;

    File file =new File("D:/file/file.zip");
   
    String uploadedFileLocation = "d://" + "Test.zip";

    try {
        IS = new FileInputStream(file);
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {

    }

    // save it
    writeToFile(IS, uploadedFileLocation);

    String output = "The PDF File uploaded to : " + uploadedFileLocation;
   
    // is=new FileInputStream(file);
   /* ClientResponse response = webResource.type(MediaType.MULTIPART_FORM_DATA)
       .post(ClientResponse.class,file );

    if (response.getStatus() != 200) {
      throw new RuntimeException("Failed : HTTP error code : "
           + response.getStatus());
    }

    System.out.println("Output from Server .... \n");
    String output = response.getEntity(String.class);
    System.out.println(output);

 */   System.exit(0);
    } catch (Exception e) {

    e.printStackTrace();

    }finally{
        if(null!=is){
            is.close();
        }
    }

  }
 
//save uploaded file to new location
  private static  void writeToFile(InputStream uploadedInputStream, String uploadedFileLocation) {
      OutputStream out = null;
      try {
          out = new FileOutputStream(new File(uploadedFileLocation));
          int read = 0;
          byte[] bytes = new byte[1024];

          out = new FileOutputStream(new File(uploadedFileLocation));
          while ((read = uploadedInputStream.read(bytes)) != -1) {
              out.write(bytes, 0, read);
          }
          out.flush();
          out.close();
          uploadedInputStream.close();
      } catch (IOException e) {

          e.printStackTrace();
      } finally {
          try {
              if (out != null) {
                  out.close();

              }
              if (uploadedInputStream != null) {
                  uploadedInputStream.close();
              }
          } catch (IOException e) {
              e.printStackTrace();
          }
      }

  }

}

Friday, February 21

Heuristic exceptions






Heuristic exceptions are inconsistent exceptions . These exceptions does not come all the time and May occur suddenly due to some unavoidable condition 







For example You are calling a service to do some transaction on database and data-source configuration is missing . This happened because last time server got restarted Data-source configuration is removed . So this can be restored by setting the configuration right. 

For example in distributed system multiple parties taking part in a two phase commit transaction are waiting for transaction manager to get information If all of them should commit their individual transaction or not. Transaction manager took lot of time to response and Participant committed their individual transaction even before that . There could come a situation where some participants commit their transaction while others don't and they rollback it if transaction manager sends information to rollback. Thus in distributed system data becomes highly inconsistent .
You put some message in JMS queue But all of a sudden Queue connection breaks and you fail you get any proper response .

Such kind of strange and inconsistent exception falls under heuristic exceptions.


-->

Thursday, February 20

Creating a zip file in java

Creating a zip file in java 




Converting a String into a test file and putting that in zip file 


  1. Here I am creating File.zip file at location D:/file.zip
  2. Here two text files are using by reading an existing txt file
  3. One Pdf file is created by reading an existing file
  4. One txt file is created from a String 
  5. All are zipped together and are put in zip file 




import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;


public class Zipper {
    
    public static void main( String[] args )
    {
      byte[] buffer = new byte[1024];

      try{

        FileOutputStream fos = new FileOutputStream("D:\\File.zip");
        ZipOutputStream zos = new ZipOutputStream(fos);
        ZipEntry ze= new ZipEntry("spy.txt");
        zos.putNextEntry(ze);
        FileInputStream in = new FileInputStream("D:\\spy.txt");

        int len;
        while ((len = in.read(buffer)) > 0) {
          zos.write(buffer, 0, len);
        }

        in.close();
        
        ZipEntry ze1= new ZipEntry("spy1.txt");
        zos.putNextEntry(ze1);
        FileInputStream in1 = new FileInputStream("D:\\spy.txt");

        int len1;
        while ((len1 = in1.read(buffer)) > 0) {
          zos.write(buffer, 0, len1);
        }

        in1.close();
        
        ZipEntry ze2= new ZipEntry("Promote PDF.pdf");
        zos.putNextEntry(ze2);
        FileInputStream in2 = new FileInputStream("C:/Users/mkum63/Desktop/Promote PDF.pdf");

        int len2;
        while ((len2 = in2.read(buffer)) > 0) {
          zos.write(buffer, 0, len2);
        }

        in.close();
        
        
        ZipEntry ze4= new ZipEntry("spy4.txt");
        zos.putNextEntry(ze4);
        StringBuilder sb=new StringBuilder();
        sb.append("dfdgfdgf");
        sb.append("\n");
        sb.append("dsfsdfs");
        sb.append("/n");
        sb.append("dsfsfsdfdffdf");

        InputStream  in4 = new ByteArrayInputStream(sb.toString().getBytes());
                
        int len4;
        while ((len4 = in4.read(buffer))>0) {
          zos.write(buffer, 0, len4);
        }
          
        in4.close();
        
        zos.closeEntry();

        zos.close();

        System.out.println("Done");

      }catch(IOException ex){
         ex.printStackTrace();
      }
    }
}