2/25/2016

Zip File Using Java or ExecutorService Real time Example


From below two programs helps to do concurrent zipping of all files in folder using java.


import java.io.File;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.google.common.collect.Lists;

public class ZipIt {
 
public static void main(String[] args){
  
    
  
  try {
   
   File dir=new File("<DIR PATH>");
    File[] xmlFiles=null;
   if (dir.isDirectory()) {
              xmlFiles = dir.listFiles(new FilenameFilter() {
              @Override
              public boolean accept(File folder, String name) {
                  return name.toLowerCase().endsWith(".xml");
              }
          });
      }
   ExecutorService executorService = Executors.newScheduledThreadPool(60);
   List<List<File>> smallerLists = Lists.partition(Arrays.asList(xmlFiles), 60);
   List<Future<Integer>> futures=null;
   //20 independent threads used to generate 20 images.
   List<Callable<Integer>> callables = new ArrayList<Callable<Integer>>();
   
   for (List<File> list : smallerLists) {
    callables.add(new ZipFilesThread(list));
   }
   
   try {
    futures = executorService.invokeAll(callables);
    
   } finally {
    executorService.shutdown();
   }
   
   
   for (Future<Integer> future : futures) {
    
    System.out.println("File converted to Zip:"+future.isDone());
   }
   
   
  } catch (Exception e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }

}





import java.io.File;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.logging.Logger;

import com.mercuria.etl.util.ZipUtil;

public class ZipFilesThread implements Callable<Integer> {

 
 List<File> files;
 
 
 private static Logger logger = Logger.getLogger(ZipFilesThread.class.getName());

 public ZipFilesThread(List<File> files) {
  super();
  this.files=files;
  
 }

 @Override
 public Integer call() throws Exception {
  
  for (File xmlFile : files) {
   ZipUtil.zipFile(xmlFile.getAbsolutePath());
   xmlFile.delete();
  }
  return files.size();
  
 }
 
 
 

}

9/01/2015

Execute putty (Plink) commands (On remote Linux machine) from java



Execute putty commands (On remote Linux machine) from java


1.       Before executing this program we need to download putty and Plink from below link. Put the downloaded files in one folder. (ex: C:\putty). After download istall or run once putty.exe and plink.exe


2.       We need to set this folder to class path  or In java program set the right folder path in below line. And execute the program.



Below is the example java program. you can test your commend replacing the correct host,username and password. and line # 26 with right command.

Note: Make sure that your commend must end with \n. 





 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import java.io.InputStream;
import java.io.OutputStream;

public class PuttyTest {
 private static String host = "***";
 private static String userName = "***";
 private static String password = "****";

 public static void main(String args[]) throws Exception
 {
  PuttyTest test=new PuttyTest();
  System.out.println(test.getLimServerStatus());
 }
 
 public String getLimServerStatus() throws Exception {

  try {
      String command = "c:/putty/plink -v "+host+" -l "+userName+" -pw "+password;
      Runtime r = Runtime.getRuntime ();
      Process p = r.exec (command);
      Thread.sleep (1000);
      InputStream std = p.getInputStream ();
      OutputStream out = p.getOutputStream ();
      //InputStream err = p.getErrorStream ();

      out.write ("tail -n 1000 config/load_updates.hst | grep \"Error\" | wc -l\n".getBytes ());
      out.flush ();

      Thread.sleep (3000);

      int value = 0;
      String otherString=null;
      if (std.available () > 0) {
          value = std.read();
          otherString=String.valueOf((char) value);
          while (std.available() > 0) {
              value = std.read();
              otherString+=String.valueOf((char) value);
          }
      }
     
      int count=0;
      String[] lines = otherString.split("\r\n|\r|\n");
      for (String string : lines) {
    System.out.println(string+" :"+count++);
   }
      p.destroy ();
      return lines[lines.length-2]; // needed output is in third line.
      
 }catch(Exception e)
 {
  e.printStackTrace();
 }
  return null;
 }
}

7/02/2015

Extract Text From (Image, PDF, Image embedded in PDF)

Extract Text From (Image, PDF, Image embedded in PDF)
----------------------------------------------------------------------------------------------------
Extracting text from the PDF is easy but extract text from the PDF that you received through scan is bit difficult. Because each scanned page is embedded in PDF as image.

Logic
-------
So in these kind of PDFs, first we have to extract images from PDF than extract text from images. 

step1:

If you have maven project add below dependency to your pom. This decency required to extract images from the PDF.

<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>2.0.0</version>
</dependency>

Step2:

To extract text from the image we need to install tesseract-ocr. Download .exe from the site and install the EXE   https://code.google.com/p/tesseract-ocr/

Once after installing the EXE  add home directory to the PATH. I added installed folder path to PATH  system variable (Properties->Advance settings->Environment Variable)    

C:\Program Files (x86)\Tesseract-OCR

Step3:

Open the windows commend prompt and  run the text "tesseract" . You should not get command not  exist error here . if you get error check for how to set the path.  

Step4:
 Once all set restart the eclipse and execute the below Program with your PDF. It generate the text file of pdfs in  given folder.


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import java.io.File;

import net.sourceforge.tess4j.util.PdfUtilities;

import org.apache.commons.io.FileUtils;



public class TesseractExample {
 
 static String imageFolderPath="C:/santosh/PNG";
 static public void main(String[] args) {
  try {
   File[] imageFile = PdfUtilities.convertPdf2Png(new File(
     "C:/santosh/1999_001.pdf"));
   File dir=new File("C:/santosh/IMAGE_TEXT");
   if(!dir.exists())
   {
    if (dir.mkdir()) {
     System.out.println("Directory is created!");
    } 
   }
   else {
    FileUtils.cleanDirectory(dir); 
   }
   int i=1;
   for (File file : imageFile) {
    Runtime.getRuntime().exec("tesseract "+file.getAbsolutePath()+ " "+dir+File.separator+"imageText"+i);
    i++;
   }
  } catch (Exception e) {
   System.err.println(e.getMessage());
  }

 }

4/01/2015

SFTP/FTP JAVA file upload or download from FTP Server

SFTP/FTP  JAVA  file upload or download from FTP Server


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
        private String host="host";
 private String user="user";
 private String pwd="password";
 private String localFileFullPath="C:/temp/file.txt";
 private String remoteFileName="ftpFileName";
 private String sftpWorkingDir="/ftpFolder/test;
 
 public void uploadFile() throws Exception{
  JSch jsch = new JSch();
  Session session = jsch.getSession(user, host,22);
  session.setPassword(pwd);
  java.util.Properties config = new java.util.Properties();
  config.put("StrictHostKeyChecking", "no");
  session.setConfig(config);
  session.connect();
  Channel channel = session.openChannel("sftp");
  channel.connect();
  ChannelSftp channelSftp = (ChannelSftp)channel;
  channelSftp.cd(sftpWorkingDir);
  channelSftp.put(new FileInputStream(new File(localFileFullPath)), remoteFileName);
  channelSftp.exit();
  session.disconnect();
 }
 
 public void downloadFile() throws Exception{
  JSch jsch = new JSch();
  Session session = jsch.getSession(user, host,22);
  session.setPassword(pwd);
  java.util.Properties config = new java.util.Properties();
  config.put("StrictHostKeyChecking", "no");
  session.setConfig(config);
  session.connect();
  Channel channel = session.openChannel("sftp");
  channel.connect();
  ChannelSftp channelSftp = (ChannelSftp)channel;
  channelSftp.cd(sftpWorkingDir);
  //channelSftp.put(new FileInputStream(new File(localFileFullPath)), remoteFileName);
  channelSftp.get(remoteFileName, new FileOutputStream(new File(localFileFullPath)));
  channelSftp.exit();
  session.disconnect();
 }

3/06/2015

Split large multi header .csv file to multiple files in Java

Split large multi header .csv file to multiple files in Java

Problem
----------------
if we have large .csv file with multiple header something like below. In multi Thread ETL process we need to split this file to multiple files on Header.

AAA
Column1 | Column2 | Column3 ..................
row1
row2
....
;;;;;

BBB
Column1 | Column2 | Column3 ..................
row1
row2








for the above format I used the regular expression is split point. 

String regex = "^.*[A-Z]$"

you can change this expression in below method as per your column header in below function before intended to use in your problem.

The below function go through the large file line by line and as header encounter move the lines to new file. Save function do the save or creation of new file.  

private String parentFolder = "C:/ETL/copy/";
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
public void split(String fileName) throws IOException {
  try {
   // opens the file in a string buffer
   File headFile=new  File(parentFolder + fileName + ".csv");
   BufferedReader bufferedReader = new BufferedReader(new FileReader(headFile));
   StringBuffer stringBuffer = new StringBuffer();

   // performs the splitting
   String line;
   int row = 0;
   int counter = 1;
   while ((line = bufferedReader.readLine()) != null) {
    String regex = "^.*[A-Z]$";
    boolean isMatch = Pattern.matches(regex, line.trim());
    if (isMatch) {
     logger.info(line);
    }
    if (isMatch && row != 0) {
     saveFile(stringBuffer, fileName + counter + ".csv",headFile.lastModified());
     counter++;
     stringBuffer = new StringBuffer();
     stringBuffer.append(line);
     stringBuffer.append(NEWLINE);
    } else {
     stringBuffer.append(line);
     stringBuffer.append(NEWLINE);
    }

    row++;
   }
   saveFile(stringBuffer,fileName + counter + ".csv",headFile.lastModified());
   bufferedReader.close();

  } catch (IOException e) {
   e.printStackTrace();
  }
 }


 
1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


  
 private void saveFile(StringBuffer stringBuffer, String filename,long lastModifiedTime)
   throws IOException {
  File file = new File(parentFolder + "splittedFile");
  file.mkdir();

  FileWriter output = null;
  try {
   file = new File(parentFolder + "splittedFile/" + filename);
   file.setLastModified(lastModifiedTime);
   output = new FileWriter(file);
   output.write(stringBuffer.toString());
   // System.out.println("file " + file.getAbsolutePath() +
   // " written");
  } catch (IOException e) {
   e.printStackTrace();
  } finally {

   try {
    output.close();
   } catch (IOException e) {
    // do nothing the file wasn't been even opened
   }
  }
 }

1/21/2015

From JAVA to HDFS File operations : Read, write, copy, delete, create


From JAVA to HDFS File operations : Read, write, copy, delete, create

1. creating the directory in the HDFS
2. deleting the directory in the HDF
3. copying file from local to HDFS
4. Read File From HDFS
5. Write File To HDFS

Change below configuration to your .xml files and use it. 


Configuration conf = new Configuration();
conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/core-site.xml"));
conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/hdfs-site.xml"));

-----------------------------------------------------------------------


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
package com.my.cert.example;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HDFSFileOperations {

 public static void main(String args[]) throws IOException {
  Configuration conf = new Configuration();
  conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/core-site.xml"));
  conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/hdfs-site.xml"));
  FileSystem hdfs = FileSystem.get(conf);
  System.out.println("Home Dir: " + getHomedirectory(hdfs));
  System.out.println("Create Directory : "+ createDirectory(hdfs, "/testFolder"));
  System.out.println("copy File From Local: "+ copyFileFromLocal(hdfs, "testFolder","C:/hadoop/test/test.txt"));
  readDataFromHDFSFile(hdfs, "testFolder/test.txt");
  writingDataToHDFS(hdfs, "testFolder/test.txt");

 }

 public static Path getHomedirectory(FileSystem hdfc) throws IOException {
  Path homeDir = hdfc.getHomeDirectory();

  return homeDir;
 }

 /*
  * creating the directory in the HDFS
  */

 public static boolean createDirectory(FileSystem hdfs, String dirName)
   throws IOException {
  Path homeDir = getHomedirectory(hdfs);
  Path newFolderPath = new Path(dirName);
  newFolderPath = Path.mergePaths(homeDir, newFolderPath);
  if (hdfs.exists(newFolderPath)) {
   hdfs.delete(newFolderPath, true);

  }

  return hdfs.mkdirs(newFolderPath);
 }

 /*
  * deleting the directory in the HDFS
  */
 public static boolean deleteDirectory(FileSystem hdfs, String dirName)
   throws IOException {
  Path deleteFolderName = new Path(dirName);
  if (hdfs.exists(deleteFolderName)) {
   return hdfs.delete(deleteFolderName, true);
  }
  return false;
 }

 /*
  * copying file from local to HDFS
  */

 public static boolean copyFileFromLocal(FileSystem hdfs,
   String hdfsFolderName, String localFileAbsPath) throws IOException {
  Path localFilePath = new Path(localFileAbsPath);
  String localFileName = new File(localFileAbsPath).getName();
  Path hdfsFolderpath = new Path(hdfsFolderName + "/" + localFileName);

  if (!hdfs.exists(hdfsFolderpath)) {
   hdfs.createNewFile(hdfsFolderpath);
  }

  hdfs.copyFromLocalFile(localFilePath, hdfsFolderpath);
  return true;
 }

 public static void readDataFromHDFSFile(FileSystem hdfs, String filePath)
   throws IllegalArgumentException, IOException {
  BufferedReader bfr = new BufferedReader(new InputStreamReader(
    hdfs.open(new Path(filePath))));
  String str = null;
  while ((str = bfr.readLine()) != null) {
   System.out.println(str);

  }

 }

 public static void writingDataToHDFS(FileSystem hdfs, String filePath)
   throws IllegalArgumentException, IOException {
  StringBuilder sb = new StringBuilder();
  for (int i = 1; i <= 5; i++) {
   sb.append("Test creating file" + i);
   sb.append("\n");
  }
  byte[] byt = sb.toString().getBytes();
  FSDataOutputStream fsOutStream = hdfs.create(new Path(filePath));
  fsOutStream.write(byt);

  fsOutStream.close();
 }

}