/* CleanGenbankFastaComment .java -- A tool to reduce fasta comment to a 
/* single field
/* Copyright (C) 2003 Jean Marie ROUILLARD (jmrouill@umich.edu)
/* This program is free software; you can redistribute it and/or modify
/* it under the terms of the GNU Library General Public License as published 
/* by  the Free Software Foundation; either version 2 of the License or
/* (at your option) any later version.
/* This program is distributed in the hope that it will be useful, but
/* WITHOUT ANY WARRANTY; without even the implied warranty of
/* GNU Library General Public License for more details.
/* You should have received a copy of the GNU Library General Public License
/* along with this program; see the file COPYING.LIB.  If not, write to 
/* the Free Software Foundation Inc., 59 Temple Place - Suite 330, 
/* Boston, MA  02111-1307 USA

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.FileOutputStream;
import java.util.StringTokenizer;

public class CleanGenbankFastaComment {

    static public void main (String arg[]) throws java.io.IOException{

	if (arg.length != 3) {
	    //Error message if the expected number of arguments is not provided
	    System.out.println("Syntax is java CleanGenbankFastaComment Fasta_file field separator\n");
	    System.out.println("Example: java CleanGenbankFastaComment Fasta_file 4 \"|\" will transform");
	    System.out.println(">gi|30089998|ref|NM_178042.1| Homo sapiens BAF53... into >NM_178042.1\n");

	else {
	    //Create input and output stream
	    BufferedReader readBuff = new BufferedReader (new FileReader(arg[0]));
	    PrintWriter saveAs = new PrintWriter(new FileOutputStream(arg[0] + ".clean"));

	    //Which field and separator to use with the StringTokenizer
	    int field = new Integer(arg[1]).intValue();
	    String separator = arg[2];

	    //Read each line from the input file
	    String line = readBuff.readLine();
	    while (line != null) {
		//Line is a fasta comment. Need to be processed
		if(line.indexOf(">") != -1) {

		    //Example: >gi|30089998|ref|NM_178042.1| Homo sapiens BAF53...
		    StringTokenizer st = new StringTokenizer(line, separator);

		    //Discard all the non relevant fields
		    for(int i = 0; i < field - 1; i++) {

		    //Read the relevant field or replace it by null
		    String newComment = "null";
		    if(st.hasMoreTokens()) {
			newComment = st.nextToken().trim();;

		    //save the new comment
		    saveAs.println(">" + newComment);
		    System.out.println("Write >" + newComment);

		//Line is sequence. Do not need to be processed
		else {
		line = readBuff.readLine();
	    System.out.println("\nThe new fasta file has been saved as " + arg[0] + ".clean");