本文共 22509 字,大约阅读时间需要 75 分钟。
1.序列比对指将两个或多个序列排列在一起,标明其相似之处。序列中可以插入间隔(通常用短横线“-”表示)。对应的相同或相似的符号(在核酸中是A, T(或U), C, G,在蛋白质中是氨基酸残基的单字母表示)排列在同一列上。这一方法常用于研究由共同祖先进化而来的序列,特别是如蛋白质序列或DNA序列等生物序列。进行相似度分析。
2.分类
3.双序列局部比对。
/** 1. Design an algorithm to find the Longest Common Subsequence, Longest Common Substring between two sequences. * Created by zoutai on 2017/10/1. * 从后向前遍历查找,迭代/遍历+动态规划 * 但是递归的方式有一个坏处,就是内部往往那个存在很多重复计算,最大时间复杂度2^n * 所以我们采用空间换时间的办法:直接将所有的L[i][j]遍历出来,时间复杂度即为(mn) */public class LongestCommonSubsequence { public static void main(String[] args) { String s1 = "AGGTAB"; String s2 = "GXTXAYB"; int m = s1.length(),n=s2.length(); String result = getLongestSubStr(s1,s2,m,n); System.out.println(result); } // 从后向前遍历 public static String getLongestSubStr(String s1,String s2,int m,int n) { if (m == 0 || n == 0) return ""; if (s1.charAt(m - 1) == s2.charAt(n - 1)) return getLongestSubStr(s1, s2, m - 1, n - 1)+s1.charAt(m - 1); else return max(getLongestSubStr(s1, s2, m, n - 1), getLongestSubStr(s1, s2, m - 1, n)); } // 比较字符串长度 public static String max(String a, String b) { if(a==null||b==null){ return ""; } return (a.length() > b.length()) ? a : b; }}
import java.util.LinkedList;import java.util.List;/** * Created by zoutai on 2017/10/2. * 2.Tandem repeats. Let P be a pattern of length n and T a text of length m. * Let Pm be the concatenation of P with itself m times, so Pm has length mn. * We want to compute a local alignment between Pm and T . */public class Alignment02 { public static void main(String[] args) { Listlist = new LinkedList (); /* 样例1: 输出为: G-CTAGCT GACTA-CT */ String P = new String("AGCT"); String T = new String("GACTACT"); String Pm = new String(); Pm = pToPm(P,T); list = localAlig(Pm,T); /* (不复制)样例2: 输出为: GTT-AC GTTGAC */// String t1 = new String("TGTTACGG");// String t2 = new String("GGTTGACTA");// list = localAlig(t1, t2); for (String str : list) { System.out.println(str.toString()); } } private static List localAlig(String A, String B) { List twoList = new LinkedList (); int allMax = 0; int col = A.length(); int row = B.length(); int[][] mat = new int[row + 1][col + 1]; for (int i = 0; i < col + 1; i++) { mat[0][i] = 0; } for (int j = 0; j < row + 1; j++) { mat[j][0] = 0; } for (int i = 1; i < col+1; i++) { for (int j = 1; j < row+1; j++) { int Match = mat[j - 1][i - 1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3); int Delete = mat[j - 1][i] + (0 - 2); int Insert = mat[j][i - 1] + (0 - 2); mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete)); } } // 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点 int tempMax = 0; int temprow =0,tempcol = 0; for (int i =0;i 0 && j > 0) { if (i > 0 && j > 0 && mat[j][i] == mat[j-1][i-1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3)) { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; i = i - 1; j = j - 1; } else if (i == 1 || j == 1) { break; } else if (j > 0 && mat[j][i] == mat[j - 1][i] + (0 - 2)) { AlignmentA = "-" + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; j = j - 1; } else { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = "-" + AlignmentB; i = i - 1; } } twoList.add(AlignmentA); twoList.add(AlignmentB); return twoList; } // 将p字符串转为pm private static String pToPm(String p, String t) { // TODO Auto-generated method stub int n = p.length(); int m = t.length(); StringBuffer sb = new StringBuffer(); for (int i=0;i
* 线性空白函数:
import java.util.LinkedList;import java.util.List;/** * Created by zoutai on 2017/10/2. * Implementation of simple local alignment algorithm * * 线性罚分,Ws=2;Wd=0; */public class Alignment0301 { public static final int[][] PAM250 = { { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0}, {-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3}, {-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3}, {-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3}, { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1}, {-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2}, {-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2}, { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3}, {-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3}, {-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3}, {-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1}, {-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2}, {-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1}, {-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1}, {-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2}, { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2}, { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0}, {-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3}, {-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1}, { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}}; public static void main(String[] args) { String t1 = new String("TGTTACGG"); String t2 = new String("GGTTGACTA"); Listlist = new LinkedList (); list = localAlig(t1, t2); for (String str : list) { System.out.println(str.toString()); } } private static List localAlig(String A, String B) { List twoList = new LinkedList (); int allMax = 0; int col = A.length(); int row = B.length(); int[][] mat = new int[row + 1][col + 1]; for (int i = 0; i < col + 1; i++) { mat[0][i] = 0; } for (int j = 0; j < row + 1; j++) { mat[j][0] = 0; } for (int i = 1; i < col+1; i++) { for (int j = 1; j < row+1; j++) { int Match = mat[j - 1][i - 1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3); int Delete = mat[j - 1][i] + (0 - 2); int Insert = mat[j][i - 1] + (0 - 2); mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete)); } } // 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点 int tempMax = 0; int temprow =0,tempcol = 0; for (int i =0;i 1 && j > 1) { if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1] + (B.charAt(j-1)==A.charAt(i-1)?3:-3)) { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; i = i - 1; j = j - 1; } else if (i == 1 || j == 1) { break; } else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) { AlignmentA = "-" + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; j = j - 1; } else { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = "-" + AlignmentB; i = i - 1; } } twoList.add(AlignmentA); twoList.add(AlignmentB); return twoList; }}
* Affine空白函数:
import java.util.LinkedList;import java.util.List;/** * Created by zoutai on 2017/10/2. * Implementation the classical affine-gap local alignment algorithm * * Affine罚分,Ws=2;Wd=10; */public class Alignment0302 { public static final int[][] PAM250 = { { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0}, {-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3}, {-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3}, {-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3}, { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1}, {-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2}, {-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2}, { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3}, {-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3}, {-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3}, {-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1}, {-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2}, {-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1}, {-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1}, {-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2}, { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2}, { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0}, {-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3}, {-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1}, { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}}; public static void main(String[] args) { String t1 = new String("TGTTACGG"); String t2 = new String("GGTTGACTA"); Listlist = new LinkedList (); list = localAlig(t1, t2); for (String str : list) { System.out.println(str.toString()); } } private static List localAlig(String A, String B) { List twoList = new LinkedList (); int allMax = 0; int col = A.length(); int row = B.length(); int[][] mat = new int[row + 1][col + 1]; for (int i = 0; i < col + 1; i++) { mat[0][i] = -10; } for (int j = 0; j < row + 1; j++) { mat[j][0] = -10; } for (int i = 1; i < col+1; i++) { for (int j = 1; j < row+1; j++) { int Match = mat[j - 1][i - 1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1)); int Delete = mat[j - 1][i] + (0 - 2); int Insert = mat[j][i - 1] + (0 - 2); mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete)); } } // 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点 int tempMax = 0; int temprow =0,tempcol = 0; for (int i =0;i 1 && j > 1) { if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1))) { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; i = i - 1; j = j - 1; } else if (i == 1 || j == 1) { break; } else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) { AlignmentA = "-" + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; j = j - 1; } else { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = "-" + AlignmentB; i = i - 1; } } twoList.add(AlignmentA); twoList.add(AlignmentB); return twoList; }}
* 其中给定罚分矩阵:
import java.util.HashMap;//This stores scoring resources, including matricespublic class Scoring { //matrix contents are from rosalind: public static final int[][] BLOSUM62 = { { 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2}, { 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2}, {-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3}, {-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2}, {-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3}, { 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3}, {-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2}, {-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1}, {-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2}, {-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1}, {-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1}, {-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2}, {-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3}, {-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1}, {-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2}, { 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2}, { 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2}, { 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1}, {-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2}, {-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7}}; //This following matrix is copied from rosalind: public static final int[][] PAM250 = { { 4,-1,-2,-2,0,-1,-1,0,-2,-1,-1,-1,-1,-2,-1,1,0,-3,-2,0}, {-1,5,0,-2,-3,1,0,-2,0,-3,-2,2,-1,-3,-2,-1,-1,-3,-2,-3}, {-2,0,6,1,-3,0,0,0,1,-3,-3,0,-2,-3,-2,1,0,-4,-2,-3}, {-2,-2,1,6,-3,0,2,-1,-1,-3,-4,-1,-3,-3,-1,0,-1,-4,-3,-3}, { 0,-3,-3,-3,9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1}, {-1,1,0,0,-3,5,2,-2,0,-3,-2,1,0,-3,-1,0,-1,-2,-1,-2}, {-1,0,0,2,-4,2,5,-2,0,-3,-3,1,-2,-3,-1,0,-1,-3,-2,-2}, { 0,-2,0,-1,-3,-2,-2,6,-2,-4,-4,-2,-3,-3,-2,0,-2,-2,-3,-3}, {-2,0,1,-1,-3,0,0,-2,8,-3,-3,-1,-2,-1,-2,-1,-2,-2,2,-3}, {-1,-3,-3,-3,-1,-3,-3,-4,-3,4,2,-3,1,0,-3,-2,-1,-3,-1,3}, {-1,-2,-3,-4,-1,-2,-3,-4,-3,2,4,-2,2,0,-3,-2,-1,-2,-1,1}, {-1,2,0,-1,-3,1,1,-2,-1,-3,-2,5,-1,-3,-1,0,-1,-3,-2,-2}, {-1,-1,-2,-3,-1,0,-2,-3,-2,1,2,-1,5,0,-2,-1,-1,-1,-1,1}, {-2,-3,-3,-3,-2,-3,-3,-3,-1,0,0,-3,0,6,-4,-2,-2,1,3,-1}, {-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4,7,-1,-1,-4,-3,-2}, { 1,-1,1,0,-1,0,0,0,-1,-2,-2,0,-1,-2,-1,4,1,-3,-2,-2}, { 0,-1,0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1,1,5,-2,-2,0}, {-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1,1,-4,-3,-2,11,2,-3}, {-2,-2,-2,-3,-2,-1,-2,-3,2,-1,-1,-2,-1,3,-3,-2,-2,2,7,-1}, { 0,-3,-3,-3,-1,-2,-2,-3,-3,3,1,-2,1,-1,-2,-2,0,-3,-1,4}}; public static HashMapmatrixIndex = new HashMap<>(); //initializing values for the matrices, as they are arranged here: static { matrixIndex.put('A', 0); matrixIndex.put('C', 1); matrixIndex.put('D', 2); matrixIndex.put('E', 3); matrixIndex.put('F', 4); matrixIndex.put('G', 5); matrixIndex.put('H', 6); matrixIndex.put('I', 7); matrixIndex.put('K', 8); matrixIndex.put('L', 9); matrixIndex.put('M', 10); matrixIndex.put('N', 11); matrixIndex.put('P', 12); matrixIndex.put('Q', 13); matrixIndex.put('R', 14); matrixIndex.put('S', 15); matrixIndex.put('T', 16); matrixIndex.put('V', 17); matrixIndex.put('W', 18); matrixIndex.put('Y', 19); } // returns the score of a match between the two provided amino acids, as scored by the specified matrix: public static int getScore(int[][] matrix, char reference, char search) { reference = Character.toUpperCase(reference); search = Character.toUpperCase(search); return matrix[matrixIndex.get(reference)][matrixIndex.get(search)]; }}
* 对文本进行处理测试:
import java.io.*;import java.util.LinkedList;import java.util.List;/** * Created by zoutai on 2017/10/2. */public class TestAlignment03 { public static final int[][] PAM250 = { { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0}, {-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3}, {-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3}, {-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3}, { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1}, {-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2}, {-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2}, { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3}, {-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3}, {-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3}, {-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1}, {-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2}, {-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1}, {-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1}, {-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2}, { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2}, { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0}, {-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3}, {-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1}, { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}}; public static void main(String[] args) throws FileNotFoundException,IOException{ File txtFile = new File("data_hm.txt"); BufferedReader br = new BufferedReader(new FileReader(txtFile)); int lineNumber = 0; String line = null; StringBuffer t1 = new StringBuffer(); StringBuffer t2 = new StringBuffer(); List strArr1 = new LinkedList(); List strArr2 = new LinkedList(); int[] resultArr = new int[25]; Boolean first = true; Boolean flag = true; int isget = 1; while ((line = br.readLine())!=null) { if(first) { first = false; continue; } else if(line.equals("")) { first = true; if(isget==2) { strArr1.add(t1); strArr2.add(t2); t1 = new StringBuffer(); t2 = new StringBuffer(); isget--; continue; } isget++; flag = !flag; } else { if(flag) { t1.append(line); } else { t2.append(line); } } } Listlist = new LinkedList (); System.out.println(strArr1.size()); for (int i =0;i<25;i++) { System.out.println(strArr1.get(i).toString()); System.out.println(strArr2.get(i).toString()); System.out.println(); resultArr[i] = localAlig(strArr1.get(i).toString(), strArr2.get(i).toString()); } System.out.println(resultArr); for (int score: resultArr) { System.out.println(score); } } private static int localAlig(String A, String B) { List twoList = new LinkedList (); int allMax = 0; int col = A.length(); int row = B.length(); int[][] mat = new int[row + 1][col + 1]; for (int i = 0; i < col + 1; i++) { mat[0][i] = -10; } for (int j = 0; j < row + 1; j++) { mat[j][0] = -10; } for (int i = 1; i < col+1; i++) { for (int j = 1; j < row+1; j++) { int Match = mat[j - 1][i - 1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1)); int Delete = mat[j - 1][i] + (0 - 2); int Insert = mat[j][i - 1] + (0 - 2); mat[j][i] = Math.max(Math.max(Match,0), Math.max(Insert, Delete)); } } // 再次遍历矩阵,获取矩阵最大值,用于查找序列初始点 int tempMax = 0; int temprow =0,tempcol = 0; for (int i =0;i 1 && j > 1) { if (i > 1 && j > 1 && mat[j][i] == mat[j-1][i-1] + Scoring.getScore(PAM250, A.charAt(i - 1), B.charAt(j - 1))) { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; i = i - 1; j = j - 1; } else if (i == 1 || j == 1) { break; } else if (j > 1 && mat[j][i] == mat[j - 1][i] + (0 - 2)) { AlignmentA = "-" + AlignmentA; AlignmentB = B.charAt(j - 1) + AlignmentB; j = j - 1; } else { AlignmentA = A.charAt(i - 1) + AlignmentA; AlignmentB = "-" + AlignmentB; i = i - 1; } } twoList.add(AlignmentA); twoList.add(AlignmentB); //return twoList; return tempMax; }}
第一题:查找最长子序列
第二题:序列比较算法,使用最普通的方式,Ws = 2, Wd = 0, 无罚分矩阵,默认(相等:不相等)=(+3,-1) 第三题:相对于第二题,添加了罚分矩阵PAM250,(20*20);同时simple local alignment algorithm使用线性罚分,Wd=0,Ws=2; affine-gap local alignment algorithm使用affine罚分,Wd=10,Ws=2。一题:求两个序列的最长公共子序列或子串(动态规划)
参考链接:第二题:
参考:第三题:
参考github用户:相关资料连接链接: 密码:gmyf