Source code for resimpy.simulate.initiator.General

__version__ = "v1.0"
__copyright__ = "Copyright 2022"
__license__ = "MIT"
__lab__ = "Adam Cribbs lab"

import time
import numpy as np
from resimpy.util.random.Sampling import sampling as ranspl
from resimpy.util.random.Number import number as rannum
from resimpy.util.file.read.Reader import reader as pfreader
from resimpy.util.file.create.Folder import folder as crtfolder
from resimpy.util.sequence.symbol.Single import single as dnasgl
from resimpy.read.umi.Design import design as dumi
from resimpy.read.similarity.distance.Hamming import hamming
from resimpy.read.seq.Design import design as dseq


[docs]class general(object): def __init__(self, seq_num, is_seed=False, umi_unit_pattern=3, umi_unit_len=12, seq_len=100, is_sv_umi_lib=True, is_sv_seq_lib=True, umi_lib_fpn='./umi.txt', seq_lib_fpn='./seq.txt', working_dir='./simu/', condis=['umi'], sim_thres=2, permutation=0 ): self.pfreader = pfreader() self.ranspl = ranspl() self.rannum = rannum() self.dnasgl = dnasgl() self.crtfolder = crtfolder() self.dumi = dumi self.dseq = dseq self.is_seed = is_seed self.is_sv_umi_lib = is_sv_umi_lib self.umi_lib_fpn = umi_lib_fpn self.is_sv_seq_lib = is_sv_seq_lib self.seq_lib_fpn = seq_lib_fpn self.seq_num = seq_num self.umi_unit_pattern = umi_unit_pattern self.umi_unit_len = umi_unit_len self.seq_len = seq_len self.condis = condis self.sim_thres = sim_thres self.permutation = permutation self.dna_map = self.dnasgl.todict(nucleotides=self.dnasgl.get(universal=True), reverse=True) self.crtfolder.osmkdir(working_dir) @property def umi_len(self, ): return self.umi_unit_pattern * self.umi_unit_len
[docs] def pooling(self,): stime = time.time() seqs = [] umi_pool = [] umi_cnt = 0 for id in np.arange(self.seq_num): read_struct_ref = {} if 'umi' in self.condis: umi_flag = False while not umi_flag: umip = self.dumi( dna_map=self.dna_map, umi_unit_pattern=self.umi_unit_pattern, pseudorandom_num=self.rannum.uniform( low=0, high=4, num=self.umi_unit_len, use_seed=self.is_seed, seed=id + self.permutation * self.seq_num + umi_cnt, ), ) umi_i = umip.reoccur(is_sv=False) edh = np.array([hamming().general(umi_i, j) for j in umi_pool]) # for j in umi_pool: # if hamming().general(umi_i, j) < self.sim_thres: # print(umi_i, j) if len(edh[edh < self.sim_thres]) == 0: # print(len(edh[edh < self.sim_thres])) umi_pool.append(umi_i) read_struct_ref['umi'] = umi_i umi_flag = True umip.write(res=umi_i, lib_fpn=self.umi_lib_fpn, is_sv=self.is_sv_umi_lib) else: # print(id) umi_cnt += 1 if 'seq' in self.condis: seq_i = self.dseq( dna_map=self.dna_map, pseudorandom_num=self.rannum.uniform( low=0, high=4, num=self.seq_len, use_seed=self.is_seed, seed=id + self.permutation * self.seq_num + 5000000, ), ).general(lib_fpn=self.seq_lib_fpn, is_sv=self.is_sv_seq_lib) read_struct_ref['seq'] = seq_i read_struct_pfd_order = {condi: read_struct_ref[condi] for condi in self.condis} seqs.append([self.paste([*read_struct_pfd_order.values()]), id, 'init']) # print(umi_cnt) # print(umi_pool) etime = time.time() print("===>time for generating initial pool of sequences: {:.3f}s".format(etime-stime)) return seqs
[docs] def paste(self, read_struct=[]): return ''.join(read_struct)
if __name__ == "__main__": DEFINE = { '': '', } # print(DEFINE['cand_pool_fpn']) p = general( seq_num=50, umi_unit_pattern=1, umi_unit_len=10, seq_len=100 - 10, is_seed=True, is_sv_umi_lib=True, umi_lib_fpn='./umi3.txt', is_sv_seq_lib=True, seq_lib_fpn='./seq.txt', working_dir='./', condis=['umi', 'seq'], sim_thres=3, permutation=0, ) # print(p.umi_len) res = p.pooling() print(res)