Source code for resimpy.simulate.dispatcher.batch.General

__version__ = "v1.0"
__copyright__ = "Copyright 2022"
__license__ = "MIT"
__lab__ = "Adam Cribbs lab"

import argparse
from resimpy.simulate.dispatcher.single.General import general as simugeneral


[docs]class general(object): def __init__(self, mode, permutation_num=10, umi_unit_pattern=1, umi_unit_len_fixed=10, seq_len=20, umi_num_fixed=50, pcr_num_fixed=10, pcr_err_fixed=1e-3, seq_err_fixed=1e-3, ampl_rate_fixed=0.85, sim_thres_fixed=3, seq_sub_spl_rate=1, ampl_rates=None, umi_unit_lens=None, pcr_nums=None, pcr_errs=None, seq_errs=None, sv_fp='./', ): if mode == 'internal': self.permutation_num = permutation_num self.seq_len = seq_len self.umi_unit_len_fixed = umi_unit_len_fixed self.umi_num_fixed = umi_num_fixed self.umi_unit_pattern = umi_unit_pattern self.pcr_num_fixed = pcr_num_fixed self.pcr_err_fixed = pcr_err_fixed self.seq_err_fixed = seq_err_fixed self.ampl_rate_fixed = ampl_rate_fixed self.sim_thres_fixed = sim_thres_fixed self.seq_sub_spl_rate = seq_sub_spl_rate self.ampl_rates = ampl_rates self.umi_unit_lens = umi_unit_lens self.pcr_nums = pcr_nums self.pcr_errs = pcr_errs self.seq_errs = seq_errs self.sv_fp = sv_fp print('run resimpy internally.') else: self.parser = argparse.ArgumentParser( description='Welcome to the resimpy_general module' ) self.parser.add_argument( "--recipe", "-r", metavar='recipe', dest='r', required=True, type=str, help='which condition among seq_errs, ampl_rates, pcr_errs, pcr_nums, and umi_lens is used', ) self.parser.add_argument( "--read_structure", "-rs", metavar='read_structure', dest='rs', required=True, type=str, help='read structure consisting of a UMI block (umi) and a sequence block (seq), e.g., umi or umi+seq', ) self.parser.add_argument( "--permutation_num", "-perm_num", metavar='permutation_num', dest='perm_num', required=True, type=int, help='permutation test number', ) self.parser.add_argument( "--umi_unit_pattern", "-umiup", metavar='umi_unit_pattern', dest='umiup', default=1, type=int, help='unit UMI pattern. This is to specify if UMIs consist of monomer, dimer, trimer, or other blocks', ) self.parser.add_argument( "--umi_unit_len_fixed", "-umiul", metavar='umi_unit_len_fixed', dest='umiul', default=10, type=int, help='unit UMI length fixed. This is to specify the length of a monomer UMI. The final UMI length = umi_unit_pattern * umi_unit_len_fixed', ) self.parser.add_argument( "--umi_num_fixed", "-umi_num", metavar='umi_num_fixed', dest='umi_num', default=50, type=int, help='UMI number', ) self.parser.add_argument( "--seq_length", "-seq_len", metavar='seq_length', dest='seq_len', default=20, type=int, help='genomic sequence length', ) self.parser.add_argument( "--sim_thres_fixed", "-sim_thres", metavar='sim_thres_fixed', dest='sim_thres', default=3, type=int, help='edit distance-measured similarities between UMIs', ) self.parser.add_argument( "--pcr_num_fixed", "-pcr_num", metavar='pcr_num_fixed', dest='pcr_num', default=8, type=int, help='Number of PCR cycles fixed', ) self.parser.add_argument( "--ampl_rate_fixed", "-ampl_rate", metavar='ampl_rate_fixed', dest='ampl_rate', default=0.85, type=float, help='PCR amplification rate fixed', ) self.parser.add_argument( "--seq_sub_spl_rate", "-spl_rate", metavar='seq_sub_spl_rate', dest='spl_rate', default=1.0, type=float, help='Subsampling rate for sequencing', ) self.parser.add_argument( "--pcr_err_fixed", "-pcr_err", metavar='pcr_err_fixed', dest='pcr_err', default=0.001, type=float, help='PCR error fixed', ) self.parser.add_argument( "--seq_err_fixed", "-seq_err", metavar='seq_err_fixed', dest='seq_err', default=0.001, type=float, help='Sequencing error fixed', ) self.parser.add_argument( "--ampl_set_rates", "-ampl_rates", metavar='ampl_set_rates', dest='ampl_rates', default='0.1;0.2;0.3;0.4;0.5;0.6;0.7;0.8;0.9;1.0', type=str, help='a semicolon-partitioned string of a set of amplification rates', ) self.parser.add_argument( "--umi_unit_set_lens", "-umi_lens", metavar='umi_unit_set_lens', dest='umi_lens', default='6;7;8;9;10;11;12', type=str, help='a semicolon-partitioned string of a set of unit UMI lens', ) self.parser.add_argument( "--pcr_set_nums", "-pcr_nums", metavar='pcr_set_nums', dest='pcr_nums', default='6;7;8;9;10;11;12;13;14', type=str, help='a semicolon-partitioned string of a set of PCR numbers', ) self.parser.add_argument( "--pcr_set_errs", "-pcr_errs", metavar='pcr_set_errs', dest='pcr_errs', default='1e-4;1e-3;1e-2;0.1', type=str, help='a semicolon-partitioned string of a set of PCR errors', ) self.parser.add_argument( "--seq_set_errs", "-seq_errs", metavar='seq_set_errs', dest='seq_errs', default='1e-3;1e-2;0.1', type=str, help='a semicolon-partitioned string of a set of sequencing errors', ) self.parser.add_argument( "--out_directory", "-out_dir", metavar='out_directory', dest='out_dir', default='./', type=str, help='output directory', ) args = self.parser.parse_args() self.recipe = args.r self.read_structure = args.rs self.permutation_num = args.perm_num self.umi_unit_pattern = args.umiup self.umi_unit_len_fixed = args.umiul self.umi_num_fixed = args.umi_num self.seq_len = args.seq_len self.pcr_num_fixed = args.pcr_num self.pcr_err_fixed = args.pcr_err self.seq_err_fixed = args.seq_err self.ampl_rate_fixed = args.ampl_rate self.sim_thres_fixed = args.sim_thres self.seq_sub_spl_rate = args.spl_rate self.ampl_rates = args.ampl_rates self.umi_unit_lens = args.umi_lens self.pcr_nums = args.pcr_nums self.pcr_errs = args.pcr_errs self.seq_errs = args.seq_errs self.sv_fp = args.out_dir self.seq_errs = [float(i) for i in self.seq_errs.split(';')] self.pcr_errs = [float(i) for i in self.pcr_errs.split(';')] self.pcr_nums = [int(i) for i in self.pcr_nums.split(';')] self.umi_unit_lens = [int(i) for i in self.umi_unit_lens.split(';')] self.ampl_rates = [float(i) for i in self.ampl_rates.split(';')] self.read_structure = [str(i) for i in self.read_structure.split('+')] print(self.seq_errs) print(self.pcr_errs) print(self.pcr_nums) print(self.umi_unit_lens) print(self.ampl_rates)
[docs] def pcrNums(self, ): for pn in range(self.permutation_num): simu_params = { 'init_seq_setting': { 'seq_num': self.umi_num_fixed, 'umi_unit_pattern': self.umi_unit_pattern, 'umi_unit_len': self.umi_unit_len_fixed, 'seq_len': self.seq_len, 'is_seed': True, 'working_dir': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_umi_lib': True, 'umi_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/umi.txt', 'is_sv_seq_lib': True if 'seq' in self.read_structure else False, 'seq_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/seq.txt', 'condis': self.read_structure, 'sim_thres': self.sim_thres_fixed, 'permutation': pn, }, 'ampl_rate': self.ampl_rate_fixed, 'pcr_nums': self.pcr_nums, 'err_num_met': 'nbinomial', 'pcr_error': self.pcr_err_fixed, 'seq_error': self.seq_err_fixed, 'seq_sub_spl_rate': self.seq_sub_spl_rate, 'use_seed': False, 'seed': None, 'write': { 'fastq_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'fastq_fn': '', } } p = simugeneral(simu_params) print(p.ondemandPCRNums()) return
[docs] def pcrErrs(self, ): for pn in range(self.permutation_num): simu_params = { 'init_seq_setting': { 'seq_num': self.umi_num_fixed, 'umi_unit_pattern': self.umi_unit_pattern, 'umi_unit_len': self.umi_unit_len_fixed, 'seq_len': self.seq_len, 'is_seed': True, # ### /*** block. general ***/ # 'working_dir': to('data/simu/monomer/general/1/pcr_err/permute_') + str(pn) + '/', # 'is_sv_umi_lib': True, # 'umi_lib_fpn': to('data/simu/monomer/general/1/pcr_err/permute_') + str(pn) + '/umi.txt', # # ### /*** block. dimer ***/ # 'working_dir': to('data/simu/dimer/pcr_err/permute_') + str(pn) + '/', # 'is_sv_umi_lib': True, # 'umi_lib_fpn': to('data/simu/dimer/pcr_err/permute_') + str(pn) + '/umi.txt', # ### /*** block. trimer ***/ 'working_dir': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_umi_lib': True, 'umi_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/umi.txt', 'is_sv_seq_lib': True if 'seq' in self.read_structure else False, 'seq_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/seq.txt', 'condis': self.read_structure, 'sim_thres': self.sim_thres_fixed, 'permutation': pn, }, 'ampl_rate': self.ampl_rate_fixed, 'pcr_num': self.pcr_num_fixed, 'err_num_met': 'nbinomial', 'pcr_errors': self.pcr_errs, 'seq_error': self.seq_err_fixed, 'seq_sub_spl_rate': self.seq_sub_spl_rate, 'use_seed': False, 'seed': None, 'write': { 'fastq_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'fastq_fn': '', } } p = simugeneral(simu_params) print(p.ondemandPCRErrs()) return
[docs] def seqErrs(self, ): for pn in range(self.permutation_num): simu_params = { 'init_seq_setting': { 'seq_num': self.umi_num_fixed, 'umi_unit_pattern': self.umi_unit_pattern, 'umi_unit_len': self.umi_unit_len_fixed, 'seq_len': self.seq_len, 'is_seed': True, # ### /*** block. general ***/ # 'working_dir': to('data/simu/monomer/general/1/seq_errs/permute_') + str(pn) + '/', # 'is_sv_umi_lib': True, # 'umi_lib_fpn': to('data/simu/monomer/general/1/seq_errs/permute_') + str(pn) + '/umi.txt', # ### /*** block. dimer ***/ 'working_dir': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_umi_lib': True, 'umi_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/umi.txt', # ### /*** block. trimer ***/ # 'working_dir': to('data/simu/trimer/pcr8/seq_errs/permute_') + str(pn) + '/', # 'is_sv_umi_lib': True, # 'umi_lib_fpn': to('data/simu/trimer/pcr8/seq_errs/permute_') + str(pn) + '/umi.txt', 'is_sv_seq_lib': True if 'seq' in self.read_structure else False, 'seq_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/seq.txt', 'condis': self.read_structure, 'sim_thres': self.sim_thres_fixed, 'permutation': pn, }, 'ampl_rate': self.ampl_rate_fixed, 'pcr_num': self.pcr_num_fixed, 'err_num_met': 'nbinomial', 'pcr_error': self.pcr_err_fixed, 'seq_errors': self.seq_errs, 'seq_sub_spl_rate': self.seq_sub_spl_rate, 'use_seed': False, 'seed': None, 'write': { 'fastq_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'fastq_fn': '', } } p = simugeneral(simu_params) print(p.ondemandSeqErrs()) return
[docs] def umiLens(self, ): for pn in range(self.permutation_num): simu_params = { 'init_seq_setting': { 'seq_num': self.umi_num_fixed, 'umi_unit_pattern': self.umi_unit_pattern, 'umi_unit_lens': self.umi_unit_lens, 'seq_len': self.seq_len, 'is_seed': True, 'working_dir': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_umi_lib': True, 'umi_lib_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_seq_lib': True if 'seq' in self.read_structure else False, 'seq_lib_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'condis': self.read_structure, 'sim_thres': self.sim_thres_fixed, 'permutation': pn, }, 'ampl_rate': self.ampl_rate_fixed, 'pcr_num': self.pcr_num_fixed, 'err_num_met': 'nbinomial', 'pcr_error': self.pcr_err_fixed, 'seq_error': self.seq_err_fixed, 'seq_sub_spl_rate': self.seq_sub_spl_rate, 'use_seed': False, 'seed': None, 'write': { 'fastq_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'fastq_fn': '', } } p = simugeneral(simu_params) print(p.ondemandUMILens()) return
[docs] def amplRates(self, ): for pn in range(self.permutation_num): simu_params = { 'init_seq_setting': { 'seq_num': self.umi_num_fixed, 'umi_unit_pattern': self.umi_unit_pattern, 'umi_unit_len': self.umi_unit_len_fixed, 'seq_len': self.seq_len, 'is_seed': True, 'working_dir': self.sv_fp + '/permute_' + str(pn) + '/', 'is_sv_umi_lib': True, 'umi_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/umi.txt', 'is_sv_seq_lib': True if 'seq' in self.read_structure else False, 'seq_lib_fpn': self.sv_fp + '/permute_' + str(pn) + '/seq.txt', 'condis': self.read_structure, 'sim_thres': self.sim_thres_fixed, 'permutation': pn, }, 'ampl_rates': self.ampl_rates, 'pcr_num': self.pcr_num_fixed, 'err_num_met': 'nbinomial', 'pcr_error': self.pcr_err_fixed, 'seq_error': self.seq_err_fixed, 'seq_sub_spl_rate': self.seq_sub_spl_rate, 'use_seed': False, 'seed': None, 'write': { 'fastq_fp': self.sv_fp + '/permute_' + str(pn) + '/', 'fastq_fn': '', } } p = simugeneral(simu_params) print(p.ondemandAmplRates()) return
[docs]def run(): from pyfiglet import Figlet vignette1 = Figlet(font='standard') print(vignette1.renderText('Resimpy Toolkit')) vignette2 = Figlet(font='doom') print(vignette2.renderText('Welcome')) p = general( mode='external', ) if p.recipe == 'seq_errs': p.seqErrs() elif p.recipe == 'pcr_errs': p.pcrErrs() elif p.recipe == 'pcr_nums': p.pcrNums() elif p.recipe == 'umi_lens': p.umiLens() elif p.recipe == 'ampl_rates': p.amplRates() return 'job is finished now'
if __name__ == "__main__": p = general( mode='internal', # mode='external', ampl_rates=None, umi_unit_lens=None, pcr_nums=None, pcr_errs=None, seq_errs='1e-3;1e-2;0.1', ) # print(p.pcrNums()) # print(p.pcrErrs()) print(p.seqErrs()) # print(p.umiLens()) # print(p.amplRates())