# It is a parameters file for GADMA software.
# Lines started from # are ignored.
# Also comments at the end of a line are ignored too.
# Every line contains: Identificator of parameter : value.
#!!! means pay attention to this parameter, they are primary.
#!!!
# Output directory to write all GADMA out.
# One need to set it to a missing or empty directory.
# If it is resumed from other directory and output directory
# isn't set, GADMA will add '_resumed' for previous output
# directory.
Output directory: my_example_run
#!!!
# Input data can be sfs file (should end with .fs) or
# file of SNP's in dadi format (should end with .txt) or
# VCF file + popmap file (sample population map).
Input data: tests/test_data/DATA/sfs/YRI_CEU.fs
# 'Population labels' is sequence of population names (the same
# names as in input file)
# If .fs file is in old format then it would rename population
# labels that are absent.
# It is necessary to put them in order from most ancient to less.
# (In case of more than two populations)
# It is important, because the last of formed populations take
# part in the next split.
# For example, if we have YRI - African population,
# CEU - European population and CHB - Chinese population,
# then we can write YRI, CEU, CHB or YRI, CHB, CEU
# (YRI must be at the first place)
# Default: from input file
Population labels: [YRI, CEU]
# Also one can project spectrum to less size.
# For example, we have 80 individuals in each of three
# populations, then spectrum will be 81x81x81 and one can
# project it to 21x21x21 by set 'Projections' parameter
# to 20, 20, 20.
# Default: from input file
Projections: [20, 20]
# To indicate if outgroup information is included in SFS data
# one can set Outgroup option.
# If outgroup is False then SFS will be folded.
# Default: from input file
Outgroup: True
# Effective length of sequence that was used to build SFS data.
# Should be used together with Mutation rate and can be replaced
# be Thera0 setting.
# Default: None
Sequence length: 4040000
#!!!
# Are SNP's linked or unlinked?
# If they are linked, then Composite Likelihood Akaike
# Information Criterion (CLAIC) will be used to compare models.
# If they are unlinked, then usual Akaike Information Criterion
# (AIC) will be used.
# Default: True
Linked SNP's: True
#!!!
# If SNP's are linked in order to calculate CLAIC, please, set
# the directory with bootstrapped data.
# Bootstrap should be done over the regions of the genome.
# Default: None
Directory with bootstrap: Null
#!!!
# Now all main parameters:
#
# Engine for the demographic inference.
# Default: moments
Engine: moments
# If you choose to use dadi, please set pts parameter - number
# of points in the grid. Otherwise this pts would be used in dadi's code.
# Default: Let n = max number of individuals in one population,
# then pts = n, n+10, n+20
Pts: [20, 30, 40]
#!!!
# Print parameters of model in units of N_ref = N_A.
# N_A will be placed in brackets at the end of string.
# Default: False
Relative parameters: False
# Total mutation flux - theta.
# It is equal to:
# theta = 4 * μ * L
# where μ - mutation rate per site per generation and
# L - effective sequenced length, which accounts for losses
# in alignment and missed calls.
# Note: one should estimate μ based on generation time.
# Default: 1.0
Theta0: Null
# Instead of Theta0 mutation rate can be set independently.
# Should be used together with Sequence length option.
# Default: None
Mutation rate: 2.35e-08
# Recombination rate
# Default: None
Recombination rate: Null
# Time (years) for one generation. Can be float.
# Is important for drawing models. If one don't want to draw,
# one can pass it.
# Default: 1.0
Time for generation: Null
#!!!
# One should choose the demographic history to infer.
# It can be custom or setted up with structure.
# 1. Using a custom demographic model.
# Please specify a file with a function named 'model_func' in it.
# So file should contain:
# def model_func(params, ns, pts) in case of dadi
# or
# def model_func(params, ns) in case of moments
# Default: None
Custom filename: Null
# Now one should specify either bounds or identifications
# of custom model's parameters. All values are in Nref units.
# Lower and upper bounds - lists of numbers.
# List of usual bounds:
# N: 1e-2 - 100
# T: 0 - 5
# m: 0 - 10
# s: 0 - 1
# These bounds will be taken automatically if identifications are set.
# Default: None
Lower bound: Null
Upper bound: Null
# An identifier list:
# T - time
# N - size of population
# m - migration
# s - split event, proportion in which population size
# is divided to form two new populations.
# Default: None
Parameter identifiers: Null
# 2. Structure is for not custom models!
# Structure of model for one population - number of time periods
# (e.g. 5).
# Structure of model for two populations - number of time periods
# before the split of the ancestral population and after it (e.g. 2,2).
# Structure of model for three populations - number of time periods
# before first split, between first and second splits and after
# second split (e.g. 2,1,2).
#
# Structure of initial model:
# Default: all is ones - 1 or 1,1 or 1,1,1
Initial structure: [1, 1]
# Structure of final model:
# Default: equals to initial structure
Final structure: [2, 1]
#!!!
# Additional settings for demographic model with structure.
#
# Use sudden changes of population sizes only. Decreases
# the number of parameters.
# Default: False
Only sudden: False
# The set of available size dynamics could be set to any subset.
# Sud is for sudden size change (constant during the next time epoch),
# Lin - linear size change,
# Exp - exponential size change.
# If `Only sudden` is True then this setting will be [Sud].
# Default: [Sud, Lin, Exp]
Dynamics: [Sud, Lin, Exp]
# Disable migrations in demographic models.
# Default: False
No migrations: False
# Makes all migrations symmetrical.
# Default: False
Symmetric migrations: False
# Enable/disable migrations selectively.
# Default: None
Migration masks: Null
# Enable/disable inference of selection coefficients.
# Is supported by moments and dadi engines only.
# Default: False
Selection: False
# Enables/disables inference of dominance coefficient.
# If True then setting `Selection` must be aldo set to True.
# Is supported by moments and dadi engines only.
# Default: False
Dominance: True
# Estimate fraction of ancestral population as parameter of split.
# If False then population splits and each of new populations
# has its own size as parameter of the model.
# Default: False
Split fractions: True
# Estimate inbreeding coefficients as model parameters.
# Can be used only for dadi engine.
# Default: False
Inbreeding: False
# If False then multinomial approach is used in dadi and moments.
# In multinomial approach ancestral size is inferred implicitly.
# Default: False
Ancestral size as parameter: False
# It is possible to limit the time of splits.
# Split 1 is the most ancient split.
# !Note that time is in genetic units (2 * time for 1 generation):
# e.g. we want to limit by 150 kya, time for one generation is
# 25 years, then bound will be 150000 / (2*25) = 3000.
#
# Upper bound for split 1 (in case of 2 or 3 populations).
# Default: None
Upper bound of first split: Null
# Upper bound for split 2 (in case of 3 populations).
# Default: None
Upper bound of second split: Null
#!!!
# Local optimization.
#
# Choice of local optimization, that is launched after
# each genetic algorithm.
# Choices:
#
# * optimize (BFGS method)
#
# * optimiza_log (BFGS method)
#
# * optimize_powell (Powell’s conjugate direction method)
# (Note: is implemented in moments: one need to have moments
# installed.)
#
# (If optimizations are often hitting the parameter bounds,
# try using these methods:)
# * optimize_lbfgsb
# * optimize_log_lbfgsb
# (Note that it is probably best to start with the vanilla BFGS
# methods, because the L-BFGS-B methods will always try parameter
# values at the bounds during the search.
# This can dramatically slow model fitting.)
#
# * optimize_log_fmin (simplex (a.k.a. amoeba) method)
#
# * hill_climbing
#
# Default: optimize_powell
Local optimizer: BFGS_log
# Parameters of pipeline
#
# One can automatically generate dadi and moments code for models.
# If 0 then only current best model will be printed in GA's
# working directory.
# Also the result model will be saved there.
# If specified (not 0) then every N iteration model will be saved
# in python code folder.
# Default: 0
Print models' code every N iteration: 100
# Engine that will draw demographic model plots.
# Could be moments or demes.
# Default: moments
Model plot engine: moments
# One can automatically draw models every N iteration.
# If 0 then never.
# Pictures are saved in GA's directory in the picture folder.
# Default: 0
Draw models every N iteration: 100
# One can choose time units in models' plots: years or thousand
# years (kya, KYA). If time for one generation isn't specified
# then time is in genetic units.
# Default: years
Units of time in drawing: generations
# Minimum value that will be drawn in SFS plots.
# Default: 1
Vmin: 1
# No std output.
# Default: False
Silence: False
# Verbosity of optimizations output.
# Default: 1
Verbose: 1
# How many times launch GADMA with this parameters.
# Default: 1
Number of repeats: 3
# How many processes to use for this repeats.
# Note that one repeat isn't parallelized, so increasing number
# of processes will not affect the time of one repeat.
# It is desirable that the number of repeats is a multiple of
# the number of processes.
# Default: 1
Number of processes: 3
# One can resume from some other launch of GADMA by setting
# output directory of it to 'Resume from' parameter.
# You can set again new parameters of resumed launch.
Resume from: Null
#
# If you want to take only models from previous run set this
# flag. Then iterations of GA will start from 0 and values of
# mutation rate and strength will be initial.
# Default: None
Only models: False