pax_global_header00006660000000000000000000000064127434707060014524gustar00rootroot0000000000000052 comment=5893b16ff78e6d1b1214472fe305a362b8d96a38 GenomicConsensus-master/000077500000000000000000000000001274347070600156435ustar00rootroot00000000000000GenomicConsensus-master/.gitignore000066400000000000000000000001611274347070600176310ustar00rootroot00000000000000*~ *.pyc *.pyo .project .pydevproject .idea *.egg-info doc/_build build/ dist/ TAGS evidence_dump/ nosetests.xml GenomicConsensus-master/CHANGELOG000066400000000000000000000044341274347070600170620ustar00rootroot00000000000000Version 2.1.0 * Major fixes for arrow * Documentation for arrow * Update for substantially-refactored ConsensusCore2 * --dumpEvidence support for Arrow Version 2.0.0 * Working support for Arrow and POA-only consensus models Version 1.1.0 * Working support for DataSet read and reference files Version 1.0.0 * Working support for BAM files adhering to our BAM spec (version 3.0b6) Version 0.9.2 (bugfix release, issued with SMRTanalysis 2.3.0p2) * Fix bug where output contained truncated contig names Version 0.9.1 (released with SMRTanalysis 2.3.0p1) * Preliminary support for BAM file in quiver Version 0.9.0 (released with SMRTanalysis 2.3) * Support for P6-C4 chemistry * Rate of MLE convergences failures reduced drastically * quiver will now abort if it is provided data lacking the full complement of QV tracks (except the MergeQV, which is allowed to be absent, as is the case in data from old basecaller versions) * Use the new chemistry information decoding spec---expects barcode information in the cmp.h5 but will fall back to the old "SequencingChemistry" tag if the barcodes are absent. Version 0.8.0 (released with SMRTanalysis 2.2) * Improved consensus calling at edges of contigs and amplicons * Fixes to reduce algorithmic convergence failures * Improved support for chemistry mixtures * Faster analysis of P5-C3 chemistry * Improved robustness using P5-C3 chemistry * Faster startup time for large references Version 0.7.0 (released with SMRTanalysis 2.1) * Support for diploid variant calling in plurality and quiver algorithms * Auto-windowing to skip coverage deserts, drastically improving user experience for amplicon workflows. * Command line support for operating on a chosen barcode * Fix bug in dinucleotide repeat refinement * Modification to variants.gff schema to support diploid variant reporting * Fix for memory leak affecting jobs with many reference contigs (large assemblies, for example) * Improved support for P5-C3 chemistry * Improved support for P4-C2 chemistry (was included in 2.0.1 release) Version 0.6.0 (released with SMRTanalysis 2.0) * Improved Quiver accuracy, reducing errors in dinucleotide repeat regions * Improved, extensible support for existing and forthcoming sequencing chemistries GenomicConsensus-master/GenomicConsensus/000077500000000000000000000000001274347070600211255ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/ResultCollector.py000066400000000000000000000164761274347070600246420ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander, Jim Drake import cProfile, logging, os.path, sys from multiprocessing import Process from threading import Thread from collections import OrderedDict, defaultdict from .options import options from GenomicConsensus import reference, consensus, utils, windows from .io.VariantsGffWriter import VariantsGffWriter from pbcore.io import FastaWriter, FastqWriter class ResultCollector(object): """ Gathers results and writes to a file. """ def __init__(self, resultsQueue, algorithmName, algorithmConfig): self._resultsQueue = resultsQueue self._algorithmName = algorithmName self._algorithmConfig = algorithmConfig def _run(self): self.onStart() sentinelsReceived = 0 while sentinelsReceived < options.numWorkers: result = self._resultsQueue.get() if result is None: sentinelsReceived += 1 else: self.onResult(result) self.onFinish() def run(self): if options.doProfiling: cProfile.runctx("self._run()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-%s.out" % (self.name))) else: self._run() # ================================== # Overridable interface begins here. # def onStart(self): self.referenceBasesProcessedById = OrderedDict() for refId in reference.byName: self.referenceBasesProcessedById[refId] = 0 self.variantsByRefId = defaultdict(list) self.consensusChunksByRefId = defaultdict(list) # open file writers self.fastaWriter = self.fastqWriter = self.gffWriter = None if options.fastaOutputFilename: self.fastaWriter = FastaWriter(options.fastaOutputFilename) if options.fastqOutputFilename: self.fastqWriter = FastqWriter(options.fastqOutputFilename) if options.gffOutputFilename: self.gffWriter = VariantsGffWriter(options.gffOutputFilename, vars(options), reference.byName.values()) def onResult(self, result): window, cssAndVariants = result css, variants = cssAndVariants self._recordNewResults(window, css, variants) self._flushContigIfCompleted(window) def onFinish(self): logging.info("Analysis completed.") if self.fastaWriter: self.fastaWriter.close() if self.fastqWriter: self.fastqWriter.close() if self.gffWriter: self.gffWriter.close() logging.info("Output files completed.") def _recordNewResults(self, window, css, variants): refId, refStart, refEnd = window self.consensusChunksByRefId[refId].append(css) self.variantsByRefId[refId] += variants self.referenceBasesProcessedById[refId] += (refEnd - refStart) def _flushContigIfCompleted(self, window): refId, _, _ = window refEntry = reference.byName[refId] refName = refEntry.fullName basesProcessed = self.referenceBasesProcessedById[refId] requiredBases = reference.numReferenceBases(refId, options.referenceWindows) if basesProcessed == requiredBases: # This contig is done, so we can dump to file and delete # the data structures. if self.gffWriter: self.gffWriter.writeVariants(sorted(self.variantsByRefId[refId])) del self.variantsByRefId[refId] # # If the user asked to analyze a window or a set of # windows, we output a FAST[AQ] contig per analyzed # window. Otherwise we output a fasta contig per # reference contig. # # We try to be intelligent about naming the output # contigs, to include window information where applicable. # for span in reference.enumerateSpans(refId, options.referenceWindows): _, s, e = span if (s == 0) and (e == refEntry.length): spanName = refName else: spanName = refName + "_%d_%d" % (s, e) cssName = consensus.consensusContigName(spanName, self._algorithmName) # Gather just the chunks pertaining to this span chunksThisSpan = [ chunk for chunk in self.consensusChunksByRefId[refId] if windows.windowsIntersect(chunk.refWindow, span) ] css = consensus.join(chunksThisSpan) if self.fastaWriter: self.fastaWriter.writeRecord(cssName, css.sequence) if self.fastqWriter: self.fastqWriter.writeRecord(cssName, css.sequence, css.confidence) del self.consensusChunksByRefId[refId] class ResultCollectorProcess(ResultCollector, Process): def __init__(self, *args): Process.__init__(self) self.daemon = True super(ResultCollectorProcess,self).__init__(*args) class ResultCollectorThread(ResultCollector, Thread): def __init__(self, *args): Thread.__init__(self) self.daemon = True self.exitcode = 0 super(ResultCollectorThread,self).__init__(*args) GenomicConsensus-master/GenomicConsensus/Worker.py000066400000000000000000000116631274347070600227570ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander, Jim Drake import cProfile, logging, os.path from multiprocessing import Process from threading import Thread from .options import options from .reference import windowToString from .io.utils import loadCmpH5, loadBam class Worker(object): """ Base class for compute worker that read reference coordinates from the task queue, perform variant calling, then push results back to another queue, to be written to a GFF file by a collector. All tasks that are O(genome length * coverage depth) should be distributed to compute workers, leaving the collector worker only O(genome length) work to do. """ def __init__(self, workQueue, resultsQueue, algorithmConfig): self._workQueue = workQueue self._resultsQueue = resultsQueue self._algorithmConfig = algorithmConfig def _run(self): if options.usingBam: self._inAlnFile = loadBam(options.inputFilename, options.referenceFilename) else: self._inAlnFile = loadCmpH5(options.inputFilename, options.referenceFilename, disableChunkCache=options.disableHdf5ChunkCache) self.onStart() while True: datum = self._workQueue.get() if datum is None: # Sentinel indicating end of input. Place a sentinel # on the results queue and end this worker process. self._resultsQueue.put(None) break else: if datum.hasCoverage: msg = "%s received work unit, coords=%s" else: msg = "%s received work unit, coords=%s (inadequate coverage)" logging.debug(msg % (self.name, windowToString(datum.window))) result = self.onChunk(datum) self._resultsQueue.put(result) self.onFinish() def run(self): if options.pdb: import ipdb with ipdb.launch_ipdb_on_exception(): self._run() elif options.doProfiling: cProfile.runctx("self._run()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-%s.out" % (self.name))) else: self._run() #== # Begin overridable interface #== def onStart(self): pass def onChunk(self, workChunk): """ This function is the heart of the matter. workChunk -> result """ pass def onFinish(self): pass class WorkerProcess(Worker, Process): """Worker that executes as a process.""" def __init__(self, *args): Process.__init__(self) super(WorkerProcess,self).__init__(*args) self.daemon = True class WorkerThread(Worker, Thread): """Worker that executes as a thread (for debugging purposes only).""" def __init__(self, *args): Thread.__init__(self) super(WorkerThread,self).__init__(*args) self.daemon = True self.exitcode = 0 GenomicConsensus-master/GenomicConsensus/__init__.py000066400000000000000000000035271274347070600232450ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander __VERSION__ = "2.1.0" GenomicConsensus-master/GenomicConsensus/algorithmSelection.py000066400000000000000000000063131274347070600253360ustar00rootroot00000000000000#!/usr/bin/env python ################################################################################# # Copyright (c) 2011-2016, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from .utils import die def bestAlgorithm_(sequencingChemistries): """ Identify the (de novo) consensus algorithm we expect to deliver the best results, given the sequencing chemistries represented in an alignment file. We key off the sequencing chemistries as follows: - Just RS chemistry data? Then use quiver (at least for now, until we get arrow > quiver on P6-C4) - Else (either all Sequel data, or a mix of Sequel and RS data), use arrow. - Unknown chemistry found? Return None; we should abort if this is found Note that the handling/rejection of chemistry mixtures (including mixtures of Sequel and RS data) is left to the algorithm itself. """ if len(sequencingChemistries) == 0: raise ValueError("sequencingChemistries must be nonempty list or set") chems = set(sequencingChemistries) anyUnknown = "unknown" in chems allRS = all(not(chem.startswith("S/")) for chem in chems) and (not anyUnknown) if anyUnknown: return None elif allRS: return "quiver" else: return "arrow" def bestAlgorithm(sequencingChemistries): ba = bestAlgorithm_(sequencingChemistries) if ba is None: die("Unidentifiable sequencing chemistry present in dataset. " + "Check if your SMRTanalysis installation is out-of-date.") else: return ba GenomicConsensus-master/GenomicConsensus/arrow/000077500000000000000000000000001274347070600222575ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/arrow/__init__.py000077500000000000000000000035741274347070600244040ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler import utils import model # import evidence GenomicConsensus-master/GenomicConsensus/arrow/arrow.py000077500000000000000000000272251274347070600237760ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler import logging, os.path import ConsensusCore2 as cc, numpy as np from .. import reference from ..options import options from ..Worker import WorkerProcess, WorkerThread from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread from GenomicConsensus.consensus import Consensus, ArrowConsensus, join from GenomicConsensus.windows import kSpannedIntervals, holes, subWindow from GenomicConsensus.variants import filterVariants, annotateVariants from GenomicConsensus.arrow.evidence import ArrowEvidence from GenomicConsensus.arrow import diploid from GenomicConsensus.utils import die import GenomicConsensus.arrow.model as M import GenomicConsensus.arrow.utils as U def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig, depthLimit, arrowConfig): """ High-level routine for calling the consensus for a window of the genome given a cmp.h5. Identifies the coverage contours of the window in order to identify subintervals where a good consensus can be called. Creates the desired "no evidence consensus" where there is inadequate coverage. """ winId, winStart, winEnd = refWindow logging.info("Arrow operating on %s" % reference.windowToString(refWindow)) if options.fancyChunking: # 1) identify the intervals with adequate coverage for arrow # consensus; restrict to intervals of length > 10 alnHits = U.readsInWindow(alnFile, refWindow, depthLimit=20000, minMapQV=arrowConfig.minMapQV, strategy="long-and-strand-balanced", stratum=options.readStratum, barcode=options.barcode) starts = np.fromiter((hit.tStart for hit in alnHits), np.int) ends = np.fromiter((hit.tEnd for hit in alnHits), np.int) intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage, starts, ends, minLength=10) coverageGaps = holes(refWindow, intervals) allIntervals = sorted(intervals + coverageGaps) if len(allIntervals) > 1: logging.info("Usable coverage in %s: %r" % (reference.windowToString(refWindow), intervals)) else: allIntervals = [ (winStart, winEnd) ] # 2) pull out the reads we will use for each interval # 3) call consensusForAlignments on the interval subConsensi = [] variants = [] for interval in allIntervals: intStart, intEnd = interval intRefSeq = referenceContig[intStart:intEnd] subWin = subWindow(refWindow, interval) windowRefSeq = referenceContig[intStart:intEnd] alns = U.readsInWindow(alnFile, subWin, depthLimit=depthLimit, minMapQV=arrowConfig.minMapQV, strategy="long-and-strand-balanced", stratum=options.readStratum, barcode=options.barcode) clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ] clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig) if len([ a for a in clippedAlns if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage: logging.debug("%s: Reads being used: %s" % (reference.windowToString(subWin), " ".join([str(hit.readName) for hit in alns]))) css = U.consensusForAlignments(subWin, intRefSeq, clippedAlns, arrowConfig) siteCoverage = U.coverageInWindow(subWin, alns) variants_ = U.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, ai=None) filteredVars = filterVariants(options.minCoverage, options.minConfidence, variants_) # Annotate? if options.annotateGFF: annotateVariants(filteredVars, clippedAlns) variants += filteredVars # Dump? maybeDumpEvidence = \ ((options.dumpEvidence == "all") or (options.dumpEvidence == "outliers") or (options.dumpEvidence == "variants") and (len(variants) > 0)) if maybeDumpEvidence: refId, refStart, refEnd = subWin refName = reference.idToName(refId) windowDirectory = os.path.join( options.evidenceDirectory, refName, "%d-%d" % (refStart, refEnd)) ev = ArrowEvidence.fromConsensus(css) if options.dumpEvidence != "outliers": ev.save(windowDirectory) elif (np.max(np.abs(ev.delta)) > 20): # Mathematically I don't think we should be seeing # deltas > 6 in magnitude, but let's just restrict # attention to truly bonkers outliers. ev.save(windowDirectory) else: css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus, subWin, intRefSeq) subConsensi.append(css) # 4) glue the subwindow consensus objects together to form the # full window consensus css = join(subConsensi) # 5) Return return css, variants class ArrowWorker(object): @property def arrowConfig(self): return self._algorithmConfig def onChunk(self, workChunk): referenceWindow = workChunk.window refId, refStart, refEnd = referenceWindow refSeqInWindow = reference.sequenceInWindow(referenceWindow) # Quick cutout for no-coverage case if not workChunk.hasCoverage: noCallCss = ArrowConsensus.noCallConsensus(self.arrowConfig.noEvidenceConsensus, referenceWindow, refSeqInWindow) return (referenceWindow, (noCallCss, [])) # General case eWindow = reference.enlargedReferenceWindow(referenceWindow, options.referenceChunkOverlap) _, eStart, eEnd = eWindow # We call consensus on the enlarged window and then map back # to the reference and clip the consensus at the implied # bounds. This seems to be more reliable thank cutting the # consensus bluntly refContig = reference.byName[refId].sequence refSequenceInEnlargedWindow = refContig[eStart:eEnd] # # Get the consensus for the enlarged window. # css_, variants_ = \ consensusAndVariantsForWindow(self._inAlnFile, eWindow, refContig, options.coverage, self.arrowConfig) # # Restrict the consensus and variants to the reference window. # ga = cc.Align(refSequenceInEnlargedWindow, css_.sequence) targetPositions = cc.TargetToQueryPositions(ga) cssStart = targetPositions[refStart-eStart] cssEnd = targetPositions[refEnd-eStart] cssSequence = css_.sequence[cssStart:cssEnd] cssQv = css_.confidence[cssStart:cssEnd] variants = [ v for v in variants_ if refStart <= v.refStart < refEnd ] consensusObj = Consensus(referenceWindow, cssSequence, cssQv) return (referenceWindow, (consensusObj, variants)) # # Slave process/thread classes # class ArrowWorkerProcess(ArrowWorker, WorkerProcess): pass class ArrowWorkerThread(ArrowWorker, WorkerThread): pass # # Plugin API # __all__ = [ "name", "availability", "configure", "slaveFactories" ] name = "arrow" availability = (True, "OK") def configure(options, alnFile): if alnFile.readType != "standard": raise U.IncompatibleDataException( "The Arrow algorithm requires a BAM file containing standard (non-CCS) reads." ) if options.diploid: logging.warn("Diploid analysis not yet supported under Arrow model.") # test available chemistries supp = set(cc.SupportedChemistries()) logging.info("Found consensus models for: ({0})".format(", ".join(sorted(supp)))) used = set(alnFile.sequencingChemistry) if options.parametersSpec != "auto": used = set([options.parametersSpec]) unsupp = used - supp if unsupp: die("Arrow: unsupported chemistries found: ({0})".format(", ".join(sorted(unsupp)))) logging.info("Using consensus models for: ({0})".format(", ".join(sorted(used)))) return M.ArrowConfig(minMapQV=options.minMapQV, noEvidenceConsensus=options.noEvidenceConsensusCall, computeConfidence=(not options.fastMode), minReadScore=options.minReadScore, minHqRegionSnr=options.minHqRegionSnr, minZScore=options.minZScore, minAccuracy=options.minAccuracy, chemistryOverride=(None if options.parametersSpec == "auto" else options.parametersSpec)) def slaveFactories(threaded): # By default we use slave processes. The tuple ordering is important. if threaded: return (ArrowWorkerThread, ResultCollectorThread) else: return (ArrowWorkerProcess, ResultCollectorProcess) GenomicConsensus-master/GenomicConsensus/arrow/diploid.py000077500000000000000000000213121274347070600242570ustar00rootroot00000000000000# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler from GenomicConsensus.arrow.utils import allSingleBaseMutations from GenomicConsensus.variants import Variant import numpy as np import ConsensusCore2 as cc # IUPAC reference: # http://www.bioinformatics.org/sms/iupac.html _packIupac = { ("A", "G") : "R" , ("G", "A") : "R" , ("C", "T") : "Y" , ("T", "C") : "Y" , ("G", "C") : "S" , ("C", "G") : "S" , ("A", "T") : "W" , ("T", "A") : "W" , ("G", "T") : "K" , ("T", "G") : "K" , ("A", "C") : "M" , ("C", "A") : "M" } _unpackIupac = { "R" : ("A", "G") , "Y" : ("C", "T") , "S" : ("G", "C") , "W" : ("A", "T") , "K" : ("G", "T") , "M" : ("A", "C") } def packIUPAC(bases): return _packIupac[bases] def unpackIUPAC(iupacCode): return _unpackIupac[iupacCode] def isHeterozygote(base): return (base in _unpackIupac) def packMuts(cssBase, mut1, mut2): # Turn two muts (with same Start, End, LengthDiff) into a single mutation to # IUPAC. The no-op mutation is coded as None. # # Example1: (_, Subs A, Subs T) -> Subs W # Example2: (_, Ins A, Ins T) -> Ins W # Example3: (A, None, Subs T) -> Subs W # nonNullMut = mut1 or mut2 start = nonNullMut.Start() mutType = nonNullMut.Type newBase1 = mut1.Base if mut1 else cssBase newBase2 = mut2.Base if mut2 else cssBase newBasePacked = packIUPAC((newBase1, newBase2)) return cc.Mutation(mutType, start, newBasePacked) def scoresForPosition(ai, pos): muts = allSingleBaseMutations(str(ai), positions=[pos]) noMutScore = [0] * ai.NumReads() mutScores_ = [ ai.ReadLLs(mut) for mut in muts ] mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32) return mutScores def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow=None, siteCoverage=None, aligner="affine", ai=None): """ Compare the consensus and the reference in this window, returning a list of variants. Uses the integrator to identify heterozygous variants. """ assert (cssQvInWindow is None) == (siteCoverage is None) # Both or none refId, refStart, refEnd = refWindow if ai is not None: # # Hunting diploid variants: # 1. find confident heterozygous sites; # 2. build a "diploid consensus" using IUPAC encoding # for het sites; mark cssQv accordingly # 3. align diploid consensus to reference # 4. extract and decorate variants # assert str(ai) == cssSequenceInWindow iupacMutations = [] # List of (Mutation, confidence) for pos in xrange(0, ai.Length()): ds = cc.IsSiteHeterozygous(scoresForPosition(ai, pos), 40) if ds: muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos])) mut0 = muts[ds.Allele0] mut1 = muts[ds.Allele1] cssBase = cssSequenceInWindow[pos] packedMut = packMuts(cssBase, mut0, mut1) iupacMutations.append((packedMut, 40)) # Create diploidCss by applying mutations, meanwhile updating the # confidence vector accordingly. diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations], cssSequenceInWindow) diploidQv = list(cssQvInWindow) if cssQvInWindow is not None else None runningLengthDiff = 0 for (mut, conf) in iupacMutations: start = mut.Start() + runningLengthDiff end = mut.End() + runningLengthDiff diploidQv[start:end] = [conf] assert len(diploidCss) == len(diploidQv) cssSequenceInWindow = diploidCss cssQvInWindow = diploidQv vars = variantsFromAlignment(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow, siteCoverage) return vars def variantsFromAlignment(refWindow, refSeq, cssSeq, cssQV=None, refCoverage=None): """ Extract the variants implied by a pairwise alignment of cssSeq to refSeq reference. If cssQV, refCoverage are provided, they will be used to decorate the variants with those attributes. Arguments: - cssQV: QV array, same length as css - refCoverage: coverage array, sample length as reference window This is trickier than in the haploid case. We have to break out diploid variants as single bases, in order to avoid implying phase. """ variants = [] refId, refStart, refEnd = refWindow aln = cc.AlignAffineIupac(refSeq, cssSeq); alnTarget = aln.Target() alnQuery = aln.Query() assert (cssQV is None) == (refCoverage is None) # Both or none assert len(refSeq) == refEnd - refStart assert cssQV is None or len(cssSeq) == len(cssQV) assert refCoverage is None or len(refSeq) == len(refCoverage) transcript = [ X if (Q != "N" and T != "N") else "N" for (X, T, Q) in zip(aln.Transcript(), alnTarget, alnQuery) ] variants = [] runStart = -1 runStartRefPos = None runX = None refPos = refStart for pos, (X, T, Q) in enumerate(zip(transcript, alnTarget, alnQuery)): if X != runX or isHeterozygote(Q): if runStart >= 0 and runX not in "MN": # Package up the run and dump a variant ref = alnTarget[runStart:pos].replace("-", "") read = alnQuery [runStart:pos].replace("-", "") if isHeterozygote(read): allele1, allele2 = unpackIUPAC(read) var = Variant(refId, runStartRefPos, refPos, ref, allele1, allele2) else: var = Variant(refId, runStartRefPos, refPos, ref, read) variants.append(var) runStart = pos runStartRefPos = refPos runX = X if T != "-": refPos += 1 # This might be better handled within the loop above, just keeping # track of Qpos, Tpos if cssQV is not None: cssPosition = cc.TargetToQueryPositions(aln) for v in variants: # HACK ALERT: we are not really handling the confidence or # coverage for variants at last position of the window # correctly here. refPos_ = min(v.refStart-refStart, len(refCoverage)-1) cssPos_ = min(cssPosition[v.refStart-refStart], len(cssQV)-1) if refCoverage is not None: v.coverage = refCoverage[refPos_] if cssQV is not None: v.confidence = cssQV[cssPos_] return variants GenomicConsensus-master/GenomicConsensus/arrow/evidence.py000077500000000000000000000163251274347070600244250ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler __all__ = [ "ArrowEvidence" ] import h5py, logging, os.path, numpy as np from collections import namedtuple from itertools import groupby from bisect import bisect_left, bisect_right from pbcore.io import FastaReader, FastaWriter from .utils import scoreMatrix from .. import reference class ArrowEvidence(object): Mutation = namedtuple("Mutation", ("Position", "Type", "FromBase", "ToBase")) @staticmethod def _parseMutName(mutName): fields = mutName.split(" ") pos = int(fields[0]) type, fromBase, _, toBase = fields[1:] return ArrowEvidence.Mutation(pos, type, fromBase, toBase) def __init__(self, refWindow, consensus, rowNames, colNames, baselineScores, scores): assert isinstance(consensus, str) self.refWindow = refWindow # tuple(str, int, int) self.consensus = consensus self.rowNames = rowNames self.colNames = colNames self.baselineScores = baselineScores self.scores = scores self.muts = map(ArrowEvidence._parseMutName, self.colNames) @staticmethod def fromConsensus(css): rowNames, colNames, baselineScores, scores = scoreMatrix(css.ai) return ArrowEvidence(css.refWindow, css.sequence, rowNames, colNames, baselineScores, scores) @property def refName(self): return self.refWindow[0] @property def refStart(self): return self.refWindow[1] @property def refEnd(self): return self.refWindow[2] @property def positions(self): return [ mut.Position for mut in self.muts ] @property def uniquePositions(self): return sorted(list(set(self.positions))) @property def delta(self): return self.scores - self.baselineScores[:, np.newaxis] @staticmethod def load(dir): """ Load an ArrowEvidence from a directory """ if dir.endswith("/"): dir = dir[:-1] refStart, refEnd = map(int, dir.split("/")[-1].split("-")) refName = dir.split("/")[-2] refWindow = (refName, refStart, refEnd) with FastaReader(dir + "/consensus.fa") as fr: consensus = next(iter(fr)).sequence with h5py.File(dir + "/arrow-scores.h5", "r") as f: scores = f["Scores"].value baselineScores = f["BaselineScores"].value colNames = f["ColumnNames"].value rowNames = f["RowNames"].value return ArrowEvidence(refWindow, consensus, rowNames, colNames, baselineScores, scores) def save(self, dir): """ Save this ArrowEvidence to a directory. The directory will be *created* by this method. Format of evidence dump: evidence_dump/ ref000001/ 0-1005/ consensus.fa arrow-scores.h5 995-2005/ ... """ logging.info("Dumping evidence to %s" % (dir,)) join = os.path.join if os.path.exists(dir): raise Exception, "Evidence dump does not expect directory %s to exist." % dir os.makedirs(dir) #refFasta = FastaWriter(join(dir, "reference.fa")) #readsFasta = FastaWriter(join(dir, "reads.fa")) consensusFasta = FastaWriter(join(dir, "consensus.fa")) windowName = self.refName + (":%d-%d" % (self.refStart, self.refEnd)) #refFasta.writeRecord(windowName, self.refSequence) #refFasta.close() consensusFasta.writeRecord(windowName + "|arrow", self.consensus) consensusFasta.close() arrowScoreFile = h5py.File(join(dir, "arrow-scores.h5")) arrowScoreFile.create_dataset("Scores", data=self.scores) vlen_str = h5py.special_dtype(vlen=str) arrowScoreFile.create_dataset("RowNames", data=self.rowNames, dtype=vlen_str) arrowScoreFile.create_dataset("ColumnNames", data=self.colNames, dtype=vlen_str) arrowScoreFile.create_dataset("BaselineScores", data=self.baselineScores) arrowScoreFile.close() # for aln in alns: # readsFasta.writeRecord(str(aln.rowNumber), # aln.read(orientation="genomic", aligned=False)) # readsFasta.close() def forPosition(self, pos): posStart = bisect_left(self.positions, pos) posEnd = bisect_right(self.positions, pos) return ArrowEvidence(self.refStart, self.consensus, self.rowNames, self.colNames[posStart:posEnd], self.baselineScores, self.scores[:, posStart:posEnd]) def justSubstitutions(self): colMask = np.array(map(lambda s: ("Sub" in s), self.colNames)) return ArrowEvidence(self.refStart, self.consensus, self.rowNames, self.colNames[colMask], self.baselineScores, self.scores[:, colMask]) def rowNumbers(self): # with FastaReader(self.dir + "/reads.fa") as fr: # return [ int(ctg.name) for ctg in fr ] raise NotImplementedError GenomicConsensus-master/GenomicConsensus/arrow/model.py000077500000000000000000000121621274347070600237360ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler import numpy as np, ConfigParser, collections, logging from glob import glob from os.path import join from pkg_resources import resource_filename, Requirement from GenomicConsensus.utils import die from GenomicConsensus.arrow.utils import fst, snd from pbcore.chemistry import ChemistryLookupError from pbcore.io import CmpH5Alignment import ConsensusCore2 as cc __all__ = [ "ArrowConfig" ] # # ArrowConfig: the kitchen sink class of arrow options # class ArrowConfig(object): """ Arrow configuration options """ def __init__(self, minMapQV=10, minPoaCoverage=3, maxPoaCoverage=11, mutationSeparation=10, mutationNeighborhood=20, maxIterations=40, noEvidenceConsensus="nocall", computeConfidence=True, readStumpinessThreshold=0.1, minReadScore=0.75, minHqRegionSnr=3.75, minZScore=-3.5, minAccuracy=0.82, chemistryOverride=None): self.minMapQV = minMapQV self.minPoaCoverage = minPoaCoverage self.maxPoaCoverage = maxPoaCoverage self.mutationSeparation = mutationSeparation self.mutationNeighborhood = mutationNeighborhood self.maxIterations = maxIterations self.noEvidenceConsensus = noEvidenceConsensus self.computeConfidence = computeConfidence self.readStumpinessThreshold = readStumpinessThreshold self.minReadScore = minReadScore self.minHqRegionSnr = minHqRegionSnr self.minZScore = minZScore self.minAccuracy = minAccuracy self.chemistryOverride = chemistryOverride def extractMappedRead(self, aln, windowStart): """ Given a clipped alignment, convert its coordinates into template space (starts with 0), bundle it up with its features as a MappedRead. """ if isinstance(aln, CmpH5Alignment): die("Arrow does not support CmpH5 files!") assert aln.referenceSpan > 0 def baseFeature(featureName): if aln.reader.hasBaseFeature(featureName): rawFeature = aln.baseFeature(featureName, aligned=False, orientation="native") return rawFeature.clip(0,255).astype(np.uint8) else: return np.zeros((aln.qLen,), dtype=np.uint8) name = aln.readName chemistry = aln.sequencingChemistry strand = cc.StrandType_REVERSE if aln.isReverseStrand else cc.StrandType_FORWARD read = cc.Read(name, aln.read(aligned=False, orientation="native"), cc.Uint8Vector(baseFeature("Ipd").tolist()), cc.Uint8Vector(baseFeature("PulseWidth").tolist()), cc.SNR(aln.hqRegionSnr), chemistry if self.chemistryOverride is None else self.chemistryOverride) return cc.MappedRead(read, strand, int(aln.referenceStart - windowStart), int(aln.referenceEnd - windowStart)) GenomicConsensus-master/GenomicConsensus/arrow/utils.py000077500000000000000000000375411274347070600240060ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Authors: David Alexander, Lance Hepler import numpy as np, itertools, logging, re, sys from collections import Counter from traceback import format_exception from GenomicConsensus.variants import * from GenomicConsensus.utils import * from GenomicConsensus.consensus import ArrowConsensus from pbcore.io.rangeQueries import projectIntoRange import ConsensusCore2 as cc def uniqueSingleBaseMutations(templateSequence, positions=None): """ Return an iterator over all single-base mutations of a templateSequence that result in unique mutated sequences. """ allBases = "ACGT" positions = positions or xrange(0, len(templateSequence)) for tplStart in positions: tplBase = templateSequence[tplStart] prevTplBase = templateSequence[tplStart-1] if (tplStart > 0) else None # snvs for subsBase in allBases: if subsBase != tplBase: yield cc.Mutation(cc.MutationType_SUBSTITUTION, tplStart, subsBase) # Insertions---only allowing insertions that are not cognate # with the previous base. for insBase in allBases: if insBase != prevTplBase: yield cc.Mutation(cc.MutationType_INSERTION, tplStart, insBase) # Deletion--only allowed if refBase does not match previous tpl base if tplBase != prevTplBase: yield cc.Mutation(cc.MutationType_DELETION, tplStart) def allSingleBaseMutations(templateSequence, positions=None): """ Same as ``uniqueSingleBaseMutations``, but no filtering as to whether the mutated sequences are unique. """ allBases = "ACGT" positions = positions or xrange(0, len(templateSequence)) for tplStart in positions: tplBase = templateSequence[tplStart] # snvs for subsBase in allBases: if subsBase != tplBase: yield cc.Mutation(cc.MutationType_SUBSTITUTION, tplStart, subsBase) # Insertions for insBase in allBases: yield cc.Mutation(cc.MutationType_INSERTION, tplStart, insBase) # Deletion yield cc.Mutation(cc.MutationType_DELETION, tplStart) def nearbyMutations(mutations, tpl, neighborhoodSize): """ Return mutations nearby the previously-tried mutations """ mutationPositions = map(cc.Mutation.Start, mutations) nearbyPositions = set() for mp in mutationPositions: nearbyPositions.update(range(max(0, mp - neighborhoodSize), min(len(tpl), mp + neighborhoodSize))) return uniqueSingleBaseMutations(tpl, sorted(nearbyPositions)) def bestSubset(mutationsAndScores, separation): """ Given a list of (mutation, score) tuples, this utility method greedily chooses the highest scoring well-separated elements. We use this to avoid applying adjacent high scoring mutations, which are the rule, not the exception. We only apply the best scoring one in each neighborhood, and then revisit the neighborhoods after applying the mutations. """ input = mutationsAndScores[:] output = [] while input: best = max(input, key=snd) output.append(best) nStart = best[0].Start() - separation nEnd = best[0].Start() + separation for t in input[:]: if nStart <= t[0].Start() <= nEnd: input.remove(t) return output def refineConsensus(ai, arrowConfig): """ Given a MultiReadMutationScorer, identify and apply favorable template mutations. Return (consensus, didConverge) :: (str, bool) """ cfg = cc.PolishConfig(arrowConfig.maxIterations, arrowConfig.mutationSeparation, arrowConfig.mutationNeighborhood) polishResult = cc.Polish(ai, cfg) return str(ai), polishResult.hasConverged def consensusConfidence(ai, positions=None): """ Returns an array of QV values reflecting the consensus confidence at each position specified. If the `positions` argument is omitted, confidence values are returned for all positions in the consensus (str(ai)). """ return np.array(np.clip(cc.ConsensusQualities(ai), 0, 93), dtype=np.uint8) def variantsFromAlignment(a, refWindow, cssQvInWindow=None, siteCoverage=None): """ Extract the variants implied by a pairwise alignment to the reference. """ variants = [] refId, refStart, _ = refWindow refPos = refStart cssPos = 0 tbl = zip(a.Transcript(), a.Target(), a.Query()) # We don't call variants where either the reference or css is 'N' grouper = lambda row: "N" if (row[1]=="N" or row[2]=="N") else row[0] runs = itertools.groupby(tbl, grouper) for code, run in runs: assert code in "RIDMN" run = list(run) ref = "".join(map(snd, run)) refLen = len(ref) - Counter(ref)["-"] css = "".join(map(third, run)) cssLen = len(css) - Counter(css)["-"] variant = None if code == "M" or code == "N": pass elif code == "R": assert len(css)==len(ref) variant = Variant(refId, refPos, refPos+len(css), ref, css) elif code == "I": variant = Variant(refId, refPos, refPos, "", css) elif code == "D": variant = Variant(refId, refPos, refPos + len(ref), ref, "") if variant is not None: # HACK ALERT: variants at the first and last position # are not handled correctly if siteCoverage is not None and np.size(siteCoverage) > 0: refPos_ = min(refPos-refStart, len(siteCoverage)-1) variant.coverage = siteCoverage[refPos_] if cssQvInWindow is not None and np.size(cssQvInWindow) > 0: cssPos_ = min(cssPos, len(cssQvInWindow)-1) variant.confidence = cssQvInWindow[cssPos_] variants.append(variant) refPos += refLen cssPos += cssLen return variants def referenceSpanWithinWindow(referenceWindow, aln): """ Helper function for sorting reads by their reference span after restriction to a window. """ _, winStart, winEnd = referenceWindow return min(winEnd, aln.referenceEnd) - \ max(winStart, aln.referenceStart) def lifted(queryPositions, mappedRead): """ Lift a mappedRead into a new coordinate system by using the position translation table `queryPositions` """ newStart = queryPositions[mappedRead.TemplateStart] newEnd = queryPositions[mappedRead.TemplateEnd] copy = cc.MappedRead(mappedRead) copy.TemplateStart = newStart copy.TemplateEnd = newEnd return copy _typeMap = { cc.MutationType_INSERTION : "Ins", cc.MutationType_DELETION : "Del", cc.MutationType_SUBSTITUTION : "Sub" } def _shortMutationDescription(mut, tpl): """ More compact and uniform mutation description strings Examples: 201 Ins . > G 201 Sub C > T 201 Del C > . """ _type = _typeMap[mut.Type] _pos = mut.Start() _oldBase = "." if mut.Type == cc.MutationType_INSERTION \ else tpl[_pos] _newBase = "." if mut.Type == cc.MutationType_DELETION \ else mut.Base return "%d %s %s > %s" % (_pos, _type, _oldBase, _newBase) def scoreMatrix(ai): """ Returns (rowNames, columnNames, S) where: - S is a matrix where S_{ij} represents the score delta of mutation j against read i - rowNames[i] is an identifier name for the the read i---presently we use the the row number within the cmp.h5, encoded as a string - columnNames[j] is an identifier for mutation j, encoding the position, type, and base change """ css = str(ai) allMutations = sorted(allSingleBaseMutations(css)) readNames = list(ai.ReadNames()) numReads = len(readNames) shape = (numReads, len(allMutations)) scoreMatrix = np.zeros(shape) for j, mut in enumerate(allMutations): mutScores = ai.LLs(mut) scoreMatrix[:, j] = mutScores baselineScores = np.array(ai.LLs()) columnNames = [ _shortMutationDescription(mut, css) for mut in allMutations ] rowNames = readNames return (rowNames, columnNames, baselineScores, scoreMatrix) def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow=None, siteCoverage=None, aligner="affine", ai=None): """ Compare the consensus and the reference in this window, returning a list of variants. """ refId, refStart, refEnd = refWindow if aligner == "affine": align = cc.AlignAffine else: align = cc.Align ga = align(refSequenceInWindow, cssSequenceInWindow) return variantsFromAlignment(ga, refWindow, cssQvInWindow, siteCoverage) def filterAlns(refWindow, alns, arrowConfig): """ Given alns (already clipped to the window bounds), filter out any that are incompatible with Arrow. By and large we avoid doing any filtering to avoid potential reference bias in variant calling. However at the moment the POA (and potentially other components) breaks when there is a read of zero length. So we throw away reads that are "stumpy", where the aligner has inserted a large gap, such that while the alignment technically spans the window, it may not have any read content therein: Ref ATGATCCAGTTACTCCGATAAA Read ATG---------------TA-A Win. [ ) """ return [ a for a in alns if a.readLength >= (arrowConfig.readStumpinessThreshold * a.referenceSpan) and min(a.hqRegionSnr) >= arrowConfig.minHqRegionSnr and a.readScore >= arrowConfig.minReadScore ] def sufficientlyAccurate(mappedRead, poaCss, minAccuracy): if minAccuracy <= 0.0: return True s, e = mappedRead.TemplateStart, mappedRead.TemplateEnd tpl = poaCss[s:e] if mappedRead.Strand == cc.StrandType_FORWARD: pass elif mappedRead.Strand == cc.StrandType_REVERSE: tpl = reverseComplement(tpl) else: return False aln = cc.AlignLinear(tpl, mappedRead.Seq) nErrors = sum(1 for t in aln.Transcript() if t != 'M') tlen = len(tpl) acc = 1.0 - 1.0 * min(nErrors, tlen) / tlen return acc >= minAccuracy def consensusForAlignments(refWindow, refSequence, alns, arrowConfig): """ Call consensus on this interval---without subdividing the interval further. Testable! Clipping has already been done! """ _, refStart, refEnd = refWindow # Compute the POA consensus, which is our initial guess, and # should typically be > 99.5% accurate fwdSequences = [ a.read(orientation="genomic", aligned=False) for a in alns if a.spansReferenceRange(refStart, refEnd) ] assert len(fwdSequences) >= arrowConfig.minPoaCoverage try: p = cc.PoaConsensus.FindConsensus(fwdSequences[:arrowConfig.maxPoaCoverage]) except: logging.info("%s: POA could not be generated" % (refWindow,)) return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus, refWindow, refSequence) ga = cc.Align(refSequence, p.Sequence) numPoaVariants = ga.Errors() poaCss = p.Sequence # Extract reads into ConsensusCore2-compatible objects, and map them into the # coordinates relative to the POA consensus mappedReads = [ arrowConfig.extractMappedRead(aln, refStart) for aln in alns ] queryPositions = cc.TargetToQueryPositions(ga) mappedReads = [ lifted(queryPositions, mr) for mr in mappedReads ] # Load the mapped reads into the mutation scorer, and iterate # until convergence. ai = cc.MultiMolecularIntegrator(poaCss, cc.IntegratorConfig(arrowConfig.minZScore)) coverage = 0 for mr in mappedReads: if (mr.TemplateEnd <= mr.TemplateStart or mr.TemplateEnd - mr.TemplateStart < 2 or mr.Length() < 2): continue if not sufficientlyAccurate(mr, poaCss, arrowConfig.minAccuracy): tpl = poaCss[mr.TemplateStart:mr.TemplateEnd] if mr.Strand == cc.StrandType_FORWARD: pass elif mr.Strand == cc.StrandType_REVERSE: tpl = reverseComplement(tpl) else: tpl = "INACTIVE/UNMAPPED" logging.debug("%s: skipping read '%s' due to insufficient accuracy, (poa, read): ('%s', '%s')" % (refWindow, mr.Name, tpl, mr.Seq)) continue if ai.AddRead(mr) == cc.State_VALID: coverage += 1 # Iterate until covergence if coverage < arrowConfig.minPoaCoverage: logging.info("%s: Inadequate coverage to call consensus" % (refWindow,)) return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus, refWindow, refSequence) _, converged = refineConsensus(ai, arrowConfig) if converged: arrowCss = str(ai) if arrowConfig.computeConfidence: confidence = consensusConfidence(ai) else: confidence = np.zeros(shape=len(arrowCss), dtype=int) return ArrowConsensus(refWindow, arrowCss, confidence, ai) else: logging.info("%s: Arrow did not converge to MLE" % (refWindow,)) return ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus, refWindow, refSequence) def coverageInWindow(refWin, hits): winId, winStart, winEnd = refWin a = np.array([(hit.referenceStart, hit.referenceEnd) for hit in hits if hit.referenceName == winId]) tStart = a[:,0] tEnd = a[:,1] cov = projectIntoRange(tStart, tEnd, winStart, winEnd) return cov GenomicConsensus-master/GenomicConsensus/consensus.py000066400000000000000000000140131274347070600235160ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import numpy as np __all__ = [ "Consensus", "QuiverConsensus", "ArrowConsensus", "totalLength", "areContiguous", "join" ] class Consensus(object): """ A multiple sequence consensus corresponding to a (reference/scaffold) coordinate region """ def __init__(self, refWindow, sequence, confidence): assert (len(sequence) == len(confidence)) self.refWindow = refWindow self.sequence = sequence self.confidence = confidence def __cmp__(self, other): return cmp(self.refWindow, other.refWindow) # # Functions for calling the consensus for regions of inadequate # coverage # @classmethod def nAsConsensus(cls, refWin, referenceSequence): length = len(referenceSequence) seq = np.empty(length, dtype="S1") seq.fill("N") conf = np.zeros(length, dtype=np.uint8) return cls(refWin, seq.tostring(), conf) @classmethod def referenceAsConsensus(cls, refWin, referenceSequence): conf = np.zeros(len(referenceSequence), dtype=np.uint8) return cls(refWin, referenceSequence, conf) @classmethod def lowercaseReferenceAsConsensus(cls, refWin, referenceSequence): conf = np.zeros(len(referenceSequence), dtype=np.uint8) return cls(refWin, referenceSequence.lower(), conf) @classmethod def noCallConsensus(cls, noCallStyle, refWin, refSequence): d = { "nocall" : cls.nAsConsensus, "reference" : cls.referenceAsConsensus, "lowercasereference" : cls.lowercaseReferenceAsConsensus} factory = d[noCallStyle] return factory(refWin, refSequence) class QuiverConsensus(Consensus): """ A QuiverConsensus object carries an additional field, `mms`, which is the ConsensusCore MultiReadMutationScorer object, which can be used to perform some post-hoc analyses (diploid, sample mixture, etc) """ def __init__(self, refWindow, sequence, confidence, mms=None): super(QuiverConsensus, self).__init__(refWindow, sequence, confidence) self.mms = mms class ArrowConsensus(Consensus): """ An ArrowConsensus object carries an additional field, `ai`, which is the ConsensusCore2 abstract integrator object, which can be used to perform some post-hoc analyses (diploid, sample mixture, etc) """ def __init__(self, refWindow, sequence, confidence, ai=None): super(ArrowConsensus, self).__init__(refWindow, sequence, confidence) self.ai = ai def totalLength(consensi): """ Total length of reference/scaffold coordinate windows """ return sum(cssChunk.refWindow[2] - cssChunk.refWindow[1] for cssChunk in consensi) def areContiguous(refWindows): """ Predicate that determines whether the reference/scaffold windows are contiguous. """ lastEnd = None lastId = None for refWin in refWindows: id, start, end = refWin if ((lastId is not None and id != lastId) or (lastEnd is not None and start != lastEnd)): return False lastEnd = end lastId = id return True def join(consensi): """ [Consensus] -> Consensus String together all the consensus objects into a single consensus. Will raise a ValueError if the reference windows are not contiguous. """ assert len(consensi) >= 1 sortedConsensi = sorted(consensi) if not areContiguous([cssChunk.refWindow for cssChunk in sortedConsensi]): raise ValueError, "Consensus chunks must be contiguous" joinedRefWindow = (sortedConsensi[0].refWindow[0], sortedConsensi[0].refWindow[1], sortedConsensi[-1].refWindow[2]) joinedSeq = "".join([cssChunk.sequence for cssChunk in sortedConsensi]) joinedConfidence = np.concatenate([cssChunk.confidence for cssChunk in sortedConsensi]) return Consensus(joinedRefWindow, joinedSeq, joinedConfidence) # # Naming convention for consensus contigs # def consensusContigName(referenceName, algorithmName): return "%s|%s" % (referenceName, algorithmName) GenomicConsensus-master/GenomicConsensus/io/000077500000000000000000000000001274347070600215345ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/io/VariantsGffWriter.py000066400000000000000000000102611274347070600255150ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import time from pbcore.io import GffWriter, Gff3Record from GenomicConsensus import __VERSION__, reference def gffVariantSeq(var): if var.isHeterozygous: return "%s/%s" % (var.readSeq1 or ".", var.readSeq2 or ".") else: return var.readSeq1 or "." def gffVariantFrequency(var): if var.frequency1==None: return None elif var.isHeterozygous: return "%d/%d" % (var.frequency1, var.frequency2) else: return str(var.frequency1) def toGffRecord(var): varType = var.variantType gffType = varType.lower() gffStart = (var.refStart + 1) if (var.refSeq != "") else var.refStart gffEnd = var.refEnd if (var.refSeq != "") else var.refStart gffFreq = gffVariantFrequency(var) record = Gff3Record(reference.idToFullName(var.refId), gffStart, gffEnd, gffType) record.reference = var.refSeq or "." record.variantSeq = gffVariantSeq(var) if gffFreq: record.frequency = gffFreq record.coverage = var.coverage record.confidence = var.confidence if var.annotations: for (k, v) in var.annotations: record.put(k, v) return record class VariantsGffWriter(object): ONTOLOGY_URL = \ "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12" def __init__(self, f, optionsDict, referenceEntries): self._gffWriter = GffWriter(f) self._gffWriter.writeHeader("##pacbio-variant-version 2.1") self._gffWriter.writeHeader("##date %s" % time.ctime()) self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL) self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__) self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"]) self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"]) self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"]) # Reference groups. for entry in referenceEntries: self._gffWriter.writeHeader("##sequence-region %s 1 %d" \ % (entry.name, entry.length)) def writeVariants(self, variants): for var in variants: self._gffWriter.writeRecord(toGffRecord(var)) def close(self): self._gffWriter.close() GenomicConsensus-master/GenomicConsensus/io/__init__.py000066400000000000000000000036561274347070600236570ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import from .VariantsGffWriter import VariantsGffWriter from .utils import * GenomicConsensus-master/GenomicConsensus/io/utils.py000066400000000000000000000045171274347070600232550ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander __all__ = ["loadCmpH5", "loadBam"] import h5py, os.path from pbcore.io import AlignmentSet def loadCmpH5(filename, referenceFname, disableChunkCache=False): """ Get a CmpH5Reader object, disabling the chunk cache if requested. """ filename = os.path.abspath(os.path.expanduser(filename)) return AlignmentSet(filename) def loadBam(filename, referenceFname): filename = os.path.abspath(os.path.expanduser(filename)) aln = AlignmentSet(filename, referenceFastaFname=referenceFname) return aln GenomicConsensus-master/GenomicConsensus/main.py000066400000000000000000000423001274347070600224220ustar00rootroot00000000000000#!/usr/bin/env python ################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import import argparse, atexit, cProfile, gc, glob, h5py, logging, multiprocessing import os, pstats, random, shutil, tempfile, time, threading, Queue, traceback import functools import re import sys import pysam from pbcommand.utils import setup_log, Constants as LogFormats from pbcommand.cli import pbparser_runner from pbcore.io import AlignmentSet, ContigSet from GenomicConsensus import reference from GenomicConsensus.options import (options, Constants, get_parser, processOptions, resolveOptions, consensusCoreVersion, consensusCore2Version) from GenomicConsensus.utils import (IncompatibleDataException, datasetCountExceedsThreshold, die) class ToolRunner(object): """ The main driver class for the GenomicConsensus tool. It is assumed that arguments have already been parsed and used to populate the global 'options' namespace before instantiating this class. """ def __init__(self): self._inAlnFile = None self._resultsQueue = None self._workQueue = None self._slaves = None self._algorithm = None self._algorithmConfiguration = None self._aborting = False def _makeTemporaryDirectory(self): """ Make a temp dir where we can stash things if necessary. """ options.temporaryDirectory = tempfile.mkdtemp(prefix="GenomicConsensus-", dir="/tmp") logging.info("Created temporary directory %s" % (options.temporaryDirectory,) ) def _algorithmByName(self, name, peekFile): if name == "plurality": from GenomicConsensus.plurality import plurality algo = plurality elif name == "quiver": from GenomicConsensus.quiver import quiver algo = quiver elif name == "arrow": from GenomicConsensus.arrow import arrow algo = arrow # All arrow models require PW except P6 and the first S/P1-C1 if set(peekFile.sequencingChemistry) - set(["P6-C4", "S/P1-C1/beta"]): if (not peekFile.hasBaseFeature("Ipd") or not peekFile.hasBaseFeature("PulseWidth")): die("Model requires missing base feature: IPD or PulseWidth") elif name == "poa": from GenomicConsensus.poa import poa algo = poa elif name == "best": logging.info("Identifying best algorithm based on input data") from GenomicConsensus import algorithmSelection algoName = algorithmSelection.bestAlgorithm(peekFile.sequencingChemistry) return self._algorithmByName(algoName, peekFile) else: die("Failure: unrecognized algorithm %s" % name) isOK, msg = algo.availability if not isOK: die("Failure: %s" % msg) logging.info("Will use {a} algorithm".format(a=name)) return algo def _launchSlaves(self): """ Launch a group of worker processes (self._slaves), the queue (self._workQueue) that will be used to send them chunks of work, and the queue that will be used to receive back the results (self._resultsQueue). Additionally, launch the result collector process. """ availableCpus = multiprocessing.cpu_count() logging.info("Available CPUs: %d" % (availableCpus,)) logging.info("Requested workers: %d" % (options.numWorkers,)) logging.info("Parallel Mode: %s" % ("Threaded" if options.threaded else "Process",)) if (options.numWorkers > availableCpus): logging.warn("More workers requested (%d) than CPUs available (%d);" " may result in suboptimal performance." % (options.numWorkers, availableCpus)) self._initQueues() WorkerType, ResultCollectorType = self._algorithm.slaveFactories(options.threaded) self._slaves = [] for i in xrange(options.numWorkers): p = WorkerType(self._workQueue, self._resultsQueue, self._algorithmConfiguration) self._slaves.append(p) p.start() logging.info("Launched compute slaves.") rcp = ResultCollectorType(self._resultsQueue, self._algorithm.name, self._algorithmConfiguration) rcp.start() self._slaves.append(rcp) logging.info("Launched collector slave.") def _initQueues(self): if options.threaded: self._workQueue = Queue.Queue(options.queueSize) self._resultsQueue = Queue.Queue(options.queueSize) else: self._workQueue = multiprocessing.Queue(options.queueSize) self._resultsQueue = multiprocessing.Queue(options.queueSize) def _readAlignmentInput(self): """ Read the AlignmentSet input file and store it as self._inAlnFile. """ fname = options.inputFilename self._inAlnFile = AlignmentSet(fname) def _loadReference(self, alnFile): logging.info("Loading reference") err = reference.loadFromFile(options.referenceFilename, alnFile) if err: die("Error loading reference") # Grok the referenceWindow spec, if any. if options.referenceWindowsAsString is None: options.referenceWindows = () elif options.skipUnrecognizedContigs: # This is a workaround for smrtpipe scatter/gather. options.referenceWindows = [] for s in options.referenceWindowsAsString.split(","): try: win = reference.stringToWindow(s) options.referenceWindows.append(win) except: pass else: options.referenceWindows = map(reference.stringToWindow, options.referenceWindowsAsString.split(",")) if options.referenceWindowsFromAlignment: options.referenceWindows = alnFile.refWindows def _checkFileCompatibility(self, alnFile): if not alnFile.isSorted: die("Input Alignment file must be sorted.") if alnFile.isEmpty: die("Input Alignment file must be nonempty.") def _shouldDisableChunkCache(self, alnFile): #if isinstance(alnFile, CmpH5Reader): #if alnFile.isCmpH5: # threshold = options.autoDisableHdf5ChunkCache # return datasetCountExceedsThreshold(alnFile, threshold) #else: # return False return True def _configureAlgorithm(self, options, alnFile): assert self._algorithm != None try: self._algorithmConfiguration = self._algorithm.configure(options, alnFile) except IncompatibleDataException as e: die("Failure: %s" % e.message) def _mainLoop(self): # Split up reference genome into chunks and farm out the # a chunk as a unit of work. logging.debug("Starting main loop.") ids = reference.enumerateIds(options.referenceWindows) for _id in ids: if options.fancyChunking: chunks = reference.fancyEnumerateChunks(self._inAlnFile, _id, options.referenceChunkSize, options.minCoverage, options.minMapQV, options.referenceWindows) else: chunks = reference.enumerateChunks(_id, options.referenceChunkSize, options.referenceWindows) for chunk in chunks: if self._aborting: return self._workQueue.put(chunk) # Write sentinels ("end-of-work-stream") for i in xrange(options.numWorkers): self._workQueue.put(None) def _printProfiles(self): for profile in glob.glob(os.path.join(options.temporaryDirectory, "*")): pstats.Stats(profile).sort_stats("time").print_stats(20) def _cleanup(self): if options.doProfiling: logging.info("Removing %s" % options.temporaryDirectory) shutil.rmtree(options.temporaryDirectory, ignore_errors=True) def _setupEvidenceDumpDirectory(self, directoryName): if os.path.exists(directoryName): shutil.rmtree(directoryName) os.makedirs(directoryName) @property def aborting(self): return self._aborting def abortWork(self, why): """ Performs a shutdown of all the slave processes. Called by the monitoring thread when a child process exits with a non-zero, or when a keyboard interrupt (Ctrl-C) is given. Not called during normal shutdown. """ logging.error(why) self._aborting = True self._resultsQueue.close() self._workQueue.close() @property def slaves(self): return self._slaves def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() random.seed(42) if options.pdb or options.pdbAtStartup: print >>sys.stderr, "Process ID: %d" % os.getpid() try: import ipdb except ImportError: die("Debugging options require 'ipdb' package installed.") if not options.threaded: die("Debugging only works with -T (threaded) mode") if options.pdbAtStartup: ipdb.set_trace() logging.info("h5py version: %s" % h5py.version.version) logging.info("hdf5 version: %s" % h5py.version.hdf5_version) logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("ConsensusCore2 version: %s" % (consensusCore2Version() or "ConsensusCore2 unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() with AlignmentSet(options.inputFilename) as peekFile: if options.algorithm == "arrow" and peekFile.isCmpH5: die("Arrow does not support CmpH5 files") if not peekFile.isCmpH5 and not peekFile.hasPbi: die("Genomic Consensus only works with cmp.h5 files and BAM " "files with accompanying .pbi files") logging.info("Peeking at file %s" % options.inputFilename) logging.info("Input data: numAlnHits=%d" % len(peekFile)) resolveOptions(peekFile) self._loadReference(peekFile) self._checkFileCompatibility(peekFile) self._algorithm = self._algorithmByName(options.algorithm, peekFile) self._configureAlgorithm(options, peekFile) options.disableHdf5ChunkCache = True #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile) #if options.disableHdf5ChunkCache: # logging.info("Will disable HDF5 chunk cache (large number of datasets)") #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count()) if options.dumpEvidence: self._setupEvidenceDumpDirectory(options.evidenceDirectory) self._launchSlaves() self._readAlignmentInput() monitoringThread = threading.Thread(target=monitorSlaves, args=(self,)) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-main.out")) elif options.pdb: with ipdb.launch_ipdb_on_exception(): self._mainLoop() else: self._mainLoop() except: why = traceback.format_exc() self.abortWork(why) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inAlnFile.close() return 0 def monitorSlaves(driver): """ Promptly aborts if a child is found to have exited with a nonzero exit code received; otherwise returns when all processes exit cleanly (0). This approach is portable--catching SIGCHLD doesn't work on Windows. """ while not driver.aborting: all_exited = all(not p.is_alive() for p in driver.slaves) nonzero_exits = [p.exitcode for p in driver.slaves if p.exitcode] if nonzero_exits: exitcode = nonzero_exits[0] driver.abortWork("Child process exited with exitcode=%d. Aborting." % exitcode) return exitcode elif all_exited: return 0 time.sleep(1) def args_runner(args): options.__dict__.update(args.__dict__) processOptions() tr = ToolRunner() return tr.main() def resolved_tool_contract_runner(resolved_contract): rc = resolved_contract alignment_path = rc.task.input_files[0] reference_path = rc.task.input_files[1] gff_path = rc.task.output_files[0] dataset_path = rc.task.output_files[1] fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path) fastq_path = rc.task.output_files[2] args = [ alignment_path, "--verbose", "--reference", reference_path, "--outputFilename", gff_path, "--outputFilename", fasta_path, "--outputFilename", fastq_path, "--numWorkers", str(rc.task.nproc), "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]), "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]), "--algorithm", rc.task.options[Constants.ALGORITHM_ID], "--alignmentSetRefWindows", ] if rc.task.options[Constants.DIPLOID_MODE_ID]: args.append("--diploid") args_ = get_parser().arg_parser.parser.parse_args(args) rc = args_runner(args_) if rc == 0: pysam.faidx(fasta_path) ds = ContigSet(fasta_path, strict=True) ds.write(dataset_path) return rc def main(argv=sys.argv): setup_log_ = functools.partial(setup_log, str_formatter=LogFormats.LOG_FMT_LVL) return pbparser_runner( argv=argv[1:], parser=get_parser(), args_runner_func=args_runner, contract_runner_func=resolved_tool_contract_runner, alog=logging.getLogger(), setup_log_func=setup_log_) if __name__ == "__main__": sys.exit(main(sys.argv)) GenomicConsensus-master/GenomicConsensus/options.py000066400000000000000000000466471274347070600232130ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander # # This module makes the options globally available to all processes. # # Presently it relies on the fact that multiprocessing on Linux/Unix uses fork(), # so the child processes inherit state from the main process. If we ever wanted # to port to Windows, we could support this module using the Manager protocol in # in multiprocessing. # # Usage: # In the main process, before forking: # > from options import parseOptions, options # > parseOptions() # ... # then in any subprocess you can say # > from options import options # and get the loaded options dictionary. # from __future__ import absolute_import import argparse, h5py, os, os.path, sys, json from pbcommand.models import FileTypes, SymbolTypes, get_pbparser from pbcommand.common_options import (add_resolved_tool_contract_option, add_debug_option) # FIXME add_subcomponent_versions_option) from .utils import fileFormat from . import __VERSION__ options = argparse.Namespace() def consensusCoreVersion(): try: import ConsensusCore return ConsensusCore.Version.VersionString() except: return None def consensusCore2Version(): try: import ConsensusCore2 return ConsensusCore2.__version__ except: return None class Constants(object): TOOL_ID = "genomic_consensus.tasks.variantcaller" DRIVER_EXE = "variantCaller --resolved-tool-contract " ALGORITHM_ID = "genomic_consensus.task_options.algorithm" MIN_CONFIDENCE_ID = "genomic_consensus.task_options.min_confidence" MIN_COVERAGE_ID = "genomic_consensus.task_options.min_coverage" DIPLOID_MODE_ID = "genomic_consensus.task_options.diploid" DEFAULT_ALGORITHM = "best" DEFAULT_MIN_CONFIDENCE = 40 DEFAULT_MIN_COVERAGE = 5 DEFAULT_MAX_COVERAGE = 100 DEFAULT_MIN_MAPQV = 10 DEFAULT_MIN_READSCORE = 0.65 DEFAULT_MIN_HQREGIONSNR = 3.75 DEFAULT_MIN_ZSCORE = -3.5 DEFAULT_MIN_ACCURACY = 0.82 def get_parser(): """ Construct a hybrid PbParser with most tool contract parameters defined separately from argparser parameters. """ p = get_pbparser( tool_id=Constants.TOOL_ID, version=__VERSION__, name="variantCaller", description="Compute genomic consensus and call variants relative to the reference.", driver_exe=Constants.DRIVER_EXE, nproc=SymbolTypes.MAX_NPROC, resource_types=(), default_level="WARN") tcp = p.tool_contract_parser tcp.add_input_file_type(FileTypes.DS_ALIGN, "infile", "Alignment DataSet", "BAM or Alignment DataSet") tcp.add_input_file_type(FileTypes.DS_REF, "reference", "Reference DataSet", "Fasta or Reference DataSet") tcp.add_output_file_type(FileTypes.GFF, "variants", name="Consensus GFF", description="Consensus GFF", default_name="variants") tcp.add_output_file_type(FileTypes.DS_CONTIG, "consensus", name="Consensus ContigSet", description="Consensus sequence in Fasta format", default_name="consensus") tcp.add_output_file_type(FileTypes.FASTQ, "consensus_fastq", name="Consensus fastq", description="Consensus fastq", default_name="consensus") tcp.add_str( option_id=Constants.ALGORITHM_ID, option_str="algorithm", default=Constants.DEFAULT_ALGORITHM, name="Algorithm", description="Variant calling algorithm") tcp.add_int( option_id=Constants.MIN_CONFIDENCE_ID, option_str="minConfidence", default=Constants.DEFAULT_MIN_CONFIDENCE, name="Minimum confidence", description="The minimum confidence for a variant call to be output "+\ "to variants.gff") tcp.add_int( option_id=Constants.MIN_COVERAGE_ID, option_str="minCoverage", default=Constants.DEFAULT_MIN_COVERAGE, name="Minimum coverage", description="The minimum site coverage that must be achieved for " +\ "variant calls and consensus to be calculated for a site.") tcp.add_boolean( option_id=Constants.DIPLOID_MODE_ID, option_str="diploid", default=False, name="Diploid mode (experimental)", description="Enable detection of heterozygous variants (experimental)") add_options_to_argument_parser(p.arg_parser.parser) return p def add_options_to_argument_parser(parser): def canonicalizedFilePath(path): return os.path.abspath(os.path.expanduser(path)) basics = parser.add_argument_group("Basic required options") basics.add_argument( "inputFilename", type=canonicalizedFilePath, help="The input cmp.h5 file") basics.add_argument( "--referenceFilename", "--reference", "-r", action="store", dest="referenceFilename", type=canonicalizedFilePath, required=True, help="The filename of the reference FASTA file") basics.add_argument( "-o", "--outputFilename", dest="outputFilenames", required=True, type=str, action="append", default=[], help="The output filename(s), as a comma-separated list." + \ "Valid output formats are .fa/.fasta, .fq/.fastq, .gff") parallelism = parser.add_argument_group("Parallelism") parallelism.add_argument( "-j", "--numWorkers", dest="numWorkers", type=int, default=1, help="The number of worker processes to be used") filtering = parser.add_argument_group("Output filtering") filtering.add_argument( "--minConfidence", "-q", action="store", dest="minConfidence", type=int, default=Constants.DEFAULT_MIN_CONFIDENCE, help="The minimum confidence for a variant call to be output to variants.gff") filtering.add_argument( "--minCoverage", "-x", action="store", dest="minCoverage", default=Constants.DEFAULT_MIN_COVERAGE, type=int, help="The minimum site coverage that must be achieved for variant calls and " + \ "consensus to be calculated for a site.") filtering.add_argument( "--noEvidenceConsensusCall", action="store", choices=["nocall", "reference", "lowercasereference"], default="lowercasereference", help="The consensus base that will be output for sites with no effective coverage.") readSelection = parser.add_argument_group("Read selection/filtering") readSelection.add_argument( "--coverage", "-X", action="store", dest="coverage", type=int, default=Constants.DEFAULT_MAX_COVERAGE, help="A designation of the maximum coverage level to be used for analysis." + \ " Exact interpretation is algorithm-specific.") readSelection.add_argument( "--minMapQV", "-m", action="store", dest="minMapQV", type=float, default=Constants.DEFAULT_MIN_MAPQV, help="The minimum MapQV for reads that will be used for analysis.") # Since the reference isn't loaded at options processing time, we # can't grok the referenceWindow specified until later. We store # it as a string (referenceWindowsAsString) and it will later be # interpreted and stored as a proper window tuple (referenceWindow) readSelection.add_argument( "--referenceWindow", "--referenceWindows", "-w", action="store", dest="referenceWindowsAsString", type=str, help="The window (or multiple comma-delimited windows) of the reference to " + \ "be processed, in the format refGroup:refStart-refEnd " + \ "(default: entire reference).", default=None) readSelection.add_argument( "--alignmentSetRefWindows", action="store_true", dest="referenceWindowsFromAlignment", help="The window (or multiple comma-delimited windows) of the reference to " + \ "be processed, in the format refGroup:refStart-refEnd " + \ "will be pulled from the alignment file.", default=False) def slurpWindowFile(fname): return ",".join(map(str.strip, open(fname).readlines())) readSelection.add_argument( "--referenceWindowsFile", "-W", action="store", dest="referenceWindowsAsString", type=slurpWindowFile, help="A file containing reference window designations, one per line", default=None) readSelection.add_argument( "--barcode", type=str, dest="_barcode", help="Only process reads with the given barcode name.") def parseReadStratum(s): rs = map(int, s.split("/")) assert len(rs) == 2 assert rs[0] < rs[1] return rs readSelection.add_argument( "--readStratum", help="A string of the form 'n/N', where n, and N are integers, 0 <= n < N, designating" \ " that the reads are to be deterministically split into N strata of roughly even" \ " size, and stratum n is to be used for variant and consensus calling. This is" \ " mostly useful for Quiver development.", dest="readStratum", default=None, type=parseReadStratum) readSelection.add_argument( "--minReadScore", action="store", dest="minReadScore", type=float, default=Constants.DEFAULT_MIN_READSCORE, help="The minimum ReadScore for reads that will be used for analysis (arrow-only).") readSelection.add_argument( "--minSnr", action="store", dest="minHqRegionSnr", type=float, default=Constants.DEFAULT_MIN_HQREGIONSNR, help="The minimum acceptable signal-to-noise over all channels for reads that will be used for analysis (arrow-only).") readSelection.add_argument( "--minZScore", action="store", dest="minZScore", type=float, default=Constants.DEFAULT_MIN_ZSCORE, help="The minimum acceptable z-score for reads that will be used for analysis (arrow-only).") readSelection.add_argument( "--minAccuracy", action="store", dest="minAccuracy", type=float, default=Constants.DEFAULT_MIN_ACCURACY, help="The minimum acceptable window-global alignment accuracy for reads that will be used for the analysis (arrow-only).") algorithm = parser.add_argument_group("Algorithm and parameter settings") algorithm.add_argument( "--algorithm", action="store", dest="algorithm", type=str, choices=["quiver", "arrow", "plurality", "poa", "best"], default="best") algorithm.add_argument( "--parametersFile", "-P", dest="parametersFile", type=str, default=None, help="Parameter set filename (QuiverParameters.ini), or directory D " + \ "such that either D/*/GenomicConsensus/QuiverParameters.ini, " + \ "or D/GenomicConsensus/QuiverParameters.ini, is found. In the " + \ "former case, the lexically largest path is chosen.") algorithm.add_argument( "--parametersSpec", "-p", action="store", dest="parametersSpec", type=str, default="auto", help="Name of parameter set (chemistry.model) to select from the " + \ "parameters file, or just the name of the chemistry, in which " + \ "case the best available model is chosen. Default is 'auto', " + \ "which selects the best parameter set from the cmp.h5") debugging = parser.add_argument_group("Verbosity and debugging/profiling") add_debug_option(debugging) debugging.add_argument( "--pdbAtStartup", action="store_true", dest="pdbAtStartup", default=False, help="Drop into Python debugger at startup (requires ipdb)") debugging.add_argument( "--profile", action="store_true", dest="doProfiling", default=False, help="Enable Python-level profiling (using cProfile).") debugging.add_argument( "--dumpEvidence", "-d", dest="dumpEvidence", nargs="?", default=None, const="variants", choices=["variants", "all", "outliers"]) debugging.add_argument( "--evidenceDirectory", default="evidence_dump") debugging.add_argument( "--annotateGFF", action="store_true", help="Augment GFF variant records with additional information") advanced = parser.add_argument_group("Advanced configuration options") advanced.add_argument( "--diploid", action="store_true", help="Enable detection of heterozygous variants (experimental)") advanced.add_argument( "--queueSize", "-Q", action="store", dest="queueSize", type=int, default=200) advanced.add_argument( "--threaded", "-T", action="store_true", dest="threaded", default=False, help="Run threads instead of processes (for debugging purposes only)") advanced.add_argument( "--referenceChunkSize", "-C", action="store", dest="referenceChunkSize", type=int, default=500) advanced.add_argument( "--fancyChunking", default=True, action="store_true", help="Adaptive reference chunking designed to handle coverage cutouts better") advanced.add_argument( "--simpleChunking", dest="fancyChunking", action="store_false", help="Disable adaptive reference chunking") advanced.add_argument( "--referenceChunkOverlap", action="store", dest="referenceChunkOverlap", type=int, default=5) advanced.add_argument( "--autoDisableHdf5ChunkCache", action="store", type=int, default=500, help="Disable the HDF5 chunk cache when the number of datasets in the cmp.h5 " + \ "exceeds the given threshold") advanced.add_argument( "--aligner", "-a", action="store", choices=["affine", "simple"], default="affine", help="The pairwise alignment algorithm that will be used to produce variant calls" \ " from the consensus (Quiver only).") advanced.add_argument( "--refineDinucleotideRepeats", dest="refineDinucleotideRepeats", action="store_true", help="Require quiver maximum likelihood search to try one less/more repeat copy in" \ " dinucleotide repeats, which seem to be the most frequent cause of suboptimal" \ " convergence (getting trapped in local optimum) (Quiver only)") advanced.add_argument( "--noRefineDinucleotideRepeats", dest="refineDinucleotideRepeats", action="store_false", help="Disable dinucleotide refinement") advanced.set_defaults(refineDinucleotideRepeats=True) advanced.add_argument( "--fast", dest="fastMode", action="store_true", help="Cut some corners to run faster. Unsupported!") advanced.add_argument( "--skipUnrecognizedContigs", action="store_true", help="Do not abort when told to process a reference window (via -w/--referenceWindow[s]) " \ "that has no aligned coverage. Outputs emptyish files if there are no remaining " \ "non-degenerate windows. Only intended for use by smrtpipe scatter/gather.") return parser def processOptions(): """ Various additions to the global 'options' object, assuming that the command-line arguments have already been processed. """ parser = get_parser().arg_parser.parser def checkInputFile(path): if not os.path.isfile(path): parser.error("Input file %s not found." % (path,)) def checkOutputFile(path): try: f = open(path, "a") f.close() except: parser.error("Output file %s cannot be written." % (path,)) options.gffOutputFilename = None options.fastaOutputFilename = None options.fastqOutputFilename = None options.csvOutputFilename = None for outputFilename in options.outputFilenames: fmt = fileFormat(outputFilename) if fmt == "GFF": options.gffOutputFilename = outputFilename elif fmt == "FASTA": options.fastaOutputFilename = outputFilename elif fmt == "FASTQ": options.fastqOutputFilename = outputFilename elif fmt == "CSV": options.csvOutputFilename = outputFilename if options.inputFilename.endswith(".bam"): options.usingBam, options.usingCmpH5 = True, False else: options.usingBam, options.usingCmpH5 = False, True for path in (options.inputFilename, options.referenceFilename): if path != None: checkInputFile(path) for path in options.outputFilenames: if path != None: checkOutputFile(path) options.shellCommand = " ".join(sys.argv) def resolveOptions(alnFile): """ Some of the options are provided as strings by the user, but need to be translated into internal identifiers. These options are encoded as options._optionName; here we lookup the ID and store it as options.optionName. This is essentially just an order-of-initialization issue. """ if options._barcode != None: if not alnFile.isBarcoded: raise Exception("input file is not barcoded!") if options._barcode not in alnFile.barcode: raise Exception("Barcode with given name not present in input file!") options.barcode = alnFile.barcode[options._barcode] else: options.barcode = None GenomicConsensus-master/GenomicConsensus/plurality/000077500000000000000000000000001274347070600231525ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/plurality/__init__.py000066400000000000000000000035011274347070600252620ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander GenomicConsensus-master/GenomicConsensus/plurality/plurality.py000066400000000000000000000410321274347070600255510ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import import math, logging, numpy as np, random from itertools import izip from collections import Counter from ..utils import * from .. import reference from ..options import options from ..Worker import WorkerProcess, WorkerThread from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread from ..consensus import * from ..variants import * # # --------------- Configuration ---------------------- # class PluralityConfig(object): """ Plurality configuration options """ def __init__(self, minMapQV=10, minCoverage=3, minConfidence=40, diploid=False, noEvidenceConsensus="nocall"): self.minMapQV = minMapQV self.minCoverage = minCoverage self.minConfidence = minConfidence self.noEvidenceConsensus = noEvidenceConsensus self.diploid = diploid self.realignHomopolymers = False # not available yet # # ----------- The actual algorithm code ------------- # def pluralityConsensusAndVariants(refWindow, referenceSequenceInWindow, alns, pluralityConfig): """ Compute (Consensus, [Variant]) for this window, using the given `alns`, by applying a straightforward column-oriented consensus calling algorithm. If the consensus cannot be called for a base, "N" will be placed in the consensus sequence for that position. If `realignHomopolymers` is True, alignment gaps will be shuffled in homopolymer regions in an attempt to maximize variant detection sensitivity (not yet implemented, and may never be). """ _, refStart, refEnd = refWindow windowSize = refEnd - refStart assert len(referenceSequenceInWindow) == windowSize # # Build up these arrays in reference coordinates. # consensusSequence_ = [] consensusFrequency_ = [] consensusConfidence_ = [] effectiveCoverage_ = [] alternateAllele_ = [] # DIPLOID ONLY alternateFrequency_ = [] # " heterozygousConfidence_ = [] # " noCallCss = Consensus.noCallConsensus(pluralityConfig.noEvidenceConsensus, refWindow, referenceSequenceInWindow) baseCallsMatrix = tabulateBaseCalls(refWindow, alns) for j in xrange(0, windowSize): counter = Counter(baseCallsMatrix[:, j]) if "" in counter: counter.pop("") siteEffectiveCoverage = sum(counter.itervalues()) if ((siteEffectiveCoverage == 0) or (siteEffectiveCoverage < pluralityConfig.minCoverage)): siteConsensusFrequency = siteEffectiveCoverage siteConsensusSequence = noCallCss.sequence[j] top2 = None else: # Not for production code: top2 = counter.most_common(2) siteConsensusSequence, siteConsensusFrequency = top2[0] # Replace explicit gaps with empty string if siteConsensusSequence == "-": siteConsensusSequence = "" consensusSequence_.append(siteConsensusSequence) consensusFrequency_.append(siteConsensusFrequency) effectiveCoverage_.append(siteEffectiveCoverage) if pluralityConfig.diploid: if top2 and len(top2) > 1: siteAlternateAllele, siteAlternateFrequency = top2[1] else: siteAlternateAllele = "N" siteAlternateFrequency = 0 if siteAlternateAllele == "-": siteAlternateAllele = "" alternateAllele_.append(siteAlternateAllele) alternateFrequency_.append(siteAlternateFrequency) else: siteAlternateAllele = "N" siteAlternateFrequency = 0 siteConsensusConfidence, siteHeterozygousConfidence = \ posteriorConfidences(siteEffectiveCoverage, siteConsensusFrequency, siteAlternateFrequency, diploid=pluralityConfig.diploid) consensusConfidence_.append(siteConsensusConfidence) if pluralityConfig.diploid: heterozygousConfidence_.append(siteHeterozygousConfidence) # # Derive variants from reference-coordinates consensus # variants = _computeVariants(pluralityConfig, refWindow, referenceSequenceInWindow, effectiveCoverage_, consensusSequence_, consensusFrequency_, consensusConfidence_, alternateAllele_, alternateFrequency_, heterozygousConfidence_) # # Now we need to put everything in consensus coordinates # consensusLens = map(len, consensusSequence_) consensusSequence = "".join(consensusSequence_) consensusConfidence = np.repeat(consensusConfidence_, consensusLens) css = Consensus(refWindow, consensusSequence, consensusConfidence) return (css, variants) def varsFromRefAndRead(refId, refPos, refBase, readSeq, **kwargs): """ Compute the haploid/heterozygous Variant[s] corresponding to a readSeq aligned against refSeq. Two variant scenario: REF: G READ: AC => insertion(A), substitution(G->C) Required: refBase != readSeq Returned: List of Variant objects (length one or two) """ assert refBase != readSeq vars = [] readBefore, readAt = readSeq[:-1], readSeq[-1:] if readBefore: # Insertion vars.append(Variant(refId, refPos, refPos, "", readBefore, **kwargs)) if readAt != refBase: vars.append(Variant(refId, refPos, refPos+1, refBase, readAt, **kwargs)) return vars def varsFromRefAndReads(refId, refPos, refBase, readSeq1, readSeq2, **kwargs): """ Heterozygous extension of the above """ assert (refBase != readSeq1) or (refBase != readSeq2) vars = [] readBefore1, readAt1 = readSeq1[:-1], readSeq1[-1:] readBefore2, readAt2 = readSeq2[:-1], readSeq2[-1:] if readBefore1 or readBefore2: vars.append(Variant(refId, refPos, refPos, "", readBefore1, readBefore2, **kwargs)) if readAt1 != refBase or readAt2 != refBase: vars.append(Variant(refId, refPos, refPos+1, refBase, readAt1, readAt2, **kwargs)) return vars def _isMixedLengthVariant(v): return (v.isHeterozygous and len(v.readSeq1) != len(v.readSeq2)) def _isSameLengthVariant(v): return not _isMixedLengthVariant(v) def _computeVariants(config, refWindow, refSequenceInWindow, coverageArray, consensusArray, consensusFrequencyArray, consensusConfidenceArray, alternateAlleleArray=None, alternateAlleleFrequency=None, heterozygousConfidence=None): refId, refStart, refEnd = refWindow windowSize = refEnd - refStart assert len(refSequenceInWindow) == windowSize assert len(consensusArray) == windowSize if config.diploid: assert len(alternateAlleleArray) == windowSize assert len(alternateAlleleFrequency) == windowSize vars = [] for j in xrange(windowSize): refPos = j + refStart refBase = refSequenceInWindow[j] cov = coverageArray[j] cssBases = consensusArray[j] conf = consensusConfidenceArray[j] cssFreq = consensusFrequencyArray[j] if config.diploid: altBases = alternateAlleleArray[j] altFreq = alternateAlleleFrequency[j] hetConf = heterozygousConfidence[j] else: altBases = "N" altFreq = 0 if cov < config.minCoverage: continue if (config.diploid and hetConf > conf): # # Diploid variant[s]? # if (hetConf >= config.minConfidence) and (refBase != "N"): vs = varsFromRefAndReads(refId, refPos, refBase, cssBases, altBases, confidence=hetConf, coverage=cov, frequency1=cssFreq, frequency2=altFreq) vars = vars + vs else: # # Haploid variant[s]? # if (conf >= config.minConfidence) and \ (refBase != cssBases) and \ (refBase != "N") and \ (cssBases != "N") and \ (cssBases == "" or cssBases.isupper()): vs = varsFromRefAndRead(refId, refPos, refBase, cssBases, confidence=conf, coverage=cov, frequency1=cssFreq) vars = vars + vs if config.diploid: vars = filter(_isSameLengthVariant, vars) return sorted(vars) def tabulateBaseCalls(refWindow, alns, realignHomopolymers=False): """ Go through the reads and build up the structured baseCallsMatrix table, which tabulates the read bases occurring at each reference coordinate in each read. This code is somewhat tricky, read carefully. """ _, refStart, refEnd = refWindow windowSize = refEnd - refStart baseCallsMatrix = np.zeros(shape=(len(alns), windowSize), dtype="S8") for i, aln in enumerate(alns): aln = aln.clippedTo(refStart, refEnd) alnRef = aln.reference(orientation="genomic") alnRead = aln.read(orientation="genomic") if realignHomopolymers: alnRef, alnRead = normalizeHomopolymerGaps(alnRef, alnRead) # Idea: scan through the ref, read; for each non-gap character # in ref, record all non-gap characters seen in read since # last ref gap. readBases = [] accum = [] for (refBase, readBase) in izip(alnRef, alnRead): if readBase != "-": readBases.append(readBase) if refBase != "-": basesForRefPos = "".join(readBases) if readBases else "-" accum.append(basesForRefPos) readBases = [] s, e = (aln.referenceStart - refStart, aln.referenceEnd - refStart) baseCallsMatrix[i, s:e] = accum return baseCallsMatrix # # ------ HACKISH POSTERIOR PROBABILITY CALCULATION ---------- # EPS = 0.05 LOGEPS = np.log(EPS) LOG_O_M_EPS = np.log(1-EPS) LOG_O_M_EPS_2 = np.log((1-EPS)/2) def posteriorConfidences(depth, cssFreq, altFreq, diploid=False, cap=40): """ Return crude approximations to the posterior probabilities of the genotypes s_1 and s_1/s_2, where s_1 and s_2 are the observed consensus and alternate allele. The assumption here is that the probability of the genotype being anything other that s_1, s_2, or s_1/s_2 is vanishingly small. Not really a very good assumption, but plurality is not our real algorithm anyway. """ cssFreq = cssFreq+1 altFreq = altFreq+1 depth = depth + 2 cssLL_ = cssFreq*LOG_O_M_EPS + (depth-cssFreq)*LOGEPS altLL_ = altFreq*LOG_O_M_EPS + (depth-altFreq)*LOGEPS cssL_ = np.exp(cssLL_) altL_ = np.exp(altLL_) if diploid: hetLL_ = (cssFreq+altFreq)*LOG_O_M_EPS_2 + (depth-cssFreq-altFreq)*LOGEPS hetL_ = np.exp(hetLL_) total = cssL_ + altL_ + hetL_ hetProb = hetL_/total hetConf = -10*np.log10(1.-hetProb) if (hetProb < 1) else cap else: total = cssL_ + altL_ hetConf = 0 cssProb = cssL_/total cssConf = -10*np.log10(1.-cssProb) if (cssProb < 1) else cap return int(min(cap, cssConf)), int(min(cap, hetConf)) # # -------------- Plurality Worker class -------------------- # class PluralityWorker(object): @property def pluralityConfig(self): return self._algorithmConfig def onStart(self): random.seed(42) def onChunk(self, workChunk): referenceWindow = workChunk.window refSeqInWindow = reference.sequenceInWindow(referenceWindow) logging.info("Plurality operating on %s" % reference.windowToString(referenceWindow)) if not workChunk.hasCoverage: noCallCss = Consensus.noCallConsensus(options.noEvidenceConsensusCall, referenceWindow, refSeqInWindow) return (referenceWindow, (noCallCss, [])) alnHits = readsInWindow(self._inAlnFile, referenceWindow, depthLimit=options.coverage, minMapQV=options.minMapQV, strategy="long-and-strand-balanced", stratum=options.readStratum, barcode=options.barcode) return (referenceWindow, pluralityConsensusAndVariants(referenceWindow, refSeqInWindow, alnHits, self.pluralityConfig)) # define both process and thread-based plurality callers class PluralityWorkerProcess(PluralityWorker, WorkerProcess): pass class PluralityWorkerThread(PluralityWorker, WorkerThread): pass # # --------------------- Plugin API -------------------------------- # # Pluggable module API for algorithms: # - Algorithm lives in a package # - Package must never fail to import, even if some of # its dependencies are not installed. # - Package must provide a main module exposing these top level # variables/methods: # - name = str # - availability = (bool, str) # - configure -> options -> cmph5 -> algorithm specific config object; # (can raise IncompatibleDataException) # - slaveFactories -> bool -> (class, class) __all__ = [ "name", "availability", "configure", "slaveFactories" ] name = "plurality" availability = (True, "OK") def slaveFactories(threaded): if threaded: return (PluralityWorkerThread, ResultCollectorThread) else: return (PluralityWorkerProcess, ResultCollectorProcess) def configure(options, cmpH5): pluralityConfig = PluralityConfig(minMapQV=options.minMapQV, minCoverage=options.minCoverage, minConfidence=options.minConfidence, diploid=options.diploid, noEvidenceConsensus=options.noEvidenceConsensusCall) return pluralityConfig GenomicConsensus-master/GenomicConsensus/poa/000077500000000000000000000000001274347070600217045ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/poa/__init__.py000066400000000000000000000035011274347070600240140ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander GenomicConsensus-master/GenomicConsensus/poa/poa.py000066400000000000000000000367251274347070600230520ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import import itertools, logging, math, random from collections import Counter import ConsensusCore as cc, numpy as np from ..utils import readsInWindow, snd, third from .. import reference from ..options import options from ..consensus import Consensus, join from ..windows import kSpannedIntervals, holes, subWindow from ..variants import Variant, filterVariants, annotateVariants from ..Worker import WorkerProcess, WorkerThread from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread # # --------------- Configuration ---------------------- # class PoaConfig(object): """ Poa configuration options """ def __init__(self, aligner="affine", minMapQV=10, minPoaCoverage=3, maxPoaCoverage=100, noEvidenceConsensus="nocall", readStumpinessThreshold=0.1, minReadScore=0.75, minHqRegionSnr=3.75): self.aligner = aligner self.minMapQV = minMapQV self.minPoaCoverage = minPoaCoverage self.maxPoaCoverage = maxPoaCoverage self.noEvidenceConsensus = noEvidenceConsensus self.readStumpinessThreshold = readStumpinessThreshold self.minReadScore = minReadScore self.minHqRegionSnr = minHqRegionSnr # # ----------- The actual algorithm code ------------- # def filterAlns(alns, poaConfig): """ Given alns (already clipped to the window bounds), filter out any that are deemed insufficiently high-quality for POA. By and large we avoid doing any filtering to avoid potential reference bias in variant calling. However at the moment the POA (and potentially other components) breaks when there is a read of zero length. So we throw away reads that are "stumpy", where the aligner has inserted a large gap, such that while the alignment technically spans the window, it may not have any read content therein: Ref ATGATCCAGTTACTCCGATAAA Read ATG---------------TA-A Win. [ ) """ return [ a for a in alns if a.readLength >= (poaConfig.readStumpinessThreshold * a.referenceSpan) and min(a.hqRegionSnr) >= poaConfig.minHqRegionSnr and a.readScore >= poaConfig.minReadScore ] def variantsFromAlignment(a, refWindow, cssQvInWindow=None, siteCoverage=None): """ Extract the variants implied by a pairwise alignment to the reference. """ variants = [] refId, refStart, _ = refWindow refPos = refStart cssPos = 0 tbl = zip(a.Transcript(), a.Target(), a.Query()) # We don't call variants where either the reference or css is 'N' grouper = lambda row: "N" if (row[1]=="N" or row[2]=="N") else row[0] runs = itertools.groupby(tbl, grouper) for code, run in runs: assert code in "RIDMN" run = list(run) ref = "".join(map(snd, run)) refLen = len(ref) - Counter(ref)["-"] css = "".join(map(third, run)) cssLen = len(css) - Counter(css)["-"] variant = None if code == "M" or code == "N": pass elif code == "R": assert len(css)==len(ref) variant = Variant(refId, refPos, refPos+len(css), ref, css) elif code == "I": variant = Variant(refId, refPos, refPos, "", css) elif code == "D": variant = Variant(refId, refPos, refPos + len(ref), ref, "") if variant is not None: # HACK ALERT: variants at the first and last position # are not handled correctly if siteCoverage is not None and np.size(siteCoverage) > 0: refPos_ = min(refPos-refStart, len(siteCoverage)-1) variant.coverage = siteCoverage[refPos_] if cssQvInWindow is not None and np.size(cssQvInWindow) > 0: cssPos_ = min(cssPos, len(cssQvInWindow)-1) variant.confidence = cssQvInWindow[cssPos_] variants.append(variant) refPos += refLen cssPos += cssLen return variants def variantsAndConfidence(refWindow, refSequence, cssSequence, aligner="affine"): """ Compute the confidence for each position, and compare the consensus and reference in this window, returning a list of variants """ refId, refStart, refEnd = refWindow if aligner == "affine": align = cc.AlignAffine else: align = cc.Align ga = align(refSequence, cssSequence) confidence = np.ones((len(cssSequence),), dtype=np.uint8) * 20 variants = variantsFromAlignment(ga, refWindow, confidence) return (confidence, variants) def consensusAndVariantsForAlignments(refWindow, refSequence, alns, poaConfig): """ Call consensus on this interval---without subdividing the interval further. Testable! Clipping has already been done! """ _, refStart, refEnd = refWindow # Compute the POA consensus, which is our initial guess, and # should typically be > 99.5% accurate fwdSequences = [ a.read(orientation="genomic", aligned=False) for a in alns if a.spansReferenceRange(refStart, refEnd) ] try: assert len(fwdSequences) >= poaConfig.minPoaCoverage p = cc.PoaConsensus.FindConsensus(fwdSequences[:poaConfig.maxPoaCoverage]) except: logging.info("%s: POA could not be generated" % (refWindow,)) css = Consensus.noCallConsensus(poaConfig.noEvidenceConsensus, refWindow, refSequence) return (css, []) poaCss = p.Sequence confidence, variants = \ variantsAndConfidence(refWindow, refSequence, poaCss, poaConfig.aligner) css = Consensus(refWindow, poaCss, confidence) return (css, variants) def poaConsensusAndVariants(alnFile, refWindow, referenceContig, depthLimit, poaConfig): """ High-level routine for calling the consensus for a window of the genome given an alignment. Identifies the coverage contours of the window in order to identify subintervals where a good consensus can be called. Creates the desired "no evidence consensus" where there is inadequate coverage. """ winId, winStart, winEnd = refWindow logging.info("POA operating on %s" % reference.windowToString(refWindow)) if options.fancyChunking: # 1) identify the intervals with adequate coverage for poa # consensus; restrict to intervals of length > 10 alnHits = readsInWindow(alnFile, refWindow, depthLimit=20000, minMapQV=poaConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) starts = np.fromiter((hit.tStart for hit in alnHits), np.int) ends = np.fromiter((hit.tEnd for hit in alnHits), np.int) intervals = kSpannedIntervals(refWindow, poaConfig.minPoaCoverage, starts, ends, minLength=10) coverageGaps = holes(refWindow, intervals) allIntervals = sorted(intervals + coverageGaps) if len(allIntervals) > 1: logging.info("Usable coverage in %s: %r" % (reference.windowToString(refWindow), intervals)) else: allIntervals = [ (winStart, winEnd) ] # 2) pull out the reads we will use for each interval # 3) call consensusForAlignments on the interval subConsensi = [] variants = [] for interval in allIntervals: intStart, intEnd = interval intRefSeq = referenceContig[intStart:intEnd] subWin = subWindow(refWindow, interval) windowRefSeq = referenceContig[intStart:intEnd] alns = readsInWindow(alnFile, subWin, depthLimit=depthLimit, minMapQV=poaConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ] clippedAlns = filterAlns(clippedAlns_, poaConfig) if len([ a for a in clippedAlns if a.spansReferenceRange(*interval) ]) >= poaConfig.minPoaCoverage: logging.debug("%s: Reads being used: %s" % (reference.windowToString(subWin), " ".join([str(hit.readName) for hit in alns]))) css, variants_ = \ consensusAndVariantsForAlignments(subWin, intRefSeq, clippedAlns, poaConfig) filteredVars = filterVariants(options.minCoverage, options.minConfidence, variants_) # Annotate? if options.annotateGFF: annotateVariants(filteredVars, clippedAlns) variants += filteredVars # Dump? shouldDumpEvidence = \ ((options.dumpEvidence == "all") or (options.dumpEvidence == "variants") and (len(variants) > 0)) if shouldDumpEvidence: logging.info("POA does not yet support --dumpEvidence") # dumpEvidence(options.evidenceDirectory, # subWin, windowRefSeq, # clippedAlns, css) else: css = Consensus.noCallConsensus(poaConfig.noEvidenceConsensus, subWin, intRefSeq) subConsensi.append(css) # 4) glue the subwindow consensus objects together to form the # full window consensus css = join(subConsensi) # 5) Return return css, variants # # -------------- Poa Worker class -------------------- # class PoaWorker(object): @property def poaConfig(self): return self._algorithmConfig def onStart(self): random.seed(42) def onChunk(self, workChunk): referenceWindow = workChunk.window refId, refStart, refEnd = referenceWindow refSeqInWindow = reference.sequenceInWindow(referenceWindow) # Quick cutout for no-coverage case if not workChunk.hasCoverage: noCallCss = Consensus.noCallConsensus(self.poaConfig.noEvidenceConsensus, referenceWindow, refSeqInWindow) return (referenceWindow, (noCallCss, [])) # General case eWindow = reference.enlargedReferenceWindow(referenceWindow, options.referenceChunkOverlap) _, eStart, eEnd = eWindow # We call consensus on the enlarged window and then map back # to the reference and clip the consensus at the implied # bounds. This seems to be more reliable thank cutting the # consensus bluntly refContig = reference.byName[refId].sequence refSequenceInEnlargedWindow = refContig[eStart:eEnd] # # Get the consensus for the enlarged window. # css_, variants_ = \ poaConsensusAndVariants(self._inAlnFile, eWindow, refContig, options.coverage, self.poaConfig) # # Restrict the consensus and variants to the reference window. # ga = cc.Align(refSequenceInEnlargedWindow, css_.sequence) targetPositions = cc.TargetToQueryPositions(ga) cssStart = targetPositions[refStart-eStart] cssEnd = targetPositions[refEnd-eStart] cssSequence = css_.sequence[cssStart:cssEnd] cssQv = css_.confidence[cssStart:cssEnd] variants = [ v for v in variants_ if refStart <= v.refStart < refEnd ] consensusObj = Consensus(referenceWindow, cssSequence, cssQv) return (referenceWindow, (consensusObj, variants)) # define both process and thread-based plurality callers class PoaWorkerProcess(PoaWorker, WorkerProcess): pass class PoaWorkerThread(PoaWorker, WorkerThread): pass # # --------------------- Plugin API -------------------------------- # # Pluggable module API for algorithms: # - Algorithm lives in a package # - Package must never fail to import, even if some of # its dependencies are not installed. # - Package must provide a main module exposing these top level # variables/methods: # - name = str # - availability = (bool, str) # - configure -> options -> cmph5 -> algorithm specific config object; # (can raise IncompatibleDataException) # - slaveFactories -> bool -> (class, class) __all__ = [ "name", "availability", "configure", "slaveFactories" ] name = "poa" availability = (True, "OK") def slaveFactories(threaded): if threaded: return (PoaWorkerThread, ResultCollectorThread) else: return (PoaWorkerProcess, ResultCollectorProcess) def configure(options, _): poaConfig = PoaConfig(aligner=options.aligner, minMapQV=options.minMapQV, noEvidenceConsensus=options.noEvidenceConsensusCall, minReadScore=options.minReadScore, minHqRegionSnr=options.minHqRegionSnr) return poaConfig GenomicConsensus-master/GenomicConsensus/quiver/000077500000000000000000000000001274347070600224405ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/__init__.py000066400000000000000000000035531274347070600245570ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import utils import model import evidence GenomicConsensus-master/GenomicConsensus/quiver/diploid.py000066400000000000000000000214101274347070600244340ustar00rootroot00000000000000# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from GenomicConsensus.quiver.utils import allSingleBaseMutations from GenomicConsensus.variants import Variant import numpy as np import ConsensusCore as cc # IUPAC reference: # http://www.bioinformatics.org/sms/iupac.html _packIupac = { ("A", "G") : "R" , ("G", "A") : "R" , ("C", "T") : "Y" , ("T", "C") : "Y" , ("G", "C") : "S" , ("C", "G") : "S" , ("A", "T") : "W" , ("T", "A") : "W" , ("G", "T") : "K" , ("T", "G") : "K" , ("A", "C") : "M" , ("C", "A") : "M" } _unpackIupac = { "R" : ("A", "G") , "Y" : ("C", "T") , "S" : ("G", "C") , "W" : ("A", "T") , "K" : ("G", "T") , "M" : ("A", "C") } def packIUPAC(bases): return _packIupac[bases] def unpackIUPAC(iupacCode): return _unpackIupac[iupacCode] def isHeterozygote(base): return (base in _unpackIupac) def packMuts(cssBase, mut1, mut2): # Turn two muts (with same Start, End, LengthDiff) into a single mutation to # IUPAC. The no-op mutation is coded as None. # # Example1: (_, Subs A, Subs T) -> Subs W # Example2: (_, Ins A, Ins T) -> Ins W # Example3: (A, None, Subs T) -> Subs W # nonNullMut = mut1 or mut2 start = nonNullMut.Start() end = nonNullMut.End() mutType = nonNullMut.Type() newBases1 = mut1.NewBases() if mut1 else cssBase newBases2 = mut2.NewBases() if mut2 else cssBase newBasesPacked = packIUPAC((newBases1, newBases2)) return cc.Mutation(mutType, start, end, newBasesPacked) def scoresForPosition(mms, pos): muts = allSingleBaseMutations(mms.Template(), positions=[pos]) noMutScore = [0] * mms.NumReads() mutScores_ = [ mms.Scores(mut) for mut in muts ] mutScores = np.column_stack([noMutScore] + mutScores_).astype(np.float32) return mutScores def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow=None, siteCoverage=None, aligner="affine", mms=None): """ Compare the consensus and the reference in this window, returning a list of variants. Uses the mms to identify heterozygous variants. """ assert (cssQvInWindow is None) == (siteCoverage is None) # Both or none refId, refStart, refEnd = refWindow if mms is not None: # # Hunting diploid variants: # 1. find confident heterozygous sites; # 2. build a "diploid consensus" using IUPAC encoding # for het sites; mark cssQv accordingly # 3. align diploid consensus to reference # 4. extract and decorate variants # assert mms.Template() == cssSequenceInWindow iupacMutations = [] # List of (Mutation, confidence) for pos in xrange(0, mms.TemplateLength()): ds = cc.IsSiteHeterozygous(scoresForPosition(mms, pos), 40) if ds: muts = [None] + list(allSingleBaseMutations(cssSequenceInWindow, positions=[pos])) mut0 = muts[ds.Allele0] mut1 = muts[ds.Allele1] cssBase = cssSequenceInWindow[pos] packedMut = packMuts(cssBase, mut0, mut1) iupacMutations.append((packedMut, 40)) # Create diploidCss by applying mutations, meanwhile updating the # confidence vector accordingly. diploidCss = cc.ApplyMutations([pair[0] for pair in iupacMutations], cssSequenceInWindow) diploidQv = list(cssQvInWindow) if cssQvInWindow is not None else None runningLengthDiff = 0 for (mut, conf) in iupacMutations: start = mut.Start() + runningLengthDiff end = mut.End() + runningLengthDiff diploidQv[start:end] = [conf] assert len(diploidCss) == len(diploidQv) cssSequenceInWindow = diploidCss cssQvInWindow = diploidQv vars = variantsFromAlignment(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow, siteCoverage) return vars def variantsFromAlignment(refWindow, refSeq, cssSeq, cssQV=None, refCoverage=None): """ Extract the variants implied by a pairwise alignment of cssSeq to refSeq reference. If cssQV, refCoverage are provided, they will be used to decorate the variants with those attributes. Arguments: - cssQV: QV array, same length as css - refCoverage: coverage array, sample length as reference window This is trickier than in the haploid case. We have to break out diploid variants as single bases, in order to avoid implying phase. """ variants = [] refId, refStart, refEnd = refWindow aln = cc.AlignAffineIupac(refSeq, cssSeq); alnTarget = aln.Target() alnQuery = aln.Query() assert (cssQV is None) == (refCoverage is None) # Both or none assert len(refSeq) == refEnd - refStart assert cssQV is None or len(cssSeq) == len(cssQV) assert refCoverage is None or len(refSeq) == len(refCoverage) transcript = [ X if (Q != "N" and T != "N") else "N" for (X, T, Q) in zip(aln.Transcript(), alnTarget, alnQuery) ] variants = [] runStart = -1 runStartRefPos = None runX = None refPos = refStart for pos, (X, T, Q) in enumerate(zip(transcript, alnTarget, alnQuery)): if X != runX or isHeterozygote(Q): if runStart >= 0 and runX not in "MN": # Package up the run and dump a variant ref = alnTarget[runStart:pos].replace("-", "") read = alnQuery [runStart:pos].replace("-", "") if isHeterozygote(read): allele1, allele2 = unpackIUPAC(read) var = Variant(refId, runStartRefPos, refPos, ref, allele1, allele2) else: var = Variant(refId, runStartRefPos, refPos, ref, read) variants.append(var) runStart = pos runStartRefPos = refPos runX = X if T != "-": refPos += 1 # This might be better handled within the loop above, just keeping # track of Qpos, Tpos if cssQV is not None: cssPosition = cc.TargetToQueryPositions(aln) for v in variants: # HACK ALERT: we are not really handling the confidence or # coverage for variants at last position of the window # correctly here. refPos_ = min(v.refStart-refStart, len(refCoverage)-1) cssPos_ = min(cssPosition[v.refStart-refStart], len(cssQV)-1) if refCoverage is not None: v.coverage = refCoverage[refPos_] if cssQV is not None: v.confidence = cssQV[cssPos_] return variants GenomicConsensus-master/GenomicConsensus/quiver/evidence.py000066400000000000000000000157371274347070600246110ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander __all__ = [ "dumpEvidence", "QuiverEvidence" ] import h5py, logging, os.path, numpy as np from collections import namedtuple from itertools import groupby from bisect import bisect_left, bisect_right from pbcore.io import FastaReader, FastaWriter from .utils import scoreMatrix from .. import reference def dumpEvidence(evidenceDumpBaseDirectory, refWindow, refSequence, alns, quiverConsensus): # Format of evidence dump: # evidence_dump/ # ref000001/ # 0-1005/ # reference.fa # reads.fa # consensus.fa # quiver-scores.h5 # 995-2005/ # ... join = os.path.join refId, refStart, refEnd = refWindow refName = reference.idToName(refId) windowDirectory = join(evidenceDumpBaseDirectory, refName, "%d-%d" % (refStart, refEnd)) logging.info("Dumping evidence to %s" % (windowDirectory,)) if os.path.exists(windowDirectory): raise Exception, "Evidence dump does not expect directory %s to exist." % windowDirectory os.makedirs(windowDirectory) refFasta = FastaWriter(join(windowDirectory, "reference.fa")) readsFasta = FastaWriter(join(windowDirectory, "reads.fa")) consensusFasta = FastaWriter(join(windowDirectory, "consensus.fa")) windowName = refName + (":%d-%d" % (refStart, refEnd)) refFasta.writeRecord(windowName, refSequence) refFasta.close() consensusFasta.writeRecord(windowName + "|quiver", quiverConsensus.sequence) consensusFasta.close() rowNames, columnNames, baselineScores, scores = scoreMatrix(quiverConsensus.mms) quiverScoreFile = h5py.File(join(windowDirectory, "quiver-scores.h5")) quiverScoreFile.create_dataset("Scores", data=scores) vlen_str = h5py.special_dtype(vlen=str) quiverScoreFile.create_dataset("RowNames", data=rowNames, dtype=vlen_str) quiverScoreFile.create_dataset("ColumnNames", data=columnNames, dtype=vlen_str) quiverScoreFile.create_dataset("BaselineScores", data=baselineScores) quiverScoreFile.close() for aln in alns: readsFasta.writeRecord(str(aln.rowNumber), aln.read(orientation="genomic", aligned=False)) readsFasta.close() class QuiverEvidence(object): """ An experimental reader class for quiver evidence dumps produced by quiver --dumpEvidence """ Mutation = namedtuple("Mutation", ("Position", "Type", "FromBase", "ToBase")) @staticmethod def _parseMutName(mutName): fields = mutName.split(" ") pos = int(fields[0]) type, fromBase, _, toBase = fields[1:] return QuiverEvidence.Mutation(pos, type, fromBase, toBase) def __init__(self, path, refStart, consensus, rowNames, colNames, baselineScores, scores): self.path = path self.refStart = refStart self.consensus = consensus self.rowNames = rowNames self.colNames = colNames self.baselineScores = baselineScores self.scores = scores self.muts = map(QuiverEvidence._parseMutName, self.colNames) @property def positions(self): return [ mut.Position for mut in self.muts ] @property def uniquePositions(self): return sorted(list(set(self.positions))) @property def totalScores(self): return self.baselineScores[:, np.newaxis] + self.scores @staticmethod def load(path): if path.endswith("/"): path = path[:-1] refWin_ = path.split("/")[-1].split("-") refStart = int(refWin_[0]) with FastaReader(path + "/consensus.fa") as fr: consensus = next(iter(fr)).sequence with h5py.File(path + "/quiver-scores.h5", "r") as f: scores = f["Scores"].value baselineScores = f["BaselineScores"].value colNames = f["ColumnNames"].value rowNames = f["RowNames"].value return QuiverEvidence(path, refStart, consensus, rowNames, colNames, baselineScores, scores) def forPosition(self, pos): posStart = bisect_left(self.positions, pos) posEnd = bisect_right(self.positions, pos) return QuiverEvidence(self.path, self.refStart, self.consensus, self.rowNames, self.colNames[posStart:posEnd], self.baselineScores, self.scores[:, posStart:posEnd]) def justSubstitutions(self): colMask = np.array(map(lambda s: ("Sub" in s), self.colNames)) return QuiverEvidence(self.path, self.refStart, self.consensus, self.rowNames, self.colNames[colMask], self.baselineScores, self.scores[:, colMask]) def rowNumbers(self): with FastaReader(self.path + "/reads.fa") as fr: return [ int(ctg.name) for ctg in fr ] GenomicConsensus-master/GenomicConsensus/quiver/model.py000066400000000000000000000400071274347070600241130ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import numpy as np, ConfigParser, collections, logging from glob import glob from os.path import join from pkg_resources import resource_filename, Requirement from GenomicConsensus.utils import die from GenomicConsensus.quiver.utils import asFloatFeature, fst, snd from pbcore.chemistry import ChemistryLookupError from pbcore.io import CmpH5Alignment import ConsensusCore as cc __all__ = [ "ParameterSet", "AllQVsModel", "NoMergeQVModel", "NoQVsModel", "InDelQVsModel", "AllQVsMergingByChannelModel", "NoQVsMergingByChannelModel", "QuiverConfig", "allQVsLoaded", "loadParameterSet", "loadQuiverConfig" ] _basicParameterNames = \ [ "Match" , "Mismatch" , "MismatchS", "Branch" , "BranchS", "DeletionN" , "DeletionWithTag" , "DeletionWithTagS", "Nce" , "NceS", "Merge" , "MergeS" ] _mergeByChannelParameterNames = \ [ "Match" , "Mismatch" , "MismatchS", "Branch" , "BranchS", "DeletionN" , "DeletionWithTag" , "DeletionWithTagS", "Nce" , "NceS", "Merge_A" , "Merge_C", "Merge_G" , "Merge_T", "MergeS_A" , "MergeS_C", "MergeS_G" , "MergeS_T" ] ALL_FEATURES = [ "InsertionQV" , "SubstitutionQV" , "DeletionQV" , "DeletionTag" , "MergeQV" ] # # Model classes # class Model(object): requiredFeatures = set([]) parameterNames = [] @classmethod def isCompatibleWithCmpH5(cls, cmpH5): return all(cmpH5.hasBaseFeature(feature) for feature in cls.requiredFeatures) @classmethod def extractFeatures(cls, aln): """ Extract the data in a cmp.h5 alignment record into a ConsensusCore-friendly `QvSequenceFeatures` object. Will extract only the features relevant to this Model, zero-filling the other features arrays. """ if isinstance(aln, CmpH5Alignment): # # For cmp.h5 input, we have to use the AlnArray to see where the # gaps are (see bug 20752), in order to support old files. # alnRead = np.fromstring(aln.read(), dtype=np.int8) gapMask = alnRead == ord("-") _args = [ alnRead[~gapMask].tostring() ] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append(asFloatFeature(aln.baseFeature(feature)[~gapMask])) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args) else: _args = [ aln.read(aligned=False, orientation="native") ] for feature in ALL_FEATURES: if feature in cls.requiredFeatures: _args.append(asFloatFeature(aln.baseFeature(feature, aligned=False))) else: _args.append(cc.FloatFeature(int(aln.readLength))) return cc.QvSequenceFeatures(*_args) @classmethod def extractMappedRead(cls, aln, windowStart): """ Given a clipped alignment, convert its coordinates into template space (starts with 0), bundle it up with its features as a MappedRead. """ assert aln.referenceSpan > 0 name = aln.readName chemistry = chemOrUnknown(aln) read = cc.Read(cls.extractFeatures(aln), name, chemistry) return cc.MappedRead(read, int(aln.isReverseStrand), int(aln.referenceStart - windowStart), int(aln.referenceEnd - windowStart)) class AllQVsModel(Model): name = "AllQVsModel" rank = 3 requiredFeatures = { "InsertionQV", "SubstitutionQV", "DeletionQV" , "DeletionTag" , "MergeQV" } parameterNames = _basicParameterNames class NoMergeQVModel(Model): name = "NoMergeQVModel" rank = 2 requiredFeatures = { "InsertionQV", "SubstitutionQV", "DeletionQV" , "DeletionTag" } parameterNames = _basicParameterNames class NoQVsModel(Model): name = "NoQVsModel" rank = 1 requiredFeatures = set([]) parameterNames = _basicParameterNames class AllQVsMergingByChannelModel(Model): name = "AllQVsMergingByChannelModel" rank = 4 requiredFeatures = AllQVsModel.requiredFeatures parameterNames = _mergeByChannelParameterNames class NoQVsMergingByChannelModel(Model): name = "NoQVsMergingByChannelModel" rank = -1 requiredFeatures = set([]) parameterNames = _mergeByChannelParameterNames class InDelQVsModel(Model): name = "InDelQVsModel" rank = -1 requiredFeatures = { "InsertionQV", "DeletionQV", "DeletionTag" } parameterNames = _mergeByChannelParameterNames # # Code for accessing the ConsensusCore quiver parameter sets # from the .ini config file. # class ParameterSet(object): def __init__(self, name, model, chemistry, ccQuiverConfig): self.name = name self.chemistry = chemistry self.model = model self.ccQuiverConfig = ccQuiverConfig def _getResourcesDirectory(): return resource_filename(Requirement.parse("GenomicConsensus"), "GenomicConsensus/quiver/resources") def chemOrUnknown(aln): """ Chemistry if it's loaded, otherwise "unknown" (If chemistry wasn't loaded, user must have manually selected parameter set) """ try: chemistry = aln.sequencingChemistry except ChemistryLookupError: chemistry = "unknown" return chemistry def _isChemistryMixSupported(allChems): return len(allChems) == 1 or set(allChems).issubset(set(["C2", "P4-C2", "P5-C3", "P6-C4"])) def _findParametersFile(filenameOrDirectory=None): if filenameOrDirectory is None: filenameOrDirectory = _getResourcesDirectory() # Given a full path to an .ini file, return the path if filenameOrDirectory.endswith(".ini"): return filenameOrDirectory # Given a path to a bundle (the directory with a date as its # name), return the path to the .ini file within foundInThisBundle = glob(join(filenameOrDirectory, "GenomicConsensus/QuiverParameters.ini")) if foundInThisBundle: return foundInThisBundle[0] # Given a directory containing bundles, return the path to the # .ini file within the lexically largest bundle subdirectory foundInBundlesBelow = glob(join(filenameOrDirectory, "*/GenomicConsensus/QuiverParameters.ini")) if foundInBundlesBelow: return sorted(foundInBundlesBelow)[-1] raise ValueError("Unable to find parameter set file (QuiverParameters.ini)") def _buildParameterSet(parameterSetName, nameValuePairs): chem, modelName = parameterSetName.split(".")[:2] if modelName == "AllQVsModel": model = AllQVsModel elif modelName == "NoMergeQVModel": model = NoMergeQVModel elif modelName == "NoQVsModel": model = NoQVsModel elif modelName == "AllQVsMergingByChannelModel": model = AllQVsMergingByChannelModel elif modelName == "NoQVsMergingByChannelModel": model = NoQVsMergingByChannelModel else: logging.error("Found parameter set for unrecognized model: %s" % modelName) return None if map(fst, nameValuePairs) != model.parameterNames: die("Malformed parameter set file") qvModelParams = cc.QvModelParams(chem, modelName, *[ float(snd(pair)) for pair in nameValuePairs ]) # # Dirty hack for --diploid support, diploid model is scaled # differently. Needs further work. # if parameterSetName == "unknown.NoQVsModel": bandingOptions = cc.BandingOptions(4, 24) fastScoreThreshold = -50 else: bandingOptions = cc.BandingOptions(4, 6) fastScoreThreshold = -12.5 quiverConfig = cc.QuiverConfig(qvModelParams, cc.ALL_MOVES, bandingOptions, fastScoreThreshold) return ParameterSet(parameterSetName, model, chem, quiverConfig) def _loadParameterSets(iniFilename): # returns dict: name -> ParameterSet cp = ConfigParser.ConfigParser() cp.optionxform=str cp.read([iniFilename]) sections = cp.sections() parameterSets = {} for sectionName in sections: parameterSet = _buildParameterSet(sectionName, cp.items(sectionName)) if parameterSet: parameterSets[sectionName] = parameterSet return parameterSets def _bestParameterSet(parameterSets, chemistry, qvsAvailable): fallbackParameterSets = \ [ paramSet for paramSet in parameterSets.itervalues() if paramSet.chemistry == "unknown" if paramSet.model.requiredFeatures.issubset(qvsAvailable) ] perChemistryParameterSets = \ [ paramSet for paramSet in parameterSets.itervalues() if paramSet.chemistry == chemistry if paramSet.model.requiredFeatures.issubset(qvsAvailable) ] # Find the best one, under the assumption that a chemistry-trained # parameter set is always better than the "unknown" chemistry set. if perChemistryParameterSets: return max(perChemistryParameterSets, key=lambda ps: ps.model.rank) elif fallbackParameterSets: return max(fallbackParameterSets, key=lambda ps: ps.model.rank) else: raise Exception("Quiver: No applicable parameter set found!") # # QuiverConfig: the kitchen sink class of quiver options # class QuiverConfig(object): """ Quiver configuration options """ def __init__(self, parameterSets, minMapQV=10, minPoaCoverage=3, maxPoaCoverage=11, mutationSeparation=10, mutationNeighborhood=20, maxIterations=40, refineDinucleotideRepeats=True, noEvidenceConsensus="nocall", computeConfidence=True, readStumpinessThreshold=0.1): self.minMapQV = minMapQV self.minPoaCoverage = minPoaCoverage self.maxPoaCoverage = maxPoaCoverage self.mutationSeparation = mutationSeparation self.mutationNeighborhood = mutationNeighborhood self.maxIterations = maxIterations self.refineDinucleotideRepeats = refineDinucleotideRepeats self.noEvidenceConsensus = noEvidenceConsensus self.computeConfidence = computeConfidence self.readStumpinessThreshold = readStumpinessThreshold self.parameterSets = parameterSets qct = cc.QuiverConfigTable() for (chem, pset) in self.parameterSets.items(): if chem == "*": qct.InsertDefault(pset.ccQuiverConfig) else: qct.InsertAs(chem, pset.ccQuiverConfig) self.ccQuiverConfigTbl = qct @staticmethod def _defaultQuiverParameters(): return loadQuiverConfig("unknown.NoQVsModel") def extractMappedRead(self, aln, windowStart): pset = self.parameterSets.get(chemOrUnknown(aln)) or \ self.parameterSets.get("*") model = pset.model return model.extractMappedRead(aln, windowStart) # # Convenience functions # def allQVsLoaded(cmpH5): """ Does this cmp.h5 file have the complete set of QV features? """ return AllQVsModel.isCompatibleWithCmpH5(cmpH5) def enoughQVsLoaded(cmpH5): """ If lacking QVs other than possibly the Merge QV, we should abort. This is the check. """ return NoMergeQVModel.isCompatibleWithCmpH5(cmpH5) def loadParameterSets(parametersFile=None, spec=None, cmpH5=None): """ spec is either: - chemName.modelName (complete spec), - chemName - None If the spec is incomplete, cmpH5 is required to determine the best available option. Returned value is a dict of completeSpec -> QuiverConfig """ if spec is None: chemistryName, modelName = None, None elif "." in spec: chemistryName, modelName = spec.split(".") else: chemistryName, modelName = spec, None assert (cmpH5 is not None) or (chemistryName and modelName) parametersFile = _findParametersFile(parametersFile) logging.info("Using Quiver parameters file %s" % parametersFile) sets = _loadParameterSets(parametersFile) if chemistryName and modelName: try: p = sets[spec] params = { "*" : p } except: die("Quiver: no available parameter set named %s" % \ spec) elif chemistryName: qvsAvailable = cmpH5.baseFeaturesAvailable() p = _bestParameterSet(sets, chemistryName, qvsAvailable) if p.chemistry != chemistryName: die("Quiver: no parameter set available compatible with this " + \ "cmp.h5 for chemistry \"%s\" " % chemistryName) params = { "*" : p } else: chemistryNames = list(set(cmpH5.sequencingChemistry)) # uniquify if "unknown" in chemistryNames: die("\"unknown\" chemistry in alignment file: either an unsupported chemistry " + "has been used, the alignment file has been improperly constructed, or " + "this version of SMRTanalysis is too old to recognize a new chemistry.") if not _isChemistryMixSupported(chemistryNames): die("Unsupported chemistry mix, cannot proceed.") qvsAvailable = cmpH5.baseFeaturesAvailable() bestParams = [ _bestParameterSet(sets, chemistryName, qvsAvailable) for chemistryName in chemistryNames ] params = dict(zip(chemistryNames, bestParams)) return params def loadQuiverConfig(spec=None, cmpH5=None, parametersFile=None, **quiverConfigOpts): params = loadParameterSets(parametersFile, spec, cmpH5) return QuiverConfig(parameterSets=params, **quiverConfigOpts) GenomicConsensus-master/GenomicConsensus/quiver/quiver.py000066400000000000000000000312601274347070600243270ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import logging import ConsensusCore as cc, numpy as np from .. import reference from ..options import options from ..Worker import WorkerProcess, WorkerThread from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread from GenomicConsensus.consensus import Consensus, QuiverConsensus, join from GenomicConsensus.windows import kSpannedIntervals, holes, subWindow from GenomicConsensus.variants import filterVariants, annotateVariants from GenomicConsensus.quiver.evidence import dumpEvidence from GenomicConsensus.quiver import diploid import GenomicConsensus.quiver.model as M import GenomicConsensus.quiver.utils as U def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig, depthLimit, quiverConfig): """ High-level routine for calling the consensus for a window of the genome given an alignment file. Identifies the coverage contours of the window in order to identify subintervals where a good consensus can be called. Creates the desired "no evidence consensus" where there is inadequate coverage. """ winId, winStart, winEnd = refWindow logging.info("Quiver operating on %s" % reference.windowToString(refWindow)) if options.fancyChunking: # 1) identify the intervals with adequate coverage for quiver # consensus; restrict to intervals of length > 10 alnHits = U.readsInWindow(cmpH5, refWindow, depthLimit=20000, minMapQV=quiverConfig.minMapQV, strategy="long-and-strand-balanced", stratum=options.readStratum, barcode=options.barcode) starts = np.fromiter((hit.tStart for hit in alnHits), np.int) ends = np.fromiter((hit.tEnd for hit in alnHits), np.int) intervals = kSpannedIntervals(refWindow, quiverConfig.minPoaCoverage, starts, ends, minLength=10) coverageGaps = holes(refWindow, intervals) allIntervals = sorted(intervals + coverageGaps) if len(allIntervals) > 1: logging.info("Usable coverage in %s: %r" % (reference.windowToString(refWindow), intervals)) else: allIntervals = [ (winStart, winEnd) ] # 2) pull out the reads we will use for each interval # 3) call consensusForAlignments on the interval subConsensi = [] variants = [] for interval in allIntervals: intStart, intEnd = interval intRefSeq = referenceContig[intStart:intEnd] subWin = subWindow(refWindow, interval) windowRefSeq = referenceContig[intStart:intEnd] alns = U.readsInWindow(cmpH5, subWin, depthLimit=depthLimit, minMapQV=quiverConfig.minMapQV, strategy="long-and-strand-balanced", stratum=options.readStratum, barcode=options.barcode) clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ] clippedAlns = U.filterAlns(subWin, clippedAlns_, quiverConfig) if len([ a for a in clippedAlns if a.spansReferenceRange(*interval) ]) >= quiverConfig.minPoaCoverage: logging.debug("%s: Reads being used: %s" % (reference.windowToString(subWin), " ".join([str(hit.readName) for hit in alns]))) css = U.consensusForAlignments(subWin, intRefSeq, clippedAlns, quiverConfig) siteCoverage = U.coverageInWindow(subWin, alns) if options.diploid: variants_ = diploid.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, css.mms) else: variants_ = U.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, mms=None) filteredVars = filterVariants(options.minCoverage, options.minConfidence, variants_) # Annotate? if options.annotateGFF: annotateVariants(filteredVars, clippedAlns) variants += filteredVars # Dump? shouldDumpEvidence = \ ((options.dumpEvidence == "all") or (options.dumpEvidence == "variants") and (len(variants) > 0)) if shouldDumpEvidence: dumpEvidence(options.evidenceDirectory, subWin, windowRefSeq, clippedAlns, css) else: css = QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus, subWin, intRefSeq) subConsensi.append(css) # 4) glue the subwindow consensus objects together to form the # full window consensus css = join(subConsensi) # 5) Return return css, variants class QuiverWorker(object): @property def quiverConfig(self): return self._algorithmConfig def onChunk(self, workChunk): referenceWindow = workChunk.window refId, refStart, refEnd = referenceWindow refSeqInWindow = reference.sequenceInWindow(referenceWindow) # Quick cutout for no-coverage case if not workChunk.hasCoverage: noCallCss = QuiverConsensus.noCallConsensus(self.quiverConfig.noEvidenceConsensus, referenceWindow, refSeqInWindow) return (referenceWindow, (noCallCss, [])) # General case eWindow = reference.enlargedReferenceWindow(referenceWindow, options.referenceChunkOverlap) _, eStart, eEnd = eWindow # We call consensus on the enlarged window and then map back # to the reference and clip the consensus at the implied # bounds. This seems to be more reliable thank cutting the # consensus bluntly refContig = reference.byName[refId].sequence refSequenceInEnlargedWindow = refContig[eStart:eEnd] # # Get the consensus for the enlarged window. # css_, variants_ = \ consensusAndVariantsForWindow(self._inAlnFile, eWindow, refContig, options.coverage, self.quiverConfig) # # Restrict the consensus and variants to the reference window. # ga = cc.Align(refSequenceInEnlargedWindow, css_.sequence) targetPositions = cc.TargetToQueryPositions(ga) cssStart = targetPositions[refStart-eStart] cssEnd = targetPositions[refEnd-eStart] cssSequence = css_.sequence[cssStart:cssEnd] cssQv = css_.confidence[cssStart:cssEnd] variants = [ v for v in variants_ if refStart <= v.refStart < refEnd ] consensusObj = Consensus(referenceWindow, cssSequence, cssQv) return (referenceWindow, (consensusObj, variants)) # # Slave process/thread classes # class QuiverWorkerProcess(QuiverWorker, WorkerProcess): pass class QuiverWorkerThread(QuiverWorker, WorkerThread): pass # # Plugin API # __all__ = [ "name", "availability", "configure", "slaveFactories" ] name = "quiver" availability = (True, "OK") def configure(options, cmpH5): if options.verbosity > 1: cc.Logging.EnableDiagnosticLogging() if cmpH5.readType != "standard": raise U.IncompatibleDataException( "The Quiver algorithm requires an alignment file containing standard (non-CCS) reads." ) if options.parametersSpec == "auto": # Reject Sequel chemistries explicitly---there are no Quiver # trainings for Sequel. Arrow should be used. # (Not that power-users can bypass this requirement using an explicit parameter set) for chem in cmpH5.sequencingChemistry: if chem.startswith("S/"): raise U.IncompatibleDataException( "The Quiver algorithm is not trained for Sequel data. " + "Please use the Arrow algorithm instead.") if options.diploid: logging.info("Diploid analysis--resorting to unknown.NoQVsModel until other " + "parameter sets can be recalibrated.") params = M.loadParameterSets(options.parametersFile, spec="unknown.NoQVsModel") else: params = M.loadParameterSets(options.parametersFile, cmpH5=cmpH5) qvMsg = "This alignment file file lacks some of the QV data tracks that are required " + \ "for optimal performance of the Quiver algorithm. For optimal results" + \ " use the ResequencingQVs workflow in SMRTPortal with bas.h5 files " + \ "from an instrument using software version 1.3.1 or later, or the " + \ "--forQuiver option to pbalign." if not M.enoughQVsLoaded(cmpH5): raise U.IncompatibleDataException(qvMsg) elif not M.allQVsLoaded(cmpH5): logging.warn(qvMsg) else: params = M.loadParameterSets(options.parametersFile, spec=options.parametersSpec, cmpH5=cmpH5) if not all(ps.model.isCompatibleWithCmpH5(cmpH5) for ps in params.values()): raise U.IncompatibleDataException( "Selected Quiver parameter set is incompatible with this alignment file " + "due to missing data tracks.") logging.info("Using Quiver parameter set(s): %s" % (", ".join(ps.name for ps in params.values()))) return M.QuiverConfig(minMapQV=options.minMapQV, noEvidenceConsensus=options.noEvidenceConsensusCall, refineDinucleotideRepeats=(not options.fastMode) and options.refineDinucleotideRepeats, computeConfidence=(not options.fastMode), parameterSets=params) def slaveFactories(threaded): # By default we use slave processes. The tuple ordering is important. if threaded: return (QuiverWorkerThread, ResultCollectorThread) else: return (QuiverWorkerProcess, ResultCollectorProcess) GenomicConsensus-master/GenomicConsensus/quiver/resources/000077500000000000000000000000001274347070600244525ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-03/000077500000000000000000000000001274347070600252575ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-03/GenomicConsensus/000077500000000000000000000000001274347070600305415ustar00rootroot00000000000000QuiverParameters.ini000066400000000000000000000106261274347070600344670ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-03/GenomicConsensus################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # # Parameters for Quiver for different modeling and chemistry / # upstream analysis conditions. # # Author: David Alexander # Date: 2/12/2013 # # # C2 chemistry # [C2.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [C2.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [C2.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 # # These are the models used when the chemistry is not recognized. For # now, we fall back to the C2 parameters. # [unknown.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [unknown.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [unknown.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-05/000077500000000000000000000000001274347070600252615ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-05/GenomicConsensus/000077500000000000000000000000001274347070600305435ustar00rootroot00000000000000QuiverParameters.ini000066400000000000000000000120661274347070600344710ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-05/GenomicConsensus################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # # Parameters for Quiver for different modeling and chemistry / # upstream analysis conditions. # # Author: David Alexander # Date: 2/12/2013 # # # C2 chemistry # [C2.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [C2.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [C2.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 # # P4-C2 chemistry # [P4-C2.AllQVsMergingByChannelModel] Match = 0.266887694127 Mismatch = -1.54460829977 MismatchS = -0.0316527466982 Branch = -0.578688017857 BranchS = -0.0481504371325 DeletionN = -1.30511780446 DeletionWithTag = 0.023543148334 DeletionWithTagS = -0.044525216376 Nce = -0.11981002833 NceS = -0.094850201311 Merge_A = -0.719390215369 Merge_C = 0.0637508742437 Merge_G = 0.244651896573 Merge_T = -0.627109496237 MergeS_A = -0.0168917301257 MergeS_C = -0.0769984152015 MergeS_G = -0.0862808988248 MergeS_T = -0.137076261104 # # These are the models used when the chemistry is not recognized. For # now, we fall back to the C2 parameters. # [unknown.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [unknown.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [unknown.NoQVsModel] Match = 0.0 Mismatch = -4.6 MismatchS = 0.0 Branch = -2.4 BranchS = 0.0 DeletionN = -3.25 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -2.45 NceS = 0.0 Merge = -3.2 MergeS = 0.0 GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-09/000077500000000000000000000000001274347070600252655ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-09/GenomicConsensus/000077500000000000000000000000001274347070600305475ustar00rootroot00000000000000QuiverParameters.ini000066400000000000000000000133701274347070600344740ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2013-09/GenomicConsensus################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # # Parameters for Quiver for different modeling and chemistry / # upstream analysis conditions. # # Author: David Alexander # Date: 2/12/2013 # # # C2 chemistry # [C2.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [C2.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [C2.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 # # P4-C2 chemistry # [P4-C2.AllQVsMergingByChannelModel] Match = 0.266887694127 Mismatch = -1.54460829977 MismatchS = -0.0316527466982 Branch = -0.578688017857 BranchS = -0.0481504371325 DeletionN = -1.30511780446 DeletionWithTag = 0.023543148334 DeletionWithTagS = -0.044525216376 Nce = -0.11981002833 NceS = -0.094850201311 Merge_A = -0.719390215369 Merge_C = 0.0637508742437 Merge_G = 0.244651896573 Merge_T = -0.627109496237 MergeS_A = -0.0168917301257 MergeS_C = -0.0769984152015 MergeS_G = -0.0862808988248 MergeS_T = -0.137076261104 # # P5-C3 chemistry # [P5-C3.AllQVsMergingByChannelModel] Match = 0.622352656797 Mismatch = -0.562488732923 MismatchS = -0.00841054902241 Branch = -0.43638090639 BranchS = -0.0561792950471 DeletionN = -1.26498969947 DeletionWithTag = 0.0821496590925 DeletionWithTagS = -0.0264743524097 Nce = -0.00141566220304 NceS = -0.0658611326794 Merge_A = 1.63279281062 Merge_C = 1.31706088385 Merge_G = 1.30632789651 Merge_T = -0.253111709614 MergeS_A = -0.245574925502 MergeS_C = -0.112808623474 MergeS_G = -0.107257049139 MergeS_T = -0.0194831966664 # # These are the models used when the chemistry is not recognized. For # now, we fall back to the C2 parameters. # [unknown.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [unknown.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [unknown.NoQVsModel] Match = 0.0 Mismatch = -4.6 MismatchS = 0.0 Branch = -2.4 BranchS = 0.0 DeletionN = -3.25 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -2.45 NceS = 0.0 Merge = -3.2 MergeS = 0.0 GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-03/000077500000000000000000000000001274347070600252605ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-03/GenomicConsensus/000077500000000000000000000000001274347070600305425ustar00rootroot00000000000000QuiverParameters.ini000066400000000000000000000134211274347070600344640ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-03/GenomicConsensus################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # # Parameters for Quiver for different modeling and chemistry / # upstream analysis conditions. # # Author: David Alexander # Date: 2/12/2013 # # # C2 chemistry # [C2.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [C2.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [C2.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 # # P4-C2 chemistry # [P4-C2.AllQVsMergingByChannelModel] Match = 0.266887694127 Mismatch = -1.54460829977 MismatchS = -0.0316527466982 Branch = -0.578688017857 BranchS = -0.0481504371325 DeletionN = -1.30511780446 DeletionWithTag = 0.023543148334 DeletionWithTagS = -0.044525216376 Nce = -0.11981002833 NceS = -0.094850201311 Merge_A = -0.719390215369 Merge_C = 0.0637508742437 Merge_G = 0.244651896573 Merge_T = -0.627109496237 MergeS_A = -0.0168917301257 MergeS_C = -0.0769984152015 MergeS_G = -0.0862808988248 MergeS_T = -0.137076261104 # # P5-C3 chemistry # [P5-C3.AllQVsMergingByChannelModel] Match = 0.184656435394 Mismatch = -0.380508126527 MismatchS = -0.0519773778309 Branch = -0.0178687456208 BranchS = -0.0865415022309 DeletionN = -0.928673177809 DeletionWithTag = -0.255381037375 DeletionWithTagS = 0.0173271990056 Nce = 0.303359662376 NceS = -0.0980869366241 Merge_A = -0.0402618414395 Merge_C = 0.445432915183 Merge_G = 0.256569746054 Merge_T = 0.353800996389 MergeS_A = -0.118145654186 MergeS_C = -0.0471922787923 MergeS_G = -0.032653869882 MergeS_T = -0.0596571606945 # # These are the models used when the chemistry is not recognized. For # now, we fall back to the C2 parameters. # [unknown.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [unknown.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [unknown.NoQVsModel] Match = 0.0 Mismatch = -4.6 MismatchS = 0.0 Branch = -2.4 BranchS = 0.0 DeletionN = -3.25 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -2.45 NceS = 0.0 Merge = -3.2 MergeS = 0.0 GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-09/000077500000000000000000000000001274347070600252665ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-09/GenomicConsensus/000077500000000000000000000000001274347070600305505ustar00rootroot00000000000000QuiverParameters.ini000066400000000000000000000143071274347070600344760ustar00rootroot00000000000000GenomicConsensus-master/GenomicConsensus/quiver/resources/2014-09/GenomicConsensus################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # # Parameters for Quiver for different modeling and chemistry / # upstream analysis conditions. # # Author: David Alexander # Date: 9/18/2014 # # # C2 chemistry # [C2.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [C2.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [C2.NoQVsModel] Match = 0.0 Mismatch = -1.21730327606 MismatchS = 0.0 Branch = -0.371355384588 BranchS = 0.0 DeletionN = -0.250208973885 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -0.250370770693 NceS = 0.0 Merge = -0.371355384588 MergeS = 0.0 # # P4-C2 chemistry # [P4-C2.AllQVsMergingByChannelModel] Match = 0.266887694127 Mismatch = -1.54460829977 MismatchS = -0.0316527466982 Branch = -0.578688017857 BranchS = -0.0481504371325 DeletionN = -1.30511780446 DeletionWithTag = 0.023543148334 DeletionWithTagS = -0.044525216376 Nce = -0.11981002833 NceS = -0.094850201311 Merge_A = -0.719390215369 Merge_C = 0.0637508742437 Merge_G = 0.244651896573 Merge_T = -0.627109496237 MergeS_A = -0.0168917301257 MergeS_C = -0.0769984152015 MergeS_G = -0.0862808988248 MergeS_T = -0.137076261104 # # P5-C3 chemistry # [P5-C3.AllQVsMergingByChannelModel] Match = 0.184656435394 Mismatch = -0.380508126527 MismatchS = -0.0519773778309 Branch = -0.0178687456208 BranchS = -0.0865415022309 DeletionN = -0.928673177809 DeletionWithTag = -0.255381037375 DeletionWithTagS = 0.0173271990056 Nce = 0.303359662376 NceS = -0.0980869366241 Merge_A = -0.0402618414395 Merge_C = 0.445432915183 Merge_G = 0.256569746054 Merge_T = 0.353800996389 MergeS_A = -0.118145654186 MergeS_C = -0.0471922787923 MergeS_G = -0.032653869882 MergeS_T = -0.0596571606945 # # P6-C4 chemistry # [P6-C4.AllQVsMergingByChannelModel] Match = 0.262756 Mismatch = -1.71623 MismatchS = -0.00961684 Branch = -0.400811 BranchS = -0.0577744 DeletionN = -1.39515 DeletionWithTag = -0.232547 DeletionWithTagS = -0.0235445 Nce = -0.237657 NceS = -0.0459215 Merge_A = -1.13237 Merge_C = 1.08894 Merge_G = 0.570111 Merge_T = -0.570049 MergeS_A = -4.03641e-05 MergeS_C = -0.107432 MergeS_G = -0.0801512 MergeS_T = -0.058112 # # These are the models used when the chemistry is not recognized. For # now, we fall back to the C2 parameters. # [unknown.AllQVsModel] Match = 0.2627555 Mismatch = -1.09688872 MismatchS = -0.01637988 Branch = -0.60275947 BranchS = -0.02682689 DeletionN = -1.00012494 DeletionWithTag = 0.06000148 DeletionWithTagS = -0.02579358 Nce = -0.15864559 NceS = -0.04403654 Merge = -1.02398814 MergeS = -0.12135255 [unknown.NoMergeQVModel] Match = -0.032017275750000004 Mismatch = -0.9773427825000001 MismatchS = -0.01119015225 Branch = -0.630141005 BranchS = -0.0347192135 DeletionN = -0.7697154425 DeletionWithTag = -0.0003786080875 DeletionWithTagS = -0.02546157775 Nce = -0.21589032625 NceS = -0.04661514775 Merge = -1.0336790425 MergeS = 0.0 [unknown.NoQVsModel] Match = 0.0 Mismatch = -4.6 MismatchS = 0.0 Branch = -2.4 BranchS = 0.0 DeletionN = -3.25 DeletionWithTag = 0.0 DeletionWithTagS = 0.0 Nce = -2.45 NceS = 0.0 Merge = -3.2 MergeS = 0.0 GenomicConsensus-master/GenomicConsensus/quiver/utils.py000066400000000000000000000361121274347070600241550ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander import numpy as np, itertools, logging, re from collections import Counter from GenomicConsensus.variants import * from GenomicConsensus.utils import * from GenomicConsensus.consensus import QuiverConsensus from pbcore.io.rangeQueries import projectIntoRange import ConsensusCore as cc def uniqueSingleBaseMutations(templateSequence, positions=None): """ Return an iterator over all single-base mutations of a templateSequence that result in unique mutated sequences. """ allBases = "ACGT" positions = positions or xrange(0, len(templateSequence)) for tplStart in positions: tplBase = templateSequence[tplStart] prevTplBase = templateSequence[tplStart-1] if (tplStart > 0) else None # snvs for subsBase in allBases: if subsBase != tplBase: yield cc.Mutation(cc.SUBSTITUTION, tplStart, subsBase) # Insertions---only allowing insertions that are not cognate # with the previous base. for insBase in allBases: if insBase != prevTplBase: yield cc.Mutation(cc.INSERTION, tplStart, insBase) # Deletion--only allowed if refBase does not match previous tpl base if tplBase != prevTplBase: yield cc.Mutation(cc.DELETION, tplStart, "-") def allSingleBaseMutations(templateSequence, positions=None): """ Same as ``uniqueSingleBaseMutations``, but no filtering as to whether the mutated sequences are unique. """ allBases = "ACGT" positions = positions or xrange(0, len(templateSequence)) for tplStart in positions: tplBase = templateSequence[tplStart] # snvs for subsBase in allBases: if subsBase != tplBase: yield cc.Mutation(cc.SUBSTITUTION, tplStart, subsBase) # Insertions for insBase in allBases: yield cc.Mutation(cc.INSERTION, tplStart, insBase) # Deletion yield cc.Mutation(cc.DELETION, tplStart, "-") def nearbyMutations(mutations, tpl, neighborhoodSize): """ Return mutations nearby the previously-tried mutations """ mutationPositions = map(cc.Mutation.Start, mutations) nearbyPositions = set() for mp in mutationPositions: nearbyPositions.update(range(max(0, mp - neighborhoodSize), min(len(tpl), mp + neighborhoodSize))) return uniqueSingleBaseMutations(tpl, sorted(nearbyPositions)) def asFloatFeature(arr): return cc.FloatFeature(np.array(arr, dtype=np.float32)) def bestSubset(mutationsAndScores, separation): """ Given a list of (mutation, score) tuples, this utility method greedily chooses the highest scoring well-separated elements. We use this to avoid applying adjacent high scoring mutations, which are the rule, not the exception. We only apply the best scoring one in each neighborhood, and then revisit the neighborhoods after applying the mutations. """ input = mutationsAndScores[:] output = [] while input: best = max(input, key=snd) output.append(best) nStart = best[0].Start() - separation nEnd = best[0].Start() + separation for t in input[:]: if nStart <= t[0].Start() <= nEnd: input.remove(t) return output def refineConsensus(mms, quiverConfig): """ Given a MultiReadMutationScorer, identify and apply favorable template mutations. Return (consensus, didConverge) :: (str, bool) """ isConverged = cc.RefineConsensus(mms) return mms.Template(), isConverged def _buildDinucleotideRepeatPattern(minRepeatCount): allDinucs = [ a + b for a in "ACGT" for b in "ACGT" if a != b ] pattern = "(" + "|".join(["(?:%s){%d,}" % (dinuc, minRepeatCount) for dinuc in allDinucs]) + ")" return pattern dinucleotideRepeatPattern = _buildDinucleotideRepeatPattern(3) def findDinucleotideRepeats(s): """ string -> list( (start_position, end_position), length-2 string ) List is sorted, and [start_position, end_position) intervals are disjoint """ repeatsFound = [ (m.span(), s[m.start():m.start()+2]) for m in re.finditer(dinucleotideRepeatPattern, s) ] return sorted(repeatsFound) def refineDinucleotideRepeats(mms): """ We have observed a couple instances where we call the consensus to be off the truth by +/- 1 dinucleotide repeat---we are getting trapped in an inferor local optimum, like so: likelihood truth ATATATAT 100 quiver AT--ATAT 90 quiver+A ATA-ATAT 85 quiver+T AT-TATAT 85 To resolve this issue, we need to explore the likelihood change for wobbling on every dinucleotide repeat in the window. """ return cc.RefineDinucleotideRepeats(mms) def consensusConfidence(mms, positions=None): """ Returns an array of QV values reflecting the consensus confidence at each position specified. If the `positions` argument is omitted, confidence values are returned for all positions in the consensus (mms.Template()). """ return np.array(cc.ConsensusQVs(mms), dtype=np.uint8) def variantsFromAlignment(a, refWindow, cssQvInWindow=None, siteCoverage=None): """ Extract the variants implied by a pairwise alignment to the reference. """ variants = [] refId, refStart, _ = refWindow refPos = refStart cssPos = 0 tbl = zip(a.Transcript(), a.Target(), a.Query()) # We don't call variants where either the reference or css is 'N' grouper = lambda row: "N" if (row[1]=="N" or row[2]=="N") else row[0] runs = itertools.groupby(tbl, grouper) for code, run in runs: assert code in "RIDMN" run = list(run) ref = "".join(map(snd, run)) refLen = len(ref) - Counter(ref)["-"] css = "".join(map(third, run)) cssLen = len(css) - Counter(css)["-"] variant = None if code == "M" or code == "N": pass elif code == "R": assert len(css)==len(ref) variant = Variant(refId, refPos, refPos+len(css), ref, css) elif code == "I": variant = Variant(refId, refPos, refPos, "", css) elif code == "D": variant = Variant(refId, refPos, refPos + len(ref), ref, "") if variant is not None: # HACK ALERT: variants at the first and last position # are not handled correctly if siteCoverage is not None and np.size(siteCoverage) > 0: refPos_ = min(refPos-refStart, len(siteCoverage)-1) variant.coverage = siteCoverage[refPos_] if cssQvInWindow is not None and np.size(cssQvInWindow) > 0: cssPos_ = min(cssPos, len(cssQvInWindow)-1) variant.confidence = cssQvInWindow[cssPos_] variants.append(variant) refPos += refLen cssPos += cssLen return variants def referenceSpanWithinWindow(referenceWindow, aln): """ Helper function for sorting reads by their reference span after restriction to a window. """ _, winStart, winEnd = referenceWindow return min(winEnd, aln.referenceEnd) - \ max(winStart, aln.referenceStart) def lifted(queryPositions, mappedRead): """ Lift a mappedRead into a new coordinate system by using the position translation table `queryPositions` """ newStart = queryPositions[mappedRead.TemplateStart] newEnd = queryPositions[mappedRead.TemplateEnd] copy = cc.MappedRead(mappedRead) copy.TemplateStart = newStart copy.TemplateEnd = newEnd return copy _typeMap = { cc.INSERTION : "Ins", cc.DELETION : "Del", cc.SUBSTITUTION : "Sub" } def _shortMutationDescription(mut, tpl): """ More compact and uniform mutation description strings Examples: 201 Ins . > G 201 Sub C > T 201 Del C > . """ _type = _typeMap[mut.Type()] _pos = mut.Start() _oldBase = "." if mut.Type() == cc.INSERTION \ else tpl[_pos] _newBase = "." if mut.Type() == cc.DELETION \ else mut.NewBases() return "%d %s %s > %s" % (_pos, _type, _oldBase, _newBase) def scoreMatrix(mms): """ Returns (rowNames, columnNames, S) where: - S is a matrix where S_{ij} represents the score delta of mutation j against read i - rowNames[i] is an identifier name for the the read i---presently we use the the row number within the cmp.h5, encoded as a string - columnNames[j] is an identifier for mutation j, encoding the position, type, and base change """ css = mms.Template() allMutations = sorted(allSingleBaseMutations(css)) shape = (mms.NumReads(), len(allMutations)) scoreMatrix = np.zeros(shape) for j, mut in enumerate(allMutations): mutScores = mms.Scores(mut) scoreMatrix[:, j] = mutScores baselineScores = np.array(mms.BaselineScores()) rowNames = [ mms.Read(i).Name for i in xrange(mms.NumReads()) ] columnNames = [ _shortMutationDescription(mut, css) for mut in allMutations ] return (rowNames, columnNames, baselineScores, scoreMatrix) def variantsFromConsensus(refWindow, refSequenceInWindow, cssSequenceInWindow, cssQvInWindow=None, siteCoverage=None, aligner="affine", mms=None): """ Compare the consensus and the reference in this window, returning a list of variants. """ refId, refStart, refEnd = refWindow if aligner == "affine": align = cc.AlignAffine else: align = cc.Align ga = align(refSequenceInWindow, cssSequenceInWindow) return variantsFromAlignment(ga, refWindow, cssQvInWindow, siteCoverage) def filterAlns(refWindow, alns, quiverConfig): """ Given alns (already clipped to the window bounds), filter out any that are incompatible with Quiver. By and large we avoid doing any filtering to avoid potential reference bias in variant calling. However at the moment the POA (and potentially other components) breaks when there is a read of zero length. So we throw away reads that are "stumpy", where the aligner has inserted a large gap, such that while the alignment technically spans the window, it may not have any read content therein: Ref ATGATCCAGTTACTCCGATAAA Read ATG---------------TA-A Win. [ ) """ return [ a for a in alns if a.readLength >= (quiverConfig.readStumpinessThreshold * a.referenceSpan) ] def consensusForAlignments(refWindow, refSequence, alns, quiverConfig): """ Call consensus on this interval---without subdividing the interval further. Testable! Clipping has already been done! """ _, refStart, refEnd = refWindow # Compute the POA consensus, which is our initial guess, and # should typically be > 99.5% accurate fwdSequences = [ a.read(orientation="genomic", aligned=False) for a in alns if a.spansReferenceRange(refStart, refEnd) ] assert len(fwdSequences) >= quiverConfig.minPoaCoverage try: p = cc.PoaConsensus.FindConsensus(fwdSequences[:quiverConfig.maxPoaCoverage]) except: logging.info("%s: POA could not be generated" % (refWindow,)) return QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus, refWindow, refSequence) ga = cc.Align(refSequence, p.Sequence) numPoaVariants = ga.Errors() poaCss = p.Sequence # Extract reads into ConsensusCore-compatible objects, and map them into the # coordinates relative to the POA consensus mappedReads = [ quiverConfig.extractMappedRead(aln, refStart) for aln in alns ] queryPositions = cc.TargetToQueryPositions(ga) mappedReads = [ lifted(queryPositions, mr) for mr in mappedReads ] # Load the mapped reads into the mutation scorer, and iterate # until convergence. configTbl = quiverConfig.ccQuiverConfigTbl mms = cc.SparseSseQvMultiReadMutationScorer(configTbl, poaCss) for mr in mappedReads: mms.AddRead(mr) # Iterate until covergence _, quiverConverged = refineConsensus(mms, quiverConfig) if quiverConverged: if quiverConfig.refineDinucleotideRepeats: refineDinucleotideRepeats(mms) quiverCss = mms.Template() if quiverConfig.computeConfidence: confidence = consensusConfidence(mms) else: confidence = np.zeros(shape=len(quiverCss), dtype=int) return QuiverConsensus(refWindow, quiverCss, confidence, mms) else: logging.info("%s: Quiver did not converge to MLE" % (refWindow,)) return QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus, refWindow, refSequence) def coverageInWindow(refWin, hits): winId, winStart, winEnd = refWin a = np.array([(hit.referenceStart, hit.referenceEnd) for hit in hits if hit.referenceName == winId]) tStart = a[:,0] tEnd = a[:,1] cov = projectIntoRange(tStart, tEnd, winStart, winEnd) return cov GenomicConsensus-master/GenomicConsensus/reference.py000066400000000000000000000237751274347070600234530ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import import logging, re, numpy as np from collections import OrderedDict from pbcore.io import ReferenceSet from .windows import holes, kCoveredIntervals, enumerateIntervals from .utils import die, nub class WorkChunk(object): """ A chunk of the reference """ def __init__(self, window, hasCoverage): self.window = window self.hasCoverage = hasCoverage class UppercasingMmappedFastaSequence(object): def __init__(self, mmappedFastaSequence): self.other = mmappedFastaSequence def __getitem__(self, spec): snip = self.other.__getitem__(spec) return snip.upper() class ReferenceContig(object): """ A contig from a reference (i.e. FASTA) file. """ def __init__(self, id, name, fullName, sequence, length): self.id = id # CmpH5-local id self.name = name # Prefix of FASTA heder self.fullName = fullName self.sequence = UppercasingMmappedFastaSequence(sequence) self.length = length byName = OrderedDict() # Fasta header (string e.g. "chr1") -> FastaRecord byId = OrderedDict() # CmpH5 local id (integer) -> FastaRecord byPacBioName = OrderedDict() # pacbio name ("ref000001") -> FastaRecord def idToName(_id): # At this point ids should always be names return byName[_id].name def idToFullName(_id): # At this point ids should always be names return byName[_id].fullName # Interpret a string key (one of name, or id (as string)) # and find the associated id. Only to be used in interpretation of # command-line input! def anyKeyToId(stringKey): assert isLoaded() if stringKey in byName: return byName[stringKey].name elif stringKey in byPacBioName: return byPacBioName[stringKey].name elif stringKey.isdigit(): refId = int(stringKey) # at this point, refId can still be the old numeric identifier return byId[refId].name else: raise Exception, "Unknown reference name: %s" % stringKey def sequenceInWindow(window): refId, refStart, refEnd = window return byName[refId].sequence[refStart:refEnd] filename = None def isLoaded(): return filename != None def loadFromFile(filename_, alnFile): """ Reads reference from FASTA file, loading lookup tables that can be used any time later. """ # Contigs in FASTA may disagree with those in cmp.h5 ref info # table, for instance if the FASTA has been edited. Here's how we # handle things: # # |fastaContigs \ cmpContigs| > 0 : OK, extra FASTA contigs just ignored # |cmpContigs \ fastaContigs| > 0 : Not necessarily OK---a warning should be # issued. We then proceed to operate on # the contigs that are in both. # |cmpContigs ^ fastaContigs| == 0 : Nothing to work with. This is an error. # # While we formerly used MD5s to vouch for the identity of a # contig, we now use the name. This is an inferior approach but # is necessary, in using the FastaTable. # Load contigs assert not isLoaded() try: f = ReferenceSet(filename_) f.assertIndexed() except IOError as e: die(e) cmpContigNames = set(alnFile.refNames) for fastaRecord in f.contigs: refName = fastaRecord.id if refName in cmpContigNames: refEntry = alnFile.referenceInfo(refName) refId = refEntry.ID pacBioName = refEntry.Name refFullName = refEntry.FullName sequence = UppercasingMmappedFastaSequence(fastaRecord.sequence) length = len(fastaRecord.sequence) contig = ReferenceContig(refId, refName, refFullName, sequence, length) byId[refId] = contig byName[refName] = contig byPacBioName[pacBioName] = contig loadedFastaContigNames = set(byName.keys()) logging.info("Loaded %d of %d reference groups from %s " % (len(byName), len(loadedFastaContigNames), filename_)) if len(byName) == 0: die("No reference groups in the FASTA file were aligned against. " \ "Did you select the wrong reference FASTA file?") elif (cmpContigNames - loadedFastaContigNames): logging.warn( "Some reference contigs aligned against are not found in " \ "the reference FASTA. Will process only those contigs " \ "supported by the reference FASTA.") global filename filename = filename_ assert isLoaded() def stringToWindow(s): assert isLoaded() if s is None: return None m = re.match("(.*):(.*)-(.*)", s) if m: refId = anyKeyToId(m.group(1)) refStart = int(m.group(2)) refEnd = min(int(m.group(3)), byName[refId].length) else: refId = anyKeyToId(s) refStart = 0 refEnd = byName[refId].length return (refId, refStart, refEnd) def windowToString(referenceWindow): assert isLoaded() refId, refStart, refEnd = referenceWindow return "%s:%d-%d" % (idToName(refId), refStart, refEnd) def enumerateSpans(refId, referenceWindows=()): """ Enumerate the contiguous spans along this reference contig that are to be analyzed. """ assert isLoaded() referenceEntry = byName[refId] referenceEntrySpan = (refId, 0, referenceEntry.length) for refWin in (referenceWindows or [referenceEntrySpan]): refWinId, start, end = refWin if refWinId == refId: yield (refId, start, end) def enumerateChunks(refId, referenceStride, referenceWindows=()): """ Enumerate all work chunks on this reference contig (restricted to the windows, if provided). """ for span in enumerateSpans(refId, referenceWindows): for (s, e) in enumerateIntervals(span[1:], referenceStride): yield WorkChunk((refId, s, e), True) def fancyEnumerateChunks(alnFile, refId, referenceStride, minCoverage, minMapQV, referenceWindows=()): """ Enumerate chunks, creating chunks with hasCoverage=False for coverage cutouts. """ # Pull out rows with this refId and good enough MapQV rows = alnFile.index[ ((alnFile.tId == alnFile.referenceInfo(refId).ID) & (alnFile.mapQV >= minMapQV))] unsorted_tStart = rows.tStart unsorted_tEnd = rows.tEnd # Sort (expected by CoveredIntervals) sort_order = np.lexsort((unsorted_tEnd, unsorted_tStart)) tStart = unsorted_tStart[sort_order].tolist() tEnd = unsorted_tEnd[sort_order].tolist() for span in enumerateSpans(refId, referenceWindows): _, spanStart, spanEnd = span coveredIntervals = kCoveredIntervals(minCoverage, tStart, tEnd, spanStart, spanEnd) unCoveredIntervals = holes(span, coveredIntervals) for (s, e) in sorted(list(coveredIntervals) + unCoveredIntervals): win = (refId, s, e) if (s, e) in coveredIntervals: for chunk in enumerateChunks(refId, referenceStride, [(refId, s, e)]): yield chunk else: yield WorkChunk(win, False) def numReferenceBases(refId, referenceWindows=()): """ Termination is determined to be when the result collector has built consensus corresponding to the exact number of reference bases in the window under consideration. """ return sum((end - start) for (_, start, end) in enumerateSpans(refId, referenceWindows)) def enumerateIds(referenceWindows=()): """ Enumerate all refIds (subject to the referenceWindows restriction, if provided). """ assert isLoaded() if referenceWindows == (): for refId in byName: yield refId else: for refId in nub(refId for (refId, _, _) in referenceWindows): yield refId def enlargedReferenceWindow(refWin, overlap): assert isLoaded() refId, refStart, refEnd = refWin contigLength = byName[refId].length return (refId, max(0, refStart - overlap), min(refEnd + overlap, contigLength)) GenomicConsensus-master/GenomicConsensus/utils.py000066400000000000000000000163021274347070600226410ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import import ast import math, numpy as np, os.path, sys, itertools def die(msg): print >>sys.stderr, msg sys.exit(-1) class CommonEqualityMixin(object): def __eq__(self, other): return (isinstance(other, self.__class__) and self.__dict__ == other.__dict__) def __ne__(self, other): return not self.__eq__(other) # An exception for incompatible cmp.h5 files class IncompatibleDataException(Exception): pass # We truncate QVs at 93 because the FASTQ format downstream can only # support QVs in the range [0, 93] without lossage. def error_probability_to_qv(error_probability, cap=93): """ Convert an error probability to a phred-scaled QV. """ if error_probability==0: return cap else: return min(cap, int(round(-10*math.log10(error_probability)))) _complement = { "A" : "T", "C" : "G", "G" : "C", "T" : "A", "-" : "-" } def complement(s): cStr = "".join(_complement[c] for c in s) if type(s) == str: return cStr else: return np.fromstring(cStr, "S1") def reverseComplement(s): return complement(s)[::-1] def fileFormat(filename): if filename.endswith(".gz"): ext = os.path.splitext(filename[:-3])[1] else: ext = os.path.splitext(filename)[1] ext = ext.lower() if ext in [".fa", ".fasta"]: return "FASTA" elif ext in [".fq", ".fastq"]: return "FASTQ" elif ext in [".gff" ]: return "GFF" elif ext in [".csv" ]: return "CSV" else: raise Exception, "Unrecognized file format" def rowNumberIsInReadStratum(readStratum, rowNumber): n, N = readStratum return (rowNumber % N) == n def readsInWindow(alnFile, window, depthLimit=None, minMapQV=0, strategy="fileorder", stratum=None, barcode=None): """ Return up to `depthLimit` reads (as row numbers integers) where the mapped reference intersects the window. If depthLimit is None, return all the reads meeting the criteria. `strategy` can be: - "longest" --- get the reads with the longest length in the window - "spanning" --- get only the reads spanning the window - "fileorder" --- get the reads in file order """ assert strategy in {"longest", "spanning", "fileorder", "long-and-strand-balanced"} if stratum is not None: raise ValueError, "stratum needs to be reimplemented" def depthCap(iter): if depthLimit is not None: return alnFile[list(itertools.islice(iter, 0, depthLimit))] else: return alnFile[list(iter)] def lengthInWindow(hit): return (min(alnFile.index.tEnd[hit], winEnd) - max(alnFile.index.tStart[hit], winStart)) winId, winStart, winEnd = window alnHits = np.array(list(alnFile.readsInRange(winId, winStart, winEnd, justIndices=True))) if len(alnHits) == 0: return [] if barcode == None: alnHits = alnHits[alnFile.mapQV[alnHits] >= minMapQV] else: # this wont work with CmpH5 (no bc in index): barcode = ast.literal_eval(barcode) alnHits = alnHits[(alnFile.mapQV[alnHits] >= minMapQV) & (alnFile.index.bcLeft[alnHits] == barcode[0]) & (alnFile.index.bcRight[alnHits] == barcode[1])] if strategy == "fileorder": return depthCap(alnHits) elif strategy == "spanning": winLen = winEnd - winStart return depthCap( hit for hit in alnHits if lengthInWindow(hit) == winLen ) elif strategy == "longest": return depthCap(sorted(alnHits, key=lengthInWindow, reverse=True)) elif strategy == "long-and-strand-balanced": # Longest (in window) is great, but bam sorts by tStart then strand. # With high coverage, this bias resulted in variants. Here we lexsort # by tStart and tEnd. Longest in window is the final criteria in # either case. # lexical sort: ends = alnFile.index.tEnd[alnHits] starts = alnFile.index.tStart[alnHits] lex_sort = np.lexsort((ends, starts)) # reorder based on sort: sorted_ends = ends[lex_sort] sorted_starts = starts[lex_sort] sorted_alnHits = alnHits[lex_sort] # get lengths in window: post = sorted_ends > winEnd sorted_ends[post] = winEnd pre = sorted_starts < winStart sorted_starts[pre] = winStart lens = sorted_ends - sorted_starts # coerce a descending sort: win_sort = ((winEnd - winStart) - lens).argsort(kind="mergesort") return depthCap(sorted_alnHits[win_sort]) def datasetCountExceedsThreshold(alnFile, threshold): """ Does the file contain more than `threshold` datasets? This impacts whether or not we should disable the chunk cache. """ total = 0 for i in np.unique(alnFile.AlnGroupID): total += len(alnFile._alignmentGroup(i)) if total > threshold: return True return False # # Some lisp functions we want # fst = lambda t: t[0] snd = lambda t: t[1] third = lambda t: t[2] def nub(it): """ Unique entries in an iterable, preserving order """ seen = set() for x in it: if x not in seen: yield(x) seen.add(x) GenomicConsensus-master/GenomicConsensus/variants.py000066400000000000000000000114171274347070600233320ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # Author: David Alexander from __future__ import absolute_import from .utils import CommonEqualityMixin __all__ = [ "Variant" ] class Variant(CommonEqualityMixin): """ Variant objects represent homozygous/haploid OR heterozygous variants corresponding to a fixed window of a reference genome Internally we use Python-style half-open intervals zero-based [start, end) to delineate reference ranges. An insertion has start==end, a SNP has start+1==end, etc. GFF files use 1-based indexing and open intervals [start, end). In a GFF both insertions and SNPs have start==end, which doesn't make too much sense to me, but so be it. VCF files use 1-based indexing as well, but do not record the "end" """ def __init__(self, refId, refStart, refEnd, refSeq, readSeq1, readSeq2=None, confidence=None, coverage=None, frequency1=None, frequency2=None, annotations=None): self.refId = refId self.refStart = refStart self.refEnd = refEnd self.refSeq = refSeq self.readSeq1 = readSeq1 self.readSeq2 = readSeq2 self.confidence = confidence self.coverage = coverage self.frequency1 = frequency1 self.frequency2 = frequency2 self.annotations = annotations @property def isHeterozygous(self): return (self.readSeq2 != None) @property def variantType(self): lr = len(self.refSeq) l1 = len(self.readSeq1) l2 = len(self.readSeq2) if self.readSeq2 else None if lr == 0: return "Insertion" elif l1==0 or l2==0: return "Deletion" elif (l1==lr) and (l2==None or l2==lr): return "Substitution" else: return "Variant" def __str__(self): refSeq_ = self.refSeq or "." if self.isHeterozygous: readAlleles = "%s/%s" % (self.readSeq1 or ".", self.readSeq2 or ".") else: readAlleles = "%s" % (self.readSeq1 or ".") return "%s@%s:%d-%d %s -> %s" % \ (self.variantType, self.refId, self.refStart, self.refEnd, refSeq_, readAlleles) def __repr__(self): return str(self) def __lt__(self, other): return ((self.refId, self.refStart, self.refEnd, self.readSeq1) < (other.refId, other.refStart, other.refEnd, other.readSeq1)) def annotate(self, key, value): if self.annotations == None: self.annotations = [] self.annotations.append((key, value)) def filterVariants(minCoverage, minConfidence, variants): return [ v for v in variants if ((v.coverage >= minCoverage) and (v.confidence >= minConfidence)) ] def annotateVariants(variants, alns): # Operates in place for v in variants: v.annotate("rows", ",".join(str(a.rowNumber) for a in alns)) GenomicConsensus-master/GenomicConsensus/windows.py000066400000000000000000000151401274347070600231720ustar00rootroot00000000000000################################################################################# # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY # THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ################################################################################# # windows.py: logic for windows/intervals of the genome # # NOTE that by convention: # (start, end) is an /interval/ # (refId, start, end) is a /window/. # All windows/intervals use 0-based indexing and are half-open # (includes start, not end) # # Author: David Alexander import numpy as np, math from pbcore.io.rangeQueries import projectIntoRange from ConsensusCore import CoveredIntervals # TODO(lhepler): replace the above with the following: # from ConsensusCore2 import CoveredIntervals def intervalToPair(v): return (v.Begin, v.End) def kCoveredIntervals(k, tStart, tEnd, winStart, winEnd): return map(intervalToPair, CoveredIntervals(k, tStart, tEnd, int(winStart), int(winEnd-winStart))) def kSpannedIntervals(refWindow, k, start, end, minLength=0): """ Find intervals in the window that are k-spanned by the reads. Given: `refWindow`: the window under consideration `k`: the number of reads that must span intervals to be returned `start`, `end`: numpy arrays of start and end coordinates for reads, where the extent of each read is [start, end). Must be ordered so that `start` is sorted in ascending order. Find a maximal set of maximal disjoint intervals within refWindow such that each interval is spanned by at least k reads. Intervals are returned in sorted order, as a list of (start, end) tuples. Note that this is a greedy search procedure and may not always return the optimal solution, in some sense. However it will always return the optimal solutions in the most common cases. """ assert k >= 1 winId, winStart_, winEnd_ = refWindow # Truncate to bounds implied by refWindow start = np.clip(start, winStart_, winEnd_) end = np.clip(end, winStart_, winEnd_) # Translate the start, end to coordinate system where # refWindow.start is 0. start = start - winStart_ end = end - winStart_ winStart = 0 winEnd = winEnd_ - winStart_ positions = np.arange(winEnd - winStart, dtype=int) coverage = projectIntoRange(start, end, winStart, winEnd) x = -1 y = 0 intervalsFound = [] while y < winEnd: # Step 1: let x be the first pos >= y that is k-covered eligible = np.flatnonzero((positions >= y) & (coverage >= k)) if len(eligible) > 0: x = eligible[0] else: break # Step 2: extend the window [x, y) until [x, y) is no longer # k-spanned. Do this by setting y to the k-th largest `end` # among reads covering x eligible = end[(start <= x)] eligible.sort() if len(eligible) >= k: y = eligible[-k] else: break intervalsFound.append((x, y)) # Translate intervals back return [ (s + winStart_, e + winStart_) for (s, e) in intervalsFound if e - s >= minLength ] def abut(intervals): """ Abut adjacent intervals. Useful for debugging... """ output = [] lastS = None lastE = None for (s, e) in intervals: if s == lastE: lastS, lastE = lastS, e else: if lastS is not None: output.append((lastS, lastE)) lastS, lastE = s, e output.append((lastS, lastE)) return output def holes(refWindow, intervals): """ Given a window and a set of disjoint subintervals, return the "holes", which are the intervals of the refWindow not covered by the given subintervals. """ winId, winStart, winEnd = refWindow output = [] intervals = sorted(intervals) lastE = winStart for (s, e) in intervals: if s > lastE: output.append((lastE, s)) lastE = e if lastE < winEnd: output.append((lastE, winEnd)) return output def intersection(int1, int2): s1, e1 = int1 s2, e2 = int2 si, ei = max(s1, s2), min(e1, e2) if si < ei: return (si, ei) else: return None def windowsIntersect(w1, w2): i1, s1, e1 = w1 i2, s2, e2 = w2 return (i1 == i2) and (e1 > s2) and (e2 > s1) def subWindow(refWindow, subinterval): winId, winStart, winEnd = refWindow intS, intE = subinterval assert intS >= winStart assert intE <= winEnd return winId, intS, intE def enumerateIntervals(bounds, stride): """ Enumerate windows of size "stride", attempting to align window boundaries on multiple of stride. """ def alignDown(chunk, x): return (x/chunk)*chunk def alignUp(chunk, x): return int(math.ceil(float(x)/chunk)*chunk) start, end = bounds roundStart = alignDown(stride, start) roundEnd = alignUp (stride, end) for s in xrange(roundStart, roundEnd, stride): roundWin = (s, s + stride) yield intersection(bounds, roundWin) GenomicConsensus-master/LICENSES000066400000000000000000000031211274347070600167700ustar00rootroot00000000000000Copyright (c) 2011-2013, Pacific Biosciences of California, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Pacific Biosciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GenomicConsensus-master/Makefile000066400000000000000000000027141274347070600173070ustar00rootroot00000000000000SHELL = /bin/bash -e INTERNAL_UTILS_PATH = /mnt/secondary/Share/Quiver/Tools bdist: python setup.py build --executable="/usr/bin/env python" python setup.py bdist --formats=egg install: python setup.py install develop: python setup.py develop tests: # Unit tests nosetests --with-xunit tests/unit # End-to-end tests PATH=`pwd`:$(PATH) cram tests/cram/*.t extra-tests: # Tests that need to be run by Jenkins but are slowing # down the development cycle, so aren't run by "tests" # target. PATH=`pwd`:$(PATH) cram tests/cram/extra/*.t internal-tests: # Long running tests that depend on files located on PacBio internal NFS # servers, including some utilities (exonerate suite, MuMMer) (. /mnt/software/Modules/current/init/bash && \ module add mummer/3.23 && \ module add exonerate/2.0.0 && \ module add blasr/2.3.0 && \ module add gfftools/dalexander && \ cram tests/cram/internal/*.t) doc: cd doc; make html clean: -rm -rf dist/ build/ *.egg-info -rm -rf doc/_build -rm -f nosetests.xml -find . -name "*.pyc" | xargs rm -f tags: find GenomicConsensus -name "*.py" | xargs etags pip-install: @which pip > /dev/null @pip freeze|grep 'GenomicConsensus=='>/dev/null \ && pip uninstall -y GenomicConsensus \ || true @pip install --no-index \ --install-option="--install-scripts=$(PREFIX)/bin" \ ./ # Aliases docs: doc check: tests test: tests .PHONY: check test tests doc docs clean tags GenomicConsensus-master/README.md000066400000000000000000000047161274347070600171320ustar00rootroot00000000000000GenomicConsensus (quiver, arrow) [![Circle CI](https://circleci.com/gh/PacificBiosciences/GenomicConsensus.svg?style=svg)](https://circleci.com/gh/PacificBiosciences/GenomicConsensus) ------------------------- The ``GenomicConsensus`` package provides the ``variantCaller`` tool, which allows you to apply the Quiver or Arrow algorithm to mapped PacBio reads to get consensus and variant calls. Background on Quiver and Arrow ------------------------------ *Quiver* is the legacy consensus model based on a conditional random field approach. Quiver enables consensus accuracies on genome assemblies at accuracies approaching or even exceeding Q60 (one error per million bases). If you use the HGAP assembly protocol in SMRTportal 2.0 or later, Quiver runs automatically as the final "assembly polishing" step. Over the years Quiver has proven difficult to train and develop, so we are phasing it out in favor of the new model, Arrow. *Arrow* is an improved consensus model based on a more straightforward hidden Markov model approach. Quiver is supported for PacBio RS data. Arrow is supported for PacBio Sequel data and RS data with the P6-C4 chemistry. Getting GenomicConsensus ------------------------ Casual users should get ``GenomicConsensus`` from the [SMRTanalysis software bundle](http://www.pacb.com/support/software-downloads/). Running ------- Basic usage is as follows: ```sh % quiver aligned_reads{.cmp.h5, .bam, .fofn, or .xml} \ > -r reference{.fasta or .xml} -o variants.gff \ > -o consensus.fasta -o consensus.fastq ``` ``quiver`` is a shortcut for ``variantCaller --algorithm=quiver``. Naturally, to use arrow you could use the ``arrow`` shortcut or ``variantCaller --algorithm=arrow``. in this example we perform haploid consensus and variant calling on the mapped reads in the ``aligned_reads.bam`` which was aligned to ``reference.fasta``. The ``reference.fasta`` is only used for designating variant calls, not for computing the consensus. The consensus quality score for every position can be found in the output FASTQ file. *Note that 2.3 SMRTanalysis does not support "dataset" input (FOFN or XML files); those who need this feature should wait for the forthcoming release of SMRTanalysis 3.0 or build from GitHub sources.* More documentation ------------------ - [More detailed installation and running instructions](./doc/HowTo.rst) - [FAQ](./doc/FAQ.rst) - [variants.gff spec](./doc/VariantsGffSpecification.rst) - [CHANGELOG](./CHANGELOG) GenomicConsensus-master/bin/000077500000000000000000000000001274347070600164135ustar00rootroot00000000000000GenomicConsensus-master/bin/arrow000077500000000000000000000000551274347070600174730ustar00rootroot00000000000000#!/bin/sh variantCaller --algorithm=arrow $* GenomicConsensus-master/bin/gffToBed000066400000000000000000000153361274347070600200260ustar00rootroot00000000000000#!/usr/bin/env python """ Convert .gff to .bed format. """ import sys import os import time import json import logging import argparse import traceback from pbcommand.models import FileTypes, get_pbparser from pbcommand.cli import pbparser_runner from pbcommand.utils import setup_log from pbcore.io import GffReader, WriterBase __version__ = "3.0" log = logging.getLogger(__name__) class Constants(object): TASK_ID = "genomic_consensus.tasks.gff2bed" PURPOSE_ID = "genomic_consensus.task_options.gff2bed_purpose" TRACK_NAME_ID = "genomic_consensus.task_options.track_name" DESCRIPTION_ID = 'genomic_consensus.task_options.track_description' USE_SCORE_ID = "genomic_consensus.task_options.use_score" DRIVER_EXE = "gffToBed --resolved-tool-contract " # # (Ported from pbpy) # class BedRecord: """Models a record in a BED file format""" def __init__(self): self.chrom='' self.chromStart = 0 self.chromEnd = 0 self.name = '' self.score = -1.00 self.strand = '+' def __str__(self): return '%s\t%d\t%d\t%s\t%.3f\t%s' % \ (self.chrom, self.chromStart, self.chromEnd, self.name, \ self.score, self.strand) class CoverageBedRecord(BedRecord): @staticmethod def fromAlignmentSummaryGffRecord(gff): bed = CoverageBedRecord() bed.chrom = gff.seqid bed.chromStart = gff.start - 1 bed.chromEnd = gff.end bed.name = 'meanCov' bed.score = float(gff.cov2.split(',')[0]) bed.strand = gff.strand return bed class VariantsBedRecord(BedRecord): @staticmethod def fromVariantGffRecord(gff): bed = VariantsBedRecord() bed.chrom = gff.seqid bed.chromStart = gff.start - 1 bed.score = float(gff.confidence) bed.strand = gff.strand feature = gff.type #GFF3 coordinates are 1-based and inclusive #BED coordinates are 0-based and exclusive if feature == 'insertion': bed.chromEnd = bed.chromStart + 1 bed.name = '%d_%dins%s' % (bed.chromStart + 1, bed.chromEnd + 1, gff.variantSeq) elif feature == 'deletion': featureLen = len(gff.reference) bed.chromEnd = bed.chromStart + featureLen if featureLen == 1: bed.name = "%ddel" % (bed.chromStart + 1) else: bed.name = '%d_%ddel' % (bed.chromStart + 1, bed.chromEnd) elif feature == 'substitution': bed.chromEnd = bed.chromStart + 1 bed.name = '%d%s>%s' % (bed.chromStart + 1, gff.reference, gff.variantSeq) else: print >> sys.stderr, 'Unsupported feature %s found in GFF3 file.' % feature return bed class BedWriter(WriterBase): """Outputs BED annotation track file""" def __init__(self, outfile): self._outfile = outfile def close(self): self._outfile.close() def flush(self): self._outfile.flush() def writeHeader(self, name, description, useScore): print >> self._outfile, 'track name=%s description="%s" useScore=%d' \ % (name, description, useScore) def writeRecord(self, record): print >> self._outfile, str(record) class GffToBed: """ Utility for converting GFF3 to BED format. Currently supports regional coverage or variant .bed output. """ def __init__(self, args): self.purpose = args.purpose self.gffFile = args.gff self.args = args if self.purpose not in [ "variants", "coverage" ]: raise ValueError( "Purpose %s not supported. Must be one of: [variants|coverage]" % (self.purpose)) def run(self, out=sys.stdout): with GffReader(self.gffFile) as reader, \ BedWriter(out) as writer: writer.writeHeader(self.args.name, self.args.description, self.args.useScore) for gff in reader: if self.purpose == 'coverage': bedRecord = CoverageBedRecord.fromAlignmentSummaryGffRecord(gff) else: bedRecord = VariantsBedRecord.fromVariantGffRecord(gff) writer.writeRecord(bedRecord) return 0 def args_runner(args, out=sys.stdout): return GffToBed(args).run(out=out) def resolved_tool_contract_runner(resolved_tool_contract): rtc = resolved_tool_contract assert rtc.task.options[Constants.PURPOSE_ID] in ["coverage", "variants"] args = [ rtc.task.options[Constants.PURPOSE_ID], rtc.task.input_files[0], "--useScore", str(rtc.task.options[Constants.USE_SCORE_ID]), # "--name", str(rtc.task.options[Constants.TRACK_NAME_ID]), # "--description", str(rtc.task.options[Constants.DESCRIPTION_ID]), ] # XXX HACK args_ = get_contract_parser().arg_parser.parser.parse_args(args) with open(rtc.task.output_files[0], "w") as f: return args_runner(args_, out=f) def get_contract_parser(): p = get_pbparser( tool_id=Constants.TASK_ID, version=__version__, name="gffToBed", description=__doc__, driver_exe=Constants.DRIVER_EXE, default_level="ERROR") ap = p.arg_parser.parser tcp = p.tool_contract_parser ap.add_argument("purpose", choices=["variants","coverage"], help="Run purpose") p.add_input_file_type(FileTypes.GFF, "gff", "GFF file", "GFF file") tcp.add_output_file_type(FileTypes.BED, "bed", "BED file", "BED file", "output") tcp.add_str(Constants.PURPOSE_ID, "purpose", default="variants", name="Purpose", description="Run mode ('variants' or 'coverage')") p.add_str(Constants.TRACK_NAME_ID, "name", default="variants", name="Track name", description="track name to display in header") p.add_str(Constants.DESCRIPTION_ID, 'description', default="PacBio: snps, insertions, and deletions derived from consensus calls against reference", name="Track description", description="track description to display in header") p.add_int(Constants.USE_SCORE_ID, "useScore", default=0, name="Use score", description="whether or not to use score for feature display") return p def main(argv=sys.argv): mp = get_contract_parser() return pbparser_runner( argv=argv[1:], parser=mp, args_runner_func=args_runner, contract_runner_func=resolved_tool_contract_runner, alog=log, setup_log_func=setup_log) if __name__ == '__main__': sys.exit(main()) GenomicConsensus-master/bin/gffToVcf000077500000000000000000000125071274347070600200520ustar00rootroot00000000000000#!/usr/bin/env python """Utility for converting variant GFF3 files to 1000 Genomes VCF""" import sys import os import time import json import logging import argparse import traceback from pbcommand.models import FileTypes, get_pbparser from pbcommand.cli import pbparser_runner from pbcommand.utils import setup_log from pbcore.io import GffReader, WriterBase # # (Ported from pbpy) # __version__ = "3.0" log = logging.getLogger(__name__) class Constants(object): TASK_ID = "genomic_consensus.tasks.gff2vcf" DRIVER_EXE = "gffToVcf --resolved-tool-contract " GLOBAL_REFERENCE_ID = "genomic_consensus.task_options.global_reference" class VcfRecord: """Models a record in a VCF3.3 file.""" def __init__(self): self.chrom = '' self.pos = 1 self.id = '.' self.ref = '' self.alt = '' self.qual = -1.00 self.filter = '0' self.info = {} @staticmethod def fromVariantGffRecord(gff): vcf = VcfRecord() vcf.chrom = gff.seqid vcf.id = '.' ref = gff.reference if ref is None: vcf.ref = "N" else: vcf.ref = ref vcf.qual = float(gff.confidence) vcf.put('NS', 1) vcf.put('DP', gff.coverage) feature = gff.type vcf.pos = gff.start if feature == 'insertion': vcf.alt = 'I%s' % gff.variantSeq.upper() elif feature == 'deletion': vcf.alt = 'D%s' % len(gff.reference) elif feature == 'substitution': vcf.alt = gff.variantSeq.upper() else: print >> sys.stderr, 'Unsupported feature %s found in GFF3 file.' % feature return vcf def put(self, key, value): self.info[key] = value @staticmethod def getHeader(): return 'CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO' def _getInfoString(self): return ';'.join(['%s=%s' % (k,v) \ for k,v in self.info.iteritems()]) def __str__(self): return '%s\t%d\t%s\t%s\t%s\t%.2f\t%s\t%s' % \ (self.chrom, self.pos, self.id, self.ref, \ self.alt, self.qual, self.filter, self._getInfoString()) class VcfWriter(WriterBase): """Outputs VCF (1000 Genomes Variant Call Format) 3.3 files""" def __init__(self, outfile): self._outfile = outfile self._start() def close(self): self._outfile.close() def flush(self): self._outfile.flush() def _start(self): self.writeMetaData('fileformat', 'VCFv3.3') def writeHeader(self): print >> self._outfile, '#%s' % VcfRecord.getHeader() def writeMetaData(self, key, value): print >> self._outfile, '##%s=%s' % (key, value) def writeRecord( self, record ): print >> self._outfile, str(record) class GffToVcf(object): """Utility for converting variant GFF3 files to 1000 Genomes VCF""" def __init__(self, gffFile, globalReference=None): self.gffFile = gffFile self.globalReference = globalReference def _writeMetaData(self, writer): currentTime = time.localtime() cmdLine = os.path.basename(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) writer.writeMetaData('fileDate', '%d%d%d' % \ (currentTime[0], currentTime[1], currentTime[2])) writer.writeMetaData('source', cmdLine) if self.globalReference is not None: writer.writeMetaData('reference', self.globalReference) writer.writeMetaData('INFO', 'NS,1,Integer,"Number of Samples with Data"') writer.writeMetaData('INFO', 'DP,1,Integer,"Total Depth of Coverage"') writer.writeHeader() def run(self, out=sys.stdout): with GffReader(self.gffFile) as reader, \ VcfWriter(out) as writer: self._writeMetaData(writer) for gff in reader: vcf = VcfRecord.fromVariantGffRecord(gff) writer.writeRecord(vcf) return 0 def args_runner(args, out=sys.stdout): return GffToVcf( gffFile=args.gffFile, globalReference=args.globalReference).run(out=out) def resolved_tool_contract_runner(resolved_tool_contract): rtc = resolved_tool_contract with open(rtc.task.output_files[0], "w") as f: gr = None #rtc.task.options[Constants.GLOBAL_REFERENCE_ID] return GffToVcf( gffFile=rtc.task.input_files[0], globalReference=gr).run(out=f) def get_contract_parser(): p = get_pbparser( tool_id=Constants.TASK_ID, version=__version__, name="gffToVcf", description=__doc__, driver_exe=Constants.DRIVER_EXE, default_level="ERROR") ap = p.arg_parser.parser tcp = p.tool_contract_parser p.add_input_file_type(FileTypes.GFF, "gffFile", "GFF file", "GFF file") tcp.add_output_file_type(FileTypes.VCF, "vcf", "VCF file", "VCF file", default_name="output") ap.add_argument("--globalReference", action="store", default=None, help="Name of global reference to put in Meta field") return p def main(argv=sys.argv): return pbparser_runner( argv=argv[1:], parser=get_contract_parser(), args_runner_func=args_runner, contract_runner_func=resolved_tool_contract_runner, alog=log, setup_log_func=setup_log) if __name__ == '__main__': sys.exit(main()) GenomicConsensus-master/bin/plurality000077500000000000000000000000611274347070600203630ustar00rootroot00000000000000#!/bin/sh variantCaller --algorithm=plurality $* GenomicConsensus-master/bin/poa000077500000000000000000000000531274347070600171160ustar00rootroot00000000000000#!/bin/sh variantCaller --algorithm=poa $* GenomicConsensus-master/bin/quiver000077500000000000000000000000561274347070600176550ustar00rootroot00000000000000#!/bin/sh variantCaller --algorithm=quiver $* GenomicConsensus-master/bin/summarizeConsensus000077500000000000000000000147011274347070600222610ustar00rootroot00000000000000#!/usr/bin/env python """ Augment the alignment_summary.gff file with consensus and variants information. """ from collections import namedtuple, defaultdict import argparse import logging import bisect import json import gzip import sys import numpy as np from pbcommand.utils import setup_log from pbcommand.cli import pbparser_runner, get_default_argparser from pbcommand.models import FileTypes, get_pbparser from pbcommand.common_options import add_resolved_tool_contract_option from pbcore.io import GffReader, GffWriter, Gff3Record from GenomicConsensus.utils import error_probability_to_qv from GenomicConsensus import __VERSION__ # # Note: GFF-style coordinates # Region = namedtuple("Region", ("seqid", "start", "end")) log = logging.getLogger(__name__) class Constants(object): TOOL_ID = "genomic_consensus.tasks.summarize_consensus" DRIVER_EXE = "summarizeConsensus --resolved-tool-contract " def get_contract_parser(): p = get_pbparser( Constants.TOOL_ID, __VERSION__, "Summarize Consensus", __doc__, Constants.DRIVER_EXE, default_level="ERROR") p.add_input_file_type(FileTypes.GFF, "alignment_summary", "Alignment summary GFF", "Alignment summary GFF file") p.tool_contract_parser.add_input_file_type(FileTypes.GFF, "variants", "Variants GFF", "Variants GFF file") p.arg_parser.parser.add_argument("--variantsGff", type=str, help="Input variants.gff or variants.gff.gz filename", required=True) p.tool_contract_parser.add_output_file_type(FileTypes.GFF, "output", name="Output GFF file", description="New alignment summary GFF file", default_name="alignment_summary_variants") p.arg_parser.parser.add_argument("-o", "--output", type=str, help="Output alignment_summary.gff filename") return p def get_args_from_resolved_tool_contract(resolved_tool_contract): rtc = resolved_tool_contract p = get_contract_parser().arg_parser.parser args = [ rtc.task.input_files[0], "--variantsGff", rtc.task.input_files[1], "--output", rtc.task.output_files[0], ] return p.parse_args(args) def run(options): headers = [ ("source", "GenomicConsensus %s" % __VERSION__), ("pacbio-alignment-summary-version", "0.6"), ("source-commandline", " ".join(sys.argv)), ] inputVariantsGff = GffReader(options.variantsGff) inputAlignmentSummaryGff = GffReader(options.alignment_summary) summaries = {} for gffRecord in inputAlignmentSummaryGff: region = Region(gffRecord.seqid, gffRecord.start, gffRecord.end) summaries[region] = { "ins" : 0, "del" : 0, "sub" : 0, # TODO: base consensusQV on effective coverage "cQv" : (20, 20, 20) } inputAlignmentSummaryGff.close() counterNames = { "insertion" : "ins", "deletion" : "del", "substitution" : "sub" } regions_by_contig = defaultdict(list) for region in summaries: regions_by_contig[region.seqid].append(region) for seqid in regions_by_contig.keys(): r = regions_by_contig[seqid] regions_by_contig[seqid] = sorted(r, lambda a,b: cmp(a.start, b.start)) logging.info("Processing variant records") i = 0 have_contigs = set(regions_by_contig.keys()) for variantGffRecord in inputVariantsGff: if not variantGffRecord.seqid in have_contigs: raise KeyError( "Can't find alignment summary for contig '{s}".format( s=variantGffRecord.seqid)) positions = [r.start for r in regions_by_contig[variantGffRecord.seqid]] idx = bisect.bisect_right(positions, variantGffRecord.start) - 1 # XXX we have to be a little careful here - an insertion at the start # of a contig will have start=0 versus start=1 for the first region if idx < 0: idx = 0 region = regions_by_contig[variantGffRecord.seqid][idx] assert ((region.start <= variantGffRecord.start <= region.end) or (region.start == 1 and variantGffRecord.start == 0 and variantGffRecord.type == "insertion")), \ (variantGffRecord.seqid, region.start, variantGffRecord.start, region.end, variantGffRecord.type, idx) summary = summaries[region] counterName = counterNames[variantGffRecord.type] variantLength = max(len(variantGffRecord.reference), len(variantGffRecord.variantSeq)) summary[counterName] += variantLength i += 1 if i % 1000 == 0: logging.info("{i} records...".format(i=i)) inputAlignmentSummaryGff = open(options.alignment_summary) outputAlignmentSummaryGff = open(options.output, "w") inHeader = True for line in inputAlignmentSummaryGff: line = line.rstrip() # Pass any metadata line straight through if line[0] == "#": print >>outputAlignmentSummaryGff, line.strip() continue if inHeader: # We are at the end of the header -- write the tool-specific headers for k, v in headers: print >>outputAlignmentSummaryGff, ("##%s %s" % (k, v)) inHeader = False # Parse the line rec = Gff3Record.fromString(line) if rec.type == "region": summary = summaries[(rec.seqid, rec.start, rec.end)] if "cQv" in summary: cQvTuple = summary["cQv"] line += ";%s=%s" % ("cQv", ",".join(str(int(f)) for f in cQvTuple)) for counterName in counterNames.values(): if counterName in summary: line += ";%s=%d" % (counterName, summary[counterName]) print >>outputAlignmentSummaryGff, line return 0 def args_runner(args): return run(options=args) def resolved_tool_contract_runner(resolved_tool_contract): args = get_args_from_resolved_tool_contract(resolved_tool_contract) return run(options=args) def main(argv=sys.argv): return pbparser_runner(argv[1:], get_contract_parser(), args_runner, resolved_tool_contract_runner, log, setup_log) if __name__ == "__main__": sys.exit(main()) GenomicConsensus-master/bin/variantCaller000077500000000000000000000001701274347070600211260ustar00rootroot00000000000000#!/usr/bin/env python import sys from GenomicConsensus.main import main if __name__ == '__main__': sys.exit(main()) GenomicConsensus-master/circle.yml000066400000000000000000000046711274347070600176370ustar00rootroot00000000000000dependencies: cache_directories: - "_deps/cmake-3.3.0-Linux-x86_64" - "_deps/boost_1_58_0" - "_deps/swig-3.0.8" pre: - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get update - sudo apt-get install g++-4.8 - curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash - sudo apt-get install git-lfs=1.1.0 - if [ ! -d _deps ] ; then mkdir _deps ; fi # Create a directory for dependencies, These are static, cache them. - pushd _deps ; if [ ! -d cmake-3.3.0-Linux-x86_64 ] ; then wget --no-check-certificate http://www.cmake.org/files/v3.3/cmake-3.3.0-Linux-x86_64.tar.gz ; tar xzf cmake-3.3.0-Linux-x86_64.tar.gz ; fi - pushd _deps ; if [ ! -d boost_1_58_0 ] ; then wget http://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.tar.bz2 ; tar xjf boost_1_58_0.tar.bz2 ; fi - pushd _deps ; if [ ! -f swig-3.0.8/bin/swig ] ; then rm -fr swig-3.0.8* ; mkdir dl ; pushd dl ; wget http://downloads.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz ; tar xzf swig-3.0.8.tar.gz ; pushd swig-3.0.8 ; ./configure --prefix $(readlink -f ../../swig-3.0.8) ; make ; make install ; fi - pushd _deps ; git clone https://github.com/PacificBiosciences/ConsensusCore.git - pushd _deps ; git clone https://github.com/PacificBiosciences/ConsensusCore2.git - pushd _deps ; git clone https://github.com/PacificBiosciences/PacBioTestData.git - pip install --upgrade pip - pip install numpy - pip install cython - pip install h5py - pip install pysam - pip install --upgrade --no-deps git+https://github.com/PacificBiosciences/pbcommand.git - pip install --upgrade --no-deps git+https://github.com/PacificBiosciences/pbcore.git - pip install cram nose override: - pushd _deps/ConsensusCore ; python setup.py install --boost=$(readlink -f ../boost_1_58_0) --swig=$(readlink -f ../swig-3.0.8/bin/swig) - pushd _deps/ConsensusCore2 ; CC=gcc-4.8 CXX=g++-4.8 CMAKE_COMMAND=$(readlink -f ../cmake-3.3.0-Linux-x86_64/bin/cmake) Boost_INCLUDE_DIRS=$(readlink -f ../boost_1_58_0) SWIG_COMMAND=$(readlink -f ../swig-3.0.8/bin/swig) pip install --verbose --upgrade --no-deps . - pushd _deps/PacBioTestData ; git lfs pull && make python test: pre: - pip install --verbose . override: - make check GenomicConsensus-master/doc/000077500000000000000000000000001274347070600164105ustar00rootroot00000000000000GenomicConsensus-master/doc/FAQ.rst000066400000000000000000000424201274347070600175530ustar00rootroot00000000000000Quiver FAQ ========== What are EviCons? GenomicConsensus? Quiver? Plurality? ------------------------------------------------------------ **GenomicConsensus** is the current PacBio consensus and variant calling suite. It contains a main driver program, ``variantCaller``, which provides two consensus/variant calling algorithms: **Arrow** and **Quiver**. These algorithms can be run by calling ``variantCaller.py --algorithm=[arrow|quiver|plurality]`` or by going through the convenience wrapper scripts ``quiver`` and ``arrow``. **EviCons** was the previous generation PacBio variant caller (removed in software release v1.3.1). Separate packages called **ConsensusCore** and **ConsensusCore2** are C++ libraries where all the computation behind Quiver and Arrow are done, respectively. This is transparent to the user after installation. What is Plurality? ------------------ **Plurality** is a very simple variant calling algorithm: it stacks up the aligned reads (alignment as produced by BLASR, or alternate mapping tool), and for each column under a reference base, calls the most abundant (i.e., the plurality) read base (or bases, or deletion) as the consensus at that reference position. Why is Plurality a weak algorithm? ---------------------------------- Plurality does not perform any local realignment. This means it is heavily biased by the alignment produced by the mapper (BLASR, typically). It also means that it is insensitive at detecting indels. Consider this example:: Reference AAAA ---- Aligned A-AA reads AA-A -AAA ---- Plurality AAAA consensus Note here that every read has a deletion and the correct consensus call would be "AAA", but due to the mapper's freedom in gap-placement at the single-read level, the plurality sequence is "AAAA"---so the deletion is missed. Local realignment, which plurality does not do, but which could be considered as implicit in the Quiver algorithm, essentially pushes the gaps here to the same column, thus identifying the deletion. While plurality could be adjusted to use a simple "gap normalizing" realignment, in practice noncognate extras (spurious non-homopolymer base calls) in the midst of homopolymer runs pose challenges. What is Quiver? --------------- **Quiver** is a more sophisticated algorithm that finds the maximum quasi-likelihood template sequence given PacBio reads of the template. PacBio reads are modeled using a conditional random field approach that scores the quasi-likelihood of a read given a template sequence. In addition to the base sequence of each read, Quiver uses several additional *QV* covariates that the basecaller provides. Using these covariates provides additional information about each read, allowing more accurate consensus calls. Quiver does not use the alignment provided by the mapper (BLASR, typically), except for determining how to group reads together at a macro level. It implicitly performs its own realignment, so it is highly sensitive to all variant types, including indels---for example, it resolves the example above with ease. The name **Quiver** reflects a consensus-calling algorithm that is `QV-aware`. We use the lowercase "quiver" to denote the quiver *tool* in GenomicConsensus, which applies the Quiver algorithm to mapped reads to derive sequence consensus and variants. Quiver is described in detail in the supplementary material to the `HGAP paper`_. What is Arrow? -------------- Arrow is a newer model intended to supercede Quiver in the near future. The key differences from Quiver are that it uses an HMM model instead of a CRF, it computes true likelihoods, and it uses a smaller set of covariates. We expect a whitepaper on Arrow to be available soon. We use the lowercase "arrow" to denote the arrow *tool*, which applies the Arrow algorithm to mapped reads to derive sequence consensus and variants. How do I run `quiver`/`arrow`? ------------------------------ For general instructions on installing and running, see the HowTo_ document. What is the output from `quiver`/`arrow`? ----------------------------------------- There are three output files from the GenomicConsensus tools: 1. A consensus *FASTA* file containing the consensus sequence 2. A consensus *FASTQ* file containing the consensus sequence with quality annotations 3. A variants *GFF* file containing a filtered, annotated list of variants identified It is important to note that the variants included in the output variants GFF file are *filtered* by coverage and quality, so not all variants that are apparent in comparing the reference to the consensus FASTA output will correspond to variants in the output variants GFF file. To enable all output files, the following can be run (for example):: % quiver -j16 aligned_reads.cmp.h5 -r ref.fa \ -o consensus.fa \ -o consensus.fq \ -o variants.gff The extension is used to determine the output file format. What does it mean that `quiver` consensus is *de novo*? ------------------------------------------------------- Quiver's consensus is *de novo* in the sense that the reference and the reference alignment are not used to inform the consensus output. Only the reads factor into the determination of the consensus. The only time the reference sequence is used to make consensus calls - when the ``--noEvidenceConsensusCall`` flag is set to ``reference`` or ``lowercasereference`` (the default)- is when there is no effective coverage in a genomic window, so Quiver has no evidence for computing consensus. One can set ``--noEvidenceConsensusCall=nocall`` to avoid using the reference even in zero coverage regions. What is the expected `quiver` accuracy? --------------------------------------- Quiver's expected accuracy is a function of coverage and chemistry. The C2 chemistry (no longer available), P6-C4 and P4-C2 chemistries provide the most accuracy. Nominal consensus accuracy levels are as follows: +----------+-------------------------------+ |Coverage |Expected consensus accuracy | | +------------------+------------+ | | C2, P4-C2, P6-C4 | P5-C3 | +==========+==================+============+ |10x | > Q30 | > Q30 | +----------+------------------+------------+ |20x | > Q40 | > Q40 | +----------+------------------+------------+ |40x | > Q50 | > Q45 | +----------+------------------+------------+ |60-80x | ~ Q60 | > Q55 | +----------+------------------+------------+ The "Q" values referred to are Phred-scaled quality values: .. math:: q = -10 \log_{10} p_{error} for instance, Q50 corresponds to a p_error of 0.00001---an accuracy of 99.999%. These accuracy expectations are based on routine validations performed on multiple bacterial genomes before each chemistry release. What is the expected accuracy from `arrow` ------------------------------------------ `arrow` achieves similar accuracy to `quiver`. Numbers will be published soon. What are the residual errors after applying `quiver`? ----------------------------------------------------- If there are errors remaining applying Quiver, they will almost invariably be homopolymer run-length errors (insertions or deletions). Does `quiver`/`arrow` need to know what sequencing chemistry was used? ---------------------------------------------------------------------- At present, the Quiver model is trained per-chemistry, so it is very important that Quiver knows the sequencing chemistries used. If SMRT Analysis software was used to build the `cmp.h5` or BAM input file, the `cmp.h5` will be loaded with information about the sequencing chemistry used for each SMRT Cell, and GenomicConsensus will automatically identify the right parameters to use. If custom software was used to build the `cmp.h5`, or an override of Quiver's autodetection is desired, then the chemistry or model must be explicity entered. For example:: % quiver -p P4-C2 ... % quiver -p P4-C2.AllQVsMergingByChannelModel ... Can a mix of chemistries be used in a cmp.h5 file for quiver/arrow? ------------------------------------------------------------------- Yes! GenomicConsensus tools automatically see the chemistry *per-SMRT Cell*, so it can figure out the right parameters for each read and model them appropriately. What chemistries and chemistry mixes are supported? --------------------------------------------------- For Quiver: all PacBio RS chemistries are supported. Chemistry mixtures of P6-C4, P4-C2, P5-C3, and C2 are supported. For Arrow: the RS chemistry P6-C4, and all PacBio Sequel chemistries are supported. Mixes of these chemistries are supported. What are the QVs that the Quiver model uses? -------------------------------------------- Quiver uses additional QV tracks provided by the basecaller. These QVs may be looked at as little breadcrumbs that are left behind by the basecaller to help identify positions where it was likely that errors of a given type occurred. Formally, the QVs for a given read are vectors of the same length as the number of bases called; the QVs used are as follows: - DeletionQV - InsertionQV - MergeQV - SubstitutionQV - DeletionTag To find out if your cmp.h5 file is loaded with these QV tracks, run the command :: % h5ls -rv aligned_reads.cmp.h5 and look for the QV track names in the output. If your cmp.h5 file is lacking some of these tracks, Quiver will still run, though it will issue a warning that its performance will be suboptimal. Why is `quiver`/`arrow` making errors in some region? ----------------------------------------------------- The most likely cause for *true* errors made by these tools is that the coverage in the region was low. If there is 5x coverage over a 1000-base region, then 10 errors in that region can be expected. It is important to understand that the effective coverage available to `quiver`/`arrow` is not the full coverage apparent in plots---the tools filter out ambiguously mapped reads by default. The remaining coverage after filtering is called the /effective coverage/. See the next section for discussion of `MapQV`. If you have verified that there is high effective coverage in the region in question, it is highly possible---given the high accuracy quiver and arrow can achieve---that the apparent errors actually reflect true sequence variants. Inspect the FASTQ output file to ensure that the region was called at high confidence; if an erroneous sequence variant is being called at high confidence, please report a bug to us. What does Quiver do for genomic regions with no effective coverage? ------------------------------------------------------------------- For regions with no effective coverage, no variants are outputted, and the FASTQ confidence is 0. The output in the FASTA and FASTQ consensus sequence tracks is dependent on the setting of the ``--noEvidenceConsensusCall`` flag. Assuming the reference in the window is "ACGT", the options are: +---------------------------------------------+---------+ |``--noEvidenceConsensusCall=...`` |Consensus| | |output | +=============================================+=========+ |``nocall`` (default in 1.4) |NNNN | +---------------------------------------------+---------+ |``reference`` |ACGT | +---------------------------------------------+---------+ |``lowercasereference`` (new post 1.4, and the| | |default) |acgt | +---------------------------------------------+---------+ What is `MapQV` and why is it important? ---------------------------------------- `MapQV` is a single scalar Phred-scaled QV per aligned read that reflects the mapper's degree of certainty that the read aligned to *this* part of the reference and not some other. Unambigously mapped reads will have a high `MapQV` (typically 255), while a read that was equally likely to have come from two parts of the reference would have a `MapQV` of 3. `MapQV` is pretty important when you want highly accurate variant calls. Quiver and Plurality both filter out aligned reads with a MapQV below 20 (by default), so as not to call a variant using data of uncertain genomic origin. This can be problematic if using quiver/arrow to get a consensus sequence. If the genome of interest contains long (relative to the library insert size) highly-similar repeats, the effective coverage (after `MapQV` filtering) may be reduced in the repeat regions---this is termed these `MapQV` dropouts. If the coverage is sufficiently reduced in these regions, quiver/arrow will not call consensus in these regions---see `What do quiver/arrow do for genomic regions with no effective coverage?`_. If you want to use ambiguously mapped reads in computing a consensus for a denovo assembly, the `MapQV` filter can be turned off entirely. In this case, the consensus for each instance of a genomic repeat will be calculated using reads that may actually be from other instances of the repeat, so the exact trustworthiness of the consensus in that region may be suspect. The next section describes how to disable the `MapQV` filter. How can the `MapQV` filter be turned off and when should it be? -------------------------------------------------------------- The `MapQV` filter can be disabled using the flag ``--mapQvThreshold=0`` (shorthand: ``-m=0``). If running a quiver/arrow job via SMRT Portal, this can be done by unchecking the "Use only unambiguously mapped reads" option. Consider this in de novo assembly projects, but it is not recommended for variant calling applications. How can variant calls made by quiver/arrow be inspected or validated? --------------------------------------------------------------------- When in doubt, it is easiest to inspect the region in a tool like SMRT View, which enables you to view the reads aligned to the region. Deletions and substitutions should be fairly easy to spot; to view insertions, right-click on the reference base and select "View Insertions Before...". What are the filtering parameters that quiver/arrow use? -------------------------------------------------------- The available options limit read coverage, filters reads by `MapQV`, and filters variants by quality and coverage. - The overall read coverage used to call consensus in every window is 100x by default, but can be changed using ``-X=value``. - The `MapQV` filter, by default, removes reads with MapQV < 20. This is configured using ``--mapQvThreshold=value`` / ``-m=value`` - Variants are only called if the read coverage of the site exceeds 5x, by default---this is configurable using ``-x=value``. Further, they will not be called if the confidence (Phred-scaled) does not exceed 40---configurable using ``-q=value``. What happens when the sample is a mixture, or diploid? ----------------------------------------------------- At present, quiver/arrow assume a haploid sample, and the behavior of on sample mixtures or diploid/polyploid samples is *undefined*. The program will not crash, but the output results are not guaranteed to accord with any one of the haplotypes in the sample, as opposed to a potential patchwork. Why would I want to *iterate* the mapping+(quiver/arrow) process? ----------------------------------------------------------------- Some customers using quiver for polishing highly repetitive genomes have found that if they take the consensus FASTA output of quiver, use it as a new reference, and then perform mapping and Quiver again to get a new consensus, they get improved results from the second round of quiver. This can be explained by noting that the output of the first round of quiver is more accurate than the initial draft consensus output by the assembler, so the second round's mapping to the quiver consensus can be more sensitive in mapping reads from repetitive regions. This can then result in improved consensus in those repetitive regions, because the reads have been assigned more correctly to their true genomic loci. However there is also a possibility that the potential shifting of reads around from one rounds' mapping to the next might alter borderline (low confidence) consensus calls even away from repetitive regions. We recommend the (mapping+quiver) iteration for customers polishing repetitive genomes, and it could also prove useful for resequencing applications. However we caution that this is very much an *exploratory* procedure and we make no guarantees about its performance. In particular, borderline consensus calls can change when the procedure is iterated, and the procedure is *not* guaranteed to be convergent. Is iterating the (mapping+quiver/arrow) process a convergent procedure? ----------------------------------------------------------------------- We have seen many examples where (mapping+quiver), repeated many times, is evidently *not* a convergent procedure. For example, a variant call may be present in iteration n, absent in n+1, and then present again in n+2. It is possible for subtle changes in mapping to change the set of reads examined upon inspecting a genomic window, and therefore result in a different consensus sequence there. We expect this to be the case primarily for "borderline" (low confidence) base calls. .. _HowTo: ./HowTo.rst .. _`HGAP paper`: http://www.nature.com/nmeth/journal/v10/n6/full/nmeth.2474.html GenomicConsensus-master/doc/HowTo.rst000066400000000000000000000125431274347070600202070ustar00rootroot00000000000000 How to install and use GenomicConsensus ======================================= **We recommend that you obtain GenomicConsensus by installing the most recent version of SMRTanalysis. Other means of installation are not officially supported.** Basic running instructions -------------------------- Basic usage---using 8 CPUs to compute consensus of mapped reads and variants relative to a reference---is as follows:: % quiver -j8 aligned_reads{.cmp.h5, .bam, .fofn, or .xml} \ > -r reference{.fasta or .xml} -o variants.gff \ > -o consensus.fasta -o consensus.fastq ``quiver`` is a shortcut for ``variantCaller --algorithm=quiver``. Naturally, to use arrow you could use the ``arrow`` shortcut or ``variantCaller --algorithm=arrow``. in this example we perform haploid consensus and variant calling on the mapped reads in the ``aligned_reads.bam`` which was aligned to ``reference.fasta``. The ``reference.fasta`` is only used for designating variant calls, not for computing the consensus. The consensus quality score for every position can be found in the output FASTQ file. *Note that 2.3 SMRTanalysis does not support "dataset" input (FOFN or XML files); those who need this feature should wait for the forthcoming release of SMRTanalysis 3.0 or build from GitHub sources.* Running a large-scale resequencing/polishing job in SMRTanalysis 2.3 -------------------------------------------------------------------- We do not recommend attempting to construct a single giant cmp.h5 file and then processing it on a single node. This is inefficient and users attempting to do this have run into many problems with the instability of the HDF5 library (which PacBio is moving away from, in favor of BAM_.) To run a large-scale resequencing job (>50 megabase genome @ 50x coverage,nominally), you want to spread the computation load across multiple nodes in your computing cluster. The `smrtpipe` workflow engine in SMRTanalysis 2.3 provides a convenient workflow automating this---it will automatically spread the load for both mapping and quiver jobs among your available cluster nodes. This is accessible via the SMRTportal UI; the simplest way to set up and run thse workflows is via tha UI. Nonetheless, we include command-line instructions for completeness. If you have to run the `smrtpipe` workflow manually from the command line, a recipe is as folows:: 1. Make sure the reference you will align and compare against is present in a SMRTportal "reference repository". Even if you don't want to use SMRTportal, you need to build/import the reference appropriately, and the simplest way to do that is via SMRTportal. If you don't have a SMRTportal instance, you can use the ``referenceUploader`` command to prepare your reference repository. 2. Prepare an "input.fofn" file listing, one-per-line, each "bax.h5" file in your input data set. 3. Convert the "input.fofn" to an "input.xml" file that SMRTpipe can understand:: $ fofnToSmrtpipeInput.py input.fofn > input.xml 4. Prepare your "params.xml" file. Here is a `params.xml template`_ you can use; you should just need to edit the reference path. 5. Activate your SMRTanalysis environment, and invoke smrtpipe:: $ source /etc/setup.sh $ smrtpipe.py --distribute --params=params.xml xml:input.xml 6. After successful execution is complete, the results should be available as `data/consensus.fast[aq].gz` and `data/variants.gff.gz`, etc. Please consult the `SMRTpipe reference manual`_ for further information. *Note that resequencing (mapping reads against a reference genome and then calling consensus and identifying variants) and polishing (mapping reads against a draft assembly and then taking the consensus output as the final, polished, assembly) are the same algorithmic operation, the only effective difference is that the "variants.gff" output is not biologically meaningful in the polishing case---it just records the edits that were made to the draft to produce the polished assembly.* Running a large-scale quiver/arrow job in SMRTanalysis 3.0+ ----------------------------------------------------------- (Forthcoming) Building bleeding-edge code (unsupported) ---------------------------------------- If you need to access the the latest code for some reason, a convenient way to build it is to use PacBio's pitchfork_ build system, which will take care of all third party dependencies for you. Here's a recipe:: git clone git@github.com:PacificBiosciences/pitchfork.git cd pitchfork make GenomicConsensus # may take some time, as it builds dependencies... Now, with GenomicConsensus built, you can use it via:: bash --init-file deployment/setup-env.sh # Puts you in a subshell where your build is available quiver --help # now you have quiver, arrow, etc. available If you encounter build issues using `pitchfork`, please report the issues there. Note that you can deploy PacBio software to a location of your choice using pitchfork. Further questions? ------------------ Please consult the `FAQ document`_. .. _`FAQ document`: ./FAQ.rst .. _pitchfork : https://github.com/PacificBiosciences/pitchfork .. _`params.xml template`: ./params-template.xml .. _`SMRTpipe reference manual`: http://www.pacb.com/wp-content/uploads/2015/09/SMRT-Pipe-Reference-Guide.pdf .. _`BAM`: http://pacbiofileformats.readthedocs.io/en/3.0/BAM.html GenomicConsensus-master/doc/Makefile000066400000000000000000000127441274347070600200600ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GenomicConsensus.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GenomicConsensus.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/GenomicConsensus" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GenomicConsensus" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." GenomicConsensus-master/doc/VariantCallerFunctionalSpecification.rst000066400000000000000000000161001274347070600264130ustar00rootroot00000000000000 Variant Caller Functional Specification ======================================= Version 2.2 Introduction ------------ This document describes the interface, input/output, and performance characteristics of ``variantCaller.py``, a variant calling tool provided by the ``GenomicConsensus`` package. Software Overview ----------------- The ``GenomicConsensus`` package provides a command-line tool, ``variantCaller.py``, which provides several variant-calling algorithms for PacBio sequencing data. ``variantCaller.py`` replaces ``EviCons`` and ``SmrtBayes``, the previous (haploid, diploid---respectively) variant callers at PacBio. Functional Requirements ----------------------- Command-line interface `````````````````````` ``variantCaller.py`` is invoked from the command line. For example, a simple invocation is:: variantCaller.py -j8 --algorithm=quiver \ -r lambdaNEB.fa \ -o variants.gff \ aligned_reads.cmp.h5 which requests that variant calling proceed, - using 8 worker processes, - employing the **quiver** algorithm, - taking input from the file ``aligned_reads.cmp.h5``, - using the FASTA file ``lambdaNEB.fa`` as the reference, - and writing output to ``variants.gff``. A particularly useful option is ``--referenceWindow/-w``: this option allows the user to direct the tool to perform variant calling exclusively on a *window* of the reference genome, where the Invoking :: variantCaller.py --help will provide a help message explaining all available options; they will be documented here shortly. Input and output ```````````````` ``variantCaller.py`` requires two input files: - A file of reference-aligned reads in PacBio's standard cmp.h5 format; - A FASTA file that has been processed by ReferenceUploader. The tool's output is formatted in the GFF format, as described in (how to link to other file?). External tools can be used to convert the GFF file to a VCF or BED file---two other standard interchange formats for variant calling. .. note:: **Input cmp.h5 file requirements** ``variantCaller.py`` requires its input cmp.h5 file to be be sorted. An unsorted file can be sorting using the tool ``cmpH5Sort.py``. The *quiver* algorithm in ``variantCaller.py`` requires its input cmp.h5 file to have the following *pulse features*: - ``InsQV``, - ``SubsQV``, - ``DelQV``, - ``DelTag``, - ``MergeQV``. The *plurality* algorithm can be run on cmp.h5 files that lack these features. The input file is the main argument to ``variantCaller.py``, while the output file is provided as an argument to the ``-o`` flag. For example, :: variantCaller.py aligned_reads.cmp.h5 -r lambda.fa -o variants.gff will read input from ``aligned_reads.cmp.h5``, using the reference ``lambda.fa``, and send output to the file ``variants.gff``. The extension of the filename provided to the ``-o`` flag is meaningful, as it determines the output file format. The file formats presently supported, by extension, are ``.gff`` GFFv3 format ``.txt`` a simplified human readable format used primarily by the developers If the ``-o`` flag is not provided, the default behavior is to output to a ``variants.gff`` in the current directory. .. note:: ``variantCaller.py`` does **not** modify its input cmp.h5 file in any way. This is in contrast to previous variant callers in use at PacBio, which would write a *consensus* dataset to the input cmp.h5 file. Available algorithms ```````````````````` At this time there are two algorithms available for variant calling: **plurality** and **quiver**. **Plurality** is a simple and very fast procedure that merely tallies the most frequent read base or bases found in alignment with each reference base, and reports deviations from the reference as potential variants. **Quiver** is a more complex procedure based on algorithms originally developed for CCS. Quiver leverages the quality values (QVs) provided by upstream processing tools, which provide insight into whether insertions/deletions/substitutions were deemed likely at a given read position. Use of **quiver** requires the ``ConsensusCore`` and ``ConsensusCore2`` libraries as well as trained parameter set, which will be loaded from a standard location (TBD). Arrow and Quiver can be thought of as local-realignment procedures (QV-aware in the case of Quiver). Both algorithms are expected to converge to *zero* errors (miscalled variants) as coverage increases; however **quiver** should converge much faster (i.e., fewer errors at low coverage), and should provide greater variant detection power at a given error level. Software interfaces ``````````````````` The ``GenomicConsensus`` module has two essential dependencies: 1. **pbcore**, the PacBio Python bioinformatics library 2. **ConsensusCore**, a C++ library with SWIG bindings that provides access to the same algorithms used in circular consensus sequencing. 3. **ConsensusCore2**, a C++ library with SWIG bindings that provides access to the same algorithms used in circular consensus sequencing. Both of these modules are easily installed using their ``setup.py`` scripts, which is the canonical means of installing Python packages. Confidence values ----------------- Both *quiver* and *plurality* make a confidence metric available for every position of the consensus sequence. The confidence should be interpreted as a phred-transformed posterior probability that the consensus call is incorrect; i.e. .. math:: QV = -10 \log_{10}(p_{err}) ``variantCaller.py`` clips reported QV values at 93---larger values cannot be encoded in a standard FASTQ file. Chemistry specificity --------------------- The Quiver algorithm parameters are trained per-chemistry. SMRTanalysis software loads metadata into the `cmp.h5` to indicate the chemistry used per movie. Quiver sees this table and automatically chooses the appropriate parameter set to use. This selection can be overriden by a command line flag. When multiple chemistries are represented in the reads in a `cmp.h5`, Quiver will model each read appropriately using the parameter set for its chemistry, thus yielding optimal results. Performance Requirements ------------------------ ``variantCaller.py`` performs variant calling in parallel using multiple processes. Work splitting and inter-process communication are handled using the Python ``multiprocessing`` module. Work can be split among an arbitrary number of processes (using the ``-j`` command-line flag), but for best performance one should use no more worker processes than there are CPUs in the host computer. The running time of the *plurality* algorithm should not exceed the runtime of the BLASR process that produced the cmp.h5. The running time of the *quiver* algorithm should not exceed 4x the runtime of BLASR. The amount of core memory (RAM) used among all the python processes launched by a ``variantCaller.py`` run should not exceed the size of the uncompressed input ``.cmp.h5`` file. GenomicConsensus-master/doc/VariantCallerKnownIssues.rst000066400000000000000000000006111274347070600241000ustar00rootroot00000000000000 Known Issues ============ Python 2.6 multiprocessing is susceptible to a bug where exceptions are occasionally thrown at shutdown because the daemon processes are allowed to continue executing while the interpreter is shutting down. (See: http://bugs.python.org/issue4106, http://bugs.python.org/issue9207) The bug is fixed in 2.7 but not in 2.6. I haven't been able to find a workaround. GenomicConsensus-master/doc/VariantsGffSpecification.rst000066400000000000000000000176111274347070600240630ustar00rootroot00000000000000 ``variants.gff`` File Format (Version 2.1) ============================================ As of this version, ``variants.gff`` is our primary variant call file format. The ``variants.gff`` file is based on the `GFFv3 standard`_. The GFFv3 standard describes a tab-delimited plain-text file meta-format for describing genomic "features." Each gff file consists of some initial "header" lines supplying metadata, and then a number of "feature" lines providing information about each identified variant. The GFF Coordinate System ------------------------- All coordinates in GFF files are 1-based, and all intervals ``start, end`` are understood as including both endpoints. Headers ------- The ``variants.gff`` file begins with a block of metadata headers, which looks like the following: :: ##gff-version 3 ##pacbio-variant-version 2.1 ##date Tue Feb 28 17:44:18 2012 ##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12 ##source GenomicConsensus v0.1.0 ##source-commandline callVariants.py --algorithm=plurality aligned_reads.cmp.h5 -r spinach.fasta -o variants.gff ##source-alignment-file /home/popeye/data/aligned_reads.cmp.h5 ##source-reference-file /home/popeye/data/spinach.fasta ##sequence-region EGFR_Exon_23 1 189 ##sequence-header EGFR_Exon_24 1 200 The ``source`` and ``source-commandline`` describe the name and version of the software generating the file. ``pacbio-variant-version`` reflects the specification version that the file contents should adhere to. The ``sequence-region`` headers describe the names and extents of the reference groups (i.e. reference contigs) that will be refered to in the file. The names are the same as the full FASTA header. ``source-alignment-file`` and ``source-reference-file`` record absolute paths to the primary input files. Feature lines ------------- After the headers, each line in the file describes a genomic *feature*; in this file, all the features are potential variants flagged by the variant caller. The general format of a variant line is a 9-column (tab-delimited) record, where the first 8 columns correspond to fixed, predefined entities in the GFF standard, while the 9th column is a flexible semicolon-delimited list of mappings ``key=value``. The 8 predefined columns are as follows: +------+-------+--------------------------------+------------------+ |Column|Name |Description |Example | |Number| | | | +------+-------+--------------------------------+------------------+ |1 |seqId |The full FASTA header for the |``lambda_NEB3011``| | | |reference contig. | | | | | | | +------+-------+--------------------------------+------------------+ |2 |source |(unused; always populated with |``.`` | | | |``.``) | | +------+-------+--------------------------------+------------------+ |3 |type |the type of variant. One of |``substitution`` | | | |``insertion``, ``deletion``, or | | | | |``substitution``. | | | | | | | +------+-------+--------------------------------+------------------+ |4 |start |1-based start coordinate for the|200 | | | |variant. | | +------+-------+--------------------------------+------------------+ |5 |end |1-based end coordinate for the |215 | | | |variant. start<=end always | | | | |obtains, regardless of strand. | | +------+-------+--------------------------------+------------------+ |6 |score |unused; populated with ``.`` |``.`` | +------+-------+--------------------------------+------------------+ |7 |strand |unused; populated with ``.`` |``.`` | | | | | | +------+-------+--------------------------------+------------------+ |8 |phase |unused; populated with ``.`` |``.`` | +------+-------+--------------------------------+------------------+ The attributes in the 9th (final) column are as follows: +--------------+----------------------------+-----------------+ |Key |Description |Example | | | |value | +--------------+----------------------------+-----------------+ |``coverage`` |the read coverage of the |``42`` | | |variant site (not the | | | |variant itself) | | +--------------+----------------------------+-----------------+ |``confidence``|the phred-scaled probability|``37`` | | |that the variant is real, | | | |rounded to the nearest | | | |integer and truncated at 93 | | +--------------+----------------------------+-----------------+ |``reference`` |the reference base or bases |``T``, ``.`` | | |for the variant site. May | | | |be ``.`` to represent a | | | |zero-length substring (for | | | |insertion events) | | +--------------+----------------------------+-----------------+ |``variantSeq``|the read base or bases |``T`` | | |corresponding to the | (haploid); | | |variant. ``.`` encodes a |``T/C``, ``T/.`` | | |zer-length string, as for a | (heterozygous) | | |deletion. | | +--------------+----------------------------+-----------------+ |``frequency`` |the read coverage of the |``13`` | | |variant itself; for | (haploid) | | |heterozygous variants, the | | | |frequency of both observed |``15/12`` | | |alleles. This is an | (heterozygous) | | |optional field. | | +--------------+----------------------------+-----------------+ The attributes may be present in any order. The four types of variant we support are as follows. *(Recall that the field separator is a tab, not a space.)* 1. Insertion. Examples:: ref00001 . insertion 8 8 . . . reference=.;variantSeq=G;confidence=22;coverage=18;frequency=10 ref00001 . insertion 19 19 . . . reference=.;variantSeq=G/.;confidence=22;coverage=18;frequency=7/5 For insertions, start==end, and the insertion event is understood as taking place *following* the reference position `start`. 2. Deletion. Examples:: ref00001 . deletion 348 349 . . . reference=G;variantSeq=.;confidence=39;coverage=25;frequency=20 ref00001 . deletion 441 443 . . . reference=GG;variantSeq=GG/.;confidence=39;coverage=25;frequency=8/8 3. Substitution. Examples:: ref000001 . substitution 100 102 . . . reference=GGG;variantSeq=CCC;confidence=50;coverage=20;frequency=16 ref000001 . substitution 200 201 . . . reference=G;variantSeq=G/C;confidence=50;coverage=20;frequency=10/6 Compression ----------- The gff metaformat is verbose, so for practical purposes we will gzip encode ``variants.gff`` files as ``variants.gff.gz``. Consumers of the variant file should be able to read it in either form. Other file formats ------------------ The VCF and BED standards describe variant-call specific file formats. We can currently translate `variants.gff` files to these formats, but they are not the primary output of the variant callers. .. _GFFv3 standard: http://www.sequenceontology.org/gff3.shtml GenomicConsensus-master/doc/conf.py000077500000000000000000000176651274347070600177310ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # GenomicConsensus documentation build configuration file, created by # sphinx-quickstart on Sat Jan 28 18:28:19 2012. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os from os.path import dirname, join globals = {} execfile("../GenomicConsensus/__init__.py", globals) __VERSION__ = globals["__VERSION__"] # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.mathjax'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'GenomicConsensus' copyright = u'2012-2013, Pacific Biosciences' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = __VERSION__ # The full version, including alpha/beta/rc tags. release = __VERSION__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'GenomicConsensusdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'GenomicConsensus.tex', u'GenomicConsensus Documentation', u'David Alexander', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'genomicconsensus', u'GenomicConsensus Documentation', [u'David Alexander'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'GenomicConsensus', u'GenomicConsensus Documentation', u'David Alexander', 'GenomicConsensus', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' GenomicConsensus-master/doc/index.rst000066400000000000000000000006531274347070600202550ustar00rootroot00000000000000.. GenomicConsensus documentation master file, created by sphinx-quickstart on Sat Jan 28 18:28:19 2012. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. GenomicConsensus ================ Contents: .. toctree:: :maxdepth: 2 VariantCallerFunctionalSpecification VariantsGffSpecification VariantCallerKnownIssues HowToQuiver QuiverFAQ GenomicConsensus-master/doc/internal/000077500000000000000000000000001274347070600202245ustar00rootroot00000000000000GenomicConsensus-master/doc/internal/1_3_3_Enhancements.rst000066400000000000000000000053241274347070600242560ustar00rootroot000000000000001.3.3 Enhancements ================== Bug 20100 --------- **Genomic Consensus to support rare variant calling** Adds the ability to to call rare variants. Rare variants are defined here as mutations detected at a 1% < frequency < 50%. There is an initial minimum coverage requirement set at 500x. The information provided by this feature will be limited to deviations from the reference. This will limited to SNPs only. Indels will be ignored. Codon-aware filtering could easily be applied to the output as a post-processing step. It could also be used *in situ* as an additional filtering mechanism to reduce potentially noisy output. We can start with the post-processing option (easy) then evolve towards being codon aware as necessary. This functionality will be optional. *Inputs*: A column-oriented set of short (~1 - 3 bases) sequence calls and their corresponding frequencies. *Outputs*: A GFF-style record per rare variant call. See VariantsGffSpecification for standard format. This feature will augment the standard record with a new key/value pair indicating frequency. Example: freq=0.10. There may be more than one variant per reference position. Please note that no consensus file(s) will be generated for rare variants, though enough information is provided to build one in a separate tools/module. Bug 20628 --------- **Add support for detecting and reporting correlated mutations** Provides support for determing whether or not sets of mutations are co-located. This only includes SNPs, not indels. Correlations may only be established using overrlapping reads, i.e., gaps not supportable. Correlations will have some confidence metric associated with them (TBD). This functionality may also be combined with rare variant calling output. The guiding use-case for this feature is the BCR-ABL project, which targeted an 863 bp region of the human genome (11,000x coverage). `BCR-ABL Project Details`_. This functionality will be optional. *Inputs*: CCS-based variant calls at each position including read information: ID, start, stop. 'start' and 'stop' are in (+) strand genomic coordinates. *Outputs*: A table (possibly) of correlated mutations that could look like: ===== ======= ===== =================== Freq # Reads Conf Mutations ===== ======= ===== =================== 40.4% 4,321 40 123a, 140c 30.3% 3,210 30 50t, 350a 20.2% 2,500 20 1400g, 1500c, 1550t ===== ======= ===== =================== We may also choose to include an output of read IDs associated with reported sets of co-located mutations. Formats TBD. .. _BCR-ABL Project Details: http://web/~mbrown/workspace2011Q4/bcrAblASHRuns/ GenomicConsensus-master/doc/internal/VariantCallerValidation.rst000066400000000000000000000151311274347070600255210ustar00rootroot00000000000000Variant Caller Validation Specification ======================================= Created: 2012/02/20 Author: jdrake Synopsis -------- There are several algorithms implemented for detecting SNPs in the data using alignments against a known reference. These include Evicons (PacBio), GATK (Broad) and, most recently, GenomicConsensus (PacBio). The first was built back in the days of 70% accurate reads and is quickly becoming deprecated, though currently used only as our haploid caller (though it does have diploid calling functionality). The second is part of a comprehensive tool kit that provides better diploid calling and is currently used as such in the secondary pipeline. The third is the most recent incarnation and the heir apparent going forward. There are no metrics built to measure, for example, the sensitivy and specificity of these algorithms, and thus are difficult to evaluate against eachother. Ostensibly, since we're closer to the data, we should be able to better tune an algorithm to maximize true +/- variant calls. This exercise will create datasets to generate ROC curves and potentially other user metrics to properly evaluate algorithms. Workflow -------- A dataset will be generated using a set of mutated reference genomes to align real reads against. Each mutated reference will have a list of 'ground-truth' mutations associated with them. The alignments will then be processed by each of the candidate variant caller algorithms and their results evaluated against the ground truth for true +/-. 1. Generate mutated reference(s) 2. Align reads to each mutated reference 3. Run variant callers using alignments 4. Evaluate calls vs ground truth 5. Generate metrics 6. Repeat steps 3 - 5 as necessary *NOTE*: The mutated references could be generated on the fly using the mutation information, thus, obviating the need to save all the mutated references. The automation of the workflow should eventually be packaged and deployed into the Millhouse framework. Initially, it can configured to run from the command line on the secondary cluster and the smrtpipe infrastructure. Mutation Sets ------------- Starting with lambda (well represented amongst currently available runs), generate a set, M, of mutated lambda references with n randomly generated point mutations within each m mutated genome. Each point mutation p will be one of P = {Substitution(s), Insertion(i), Deletion(d)}. Locations will be associated with each mutation as a 1-based offset using the wild-type genome (w) coordinates. Offsets are stored in 0-based coordinates, but displayed and manipulated using a 1-based coordinate system because that's what GFF, SAM and Tablet uses. Mutation sets will be stored in a file per reference mutated. The file will be used as input to mutate genomes on the fly just prior to alignment as well as input to the validation procedure. GFF files could be generated from them fairly easily if, though probably not, necessary. Multiple versions of this file could co-exist for the same reference. The format of the mutations file is extremely simple and compact making it suitable for source control. For simplicity, we'll use the python pickle protocol vers 2. mutation = { offs, # offset in the wild-type genome (1-based) typ, # type of mutation wild, # base(s), wild-type strain mut # base(s), the mutated strain } Comparison ~~~~~~~~~~ Alignments play a big role in this. Homopolymer regions are treated slightly differently when QV's are involved. Without them, affine gaps are used which push gaps to the left, e.g., GAATGAAGCCG GAATG-AGCCG These degenerate cases will be handled by collapsing the homopolymer aligment gaps to the left before comparing. See BLASR subsection for more detail. Some alignments against a substitution generate this type of call (some field removed for brevity): deletion 10201 length=1;confidence=22;coverage=16;reference=G insertion 10201 length=1;variantSeq=T;confidence=23;coverage=16 This happens when the substitution forms a homopolymer. These will be collapsed and labelled substitutions during comparisons. Does it use CCS reads by default, QV values? The production version does not use CCS reads but does use QV values. Data Sets --------- Key things to pay attention to in datasets: - coverage level - quality - location of mutation (e.g., homopolymer) - nature of mutation (e.g., insertion followed by deletion) Start with a positive control using an unmutated reference. Zero mutations should be found. Metrics ------- Confusion matrix ROC Curves (using quality scores) QQ Plot Notes ----- http://www.sequenceontology.org/gff3.shtml http://smrtwiki/wiki/SMRTpipe http://web/~mhsieh/dag/index.html Evicons ~~~~~~~ Top level module src: //depot/software/assembly/pbpy/pbpy/smrtpipe/modules Evicons smrtpipe modules: P_Consensus, P_ConsensusAlgorithm wraps runChunkConnectPost.py (same dir) which ... wraps eviConsWrapper.py (../../../bin/) which ... wraps jConnectPost[.sh] (../../../../seymour/dist2/analysis/bin/) which ... wraps a call an evicons jar file *Un*-wrapping this may be more cumbersome than generating the appropriate inputs to the module. GATK ~~~~ P_GATKVC Uses the UnifiedGenotyper, TableRecalibration, CountCovariates components Uses BAM inputs, generated after alignment (blasr) //depot/software/bioinformatics/tools/pbsamtools BLASR ~~~~~ Running blasr to get a cmp.h5 file (super basic, with crappy alignments):: > compareSequences.py --algorithm=blasr --h5fn=aligned.cmp.h5 input.fofn refdir More productiony way:: > compareSequences.py --info --useGuidedAlign --algorithm=blasr --nproc=6 --noXML --h5mode=w \ --h5fn=control.cmp.h5 --minAccuracy=0.75 --minLength=50 -x -minMatch 12 -x -bestn 1 -x -minPctIdentity 70.0 \ --regionTable=/mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/jobs/037/037285/data/filtered_regions.chunk001of002.fofn \ input.fofn /mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/references/lambda `refdir` is a directory containing a set of information related to a reference sequence. The key files appear to be .fa and reference.info.xml. It can work with just a fasta file, but will produce a cmp.h5 that breaks evicons (reference length is 0). There is a utility to generate these ref dirs: /mnt/secondary/Smrtpipe/builds/Assembly_Mainline_Nightly_LastSuccessfulBuild/analysis/bin/referenceUploader Validation tests could be source controlled under the siv tree, given they're likely to transition into that group eventually (//depot/software/assembly/siv-test/...) Using what we've already got: //depot/software/assembly/siv-test/module-test/bin/ - mutateRef.py (?) - evalVariantCalls.py (?) GenomicConsensus-master/doc/params-template.xml000066400000000000000000000041471274347070600222340ustar00rootroot00000000000000 /mnt/secondary-siv/references/ecoli_split_1000 50 50 0.75 10 30 12 True True True --seed=1 --minAccuracy=0.75 --minLength=50 --algorithmOptions='-useQuality' DeletionQV,IPD,InsertionQV,PulseWidth,QualityValue,MergeQV,SubstitutionQV,DeletionTag byread quiver True True True True GenomicConsensus-master/setup.py000077500000000000000000000022731274347070600173640ustar00rootroot00000000000000from setuptools import setup, find_packages from os.path import join, dirname # Load __VERSION__ from the GenomicConsensus package that is under # this directory---do NOT import GenomicConsensus, as importing # GenomicConsensus may fail if it has not actually been installed yet. globals = {} execfile("GenomicConsensus/__init__.py", globals) __VERSION__ = globals["__VERSION__"] setup( name = 'GenomicConsensus', version=__VERSION__, author='Pacific Biosciences', author_email='devnet@pacificbiosciences.com', license=open('LICENSES').read(), scripts = ['bin/variantCaller', 'bin/summarizeConsensus', 'bin/gffToVcf', 'bin/gffToBed', 'bin/plurality', 'bin/poa', 'bin/quiver', 'bin/arrow'], packages = find_packages(), package_data={'GenomicConsensus.quiver': ['resources/*/GenomicConsensus/*.ini']}, include_package_data=True, zip_safe = False, install_requires=[ 'pbcore >= 1.2.9', 'pbcommand >= 0.3.20', 'numpy >= 1.6.0', 'h5py >= 2.0.1', 'ConsensusCore >= 1.0.1' # , 'ConsensusCore2 >= 0.9', ] ) GenomicConsensus-master/tests/000077500000000000000000000000001274347070600170055ustar00rootroot00000000000000GenomicConsensus-master/tests/cram/000077500000000000000000000000001274347070600177275ustar00rootroot00000000000000GenomicConsensus-master/tests/cram/arrow-all4mer.t000066400000000000000000000023751274347070600226130ustar00rootroot00000000000000Bite-sized quiver test using an All4Mers template! $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run arrow. $ arrow $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq No variants! $ egrep -v '^#' v.gff | cat Perfect consensus, no no-calls $ cat css.fa >All4mer.V2.01_Insert|arrow CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC $ fold -60 css.fq @All4mer.V2.01_Insert|arrow CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~ GenomicConsensus-master/tests/cram/bad_input.t000066400000000000000000000007211274347070600220610ustar00rootroot00000000000000 Test for sane behavior in the presence of bogus arguments. $ variantCaller fake.alignmentset.xml -r fake.referenceset.xml -o test.fasta 2>&1 | tail -1 variantCaller: error: Input file */fake.alignmentset.xml not found. (glob) Test that it doesn't crash when one BAM file in an otherwise valid AlignmentSet is empty. $ DATA=$TESTDIR/../data/sanity $ REF="`pbdata get lambda-fasta`" $ arrow --reference $REF -o contig.fasta $DATA/mixed.alignmentset.xml GenomicConsensus-master/tests/cram/best-all4mer.t000066400000000000000000000012571274347070600224140ustar00rootroot00000000000000Bite-sized quiver test using an All4Mers template! $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run arrow. $ variantCaller --algorithm=best $INPUT -r $REFERENCE -o v.gff -o css.fa No variants! $ egrep -v '^#' v.gff | cat Perfect consensus, no no-calls $ cat css.fa >All4mer.V2.01_Insert|quiver CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC GenomicConsensus-master/tests/cram/extra/000077500000000000000000000000001274347070600210525ustar00rootroot00000000000000GenomicConsensus-master/tests/cram/extra/arrow-evidence.t000066400000000000000000000016241274347070600241540ustar00rootroot00000000000000Get the arrow evidence dump and make sure it can be grokked. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run arrow w/ evidence dump $ arrow --dumpEvidence=all $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq Inspect the output... $ find evidence_dump | sort evidence_dump evidence_dump/All4mer.V2.01_Insert evidence_dump/All4mer.V2.01_Insert/0-260 evidence_dump/All4mer.V2.01_Insert/0-260/arrow-scores.h5 evidence_dump/All4mer.V2.01_Insert/0-260/consensus.fa Try to load it up using the API... $ python << EOF > from GenomicConsensus.arrow.evidence import ArrowEvidence > ev = ArrowEvidence.load("evidence_dump/All4mer.V2.01_Insert/0-260") > assert 8*len(ev.consensus)==len(ev.colNames)==2080 > assert ev.delta.shape == (len(ev.rowNames), len(ev.colNames))==(95,2080) > EOF GenomicConsensus-master/tests/cram/extra/convert-to-bed.t000066400000000000000000000037701274347070600240760ustar00rootroot00000000000000 Test conversion variants GFF -> BED. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/converters/variants.gff.gz $ gffToBed --name=variants \ > --description="PacBio variant calls" \ > variants $INPUT track name=variants description="PacBio variant calls" useScore=0 gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t701414\t701415\t701415del\t46.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t970969\t970970\t970970del\t48.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1065967\t1065968\t1065968del\t49.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1081287\t1081288\t1081288del\t40.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1315974\t1315975\t1315975del\t41.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1342769\t1342770\t1342770_1342771insA\t49.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1439018\t1439019\t1439019del\t49.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1456849\t1456850\t1456850del\t48.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1623534\t1623535\t1623535del\t47.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1998594\t1998595\t1998595del\t47.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t2002375\t2002376\t2002376del\t48.000\t. (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t2179434\t2179435\t2179435del\t48.000\t. (esc) GenomicConsensus-master/tests/cram/extra/convert-to-vcf.t000066400000000000000000000042301274347070600241120ustar00rootroot00000000000000 Test conversion GFF -> VCF $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/converters/variants.gff.gz $ gffToVcf --globalReference=Staphylococcus_aureus_USA300_TCH1516 $INPUT ##fileformat=VCFv3.3 ##fileDate=* (glob) ##source=* (glob) ##reference=Staphylococcus_aureus_USA300_TCH1516 ##INFO=NS,1,Integer,"Number of Samples with Data" ##INFO=DP,1,Integer,"Total Depth of Coverage" #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t701415\t.\tG\tD1\t46.00\t0\tNS=1;DP=97 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t970970\t.\tG\tD1\t48.00\t0\tNS=1;DP=97 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1065968\t.\tG\tD1\t49.00\t0\tNS=1;DP=87 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1081288\t.\tG\tD1\t40.00\t0\tNS=1;DP=81 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1315975\t.\tC\tD1\t41.00\t0\tNS=1;DP=100 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1342770\t.\t.\tIA\t49.00\t0\tNS=1;DP=27 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1439019\t.\tC\tD1\t49.00\t0\tNS=1;DP=88 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1456850\t.\tC\tD1\t48.00\t0\tNS=1;DP=84 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1623535\t.\tC\tD1\t47.00\t0\tNS=1;DP=99 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t1998595\t.\tG\tD1\t47.00\t0\tNS=1;DP=75 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t2002376\t.\tC\tD1\t48.00\t0\tNS=1;DP=73 (esc) gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t2179435\t.\tC\tD1\t48.00\t0\tNS=1;DP=52 (esc) GenomicConsensus-master/tests/cram/extra/coverage-bed.t000066400000000000000000000023011274347070600235560ustar00rootroot00000000000000Test conversion of alignment summary GFF to coverage BED. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/fluidigm_amplicons/alignment_summary.gff $ gffToBed --name=coverage \ > --description="PacBio coverage" \ > coverage $INPUT > coverage.bed $ head -20 coverage.bed track name=coverage description="PacBio coverage" useScore=0 ref000001\t0\t1\tmeanCov\t27.000\t+ (esc) ref000001\t1\t2\tmeanCov\t27.000\t+ (esc) ref000001\t2\t3\tmeanCov\t27.000\t+ (esc) ref000001\t3\t4\tmeanCov\t27.000\t+ (esc) ref000001\t4\t5\tmeanCov\t27.000\t+ (esc) ref000001\t5\t6\tmeanCov\t27.000\t+ (esc) ref000001\t6\t7\tmeanCov\t27.000\t+ (esc) ref000001\t7\t8\tmeanCov\t27.000\t+ (esc) ref000001\t8\t9\tmeanCov\t27.000\t+ (esc) ref000001\t9\t10\tmeanCov\t27.000\t+ (esc) ref000001\t10\t11\tmeanCov\t27.000\t+ (esc) ref000001\t11\t12\tmeanCov\t27.000\t+ (esc) ref000001\t12\t13\tmeanCov\t27.000\t+ (esc) ref000001\t13\t14\tmeanCov\t27.000\t+ (esc) ref000001\t14\t15\tmeanCov\t27.000\t+ (esc) ref000001\t15\t16\tmeanCov\t27.000\t+ (esc) ref000001\t16\t17\tmeanCov\t27.000\t+ (esc) ref000001\t17\t18\tmeanCov\t27.000\t+ (esc) ref000001\t18\t19\tmeanCov\t27.000\t+ (esc) GenomicConsensus-master/tests/cram/extra/plurality-compressed.t000066400000000000000000000044051274347070600254310ustar00rootroot00000000000000Run plurality on the small example file, and make sure the compressed output files are created correctly. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/hcv/aligned_reads.cmp.h5 $ export REFERENCE=$DATA/hcv/HCV_Ref_For_187140.fasta $ variantCaller --algorithm=plurality -q 10 -r $REFERENCE -o variants.gff.gz -o consensus.fq.gz $INPUT I like to show the head of the output files inline here so that glaringly obvious changes will pop right out, but I verify that the files are exactly correct by looking at the md5 sums. First, the variants.gff: $ gunzip variants.gff.gz $ cat variants.gff ##gff-version 3 ##pacbio-variant-version 2.1 ##date * (glob) ##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12 ##source GenomicConsensus * (glob) ##source-commandline * (glob) ##source-alignment-file * (glob) ##source-reference-file * (glob) ##sequence-region 5primeEnd 1 156 ##sequence-region 3primeEnd 1 386 Examine consensus output. This is identical to the reference $ gunzip consensus.fq.gz $ fold -60 consensus.fq @5primeEnd|plurality GGAACCGGTGAGTACACCGGAATTGCCAGGACGACCGGGTCCTTTCGTGGATAAACCCGC TCAATGCCTGGAGATTTGGGCGTGCCCCCGCAAGACTGCTAGCCGAGTAGTGTTGGGTCG CGAAAGGCCTTGTGGTACTGCCTGATAGGGTGCTTG + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII @3primeEnd|plurality TACCTGGTCATAGCCTCCGTGAAGGCTCTCAGGCTCGCTGCATCCTCCGGGACTCCCTGA CTTTCACAGATAACGACTAAGTCGTCGCCACACACGAGCATGGTGCAGTCCTGGAGCCCA GCGGCTCGACAGGCTGCTTTGGCCTTGATGTAGCAGGTGAGGGTGTTACCACAGCTGGTC GTCAGTACGCCGCTCGCGCGGCACCTGCGATAGCCGCAGTTTTCCCCCCTTGAATTAGTA AGAGGGCCCCCGACATAGAGCCTCTCGGTGAGGGACTTGATGGCCACGCGGGCTTGGGGG TCCAGGTCACAACATTGGTAAATTGCCTCCTCTGTACGGATATCGCTCTCAGTGACTGTG GAGTCAAAGCAGCGGGTATCATACGA + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIII GenomicConsensus-master/tests/cram/extra/plurality-fluidigm.t000066400000000000000000000013101274347070600250550ustar00rootroot00000000000000 Some tests of a "fluidigm amplicons" dataset $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/fluidigm_amplicons/040500.cmp.h5 $ export REFERENCE=$DATA/fluidigm_amplicons/Fluidigm_human_amplicons.fasta Set the QV threshold to 10. $ variantCaller --algorithm=plurality -r $REFERENCE -q 10 -o variants.gff -o consensus.csv -o consensus.fastq $INPUT There are two true SNVs (and one diploid SNV that we miss right now). $ grep insertion variants.gff | wc | awk '{print $1}' 0 $ grep deletion variants.gff | wc | awk '{print $1}' 0 $ grep substitution variants.gff EGFR_Exon_23\t.\tsubstitution\t48\t48\t.\t.\t.\treference=T;variantSeq=C;frequency=97;coverage=100;confidence=40 (esc) GenomicConsensus-master/tests/cram/extra/reference-mismatch.t000066400000000000000000000022231274347070600247770ustar00rootroot00000000000000 Test a few scenarios where the reference FASTA disagrees slightly from the contigs aligned against in the cmp.h5, and make sure things behave sanely. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/hcv/aligned_reads.cmp.h5 $ export WRONG_REFERENCE=$DATA/fluidigm_amplicons/Fluidigm_human_amplicons.fasta $ export REFERENCE_SUBSET=$DATA/hcv/5primeEnd.fa $ export REFERENCE_NO_FAI=$DATA/hcv/3primeEnd.fa No .fai file: $ quiver -p unknown $INPUT -r $REFERENCE_NO_FAI -o variants.gff -o consensus.fastq Companion FASTA index (.fai) file not found or malformatted! Use 'samtools faidx' to generate FASTA index. [255] Wrong reference: $ quiver -p unknown $INPUT -r $WRONG_REFERENCE -o variants.gff -o consensus.fastq No reference groups in the FASTA file were aligned against. Did you select the wrong reference FASTA file? [255] Reference containing a subset of the reference that was aligned to: $ quiver -p unknown $INPUT -r $REFERENCE_SUBSET -o variants.gff -o consensus.fastq [WARNING] Some reference contigs aligned against are not found in the reference FASTA. Will process only those contigs supported by the reference FASTA. GenomicConsensus-master/tests/cram/internal/000077500000000000000000000000001274347070600215435ustar00rootroot00000000000000GenomicConsensus-master/tests/cram/internal/alignment_summary.t000066400000000000000000000051061274347070600254650ustar00rootroot00000000000000 Test the (augmentation) of the alignment_summary.gff file by summarizeConsensus $ export DATA=/mnt/secondary/Share/Quiver/TestData/tinyLambda/ $ export PATH=$TESTDIR/..:$PATH $ export VARIANTSGFF=$DATA/variants.gff.gz $ export ALIGNMENTSUMMARYGFF=$DATA/alignment_summary.gff $ summarizeConsensus \ > --variantsGff $VARIANTSGFF \ > $ALIGNMENTSUMMARYGFF \ > -o alignment_summary.out.gff $ head -20 alignment_summary.out.gff ##gff-version 3 ##date Thu, 03-Feb-2011 14:54:12 ##source PACBIO_AlignmentSummary 1.0 ##source ConsensusStats v0.1 ##source-commandline summarizeCoverage.py --reference /mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/references/lambda --numRegions=500 /mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/jobs/016/016789/data/aligned_reads.cmp.h5 ##source-commandline mono ConsensusStats.exe /mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/jobs/016/016789/data/variants.gff /mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/jobs/016/016789/data/aligned_reads.cmp.h5 ##sequence-region lambda_NEB3011 1 48502 ##source GenomicConsensus * (glob) ##pacbio-alignment-summary-version 0.6 ##source-commandline * (glob) lambda_NEB3011\t.\tregion\t1\t100\t0.00\t+\t.\tcov2=150.440,26.772;gaps=0,0;cov=51,160,171;cQv=20,20,20;del=0;ins=19;sub=0 (esc) lambda_NEB3011\t.\tregion\t101\t200\t0.00\t+\t.\tcov2=168.700,1.780;gaps=0,0;cov=166,168,173;cQv=20,20,20;del=0;ins=16;sub=0 (esc) lambda_NEB3011\t.\tregion\t201\t300\t0.00\t+\t.\tcov2=167.860,1.732;gaps=0,0;cov=165,168,171;cQv=20,20,20;del=1;ins=17;sub=1 (esc) lambda_NEB3011\t.\tregion\t301\t400\t0.00\t+\t.\tcov2=177.690,2.587;gaps=0,0;cov=168,179,181;cQv=20,20,20;del=2;ins=8;sub=0 (esc) lambda_NEB3011\t.\tregion\t401\t500\t0.00\t+\t.\tcov2=179.730,1.248;gaps=0,0;cov=177,180,182;cQv=20,20,20;del=0;ins=0;sub=0 (esc) lambda_NEB3011\t.\tregion\t501\t600\t0.00\t+\t.\tcov2=186.670,4.907;gaps=0,0;cov=177,188,195;cQv=20,20,20;del=0;ins=0;sub=0 (esc) lambda_NEB3011\t.\tregion\t601\t700\t0.00\t+\t.\tcov2=200.160,4.051;gaps=0,0;cov=192,200,206;cQv=20,20,20;del=0;ins=0;sub=0 (esc) lambda_NEB3011\t.\tregion\t701\t800\t0.00\t+\t.\tcov2=213.630,7.634;gaps=0,0;cov=200,215,226;cQv=20,20,20;del=0;ins=0;sub=0 (esc) lambda_NEB3011\t.\tregion\t801\t900\t0.00\t+\t.\tcov2=244.290,12.954;gaps=0,0;cov=224,243,262;cQv=20,20,20;del=0;ins=0;sub=0 (esc) lambda_NEB3011\t.\tregion\t901\t1000\t0.00\t+\t.\tcov2=267.070,3.724;gaps=0,0;cov=259,266,274;cQv=20,20,20;del=0;ins=0;sub=0 (esc) $ grep -v '\#.*' alignment_summary.out.gff | md5sum 08f89b262b159671cdcdd8bdc8331461 - GenomicConsensus-master/tests/cram/internal/alignment_summary_scaling.t000066400000000000000000000020561274347070600271660ustar00rootroot00000000000000 Test performance of summarizeConsensus with large numbers of variants. $ TESTDATA="/pbi/dept/secondary/siv/testdata/pbreports-unittest/data/summarizeConsensus" First test has 48000 regions total but hundreds of small contigs, with more than 5 million variants. This should not take more than 5 minutes or so unless an O(N^2) loop is used. $ VARIANTS=$TESTDATA/variants.gff $ SUMMARY=$TESTDATA/alignment_summary.gff $ GFFREF=$TESTDATA/alignment_summary_variants.gff $ OUTPUT=alignment_summary_variants_test.gff $ summarizeConsensus --variantsGff $VARIANTS --output $OUTPUT $SUMMARY $ diff -I "##source" $OUTPUT $GFFREF Second test has 20000 regions in a single contig, and 10000 variants. This will also take several minutes. $ VARIANTS=$TESTDATA/variants_big_chr.gff $ SUMMARY=$TESTDATA/alignment_summary_big_chr.gff $ GFFREF=$TESTDATA/alignment_summary_variants_big_chr.gff $ OUTPUT=alignment_summary_variants_big_chr_test.gff $ summarizeConsensus --variantsGff $VARIANTS --output $OUTPUT $SUMMARY $ diff -I "##source" $OUTPUT $GFFREF GenomicConsensus-master/tests/cram/internal/arrow-staph.t000066400000000000000000000016471274347070600242070ustar00rootroot00000000000000Compare quiver vs. arrow on a high SNR Staph job. $ export INPUT=/mnt/secondary/Share/Quiver/TestData/staphHighSnr/aligned_subreads.fofn $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/staph/S_aureus_USA300_TCH1516.fasta $ export MASK=/mnt/secondary/Share/Quiver/GenomeMasks/S_aureus_USA300_TCH1516-mask.gff $ quiver -j${JOBS-8} $INPUT -r $REFERENCE -o quiver-variants.gff -o quiver-css.fasta $ arrow -j${JOBS-8} $INPUT -r $REFERENCE -o arrow-variants.gff -o arrow-css.fasta Quiver does a good job here---no errors. $ gffsubtract.pl quiver-variants.gff $MASK | grep -v "#" | sed 's/\t/ /g' $ fastacomposition quiver-css.fasta quiver-css.fasta A 960233 C 470725 G 470271 T 971458 Arrow, since the SNR capping fix, also gets no errors. $ gffsubtract.pl arrow-variants.gff $MASK | grep -v "#" | sed 's/\t/ /g' $ fastacomposition arrow-css.fasta arrow-css.fasta A 960232 C 470725 G 470271 T 971457 GenomicConsensus-master/tests/cram/internal/plurality-diploid-lambda.t000066400000000000000000000036561274347070600266270ustar00rootroot00000000000000 Reads are from a simulated diploid lambda, where there is a SNP at each position 250 + 500k, and the SNP is a substitution "ACGT" -> "CGTA". How well do we pick up these SNPs? $ alias untabify="sed 's/\t/ /g'" $ export INPUT=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/aln.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/lambda.fasta $ export EXPECTED_VARIANTS=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/v-expected.gff Run haploid analysis, make sure we don't make too many miscalls! $ plurality $INPUT -r $REFERENCE \ > -o variants-haploid.gff -o css-haploid.fa Now run under diploid mode $ plurality --diploid $INPUT -r $REFERENCE \ > -o variants.gff -o css.fasta Consensus outputs should be identical, because detection of diploid variants doesn't change the cconsensus calls. $ diff css.fasta css-haploid.fa Take a look at the variants... $ grep -v "#" variants.gff | head -3 | untabify lambda_NEB3011 . substitution 250 250 . . . reference=A;variantSeq=C/A;frequency=45/43;coverage=100;confidence=40 lambda_NEB3011 . substitution 750 750 . . . reference=T;variantSeq=T/A;frequency=60/27;coverage=100;confidence=40 lambda_NEB3011 . substitution 1250 1250 . . . reference=G;variantSeq=G/T;frequency=56/21;coverage=100;confidence=40 Use gffsubtract.pl to compare variants to expected. Note that the gffsubtract tool just looks at the coordinates, not the actual content of the event, so it's not going to see if we called G/C as G/T, for example. Would be good to either write a better tool or make an easy way to do this in Python. False negatives: $ gffsubtract.pl $EXPECTED_VARIANTS variants.gff | grep -v '#' | untabify lambda_NEB3011 . substitution 1750 1750 . . . reference=A;variantSeq=A/C; lambda_NEB3011 . substitution 22750 22750 . . . reference=T;variantSeq=T/A; False positives: $ gffsubtract.pl variants.gff $EXPECTED_VARIANTS | grep -v '#' | untabify GenomicConsensus-master/tests/cram/internal/plurality-lambda.t000066400000000000000000000006111274347070600251710ustar00rootroot00000000000000 $ export INPUT=/mnt/secondary/Share/Quiver/TestData/lambda/job_038537.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/lambda/lambda.fasta $ plurality -j${JOBS-8} --noEvidenceConsensusCall=nocall $INPUT -r $REFERENCE \ > -o variants.gff -o css.fasta.gz -o css.fq.gz $ grep -v "##" variants.gff [1] $ gunzip css.fasta.gz $ fastadiff -c FALSE css.fasta $REFERENCE GenomicConsensus-master/tests/cram/internal/quiver-compatibility.t000066400000000000000000000032011274347070600261060ustar00rootroot00000000000000 Quiver should abort if the cmp.h5 is not suitable. Let's make sure it does the right thing. First make sure we abort once we recognize the tiny fluidigm file is CCS data. $ export DATA=$TESTDIR/../../data $ export INPUT=$DATA/fluidigm_amplicons/040500.cmp.h5 $ export REFERENCE=$DATA/fluidigm_amplicons/Fluidigm_human_amplicons.fasta $ quiver -r $REFERENCE -o variants.gff $INPUT 2>1 [255] Tiny lambda file. Make sure it recognizes this cmp.h5 has an imcomplete set of QVs. $ export INPUT=/mnt/secondary/Share/Quiver/TestData/tinyLambda/aligned_reads_1.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/tinyLambda/lambdaNEB.fa $ quiver -p C2.AllQVsModel -r $REFERENCE -o variants.gff $INPUT Failure: Selected Quiver parameter set is incompatible with this alignment file due to missing data tracks. [255] It should handle the request of a parameter set by complete name: $ quiver --verbose -p C2.NoQVsModel -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set" [INFO] Using Quiver parameter set(s): C2.NoQVsModel ... or by chemistry name: $ quiver --verbose -p C2 -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set" [INFO] Using Quiver parameter set(s): C2.NoQVsModel ... and should fail informatively when we ask for an unrecognized parameter set or chemistry: $ quiver -p SuperChem.Model -r $REFERENCE -o variants.gff $INPUT Quiver: no available parameter set named SuperChem.Model [255] $ quiver -p SuperChem -r $REFERENCE -o variants.gff $INPUT Quiver: no parameter set available compatible with this cmp.h5 for chemistry "SuperChem" [255] GenomicConsensus-master/tests/cram/internal/quiver-diploid-lambda.t000066400000000000000000000026531274347070600261110ustar00rootroot00000000000000 Reads are from a simulated diploid lambda, where there is a SNP at each position 250 + 500k, and the SNP is a substitution "ACGT" -> "CGTA". How well do we pick up these SNPs? $ alias untabify="sed 's/\t/ /g'" $ export INPUT=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/aln.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/lambda.fasta $ export EXPECTED_VARIANTS=/mnt/secondary/Share/Quiver/TestData/lambdaDiploid/v-expected.gff $ quiver -p unknown.NoQVsModel --diploid $INPUT -r $REFERENCE \ > -o variants.gff -o css.fasta Take a look at the variants $ grep -v "#" variants.gff | head -3 | untabify lambda_NEB3011 . substitution 250 250 . . . reference=A;variantSeq=A/C;coverage=100;confidence=40 lambda_NEB3011 . substitution 750 750 . . . reference=T;variantSeq=A/T;coverage=100;confidence=40 lambda_NEB3011 . substitution 1250 1250 . . . reference=G;variantSeq=G/T;coverage=100;confidence=40 Use gffsubtract.pl to compare variants to expected. Note that the gffsubtract tool just looks at the coordinates, not the actual content of the event, so it's not going to see if we called G/C as G/T, for example. Would be good to either write a better tool or make an easy way to do this in Python. False negatives: $ gffsubtract.pl $EXPECTED_VARIANTS variants.gff | grep -v '#' | untabify False positives: $ gffsubtract.pl variants.gff $EXPECTED_VARIANTS | grep -v '#' | untabify GenomicConsensus-master/tests/cram/internal/quiver-ecoli.t000066400000000000000000000040611274347070600243350ustar00rootroot00000000000000 Run quiver on a large-insert C2 E. coli job. $ export INPUT=/mnt/secondary/Share/Quiver/TestData/ecoli/job_059531.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/ecoli/ecoliK12_pbi_March2013.fasta For some reason, this old cmp.h5 file lacks proper chemistry information. Quiver should reject it. $ quiver -j${JOBS-8} $INPUT -r $REFERENCE -o variants.gff -o css.fasta "unknown" chemistry in alignment file: either an unsupported chemistry has been used, the alignment file has been improperly constructed, or this version of SMRTanalysis is too old to recognize a new chemistry. [255] Well, we know it was a C2 job, so let's force the issue $ quiver -p C2 -j${JOBS-8} $INPUT -r $REFERENCE -o variants.gff -o css.fasta Inspect the variants list. A few mutations seem to have crept in since I built the new reference. $ sed 's/\t/ /g' variants.gff ##gff-version 3 ##pacbio-variant-version 2.1 ##date * (glob) ##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12 ##source GenomicConsensus * (glob) ##source-commandline * (glob) ##source-alignment-file * (glob) ##source-reference-file * (glob) ##sequence-region ecoliK12_pbi_March2013 1 4642522 ecoliK12_pbi_March2013 . deletion 85 85 . . . reference=G;variantSeq=.;coverage=53;confidence=48 ecoliK12_pbi_March2013 . deletion 219 219 . . . reference=A;variantSeq=.;coverage=58;confidence=47 ecoliK12_pbi_March2013 . insertion 1536 1536 . . . reference=.;variantSeq=C;coverage=91;confidence=47 No no-call windows. $ fastacomposition css.fasta css.fasta A 1141540 C 1177642 G 1180362 T 1142977 MuMMer analysis. No structural diffs $ nucmer -mum $REFERENCE css.fasta 2>/dev/null $ show-diff -H out.delta SNPs same as variants $ show-snps -C -H out.delta | sed 's/\s\+/ /g' 85 G . 84 | 85 84 | 1 1 ecoliK12_pbi_March2013 ecoliK12_pbi_March2013|quiver 220 A . 218 | 135 218 | 1 1 ecoliK12_pbi_March2013 ecoliK12_pbi_March2013|quiver 1536 . C 1535 | 1316 1535 | 1 1 ecoliK12_pbi_March2013 ecoliK12_pbi_March2013|quiver GenomicConsensus-master/tests/cram/internal/quiver-eichler-bac.t000066400000000000000000000066171274347070600254110ustar00rootroot00000000000000 $ export INPUT=/mnt/secondary/Share/Quiver/TestData/eichler/053727.cmp.h5 $ export SANGER_REFERENCE=/mnt/secondary/Share/Quiver/TestData/eichler/CH17-157L1.finished.fa $ export ASSEMBLY_REFERENCE=/mnt/secondary/Share/Quiver/TestData/eichler/CH17_157L1_quiver_fasta.fasta The QVs warning gets printed to stderr N times ... ignore it for now. $ quiver -p C2 --noEvidenceConsensusCall=nocall \ > -j${JOBS-8} $INPUT -r $ASSEMBLY_REFERENCE -o variants.gff -o css.fasta 2>/dev/null Variant scores are currently miscalibrated (need to fix the NoMergeQVModel; bug 22255). Note that these variants listed below are reckoned compared to the assembly reference, so they are not really variants so much as errors in the assembly. Variants assessed using MuMMer at the end are compared to the Sanger reference. $ sed 's/\t/ /g' variants.gff | grep -v '#' CH17-157L1 . deletion 141 142 . . . reference=AC;variantSeq=.;coverage=100;confidence=47 CH17-157L1 . deletion 797 797 . . . reference=G;variantSeq=.;coverage=100;confidence=48 CH17-157L1 . deletion 805 805 . . . reference=T;variantSeq=.;coverage=100;confidence=47 CH17-157L1 . deletion 26174 26175 . . . reference=AC;variantSeq=.;coverage=100;confidence=48 CH17-157L1 . deletion 93356 93357 . . . reference=CG;variantSeq=.;coverage=100;confidence=49 CH17-157L1 . insertion 230679 230679 . . . reference=.;variantSeq=A;coverage=100;confidence=48 CH17-157L1 . insertion 230681 230681 . . . reference=.;variantSeq=CA;coverage=100;confidence=48 CH17-157L1 . insertion 230684 230684 . . . reference=.;variantSeq=C;coverage=100;confidence=48 $ fastacomposition css.fasta css.fasta A 65735 C 51391 G 50341 N 28 T 63420 Use the MuMMer suite to look at the differences from the reference. $ nucmer -mum $SANGER_REFERENCE css.fasta 2>/dev/null First: no structural differences. $ show-diff -H -q out.delta | sed 's/\t/ /g' CH17-157L1|quiver BRK 1 30 30 CH17-157L1|quiver BRK 230896 230915 20 Next, the SNPs. $ show-snps -H -C -x10 out.delta 24558 . A 24583 | 24552 24558 | AAAAAAAAAA.AGCCTGGATG AAAAAAAAAAAAGCCTGGATG | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 51215 C . 51239 | 1765 51215 | GGCCCGCCCCCCGGGCAGCCA GGCCCGCCCC.CGGGCAGCCA | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 52980 . A 53005 | 1765 52980 | AAAAAAAAAA.ACAACAAACA AAAAAAAAAAAACAACAAACA | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 64634 C . 64658 | 11654 64634 | GACCCCCCCCCCACCGGTCAG GACCCCCCCC.CACCGGTCAG | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 85478 . T 85503 | 8834 85478 | TTTTTTTTTT.TACTAACCAG TTTTTTTTTTTTACTAACCAG | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 94312 . T 94338 | 8834 94312 | TTTTTTTTTT.TAGACAGAGT TTTTTTTTTTTTAGACAGAGT | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 106985 . T 107012 | 0 106985 | TTTTTTTTTT.TCCTGAGCAG TTTTTTTTTTTTTCCTGAGCA | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 106985 . T 107013 | 0 106985 | TTTTTTTTTT.TCCTGAGCAG TTTTTTTTTTTTCCTGAGCAG | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 182920 . A 182949 | 564 47946 | AAAAAAAAAA.ATGTGGTCTC AAAAAAAAAAAATGTGGTCTC | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) 183484 . A 183514 | 564 47382 | AAAAAAAAAA.ATAGATGAAC AAAAAAAAAAAATAGATGAAC | 1 1 CH17-157L1\tCH17-157L1|quiver (esc) GenomicConsensus-master/tests/cram/internal/quiver-fluidigm-amplicons.t000066400000000000000000000011631274347070600270250ustar00rootroot00000000000000 $ export INPUT=/mnt/secondary/Share/Quiver/TestData/fluidigmAmplicons/aligned_reads.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/fluidigmAmplicons/MET_EGFR_Full_Genes.fasta $ quiver --noEvidenceConsensusCall=nocall -j${JOBS-8} $INPUT -r $REFERENCE \ > -o variants.gff -o css.fasta [WARNING] This alignment file file lacks some of the QV data tracks that are required for optimal performance of the Quiver algorithm. For optimal results use the ResequencingQVs workflow in SMRTPortal with bas.h5 files from an instrument using software version 1.3.1 or later, or the --forQuiver option to pbalign. GenomicConsensus-master/tests/cram/internal/quiver-lambda.t000066400000000000000000000016471274347070600244710ustar00rootroot00000000000000Small lambda phage job, should be no errors. $ export CMPH5=/mnt/secondary/Share/Quiver/TestData/lambda.P4-C2/082796.cmp.h5 $ export BAM=/mnt/secondary/Share/Quiver/TestData/lambda.P4-C2/082796.bam $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/lambda.P4-C2/lambdaNEB.fa First try the cmp.h5: $ mkdir CmpH5; cd CmpH5 $ quiver -j${JOBS-8} --noEvidenceConsensusCall=nocall $CMPH5 -r $REFERENCE \ > -o variants.gff -o css.fasta.gz -o css.fq.gz $ grep -v "##" variants.gff [1] $ gunzip css.fasta.gz $ fastadiff -c FALSE css.fasta $REFERENCE $ cd .. #Now run on the BAM: # # $ mkdir BAM; cd BAM # $ quiver -j${JOBS-8} --noEvidenceConsensusCall=nocall $BAM -r $REFERENCE \ # > -o variants.gff -o css.fasta.gz -o css.fq.gz # [WARNING] 'fancyChunking' not yet available for BAM, disabling # # $ grep -v "##" variants.gff # [1] # # $ gunzip css.fasta.gz # $ fastadiff -c FALSE css.fasta $REFERENCE GenomicConsensus-master/tests/cram/internal/quiver-mruber.t000066400000000000000000000101731274347070600245370ustar00rootroot00000000000000 $ export INPUT=/mnt/secondary/Share/Quiver/TestData/mruber/aligned_reads.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/mruber/Mruber_DSM_1279.fasta $ quiver -p C2 -j${JOBS-8} $INPUT -r $REFERENCE -o variants.gff -o css.fasta Inspect the variant calls. $ grep -v "#" variants.gff | sed 's/\t/ /g' M.ruber . substitution 357364 357364 . . . reference=C;variantSeq=T;coverage=100;confidence=47 M.ruber . insertion 640716 640716 . . . reference=.;variantSeq=C;coverage=100;confidence=48 M.ruber . insertion 1320669 1320669 . . . reference=.;variantSeq=C;coverage=100;confidence=48 M.ruber . deletion 1878514 1878953 . . . reference=AGGGCGTACTTCTTTTCGGGTGCAGATGCGTAGGCATCGTAGTTGAACAGGGTTTTGACCGCCATTGAGCACTCCTTTTACGGTTCCACAATGAGTTTGCTGATCATGTTGGCGTGGCCGATGCCGCAGTATTCGTTGCAGATGATGGGATACTCACCGGGTTTGCTGAAGGTGTAGCTGACCTTGGCAATTTCCCCCGGTATCACCTGTACGTTGATGTTGGTGTTGTGTACGTGGAAGCTGTGCTGCACATCGGGTGAGGTGATATAGAAGGTTACCTTCCTGCCCACCTTGAACCGCATCTCCGCTGGCAGGTAGCCAAAGGCAAAGGCCTGCACATAGGCCACGTACTCGTTGCCGACCTGCTCAACCCGTGGGTTGGCAAAGTCTCCCTCGGTGCGCACCTTGGTGGCGTCGATGCGGCCTGCCCCCACCGGGTT;variantSeq=.;coverage=100;confidence=50 M.ruber . insertion 1987969 1987969 . . . reference=.;variantSeq=G;coverage=100;confidence=48 M.ruber . insertion 2010700 2010700 . . . reference=.;variantSeq=T;coverage=100;confidence=47 M.ruber . insertion 2070035 2070035 . . . reference=.;variantSeq=A;coverage=100;confidence=47 M.ruber . insertion 2827713 2827713 . . . reference=.;variantSeq=T;coverage=100;confidence=48 M.ruber . deletion 2841287 2841301 . . . reference=AAGCACGCCGAGGGA;variantSeq=.;coverage=100;confidence=49 The variant calls have all been Sanger validated! | | Confirmed | Confirmed | | Variant call | by eye? | by Sanger? | |------------------+-----------+------------| | 357364 C>T | YES | YES | | 640716 InsC | YES | YES | | 1320669 InsC | YES | YES | | 1878514 Del440bp | YES | YES | | 1987969 InsG | YES | YES | | 2010700 InsT | YES | YES | | 2070035 InsA | YES | YES | | 2827713 InsT | YES | YES | | 2841287 Del15bp | YES | YES | Look at the consensus output. First, there are no no-calls, which is nice. $ fastacomposition css.fasta css.fasta A 566308 C 979601 G 983450 T 567651 There are two gaps corresponding to the structural deletions: $ nucmer -mum $REFERENCE css.fasta 2>/dev/null $ show-diff -H out.delta | sed 's/\t/ /g' M.ruber GAP 1878514 1878953 440 0 440 M.ruber GAP 2851299 2831302 -19996 -19981 -15 ... and there are some SNPS. Five of them are at the coverage desert before the large deletion, seven are accounted for in the variants.gff, and the remaining ones are low-confidence miscalls. $ show-snps -H -C out.delta 233298 C . 233297 | 124066 233297 | 1 1 M.ruber\tM.ruber|quiver (esc) 357364 C T 357363 | 124066 357363 | 1 1 M.ruber\tM.ruber|quiver (esc) 640719 . C 640719 | 283355 640719 | 1 1 M.ruber\tM.ruber|quiver (esc) 1320671 . C 1320672 | 299698 1320671 | 1 1 M.ruber\tM.ruber|quiver (esc) 1620369 C . 1620369 | 252295 1476642 | 1 1 M.ruber\tM.ruber|quiver (esc) 1872664 G . 1872663 | 5836 1224348 | 1 1 M.ruber\tM.ruber|quiver (esc) 1878500 . C 1878500 | 0 1218511 | 1 1 M.ruber\tM.ruber|quiver (esc) 1878500 . C 1878501 | 0 1218510 | 1 1 M.ruber\tM.ruber|quiver (esc) 1878500 . C 1878502 | 0 1218509 | 1 1 M.ruber\tM.ruber|quiver (esc) 1878500 . A 1878503 | 0 1218508 | 1 1 M.ruber\tM.ruber|quiver (esc) 1878500 . G 1878504 | 0 1218507 | 1 1 M.ruber\tM.ruber|quiver (esc) 1987973 . G 1987538 | 22731 1109473 | 1 1 M.ruber\tM.ruber|quiver (esc) 2010704 . T 2010270 | 22731 1086741 | 1 1 M.ruber\tM.ruber|quiver (esc) 2070035 . A 2069602 | 59331 1027409 | 1 1 M.ruber\tM.ruber|quiver (esc) 2827716 . T 2827284 | 13583 269727 | 1 1 M.ruber\tM.ruber|quiver (esc) GenomicConsensus-master/tests/cram/internal/quiver-staph.t000066400000000000000000000025771274347070600243730ustar00rootroot00000000000000This input data is taken from the output of the mapping job in Pysiv: pysiv_jobs/jobs/BAMMapping/saureus_p6c4 # FIXME this file needs updating to the new BAM spec -Nat 2015-09-30 # $ export BAM=/mnt/secondary/Share/Quiver/TestData/staph/m140911_084715_42139_c100702390480000001823141103261514_s1_p0.aligned_subreads.bam $ export BAM=/pbi/dept/secondary/siv/testdata/genomic_consensus-unittest/Quiver/staph/m140911_084715_42139_c100702390480000001823141103261514_s1_p0.aligned_subreads.bam $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/staph/S_aureus_USA300_TCH1516.fasta $ export MASK=/mnt/secondary/Share/Quiver/GenomeMasks/S_aureus_USA300_TCH1516-mask.gff $ quiver -j${JOBS-8} $BAM -r $REFERENCE -o variants.gff -o css.fasta -o css.fastq Inspect the variant calls. The first variant call might be an error (follow up on this) but the latter is an error in the reference, it seems. $ gffsubtract.pl variants.gff $MASK | grep -v "#" | sed 's/\t/ /g' One window is nocalled. Follow up on this. $ fastacomposition css.fasta css.fasta A 960306 C 470724 G 470270 T 971459 One gap $ nucmer -mum $REFERENCE css.fasta 2>/dev/null $ show-diff -H out.delta | sed 's/\t/ /g' Staphylococcus_aureus_subsp_aureus_USA300_TCH1516 GAP 2149110 2149328 219 67 152 (Not showing the SNPs here as they just correspond to the masked-out region that we know doesn't match the reference) GenomicConsensus-master/tests/cram/internal/quiver-stumpy-read.t.off000066400000000000000000000022521274347070600262650ustar00rootroot00000000000000 We occasionally come across a "stumpy" read, where there is a large gap in the read, due to a single molecule event that is as-yet not well understood. At the moment if these guys aren't identified and filtered out the POA and probably the rest of Quiver will crash and burn. This is a simple test to make sure we get it right. The file contains coverage restricted to a small ~10KB window containing the stumpy read. $ export INPUT=/mnt/secondary/Share/Quiver/TestData/stumpyReadInEcoli/out.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/stumpyReadInEcoli/ecoli_mutated.fasta $ quiver --noEvidenceConsensusCall=nocall -j${JOBS-8} $INPUT -r $REFERENCE \ > -o variants.gff -o css.fasta Now compare back to the reference. Coverage island results in consensus sequence only in the window [2734568,2745424] (GFF convention). $ nucmer -mum $REFERENCE css.fasta 2>/dev/null $ show-coords -H out.delta 2734568 2745424 | 2734568 2745424 | 10857 10857 | 99.96 | ecoliK12_mutated\tecoliK12_mutated|quiver (esc) No confident variants. The 4 SNPs in the alignment are at the fringes, where there is low coverage. $ grep -v "#" variants.gff [1] GenomicConsensus-master/tests/cram/internal/quiver-tinyLambda-coverage-islands.t000066400000000000000000000146271274347070600305630ustar00rootroot00000000000000Here's a test of the new amplicons support in Quiver. Previously Quiver would not see coverage spanning its fixed 500bp windows, and would no-call the entire genome. Now the windows extents are determined adaptively based on where the coverage actually is. $ export INPUT=/mnt/secondary/Share/Quiver/TestData/tinyLambda/aligned_reads_1.cmp.h5 $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/tinyLambda/lambdaNEB.fa $ quiver -p unknown --quiet -j${JOBS-8} --noEvidenceConsensusCall=nocall \ > $INPUT -r $REFERENCE \ > -o variants.gff -o css.fa -o css.fq These variant calls actually look reasonable given the reads, but the confidences are too high. Fix this. $ grep -v '#' variants.gff lambda_NEB3011\t.\tdeletion\t24878\t24878\t.\t.\t.\treference=A;variantSeq=.;coverage=16;confidence=43 (esc) $ fastacomposition css.fa css.fa A 282 C 266 G 305 N 47361 T 281 $ nucmer -mum $REFERENCE css.fa 2>/dev/null $ show-aligns out.delta lambda_NEB3011 'lambda_NEB3011|quiver' * (glob) ============================================================ -- Alignments between lambda_NEB3011 and lambda_NEB3011|quiver -- BEGIN alignment [ +1 6531 - 6718 | +1 6531 - 6718 ] 6531 ctgccgtgcttaagggcaaatacaccatgaccggtgaagccttcgatcc 6531 ctgccgtgcttaagggcaaatacaccatgaccggtgaagccttcgatcc 6580 ggttgaggtggatatgggccgcagtgaggagaataacatcacgcagtcc 6580 ggttgaggtggatatgggccgcagtgaggagaataacatcacgcagtcc 6629 ggcggcacggagtggagcaagcgtgacaagtccacgtatgacccgaccg 6629 ggcggcacggagtggagcaagcgtgacaagtccacgtatgacccgaccg 6678 acgatatcgaagcctacgcgctgaacgccagcggtgtggtg 6678 acgatatcgaagcctacgcgctgaacgccagcggtgtggtg -- END alignment [ +1 6531 - 6718 | +1 6531 - 6718 ] -- BEGIN alignment [ +1 7266 - 7562 | +1 7266 - 7561 ] 7266 cctgacggggacgaaagaagaactggcgctccgtgtggcagagctgaaa 7266 cctgacggggacgaaagaagaactggcgctccgtgtggcagagctgaaa 7315 gaggagcttgatgacacggatgaaactgccggtcaggacacccctctca 7315 gaggagcttgatgacacggatgaaactgccggtcaggacacccctctca 7364 gccgggaaaatgtgctgaccggacatgaaaatgaggtgggatcagcgca 7364 gccgggaaaatgtgctgaccggacatgaaaatga.gtgggatcagcgca ^ 7413 gccggataccgtgattctggatacgtctgaactggtcacggtcgtggca 7412 gccggataccgtgattctggatacgtctgaactggtcacggtcgtggca 7462 ctggtgaagctgcatactgatgcacttcacgccacgcgggatgaacctg 7461 ctggtgaagctgcatactgatgcacttcacgccacgcgggatgaacctg 7511 tggcatttgtgctgccgggaacggcgtttcgtgtctctgccggtgtggc 7510 tggcatttgtgctgccgggaacggcgtttcgtgtctctgccggtgtggc 7560 agc 7559 agc -- END alignment [ +1 7266 - 7562 | +1 7266 - 7561 ] -- BEGIN alignment [ +1 24760 - 25167 | +1 24759 - 25166 ] 24760 tgaaatgatgaagagctctgtgtt.tgtcttcctgcctccagttcgccg 24759 tgaaatgatgaagagctctgtgttttgtcttcctgcctccagttcgccg ^ 24808 ggcattcaacataaaaactgatagcacccggagttccggaaacgaaatt 24808 ggcattcaacataaaaactgatagcacccggagttccggaaacgaaatt 24857 tgcatatacccattgctcacgaaaaaaaatgtccttgtcgatataggga 24857 tgcatatacccattgctcacgaaaaaa.atgtccttgtcgatataggga ^ 24906 tgaatcgcttggtgtacctcatctactgcgaaaacttgacctttctctc 24905 tgaatcgcttggtgtacctcatctactgcgaaaacttgacctttctctc 24955 ccatattgcagtcgcggcacgatggaactaaattaataggcatcaccga 24954 ccatattgcagtcgcggcacgatggaactaaattaataggcatcaccga 25004 aaattcaggataatgtgcaataggaagaaaatgatctatattttttgtc 25003 aaattcaggataatgtgcaataggaagaaaatgatctatattttttgtc 25053 tgtcctatatcaccacaaaatggacatttttcacctgatgaaacaagca 25052 tgtcctatatcaccacaaaatggacatttttcacctgatgaaacaagca 25102 tgtcatcgtaatatgttctagcgggtttgtttttatctcggagattatt 25101 tgtcatcgtaatatgttctagcgggtttgtttttatctcggagattatt 25151 ttcataaagcttttcta 25150 ttcataaagcttttcta -- END alignment [ +1 24760 - 25167 | +1 24759 - 25166 ] -- BEGIN alignment [ +1 30837 - 30950 | +1 30835 - 30945 ] 30837 ttttatccggaaactgctgtctggctttttttgatttcagaattag.cc 30835 ttttatccggaaactgctgtctggcttttt.tgatttcagaa.tagccc ^ ^ ^ 30885 tgacgggcaatgctgcgaagggcgttttcctgctgaggtgtcattgaac 30882 tgacgcg.gatgctgcgaagggcgttttcctgctgagg.gtcattgaac ^ ^^ ^ 30934 aagtcccatgtcggcaa 30929 aagtcccatgtcggcaa -- END alignment [ +1 30837 - 30950 | +1 30835 - 30945 ] -- BEGIN alignment [ +1 43908 - 44037 | +1 43902 - 44030 ] 43908 aatttcattcgccaaaaagcccgatgatgagcgactcaccacgggccac 43902 aatttcattcgccaaaaagc.cgatgatgagcgactcaccacgggccac ^ 43957 ggcttctgactctctttccggtactgatgtgatggctgctatggggatg 43950 ggcttctgactctctttccggtactgatgtgatggctgctatggggatg 44006 gcgcaatcacaagccggattcggtatggctgc 43999 gcgcaatcacaagccggattcggtatggctgc -- END alignment [ +1 43908 - 44037 | +1 43902 - 44030 ] ============================================================ GenomicConsensus-master/tests/cram/plurality-all4mer.t000066400000000000000000000024121274347070600234760ustar00rootroot00000000000000Bite-sized quiver test using an All4Mers template! $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run quiver. $ plurality $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq No variants! $ egrep -v '^#' v.gff | cat Perfect consensus, no no-calls. $ cat css.fa >All4mer.V2.01_Insert|plurality CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC $ fold -60 css.fq @All4mer.V2.01_Insert|plurality CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIII GenomicConsensus-master/tests/cram/poa-all4mer.t000066400000000000000000000023701274347070600222330ustar00rootroot00000000000000Bite-sized quiver test using an All4Mers template! $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run quiver. $ poa $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq No variants! $ egrep -v '^#' v.gff | cat Perfect consensus, no no-calls. $ cat css.fa >All4mer.V2.01_Insert|poa CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC $ fold -60 css.fq @All4mer.V2.01_Insert|poa CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC + 555555555555555555555555555555555555555555555555555555555555 555555555555555555555555555555555555555555555555555555555555 555555555555555555555555555555555555555555555555555555555555 555555555555555555555555555555555555555555555555555555555555 55555555555555555555 GenomicConsensus-master/tests/cram/quiver-all4mer.t000066400000000000000000000024011274347070600227620ustar00rootroot00000000000000Bite-sized quiver test using an All4Mers template! $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/all4mer/out.aligned_subreads.bam $ export REFERENCE=$DATA/all4mer/All4mer.V2.01_Insert.fa Run quiver. $ quiver $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq No variants! $ egrep -v '^#' v.gff | cat Perfect consensus, no no-calls. $ cat css.fa >All4mer.V2.01_Insert|quiver CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC $ fold -60 css.fq @All4mer.V2.01_Insert|quiver CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTT TCCTGTCCACCTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACA AAATGGAAGCCGGGGCCTTAAACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAAC TCTCCCAGTCAATATACCAACACATCGTGGGACGGATTGCAGAGCGAATCTATCCGCGCT CGCATAATTTAGTGTTGATC + "PPQQPQQPRPQRSRQRRRRPQQQRSPRRRQQRPRQQPRRSRPSQQRQQQTQRRPQPQQS SRQQRRQTRQRRSRRQQRRTSRRRRRQQSQQQPRTRRQQRQQRSRRRSPSQSSRQQQPRR SRRRRSRSSRSRRTRRSSRPQTQQRQQQRRRSQUQQQRQQQURTSQRQQPRTQSRQQQQR RQSRTRPPRQQRPRQRRTRRQSRQQRQQPSQQQPRRRTRQQPRQQRSRRRSQQQQRQRQQ RPRQQRRSQRQRQQSSSQQQ GenomicConsensus-master/tests/cram/quiver-noqvs-test.t000066400000000000000000000054241274347070600235550ustar00rootroot00000000000000 Identical to "small-quiver-test.t" but using the NoQVs model. $ export DATA=$TESTDIR/../data $ export INPUT=$DATA/hcv/aligned_reads.cmp.h5 $ export REFERENCE=$DATA/hcv/HCV_Ref_For_187140.fasta Quiver actually makes one error here, which is kind of disappointing, but this data is from a really ancient instrument-software version, so I'm not all that surprised. $ quiver -pC2.NoQVsModel -x0 -q0 $INPUT -r $REFERENCE -o v.gff -o css.fa -o css.fq $ cat v.gff ##gff-version 3 ##pacbio-variant-version 2.1 ##date * (glob) ##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12 ##source GenomicConsensus * (glob) ##source-commandline * (glob) ##source-alignment-file * (glob) ##source-reference-file * (glob) ##sequence-region 5primeEnd 1 156 ##sequence-region 3primeEnd 1 386 3primeEnd\t.\tdeletion\t296\t296\t.\t.\t.\treference=G;variantSeq=.;coverage=92;confidence=4 (esc) 3primeEnd\t.\tdeletion\t369\t369\t.\t.\t.\treference=G;variantSeq=.;coverage=83;confidence=6 (esc) $ cat css.fa >5primeEnd|quiver GGAACCGGTGAGTACACCGGAATTGCCAGGACGACCGGGTCCTTTCGTGGATAAACCCGC TCAATGCCTGGAGATTTGGGCGTGCCCCCGCAAGACTGCTAGCCGAGTAGTGTTGGGTCG CGAAAGGCCTTGTGGTACTGCCTGATAGGGTGCTTG >3primeEnd|quiver TACCTGGTCATAGCCTCCGTGAAGGCTCTCAGGCTCGCTGCATCCTCCGGGACTCCCTGA CTTTCACAGATAACGACTAAGTCGTCGCCACACACGAGCATGGTGCAGTCCTGGAGCCCA GCGGCTCGACAGGCTGCTTTGGCCTTGATGTAGCAGGTGAGGGTGTTACCACAGCTGGTC GTCAGTACGCCGCTCGCGCGGCACCTGCGATAGCCGCAGTTTTCCCCCCTTGAATTAGTA AGAGGGCCCCCGACATAGAGCCTCTCGGTGAGGGACTTGATGGCCACGCGGGCTTGGGGT CCAGGTCACAACATTGGTAAATTGCCTCCTCTGTACGGATATCGCTCTCAGTGACTGTGG AGTCAAACAGCGGGTATCATACGA $ cat css.fq @5primeEnd|quiver GGAACCGGTGAGTACACCGGAATTGCCAGGACGACCGGGTCCTTTCGTGGATAAACCCGCTCAATGCCTGGAGATTTGGGCGTGCCCCCGCAAGACTGCTAGCCGAGTAGTGTTGGGTCGCGAAAGGCCTTGTGGTACTGCCTGATAGGGTGCTTG + "POPOPOPOOOOOOOOOPLPKPOPOOPOOPOOOOKPOPPOLPOPPOOOOPOOOPPOPPOOOOOPOOOPOOPOOOOPPFPPOOOO5PPPPOOOPOOOOOOOOOOPOOOOOOOOOPOPPOOOOOOPPOPOPOPOOOPOOOOOOPOOOOOOPPOOOOPO @3primeEnd|quiver TACCTGGTCATAGCCTCCGTGAAGGCTCTCAGGCTCGCTGCATCCTCCGGGACTCCCTGACTTTCACAGATAACGACTAAGTCGTCGCCACACACGAGCATGGTGCAGTCCTGGAGCCCAGCGGCTCGACAGGCTGCTTTGGCCTTGATGTAGCAGGTGAGGGTGTTACCACAGCTGGTCGTCAGTACGCCGCTCGCGCGGCACCTGCGATAGCCGCAGTTTTCCCCCCTTGAATTAGTAAGAGGGCCCCCGACATAGAGCCTCTCGGTGAGGGACTTGATGGCCACGCGGGCTTGGGGTCCAGGTCACAACATTGGTAAATTGCCTCCTCTGTACGGATATCGCTCTCAGTGACTGTGGAGTCAAACAGCGGGTATCATACGA + "8<=A:@NNOOOOOPOKPOOOOPFPOIOOOOKPOOOOOOOOOOKPOFP>PPOOO5PPOOOOJPPOOOOOOONPOOOOOOPOOOOOOOOPOOOOOOOOOOOONPOOOOOONPOOPOO>PPOOOKPOOOOOOOOPOOOOOPPNPNPOPOONOOOOOOOPOOOJPPOOOPOOPOOOOOOOPOOOOOOOOOOOOPOOOOOOOOOPOOOPOOOOOOOONPOOOOEPPP(PPPPPOPOOPOPOOOOPOOOPP0PPPPOOOOOOOOOOPOOOOOPOOOLPPOOOPOOONPOPOOOONPPOOP%PPPOKPOOPOOOOOPOOOPOPOOPPOPOOPONPOOOOOOOOPOOOOOOOOOOOOOOOOOOOOMPOOOO*PP'OOOGPPO??OOOOKF@ GenomicConsensus-master/tests/cram/version.t000066400000000000000000000003211274347070600215750ustar00rootroot00000000000000This actually failed once because of a missing import, so we might as well test it. $ variantCaller --version 2.1.0 This will break if the parser setup is messed up. $ variantCaller --help >/dev/null GenomicConsensus-master/tests/data/000077500000000000000000000000001274347070600177165ustar00rootroot00000000000000GenomicConsensus-master/tests/data/all4mer/000077500000000000000000000000001274347070600212565ustar00rootroot00000000000000GenomicConsensus-master/tests/data/all4mer/All4mer.V2.01_Insert.fa000066400000000000000000000004361274347070600251220ustar00rootroot00000000000000>All4mer.V2.01_Insert CATCAGGTAAGAAAGTACGATGCTACAGCTTGTGACTGGTGCGGCACTTTTGGCTGAGTTTCCTGTCCAC CTCATGTATTCTGCCCTAACGTCGGTCTTCACGCCATTACTAGACCGACAAAATGGAAGCCGGGGCCTTA AACCCCGTTCGAGGCGTAGCAAGGAGATAGGGTTATGAACTCTCCCAGTCAATATACCAACACATCGTGG GACGGATTGCAGAGCGAATCTATCCGCGCTCGCATAATTTAGTGTTGATC GenomicConsensus-master/tests/data/all4mer/All4mer.V2.01_Insert.fa.fai000066400000000000000000000000421274347070600256510ustar00rootroot00000000000000All4mer.V2.01_Insert 260 22 70 71 GenomicConsensus-master/tests/data/all4mer/README000066400000000000000000000006101274347070600221330ustar00rootroot00000000000000How I made this lil' file (next time write a Makefile!) $ baxSieve --whitelist hole-numbers.txt /mnt/secondary/Share/VariantCalling/CCS/P6-C4.EnzymologyTemplates/2770691/0001/Analysis_Results/m141008_060349_42194_c100704972550000001823137703241586_s1_p0.1.bax.h5 out.bas.h5 $ bax2bam out.bas.h5 -o out $ pbalign out.subreads.bam All4mer.V2.01_Insert.fa out.aligned_subreads.bam GenomicConsensus-master/tests/data/all4mer/hole-numbers.txt000066400000000000000000000000521274347070600244140ustar00rootroot0000000000000014 28 29 32 47 50 56 59 60 63 72 88 90 92 GenomicConsensus-master/tests/data/all4mer/out.aligned_subreads.bam000066400000000000000000021044411274347070600260460ustar00rootroot00000000000000BCSs:vgz!u; 36MڄB"6_I;7==f߮v?lvu6ylC(ocuf+jUB WQq4^K;вT(cN`[?v׏ph{~P9wQXѥqb92M\$evkx&E(=_ӆ*~V|#f4/J*נ|g}`|,/lry= h|*l>ݕmp ΏQ<ĭ2 ,׻K5}kꯇ}dV4?JCնz$)ݿ]G%M(Z׍`9y%]Zʇ6vEVS>P;i}w&]~e+]@ɳw̝_;dF/ˮ[Y{*dn̳3]d7~]\l 3tqwvWClN. FdX*>C5*/ ,4DW\P^+ޣ[enWY#I=?|X`&M&%#C1EO\|(5$F)K,_fizpm8)Xk>玌u i@QD\OJ e˭v*&@+C Ga~J@G?JQKb~) RemM Ea3d$*=:]JprNL4̳~e]_Ɋӳtm`5k83]-+ϓc;Cta uSвuöTkWT֠"Ia؆9蛆хoZKs2zḿ]؃tҏϹ9S4EX dCyh23ira|"V4 >S~t^vՁИ?X+h3,G#'kتE8A 1u'yBU!跾58?c7U}?#? 8}HY,A44p {&~Llor +IRg5#]ۜ#clO'z$Fs;REȼ8Q|DR$đ:(2E.NEMx6 NQ`/dNKù6z:27_0  $i 4/VfAc׆ |~OG׾/_|g`[~c}_KW)H?KSE[Uߟ3ٗ?G2p}sчV9yghkNjmjl+>ڨf5cv|gz,?XV#|{}/n-IƚWfWőtuyL~|H:j^iw/^ jɎ5qw P>;>[Rc3y;k%DnGGon־n7i%\.ҽ`eNװ۲uJH7:/NlMZ ٲ˳ugOߟ<-o߼i}̯l%krn!>lx/rbͭ{ޫR?CM,Vktg9P^BEP^ER "NGTe_.ב R\XBTsVe=PƀeE=mҠ+Dٓ&gJG1 : O q5ѰR^:s+SAGOJ, tc ~Z-=,O Y. TEZ6 u)J"[`UaC|v } ԝ~O5W PJ(! p ?_ӖV,Sz(PTSWU5e kفuIS5pVV}xt ])c0]@ޡda)# 2t0rw( AY@iCώ pR;I>a{T69U!'[=24ȫc5}OMɴB:0}/}9#2]8qb88$Vz,s([иcv0||>{8 "o /7O?~҂m4h#GKF~9/#~3r|9.Jx6ڦ׵=P6hw,ߥ8l5i9XO/kz&QɸkBEvI}xj̲K5u|~|||8>n7W`0smsvDE]$"U8/ofn5墹h_.yvnح%rܦmwI}u/]|+<՗:[Ql]a| .9|["^]wWۊI_0\s[;I99.I7Eγ-B}n7K2^/嵬͉6>_vpS7o odA/n8K賏9V]ŶL9sUT ]+]l=:ShY- G7($ TC5iܜZr+{j"ntyA2 phv'OZb"{hoZRdӽ2T U`SwQBH-* C - c{{cHi'OXTaJ;T&WxX#XP  04S.ѢK=sbUO*bX~=yRV2zX+} mʏM8A<(V=Vl5q|.plE,}SmtO(N.YTY%걝jςrUkc Mn=wD!j_z jOj5e7d薦R `|/~Dd2Jke- ^hl='~Nɔug' Gi98\z uN(o$-iCFYp,iI4 g^>vЙ͵1x1 ǝA6sGJgxclICϏ\n#xyXxE6Gx:!)#'cDs 6'0st#Cab-B=0v ωK46(pX9é'|N؄^D{;FSĎFA;VU==p-VypnqlaF# N<zt}*^`t{wgG?_ @MvޱCdz⿽Wi|Mkz]n7e}7 Pv'r?n٤];Z_[Q[Ij(=k7Bj6fW˥$qh]Y;ߴ4o\6޾4_ݜWhv1y9tI7p=[,~K$`.{26b[Poc o¼>nt!y[o ֭H_ԗ6G#mcX疒}lG绐~]y^;{U~U{F'g[*n?'r=Eg +J6/qMC Oe[DUt2Yw_( } ܰ\Λ5F,r j?+[ʳl(#gh AgL U(3ӊ 4YtY)b+4\ߢK?4|Ls$Ń~qH?-?4Q[dXJEF䫕 (JFL?Y߫^pj ZbGDzj"62QX}"P P2G$H!nWl@pȶb.%bő,Y:ulۇmkp9?њ|CIM5ap]eȶiP_;y u 6vE^'q@AkhW&\bh>Wjmbn&pzfZ/lGau: M<K`<qhXŁ?4N OvA(L`'c)7!_g~JKJ&)y[Srd;P Iߣ+z-|O4mU;ʛ4oƕUg@wgK_,7v,V{hgk7 iſ _M ki{t`JR7 `ek"ZRYo oVnsûik73n%Omaylq?S{o"׻=ܝnd7Y[֓4K."ԙ=X>j%2ECWzy 0 /̲gl*Q) R!Uj|HWJp,ͰOiU@i<}V)TC{ePGZ¡xPّDamEDkQ<9V{R~K=aj<"Q |KS?c*j%!T{Ɍ=*=2ӬR|9(3$=4EsX9C75) ]U;a}5a`fK&v*gؚh?DS5L!fGX08,C0qV1rd J  Ul4Ԉ4kK0H K}ڰNNN6MVbJ&.dR-ts /yFp_ʾC^^oض6i!h#fQG:{86~O^8 ߓ:Gxq瞕D#~d9V}uG%U'I@L&ďT8y^5{Os&֊xF EQ4󒗾vpB5$`8z6£d& $5Ib%Ibha{^ꦁ%Q}v؎'a:~<צpA߮t?lT%ʰ7 4y~NA7>Mvt>}w&nFdԍzS]kyXyk{66rUVZ俬͎_bFo]i.ѐS6fߛD&%>qcvk?~L+R!ۯlۀCr^]~w6umwVŻ6nw{V ye]:]Ŋ.v0lލ{ìY^m~13l#;x=pVk3*&P-Uoxe嫼Ƴ4Ub\yK83hT+kl|\; O*<]C= :22UX[|]&,G7'[y\8xz0<SJxʔgL8B9b+ez:!B3eYT6z@|@Շ;^ίq-V$j]Mu-;HÔܠYӱs`<\7y?Vt/L;m:iV-u2U%5ZˏQdÀěXþaKIS@7%9kRC$,$~祅N5 U|ΕT1T{F$[zܣNpa a}Fģhvpx8H ύ1꾜xUfĚ81F4^8x_@c'Nlo\֛g~t41q%C qCN(kiǫXo7vt;xgO+n@_Σf;mWt^q70Z##]ۋmn)fڳ[.6 +/amx}vGĶw6(f>E^*͘$CĔ+5=`DF1~X|Z d2M6 ?lQϘr(C@BS*3|"ztjM0cZ9 0 $˅=uqNUb :xH1Tk:ƹ# TA*tV*֘7Zpȋ cJ Ë<) 5}{5BG@)U"cZ9UŇ%nQ\C_[Ob_849T7/sմ70u mΊGm]W=󘥒ԹJhOvLU(95{PUxK7$M%Oja!,Pyh 4vTؑ:܇ZHDw%dbhl-W%8)iq&.׌ H%%Z\8%g 'O5c S>U-P6H HF9&،q ELfs3K3 .$Fágc2[ega^81v\/YSNjGNS$V:K=$:vpj$=v>?xuv̶&ЍP6N?p8c]9n0H~d8(p◀v8;iO}(<"s8Ŏ5=wԧ8-<a#4c\6NQJGU$r3{G&<E #} TQVGh qJ.UD;.\*TCVYV(sPCAOd6kS*2b , =S(P9T`D  5N#֯>Âfѿ({ Bq2B9ee"sP~YP[+" \͔iƹw=D=;bXΡR~3/E?Ph:Ŵt&3NGWAGAR %1A_j01t9jZkM~g؇~mـ>uPtL:6ԡ_T~$L U4U!ZykB1z kZ2b 1A;Ht[7-2k$lcf~N-57Iyh$ ?i@=sN,CLB 8C+t6ķƀY6$ t H Zu{R?vJQNh(ԵǁcS=l?M灖|9 (L<_yϰjgtx-dxi؋ՙ7c[f>q N=o6¾#Zغ$`+8`@ 3;L^FBkB\"Yxs؍F ؾїD[oY~Ҵ͛"GSvHnlm+_wm*x,cC˿{gꓒux3K&|~|h.u)Z76zPi ]go1qo:Ymn9"4~>BX+4[]*~2bHUyRrbCh. x2(mTQ"~LsO*(gK,t_.6hlZ*Z *:hA U0_f,sxb"(pqzƔT~Z,E**=T|hST4xV蔸 M3ϸLs- @G!+mFl ̜JpV(djXC`Y1,]ˣ.q @i0˒ŪD,@Zih@NW!wJ,+]Ƕlr.v,hl+*C2KQ BApBD0t$k1/p DVWn02# qðwma⹠l^#4@TiGqb'0Q0L1rgZz|O^8|wfZc$Z35ClGX#ڧݕ>~%\d Kv7gA蟮7$F5rCqqC\IouZl^uA47qtq;zXcxD\I?KVyJ;ܻ$uM$ VV0ovv?fiUڲ2lUCmV ۝@،Qvo%&[_uVk +"ͳwG7VOƯ};Hٵ7oH5Vqys& K 0M7Y04h#/ "h?m" \i1) Zg9x1jl 泺]AK\v3! (Dg%<88dj \|j1ƒ:Af!ZIqhDb OGe9 x1|1*?*{%,Obp\EWCv9xHlW#z!j/4WSLE 4 $T7#Ȝ.'$nF'ɪ@ kkJ_2 C$ʆ'ȚeMIJ9L{؊) 5a>/z΀`HuBDe|QAb=) FW) ۨ jDD \iXT=8J>3ng;thf Sd-0&Dg.퓕)h)]K)hEv9nu\a[BOurv`M HI3I@>k=Ut`ZSH^U|ց$5!o52 2ɒw=Uؖt "ZK$N])&QtX*@xgy<Y(K0k$4 X>lߵH =Q0Nϒi'cz8`<Aj<nY?S+wqI3wf'fF p0AٳId` JQ?y~gL&Èk8N|ODs/\L%x{33o؎2ۉ'qxB5%Ipm@잖C?R>* +;< ~JWJ ;B/?y`5ar,4|^b_ARb>F'tW]sX ߥ촳6kKlJ]4f5.;k Kw.5[ׂ/{3"fO{mmѮ$u8BK--ћRm,2O#k^*;-a (ͬv6Yt{7 f һYNj)̺۶ -5G̗FgHݚ=b;} 6kiM@X)+RTM. T*1*)p1GW(W??6A&ˋU}Ш0ASt$VZBPϹ/X6!_yd]ch+QxėP,0*Kq EU":fQ~O3CWڔ2z(-Re*N* mZS?Zv6t=pF"M\'JS jnU|A&iA` N (xcc:t6FhyI-1Ncf0cSaOƞ5qp$$Td4=$x LG l=3&>x١ā=NEiq28Ʀa`͊GNFs74EE))>Eʕ#~nOvߺ ɎL4yL=+\=tYb]G]篧 Ѝ?nYS˯cȹHkXv|<'uk_L|eYޞ_v͍ 87V*q#W7V8$J6\?~XqfK݂K=$moE LM|Byjs?VGAV?Gmn]E)ȶx~LwCJg|J&&{(Xۈh- -4b9^Xʣ'!kJA:@T#4x)ł/o43 3Qi)^xQ.DE8-F tבO0ZtMTT*=)F zLBp }%54OUʈLԀ#ZoHd." 땓h?@,Kv;9 |KsY6UE%F _a]4z$!tUS%:ڮcwɏwmk Ht 2',+J6{$: @8d$ٺv, ېmYzUX-ɔX$hE3 +ۖN"INp@]e9s!gVnЗˬxm`Ul0~ 7'aypx1Nnb7IN(H. ^(&<kYLBJ4f>I+=cQY t`Hd]ߓ؋ $Ī$t\F̟ėŖQ: ]5 |ù셚zL6#dbxOcsBlvIZ1$ś4$g/ӯ}')tx {3 Z;7at]'^tބd]xNxH?[CԶwsCk﻽R|f<>2}w5 Fܾo̮CD4t^7Zg^ҮԻ4T[Ƿ!O9ݳntVZfw# ?{;ݡ$̵l:`w.v:u&ζ6,z@6~pcc$>`OQheCWy Ԝ+ $!aEb $4@#fK M3eFYGX@.)f<,+oC9)0,W9/efJɷAA,S:[g(>B  ؒWڣk5㰈"CF(r+DY=Bu$$0⹪Plm ՠOK*/Ҕ_@LYzaWyPTSM g [шsȤ+ :$]2L'arRՈÁɜcjfPyU*. jkNJj@ݾfeKrX ] ^9ںA&I9GVn_'K˓luL1rJnrP:8g$j_"v0;K k]/ Gg-s<,> C3'ǣ6 #yհ;<9"`Z,ȟh// sכ&S=:di*9T ,N:|zdp4=<ad%dGAt#g0j;L/|G >M1pz0lj9 y`` vKItz*"M]X{QNq8=ssҍM(hϜhcޜ׷ôZ2>;g 0d7t-(Xv~t,cXs{+Ajhw%u#.ڳ7׳׎@ QhrjnAcwD)G}gV:N>՛HNf9O;y6j۲-\@l z[ /mζ86.>CbUpg^ t˖*Sf!sR+ ZnPFP pquFr}"nd`퐩[P -PYjayZBlqTC+LRG-Mָ`Qp*p-q-/"ÕڣJE6>,h 'Ej~jeD_Vݦخ<Q1% קTC\ ,O lϋT lq E5zY'@Ic.l.YV6.y4KO-[fD`%Vy?Bz,gF>{'l._{ $D%MtP#TK7tP @b(Ce(2td)Fi\a,\FV"4R9MP]zm=]{Vа5!qCMfBv:cٽd͕n4m5*殅'wXg\7ke\lgޟy~sV]r׈/q" RJ29[hzqG΃2)M1JԳ\ E@Q4beƱ4jܠS(S5h8(-B4 ŵP"/P=2Uz`=kCgQQV^"O-R,բt[tQ[_QSJ|P"9 +Obyh)ňtVhF|^hZ%"1=yT?BCVvYj"d뒤5t@AM%X3ɲ Gs(PQXu|]',u)IăwJft %Cf^fY}Z@%=hLb?wGڽ.RȲn{ >>'wXd|@c&2$6zjnbG-_LcunpY6y8,Dڲ'3t'_M˕{q`fqxIJl(L`Q8 ix` (&3OKiž>N'?wc;n6!!` 'hY`ȋ뽑b75vvlq&zG~)ְx cЬhaDFq G/O XWQ;)NL G2^Jз03;=u} ;5hb80Gqmy3V9r~uvvtsٺz޵hd3g֤\j.is+Ilη݌t{t`q my~"kk݅;Hwޮo v]k3VvؾݪoҲ\}]C }qFэJߖ5z2UQiZiם* mv2ĺh^p J8'Rr1m2&MTqզ [ip _E O%C"`+Ub(QxEDmT:,K MTөNy]dm$9! E*4* #6Z44 Y/ϚEɠDq!b,4˨Ѵ`HrtCU>TA=<:<E*PKLW,jRirPS[ @@0ȶܕ˟YZΉĽrȺ"9M]45ќ|t JD"f.&O\;ꃜZ~NzK~dK~$XC{%X;ZVhFriWΣ}xVH-6{Ync u|ިIֻօH1 $}lw6^ϲZ{=z۸|cl5WFtқV{6#Úk|Gn8k-޾$Nr~mǗ9NStNE0ES@ KM&dDh;Q PE*$$3HR QxxBZ۷mǙΠAEUiڞ8}7# QL0n^ͫ]Nn~"GxO[kƘӺe˙ͮmJs &&KָJ )-cWs̫x|$oCY!.V9#)rXna)@Jpy .<K{9^Ǒ:P_?;vf@T${V>D8&3mmk *y(G#V FGz@Bz* 0ҁ0 Q(}2/{Ak d]CΗE: 4Un; D"!xj  F2㪮mx568 ?Qbo_~fIbW -c9&q(öyUmyU,V[rtCֶ  -av-Q[?rį\Zgs(7 &p4-7ژi2t}k0nC I$T'Nk/~7>v}!D+z_ x>c~Cm 1/'x/]lYy-o>Jux /V3!չ$dPoq7׃& `2ud;Hh`ey㆚$ Z J19_^!)5-A F\uuK[(xh6\y9IVajy:MmYC/XT(I6n[I2VQ_o(!Mޱt,-~p+̿aV]cfk̜`- (@}@ERZ+q4_X`m̿*[dO~|ՆC$o2tȐbp</Ƴf8 OV^{fc</C~Y*o8 F}UMg .PѵeIOîq uM8Aqn=y嗷rEr^R7r_)wsg]4j 7[VLiQ'7_+~w>xSڶ) l>]0NaB5_{Y&~*?fY٪gq}#^,͍w򩯖޸h~,zJ0,=I}Sdix?b^!UNUvOѿGY|>Y9{'gI/ib؄(jԗlCׯ/Ehgt|E~9zi8DFw,M?P*Y tQ>]9ʬ׵0bS v 8 ⹦)r/p MmE^dU箍-yV9v$ch:_tU5QAuٵ,Cd^tEqͱ $Rd(6Ʈ#4zյxVB3ҬG2,PmdM0AQxM" |4MP4YKp$؆ Ohm8JEF(Hn1߆O% A-(\q" FRHFcBlaf>FRG2!Y#X@^Svy0ul`?KEo Bz(m u |dO7m%7plZ |ɲM6d Et a@ 8U|G]ˊ*p@sQ4 $͖ w48@ 8u-W32G }x 9/`C(ӧQf$Jby9PjucgYlpBzcj^ [lw/Q#cDh[BtZp7x+k3#7fy/&&v>vƅk%0Z}1_%)⼜@-{%v~ԇ3 :!)(9ULfogp6{諃s)ArXT* GFz{ 2V.sʋx^\ufWNCxrHt>׳)@16OͽB=eգbfwYҤV(T:Cd݋7oUtd^*[koX(_)VTK9#Jm|jd.jO6v9)/+eAVłm^cxa$}օXVy$ XBc^EA5A+)  H *@@y]4FEGl!aLPnJ88^ב3Τ5ʹ,߆h3ohK25_7xUYIn皈L7)$ב[264ތbՅ1T )"p*Ud{:Mɐ E 0-7>hG]mxjE+8s`IЅs&xŀwtEthtB䚖69bkU(&0=O=VzK@t@B%`c3rk{(`̈N ~dzHup@ B+0 3*Ё>F@١S}@t]1kk&&o]!%Fb7T|%q/ְ=xiBX(8ZOﯔ[i33+ĝkole̟N(xkd{2 %؊m,^zjggڞo&^'-)y?z[M';tyH6 %>6%~N?nnH7/u5B3h i㚣ZW'qaAu}٧?gɨY &@Gc 8< Ǡ#C@م8,P$Ws=O5l~䨦Ju{ r=[GMr\{)}r\^,Eq-!yc  <º;||/ȒڋD;o[/N4!˛`n?Y^{ u[sN9g69'kRK3 ։|u򂨋u8oalwù9_]41#O,&}8s7ckNz/%c1v'Xjh] 37Hy=TJe)GɊI?Xy(C88䚍Թ98"Zi^F"lNb*ɏ<5oa:-_B>l*/e',sewt/,#~mc M/G;1o765'C2,WR)2[ %, >S<=NiH!rΤO"jD~\@<󥊝"*Ƀ:&+GX*^L9kV9ͧldCiY։F,l7]Ȥ-lZZMQR*DVi('(ӗӔЯ/%Fh)ͨӲ$֑*45+Fv h +[Ci=m'Ac^jEv[Xi1SQ"JtWU -Ⱥ1FQ㒂J%.J),lŲ<*)YB8D"vӜ6ecZ,Pݩ5ƌKpH`q%u5h,ĸkáQ,|w Y]ٷ kd}`t@8.dǁ3+;]ʞ TA ;:alˌzQWJEri7AO{i_GQvbpfwЍԎFrr#0!"d zgR\Eb܈YnO3ߴُcCtPt{./v<+$TLG[_}/姉4.^mO׻"=\B'9&Zz{[۟O{mn?m?u\^ۋh7WZϵ啺h^__-uX;֖],/j}ݼX4Ѿ:eqQbyh,[UC:q㸭^-R/׭qK\͋>.o|?}KE~kjRNy27dYS=Zy`i 5Wo5rpwjuaA:mGydÛ+Z5̇U5*_ϿxoUβ߫{/[_rgn]?]|~ޝݗKzŋW9_Gft~׋uWgy;jݗ[dTlJVF"|I湶"{e`R,}! Rc$"~үR,I _Чґ-R$ؖ?]h|osK?BS_UC݇gCAJ'ko;{HVȖrcAt$b 'H(<>PvusUޡ]|{Oj?*^$ 59apP;R_(H\'6 ?:ջͮ~ bOSaHDM&(O_MzCxvU߷5 l۱˲8u۳5l>ZO6&/ϵlV4~Ouf*`Kj66{ewuC38M`(WtMb/36 lx% z2 @P۷LlaC'0em:{ƠiZo {@&p-.S؆]c=.؀J6ix8x.;=Ekկ}~Lq؞A3Zt;Ȉjn&)0#G~'yDmq:I8C@OdFnJ2}tJG ".B7i:x4YIH>OIrz4w37Pb2S{12#oN)h:MuOf>QJNgGO&ԁ&x{F'>MHg: ғ.<'1upIgДz#|c<ãgo|^k?W׿VK}[w_ο{鿷Wt|w{[BktQ ?կBd7YWϋ%OQS}Z_ԖiPUw$p#Rك$⸵5Wϔ ˏduG@:% Yoa CŅs]o)Gb%>Pl-|^~(}Ra}ㄿT=bU+,}(J=MjfWx bt&Cvu ``>Pe3xR7-VCx- @}rGj}!&r1Ƃq!tjlkppߐF<rG\q:0)Uw"YD씋 ScJ\@[!g:r=ƷY5&…n`Ū`eC>/;bf`ǞwN e< 4O8tjg3&3+ R'z#=ƾc3B!|%< Qq0}<:d$ CVS;b[DR'tM _/X:.6]vm ~t5@lx1ؾA@y8zK̦4ħ2΀.Yl6p{|J"So'33xy~0+p'A2wphL,8C242OɜzXQ/.SnCԋ,cwEz@>@XE: o:Tã> g h:^|MOGęnOg893Jg$%`o: Y49FTbơyJ񐞸狹OFczaj06ńD<^#b{̠>43HE?z-+fd'=}$anY$A.WeK9IGcYholq| zuv" /݄w:ګ{EzI21 (* e] ,7Q߽K[:@/pX1=:~/r|mn)@Wɾz';PV/xNV|w{?򝒡/1_ZkbqG+£NeW@#])B~kIniZs*?/>T$bQ '2/|2<@UGwuB~]*r}N?D)rkծBX}I7VSBIy~V?]B OQX,TN=#>)+}qP9|{eeFe;ؕPTOx,%^g Tԅ vUWޑJ)E ^`pȾ :jXd`Za0sB=Pa 50[h0Ĥ:`$6w]֒NfM<gj,3:ulC;pc!C0m̚xr!*-bcv=Nԃ<3 b@vh\ 2ai?bF6xp43>"L ?8DiIx:2'3=7ǟVpp%44S݈mq%Ϧ( I,'pLp^HGAJ%g4L#P%<{@Os'ȥn~3!%{asZi3tFƋq2s, ;3яP~}xKEMv8-e;zX1m _nsl`h&\.& ziۮWA+Ss-f̳sh2lct`ĪvZN8r3{VeًUXl%1/[jݥ X9A6n\} i{oX ]+o1{czSkq~W*/ߝͺvNo%i+˭zs`6TѨ_* );*T$cRIi$*EGRZqsǧWb7YkT_Մ*]+=O*GKa+-FbO{<|RI?C){u$pbɡ_9P] [VJJ+Zp.3hwi]2'sIǷJʮ~FtC4q8Y<"VBȷ*{TRK?qh 1v$ 5Ilw*X95|zs?sO=)ri)ɕ"= ϴ )𙙹aLQeB$YXګ}PʹI`2cWXD+{}KL^;:h= mp`怅3V^b[U<v:-)S{ r-7<Apq?b;FBOfK-oˆ*z§A3WHYX|V׷ށzYz~^77|gk?E3ƾVssGg-(9[+^[6Ya: 獦l6Vsɴ"p_\M ۍ;_pKvgLqVϡjq#ҴpZ #+ފWk dǷJrIN鈯sU- QKV *X1vR*~1駀JS2xEREI0U/ _ةH2|uO#jH6z=guz=ud`.J꒮= P% g)ssi$i4S45|0?hQcAeyi /! ^4 !3+$ZQ"zΉ` I@{>4 n@i[m6+N玌j43B[^-׹VNӾ΄LHn:V㖜WYk@k[ut䏧%N~ F6^adxmo:5_vTfCC%{Uݍk}=7uA>{,nXIHwVGr@/[6ڮUb{@n"'X).=v0%3=gg` 5|>`s"`a;fOP%QJH~2G'uO:M40L\LC=%35N`eEGx4n44C'xd'h6>dJhjU5!#瘮4Qb%AHi$j:΀(NM]'uPyMӾCoDz>r}g%I 7(6iҦ eYt`o 7/V% t8΅Z`,9ǽpnq֜:ʵ9w[9:psǸ{Y ѭ|[ӻYoF k* }ʊHmN6$q2R|nKd 9~y&t9|65M|I!6qk f"*3Dh}Ų$kv2__vܤ0[_7Β=o,[;c'Z<ZUb~>Z?L" o J7ޛ`rBbdl5pz-&F<~ޖn~kගu[l;poηPe0a㷉p_Q^a~#]r7#dvV*e+up+TD;;r8)1r]zV|XnR*):.\UnZVEzCyKGxɊj], v bCxx?wK (U{GeY,xBEܫɉ%780{\DGlTNkaxm?߅z*DA, 8(GQy} "z$끞ȋLcĶox<#M}fYR zJv6{1; amP_kdTRqF̕n4{tˌYb 7H =y0o_Ik Usw]̀I̐n۞ ƦS[ An ?xyQNneevi.+"f~Їmqޏ|LЋ "g} i6-|u+026~Fm&7(8Wɐ73#dJfi h'mmܬ'n7/37KikZm4+7[6[ByI}u#vhʯt; [o .~xU\WWbGhH^tafU3Zv- ) ? /X98n) UqƣBIOh$x/3';r8bE,Cre42:c=z'KLR,;3MU]Mklg Ϥd[!n3Fg6x7ڤ->sVت屙0G- ['bĦI:;&''o(>1>(}z.\5@$ <05 bkxr*wޔr7h I(㿵j`V[8 afexgbnd!& mރ6~F1$+ WVo}Ajl+mQ791;n[LEܼ9b6DMC~.fo72nfp۱8~7y4,ƕ8^\]%n3>BHrv=,3DIۚ/F!b9_e8Z^`` Jy[3@a^x?cV />˜wB|ؒ.>ĝ rUL|%WKB#W;(XycIzEIwėCQYIC uT8[,Z%̋  rirH,R} k:%31^y)ٙdx0!j>&X`t'Ķ9 ռYk2Fa@Wwu0VȘ-: 2HRIhk!k>>B1ux'4hc0O 1zdQ^D=ch1M?u셝l]%h6mo- cKx^M 3]SS?vu( SNW%)|(cSz'c2p `3SV{뤅mvsWp#Sm- bokyi57 +;mH989Pu~ֲoQ+ QfYU?oE!+~qMvu؜]tq>GvX4;Yy/VѺ\YR⪱ryd%Iβj dڍ#lg)yXlZx/Դ9yQuߘ*{1fGʯ\)7$;ᛔw5{ٵ]a}b$e}5_#VԲ{B:VT A-U%4Z QP4* U$PPBSzdҠp vJs?DH*B,"T9eW5[Tx">*?@{௰S/r sW<>(֑pa>'< m"Jv+v|pnP۫>Ke!j0&J?8DTC h$@iJO鈦ILТ hL5taOFo)pE軧75=%fFCz)ų$N,7]zLa~0MjtL5"OsN( ~Hl"Lh4u}:yli oxlic=zSlM=4?7zTgj;`oPJ|ʵ{7fQ!uƺ`?6:t{ BdPXx5fx}nmgjY3ravMm,~Dwpήʹshr0֡nyv~$S v{nZx,L;-Mr77?|nw6s`>zq;[הK_z+_q|޸_r/WKWڹ|U;D\^3n7Von]6B3GDfUWZ֢oX򲑧w ^/5HшV i&?monek+?<"kˍ}PTyEDxΉRS$JTQZUG)p"zڨF eiwWd؛$ yR@x& '@eI˵ЮIbt iY"|Yx[P ώ>+՚\*ÍA *K|$UeWRXG|_.TTJbsJQjzAdN+]qΟ)RU.]~,)|Ys(vjMĦZ)Juҵ8*SG Kk>lY]6l`;,gY1d %,Vf躞ey Xi͙ o13[J܋7XlCwl84t6yYy=;l1'+,Z z]&.VyxYVOr18,jlA,P, [d<&coqç`,*9jv8xF-4zJ7na.0 0J5 l{a@4-.$t;sXnvL-GC3JC)x`Irvg!z]n-jn_5w! lM: N smνWslnGop~~3 qɬjw|fm-8=}V⫫zTcme`SZBk7ϙki_UZ0f QeVW)q!vӭEJN+4V5£2HFGD rq` ( _UE8^e^_BP|XCtP*.' [E+ E8@_~]*V e-+@\rEx|A*vEP$`˛E,d鎸pWؚM &q`W3H- ͜ &R` zuzneklU&Y4?9F<N`X0Fd_)#~3^53i q qCPj6Ulm X\?nh% :w8H '}%F Cnا='R;fJ8PoľSl&4%#0$#G h4qxzB)|3͔a7As =`{p"^c?ʽ u˼ALl3&% CX݉hk(M׳^U"Ot:Ctɘ[HBZ(~{ iSoi+} _I9If Lo]3??ߔl󔺹 5Hʯ'Gߊ#GS{5w_,o Ws߀QrUsqq'T,B=Tfdދŗq㔸X-^|yZq}qqޏR>A~,b):?>T('!Sv b%\QZjpJ)ǥ\4^."^N:~iG)!/)GBUF܆j:uX˟z.K?QW,qcKRAMOقu}* ;x`]{SŹ)RU( 0;ƎP*!PyKWj*(-TE10/*GP}\ q¡tDp,?T,/=YZ-alqNj 4;LtUg\lmҳ -0%Wgz1/2[pGQ*e EtTK-H*Ty sD@(O$Qv$+ CJM;Zd,PlО$#~؜tR1ώX ;SrV0]j"/S(cz+ 7-a0+=#ƀ+LZwscsɜl>pI>g吸bLLfne[W,kK[W(\] 61'`[VYv߻G-u:z^ 5y6@aus!s97g SnUd&l̈́,WzzY{qpr=Φ]..WY..;vʅD|qr%dBҹn]YbWk,BcG,oT@uR릣`5Cj_YM /c5ȫӳWjM#tCi ζS~|*F7};ct)RQA%#YDJXx$[,Ln;+hY! I%[, C=z*zU4>U/#>ڢ @iZ=~[5^WT#+D *Q(>zXG s* |G {( {T=(c uTWo[>y2' 'S+#EJqGZBEtXQmfv!U{Q}oaFlM0`Tҵ. sNn(5VTgn[ao>qL'7]1 XH^ٳj8Wf >fJ[c߰7#=s//v/S`zgvU׭U.ᑶD_*QeM1vd]Tb]shSҺDy:~5H.wpICȪ!rJ>)diAOǒ,}ɱyʊyeHQG~W%)db( {Ĕh[rKښ~ dS;Q5]ou񡬶v?hdukm'֏`N{i^K:]vIߔ[G-I)Dzmy$p$>vWy3dR/0 X0TKi81@wa lž'( -@V@uVp\'(` t)oQ:B@F[BQ Ga3FGcVy0C#EzƘ<jˮU җO@#;N[BCk;D6媯=]v=cx(zT|*4S yA`2q8f1W 0cZq,.+0{'@5+#z|S4p]j* [EdD/ۥavS+yFrDkY>;g%B5mUv&k"ˎ;aJ $_YL E rfaxUʃ-3l%rU%Zu*ȪlcU3kfi^$MQܭ#éwsySيz.` Ǟ .UtY56U;0obk|{b~I~˼ee(- >Jw52ֈv[MfͦfN~]$E9&6E&Y[_Z#E^k#<^,i:zfV7)в^.@H'ƭsk_?> VϮan/S;YacY>ee%D_A‹GpOCyeا;/\: 5uc"'QnZp DZI8bvN(͌M"gj3^"|v<ԇ=w #t6 Bǥ9ܳY|`wfLxq9-V9ceA4^4(K7.2M< eATR>K8?g<-RGɘf2ࣩYrXf1*sme2dr' b$C#4m6d-8OVoQ粪L|osT]JrQC)b1C|E  K]bk&f90⍛c]:Y>;P{IȬ^G m7 &?i tEgUs {vN-nbI~uyS&26"u.Mn=nivXdB$76W40uury:At|-zx NG?k m՚'WnZz[%%d+k}7V?ҁw;CaG5u`{$l3܏LS[t %1%:)hŒ8bpU~B=:e(>"iFkFȕS03 EұG-id>Fؘczba4)phؾFI5Scw* bLbdL­P"̔a+Sa&F9ތG#yn(^UJ M4.b9O-,<_y٬`Y%;_%}+EDE^3?fQYEʝ$J,/)ꌝh{3.<;KfI gVPe`ӝfs OϲIt)`c^ " ?xQ"Iizɾ: ;ZU|Jely2w e|ƃ4,uR,N$/g< C3{oTԆQMqbo݀}46dXqU/yQM!bY1CY28‚uh0\s-X .e\ 6ի[bj=.HN )di/q[R[L  "&n %R:j_[ ]:wҭTt+b^s^]2y5^sfOm~<71y)u?H)=7A+Vqdxrt֥u"rpYr{eߵHbz48ٔVVa:Xӫ&kH> 8)`WAW{c}Hyul{QL|uÎw6KPd.Fg*ځR[lJf*{-C"1QZ?G'iԎa*jj#"#'}F阆.P,h9 Flj%ih091_@hSd;B3@dqmxahO#j2'4tѣqG0)?DZ:>,%HB ]9h|pj:bQ& sWȃd_83@^ވpI*5aOh< %RdzZ0.Γ|glW"Cs9O9MY<&%˸Y:M<_Kꮒ.ʲҟGY&9 ] Ckv(zHOeWHW+󫶭Kb͈ES:0tHdƚF.+̏%UH;I[y}udvuTtno;42Pݽ#uM]H:+_e'+~GR>!=n[U{-3[OdH%S' }Ytw{J.驊vތ=!fdF ށ,uwh:E5m&\G pVқ i؏[iP*`LM#EAMVt2: `( ݘY,t7 $x>Ƅ`RUR}ϲ$⡍lysR|[5Y6+8<63ϳ23UIP) UK,=/Q> .o_k:$v^F mX*H-V(`YX77)YԵ.N0e7V2Â}gJ(9v6^ǯKFGSݤEf 8ot9K5BC^{l,K~WCrLL=O !#iuծ׺RVzHٵd8R6CAǀ?C O$$"Haۛ_̐3CsލֽdOW׫p?Wuݤ/1CE`K>"ҏ࣯AViLN8|0>e7='. `/ hKh-ձ?xUxus+w:pusVv:~}ƪϡ_mOͮ5Y>;j^6g5߼i_+N\IBЮ֥lNꭳE}]o/fIj]fUhog7'կTUSO.[]_ߴfZr~x%< yoOMk|.Zm?vGeN!ު[W^Om,OMkzw*am7 U9+XAGH RZPK!6kTxf׃&am3LM"K{3װ}@:2|0w;=vVc&^_]g}~zm:pͬ24ec=؁;0^@kЗA|sȦ#bFҁ&S:Bԥ]?׊lFR͝Xù=x@ݩChHPQꍶ^Lk^f+}rq+hQ//.fTc.+ uMtͺ⧟zx w)6>VZDxzQdY`;e̾dEy$ǜ(=+p':,앹INP"/0'_9Ysc+ KyZPaMt{TTW|_F?G*=It8TQDCH(ykR5J'=rs(VQ \I#Y,SE1g} u$øT-Rr/|U\N8D6 /*|M2bϗrD"n~Vۦ=@,dovv߀GolZd(}lh: @ Fz0m0L 3Ru{bϵ\X]&5mՆAo3{c'& ķ=FC,hF;qMYea=fl0:$hGFK}Lt}Jq c=;a#/5N0`wdCI@jjR/wQ2 vxx07b-q<( xjƖ %Ω$ ۳]o4ݳ84 ]`+^ՌAC&%4N?Tz@C:⠃Fnw8^h#[?whO0iг aOqhq CXԶc{L6Έo{pd؛d%*2v֙dah0",u0Zw80SۛyAoq:XawF&/;Hmc|fAa?Z GY Hr٨#;s=(Yڠf4f[閚?j^˟*5#1|U"ZB=%Iv =~ۖ׈-nss_ۮw%>7@r֚N.Ecv=<__4L,U&fZ]:nKXmgsr)ϚuKk˫IQ׀Y[1%yV?S %75}fp޾7g ugJw wa5[mڬ~k&G%ޔk٫L]߶ionpaUcгR/ Y+1<ߡǢ5Rg;WrL 69,Wk";_l8)m퍗u/P=5N IF¾Tyrz\Ew((rbN9*b@o>G9H[:Whr2K{~9@4===fEh7+o/U+\~T\ui/UO_z =$x@r;fʟ;b/+/_ry7, %-qA<9UFT듯8{R<{f_J3g^C'ROժUEQsM?AUE (zPs@Tjr:3g ?k.ƤtXPpLdzw-vckL v0nwhcWLfNp6*ض SLmn5<Q0>A M}|{ 6l`rv;Z=gt;s rGYt ׽k {؀puUQ_N ,]7;z똆itMӵ 7h{МuCyL8qC3rH/ I:JС* `2(}AԆ{|A 0nҗ £1$JNϴt &9cYahJ"8(=k&iȝY$pТeFN $ӐR:00=G^~ #L,&N7QCbTMa1u'n;"+^ƴx݅EiI/k@PP37}0N?#39$<ۦ*Txod;Xփ ld9nr3Y@֔lvr1_Me)!a2{PMcSd)3w!)>Nnxkfgl7<^&[ܶyz>e %~c]K+v9J6[YzNGMCei .n0?{- 8IIZq?Qf+ uVKWqyݼ_.3zl^ZkƯ_ysZ4Mꏓ}f,CKշ}Smkvtmaq˫;Y;o{*w]؎ˋKo23=SB[bYݹ 9g Cܹ|qi8f'om~lrٗx1r'o墔֣Y矊r"_$_qrޫC(߭ \ufk%q{KZIO&6}N z_@G1zWkmO=йܐ^'M2Z$||R9/׸žyDxT렠<:hՔ=w6\ PVG߃o's,?V~ɿp_W,V ~By yZ O+nMPQDVWV=pK:8BC='(.c]+r q&e8S nvz@].pt{`&c1knGgv# (z>@X=:ayLYuPAt iЧj2ڴ&;Lfy;Lv[3]kz-߳};fb 36Հ)gwE:f(ôV0 MRyQ8&3BB 5f0qS ' c:(=9~ lw€׌OZB}IL' $db,0$nt>NCS7J/1IMC#z qR'&IV<-¡=MKNǦl` aF08Ox@4#G3ڏԍN$}'$ tb`DpbD$$Pq5 ց%0h֝NwrV,29-+R9WD{N278'dv 0t`ْ;nU38)ڏqQ_[|HyM{+^>ߤvv\[e+b~{k#V2վ|jle6Lig ۀ̭z|޽Փ51 Zm/0_0gn^-qjiNZS7Wuf:jry='Hw@yb5^^fJ&I]Õ`lf|Ӵ5u;tҿ™eW.:&2[wǛ֍2-%ok׋{뵊P/>cZmYt OuWmZM6>h,̌Rf͸Zfbfc6] Nf*D;0d$@ |# {Xgб!1Gfa .|<RFadDa蚣tqiD1uJFCB4rh<lRϧHuE>u,!ЦcMO5 -]<;shFD =XI4'qjOmG ]=I"o܏c Dgi6G^`wLH?u#+IBPږ9 Iib'/1s &^ҡ?tgՌ}{kE̺dyK)w4 x )1,I^ɾ?{ oY_썈Pv{K=xѧAQQtHcm~wBm#5*vhh[]l)Lکڨ9p;}oB^}vX\ϴdYMm$/E኉ߴ1?T^dvA%3^dNd9/Ng'~%$3?7τOϥzwHOUꕺ^oq|wmJlkƻw0%cj9VApn8ASD Rk0Ix4bLƦ׈L?ɐ$f0 chw.ux G<}ux$|%:! 80) Ɖ)94t#Mñm o֝N2{7t2}$ l?9>PJC: MVdɼ -ldd^z VFY I[BW^%s[Y{n$k Gsnn;mZXo-=868J? (*m{B!M[u ݊ݩ{%nv̢վ8s/uίڛm]K\FڳM4}EWWng voh\ܨ8]3ޮÖv` iilZ_ְ1ܗm*6J[7`a񞫳# _mVK"'ޞﱗ~]v k|P}v*;:@X:#gxZCjRWjGB*J]Kb皵g|EHlJ"ħ ^@J^{"OyN,WK<qʸ, ^RP]:v]AD[`@ӵ5&~|c%|(-;lgDIfyl<ױO}Ÿ-GL/-d̋g( !ӀSxR׀24t'0e6bWP2cf.13?ap2hx*Y]dr2Sڳ1 P1рnyqHvP<ě$E.߉a <߲} NiЋ``gHȟÉ?iH$0bxf3 FڙL0Pq cN"{é;}shLbؔV FN#{S01 q ڟ?E0˂Ѭ6tԟπkTlEn8spc;$ N<& ~Ɲqm wCVݒ6V37 c3] )|u݂v0󷗘 01>ě((W]⏸m\ Zeߖ3ԛ]i ushz0MRU/ېvdehՙ>9ia"F폓>崃V[lrkz_5;ezkߡ ˬ4xZc^;idv/Leq2KѬ$ݺKt1<OSE qEt X+ .8=|k$UsW9DJx(* Wqe\9\"͐p)(=VzʓGVQRqN9(PI/WPi)bR*JkO)MT_*KKB¾Q̿*E;( )JM v:-ϲUљcSL1[~՘C%dPElXK`pCh̴[g[֒ l;VԎځ>nװ;j{ap!.-0vL3mZ[w\3NOevs1L1hc RFFY$(2 =ƣ`6n&ԅǥO3F@`@:8p8;~@ɐNplC#,xO0OaDx$QӋaߎ)v-^0NG7GtޏRf3o2{9X#g>({։gCw23o3N?7F_ј?><pSU`Bx:F8%KeG?5یvivQ^Nu}no\tgY?8CK}h=Kvv~Saδk.$/>f ̓;lI5<w-5Iq O9dNbNE z fٱoPx3>1$CLxK)ckxA⺣Ku;3gapnoK2Mܐ_9]}kv0?v'=퀶;w0aMn5Ԥ,}?m&Q}DϜwE}ik]&Z}e˚qDF'n+^m"f~љKQk`k].ZY[e(P'L^Z]l-_NΘTW}WgK狺\78_#JO^nulKu?ow}Geխ 7 m6N_f}Ή :bG7_m|<@گ4zS}ثAUn:0;]nvsVշM3Rd_MkbG%CQ}>2b{4D#T/Jŧ %* R5c c l= .HGR!|뀷Z+s H,PUPผIbiCn&b-'HQY*C+"SDJaW2')'OPm +5Y>C(Zn"TTGʡ?ehƉR5 YPY|Yګ)ŤgN4vl,)6踃1Bf{g0m44zభۺ|J]_%, i$[t3kPh= aлIbaLadeN-wھe1=a_qoj7 3-|pkȝwxx dM>\,w6`À ۧq `9/'p69j%ɀtL>LlܡY썼g8N]ֈ!8pi̟hO'CB# q˫f~Ѹh/YvKM|n6êz6ߨ֌y_f.uGm-3ni _M1M>LT&FCr+wj֍NYp!mN0ڦ67߭;"#2YwݼeCim|[; w kP콌%3:C`,"/BKHZݯ?+WHeEA BYxɋ TRW B^Ńs{ꥊd%/_+|) p cX#ҏ#'(D8CEV9_k3Tx֔\Pt=bm`A,w4v5׆B-0, 3OnР S7L4bLf{$IY0~޾1~kK~0 &1O=x8yqx@$hhCUS:ui⦾9/ Qb|W/ց$49/G B|@>v#?SOGz?q܉;K0w7=(?d]G_g}?a$c]VtcZvOv,)2 yo$?JeC6ЦT CZYj|}>-m"bhBk2SpKIᮃ}2hm]^ۗ˰lë҅ebhFƶzf qZbnռ~oᾄ1|I.&CXŻtָRfnV&kz}=XoFwKwTh _Gϊjbw^m%m0X.l 3x= >.֬vT@6zs]e2JXlkwe0f7@J[Ws HJN)Pq^Rba_W~J]rZy\ )JYA<@q gr^Cz/2|1 #X@ZU\t*,WQWWXefGQ+VHB #*2|! {+ZI U]?U^5I?y|{zHlUa]frRbs0q,%bf@3spY&rLu%όWe ]3kcdpвcv,= -? Z;Ij};.,bwMǰI%qzfs cM3Y}l=Vs>0' XmX㙶k}3sȆ(Fˋob_e{,y`[w51/CY꺚cM7b)!A bh*= fIxy4==5=0qdSG 44m.3`G?@x'ыԧ9"@BI8ҐLh`p4Jc:yqJgC?f(Kov ݚ4^GA4t&x~?AAm㘺8#2Ń) bR;Sš ӨgrLAK-n@O˹ٿٯ}1Ȇ./Xb,ܲ췖,ǐrG~n23pD-[ ~렚*XFk_ kCM]"r-+vsvre^o7Fl]7uMZFd̖r ̳mT KP+CɆ^6ͺwڷ);lwmw ݧVۥno>(f15s~`e|ɝoo;޽)}6gk =)TB*4eEQ zN+VCBpT蹢;r (f"*\qJV`6FTPeEKWlI<vvau(:S fƮ6P3ALXNg釥`+t@2ٖQ& 2;9Vf:acTiGfv sǵPj?3ŚBÓ,}i. @ffzs1c ӌݳ1^3>g6,`L[nvfloW`OsB$$ΰ?5pO'b0 ?I> y9=Zx;kO`Єv@A@56G4C>p,k`M!sMFaLؤv>HEob[qHݡNY$pq'N p44 :w"sS3K?FdN =&S 5pZ<!ɇTxRgdSv^zc!]7g tDzwC*A_ZjX*O7<*# ̶ܭmfP@472 ص=lܶ%V=Ylvڏ\kům:Ez-1j^.fի+VnԠPvs:kYheM[B}R;cP>rNo0ow~ai=ѷteٙKm;=T| 6'h2~`7WDT.# hݒTjqU\+| 42䅊$ˢ*9$s| HG2_r T;(J"*{/΄g^Q&c k>J((M^*OCQxZ'E^>)WWB$p%fˉE { T*Dz<+ Q+N's'y S[b79!' 'BT*%qEFeNFTR|ˡFil">[BTox`a{,4IY(ufɒ[YX Fo1L$n%._[z`Y:+LyVW7g6,ؑez]-e=l/Vg!L߲0a`20>Mqcw0|LeԌ=/ɌV[LF$˜HXt$JRz_d"eD9-R_ Xh~~0b7u>.^4 С,H%h0a1 94x4 7MԧS2oJOMLx0)VD'80?aR&MB2 #yԷ]FMbڥcj,0t{V»z)pdz<`d$ [؋ ТA̎8N rGKS}>ѫ~{=ZXCw_]k(TV?JŋU!ָuskv:G[mYn{]72%m@.ZnXצ,ven]c$wm\KSIs6шEl5u&\ QW]M|!ᓥwĺ|PKR]otgyۗ~/> jظ };/Žk{o(n'nzm썪*9tXr ZH4 _{V{Pڂ|T+aX=p qbv ն9TubP PUr8Amx0Iͺ "LŜ[ G $HyyOti񙂸}(WP11*4K" ׅ~ G|"{(! ԗf[;9NT<::DBX/e`Td% z^(9e(2 A,0&3d!櫖v Y+Z~Vxf!YJsRcnl?72 >ej|S82-;6Tx :[.3u=O;<;3\]ׅ ]SuW7l$`lleC 6mP1l%A:ƒЧ) L:$G>u\ Y8{tSUJƦ)ZlNwO.є'xw&bƿnw'hZ_AJ\??4 p~I~UEov܈Tu(}CMs~~wCe]Zv[9ڄ64VlCfn5CRF=곅vg+@LC!ӻB+ðY}f7"2u i[DCͽwiw{30e_Tϥ $N/{U8x|Z?QH!+k'|pPW%/7s(x*0xBN$$z0?#*ԠIQ=)r%zHNz[:I9 vtBQ! "eA>*e1䐚2Tqt R9a^JQk<_It{傌*\S,Ըwi2UkUsy$IIB\:`JRܾ+|F,Gטh2dd2дzd0S^!̠Y)*a?֗ѱj1fʚf}ekvXd/evQ$᎞E"|u>l73d3TY@.balL a{뀂ܹpfvXwˌTY=I!=:=С3:Nhd{/`aJ 1`!´qqIt@IL`6 qA0 dЋgc ZXn&3z dkO[#n`xd> =s ڝїM7xזU?V4RM6Z`r/mÁݦ30h{VE$(Lx F>Sk Qm- Zud}m*_ɖF껊ihs.+=2܅ 4Q򢚩Fב,Yd>[]ߩwXz?4|0Cɝ׿mk'{vp;rknsw~"Mx(}q-êȗs\zC* "LDTUg"?FǢRP$aR rؔq F1ǜryP<' !/",/s U`K/E~Oq\I+P' (B UIJeBpTR. %rC$=bA~OrY% D+pyIDد %Y8OB"/\O*8o, vW';s^R37TtTҽ2a؅ 0YslLe '6 2((hko1Q:[ۡg3{ݵв&v,r s(Y:|0M3Reл mab\e[#&.3:di!nye 4m`PL&eŖ5TOXMz]>l'O){Ϊ̄635uVCj7!Gtɀ:`R j"Vd^{SyQ?p&g>Bo:RN|'p<:?8 iU#n FaؠN7FING7vÄsF$I;GcOM; ~8o'ѩCo@ b:R}ţu(6aԅDfM_z;F\um|"a% bDŽljtc!ػ>pT1){9e5M^d\C&֘C@$ZC1?VP!,ʞv5U_t+|cqZMm)~sK?[}_漢DU˥#OJ^FO i>~ 7QGg,$*I)ڎu;RzcL_7eTײk=P]TIz',T|uVg={үLҟnVH3㾼U Ȉo֭I<-8˫l.t 6'o/k8`a;ncz5| tYu9m&PnϸiŗB 'juE%dmNw4jtJqCPkmkՓ%{ډ@ /H wܕAi$*|dUdEepk^[GC5!Y3 ^>J:E<`Ú(])9v)v֑"({'bW=N?@uV+z(j-Se$ QSŶÖk1RWR;T5Qoxu?Md]D)Хg0YRv|ȉ\'uY.UM=:$˸!sbJ-e=9 Q0;TXCEɜ<LtW<UpC2$^EYﳮ"& ? kgSaeA2Pfı*fΌ+daԀpˈ2#fLrL-z4%ssd8H<SoZeq,6d;Z,'YdoDifa桕9oc<,"Qw bߟ<1-r+f4J$m-xMR873 qF#cz1i $ȏ`xDF*0blθFalabqR$4SY?94ɛ 1meNA?yEz߮<\ub ̉uѹUK|&][{:ZϱLR)6L|9[RcVr{u孕6?.kbYI{u_,_, Ɋ3PK8d;=^?pV?:!駂)«m| !:†˼x%HA5ؑ;ޖBSQ95;+]i8YC`_W^M޸+zu󀒅w"|Szʫu^GXGyz\pI(  xNÎT8U:* 4>#4/?ڡ$xN$P`cQxE}%qp4*鄦``͎lv3V! T *X2eO8"Ȑ㞇}PjzycQlE>\cOt3W߳>SY¾;^dZasECT2NdW!MEF.UզJ ,X] ֞o|`ad%, ,T"ƴrb<:{㓄yzYbP[ 4I|I.,[4Ct$$dь2.,NR'lN*lI,֥7sdR\D~oiۣYM(6mGPyЫ"-ßg MS"9NMNp^(Ki1?V2c7/*FNĨ8̏?~lMV.9ǧlg c[Vx^M([ ,wi_7$bx:\G6:P8SqMt[ۥBm1wgRl_٤-C%.{ONŸWG;Iy֨d{nyiQG=CQݺqqfVx8k[{Iw6~ ˾!ikXً't^5 |s4䆌BcOD 6Y"O)WJţ:']^5jo]>C^d1/ k5PLii~([׊RysRubXSY<3R$N}ѡCoJSU^-PTYn +O{耗hx!U7xA੊bKyH-A/Ct@NM?^"袺uZ^ـ&sO X_R}&_%߫aUL[ޤ6cYxЭ$JFimP{GH5CseBBZVlTxê0gӵSV-n'dVP!Y] : \:Li\Կ*`tL|._@84rfԎҒtM>)hd~JҴbqQK;PJX㲲OuK;h'4#yF˒~8 hƬ x䓸'yWZ9-h\ΆEflZb?OKj8SзӜGa_ai4f^ϋs4'oBsGP7+=^o,Nݢ/oA$,Ks:Qu+ZEt]Ĺ))~sn]_]usuNWZc2,{+Rl7kҥYWaYsbMn电3U+ ׷M?}x [W6؁pOb~ &>-?|(5IɵO`zZnyd=ܛq鞨p KGX>&*-SI'j qb3,4zOS8]m 亊&|EEԾj|?:9TNYK"MEl!^Sk w!uKMqAQr"/v$i'uΧr3as݋=̃!S=s\>=")s7[ѱcU*$/jA[i|֔NFz>m Ct( ZQo׏){ N@>W?s^d4?Q,5A{KqO[MYs C]Ul7T{ ( Q|M/k$AwOC|!b;Y_dW$|/Qx-"$ r}E=7D^xiIuS-4DܐB| T:-)F} -̶ʘ7,&ЏB0b޺ %}'* xM2z?!”٘~7`&Of۬=K)Dy~xFhE6 {Z&;u|&+93SS#\xx x tS۬ з ׫PITu9v ֡3hMY0=Ы<(l6p%As;Cd@elPg% #/@<̳,+t> (ey2d8~XM"nNpD3jL)N1E-ȼQZѨe4}9he^eA(q8Ќƣ̋1S:ot8̦Y7qnGcR&d>O]?XOWc3]+wSA7 [(խUDBCai,]z{DKeUeݽ*#NKuޥown-mۦe[.dQ Ќ,!>0|Ahhx"d4$F 4ω̨ʪ^5[72"yΉyo.MQ?\rl;l׋?{, m/߷"Enڮ!? >m,Dn rRK00ڀ0mzݝ ?_c9[`}^W oMVz{oIw98j5./ZV۞:_ew޼Z\>|uuqy"7݋Udvh\Z'˥qr~1[[ǍeZ^g|Һhzuh}ju.]{}պj4WLn_=]ˏ}O?36o~u༇f߹;qSJ'o֑ܪq@G~g/Cy}ٟ6a\SYXa]F]k7~y ) xW<)i,1\:hBTC) t _jq< Ux*EIP?HEJ,-Z:(Pb "\kOj - SLG ו*tY}ȰE.2RH= JY?(<KՓzX FJL+Up%IhxPbQyOWk$k,WfUTiMIHU8tK47膋T݁WW\ۅ  DZa6ئkt48BzFgR*No";Ez1~BH7e@`K݇POBSdZe˜,2B: t4?`0az6rM !p5`L*"Y+.: j02 3mCg2DMl5LxN0D8%t#MxVO'Ʈ68.~闿S?7E7~v_<|? _{[S5a›a+D-glw~܂7?|oPȎ?Aȯt < ~{OM@ڀfc sj; wdwIt8o.fKl^ϗ%Յ|~8phby%_4ǯVs0;_5'v_[Vcš͋q=;i(:o5FOڍWcY-g+;\lS6ƹۈv0'쟼}Gw䪙~7; !݆ۨwfg&4zFHW4̷"Uİuy}rBmRQ?(`5F8AP]c.ts 8L=R 8սt|ƾChy1DGn<6g]gؑC'E7 u"cKR줩NTF:F~ӱ;X #Cx|b/!mslA<#NGV`;pc75f&r(`1>V ohg$H[Yzz{ߖ!a5g[6/;Fom`{Ho.}0?aalδ 7ZTmn1fЯliƺZ]Ho7EEm%~ɲ?QN.^U.˫=*O^5ϻˋc@N®݌]rN4 ^浄h^.q- \3)m^lK impS^*{DӁ)s[d߶/{w㺗oךlF|.f~]/|JncEu])ηN~LQhR=`1N tT`ڕ"%TG|$TPnsG\QeH4-,ڮ;Dȵ7ufidp4,up:O&@q B/4Û ՂսXM1T텾7qGj2cg4"XkxՇ>z>Ʊe&NQ8.(.R;`8a!æNC?{.vcOS aa$89I$b?0iz( G:z o-<+ lLfbviǓ)tq2J";2'[,8ޛzy;{m~mşŮvSs<Me~}~z;ǿCpõ5o>Bo5\_Yl.d9Sw˩W\42&%r|FRB+ }6^]6dC\/3-%_I?"Mer<_vXu6J|]i6ކ]${?>eFeVY :Qr+-@iY-T]-zRUK1]30o=(F>%1Re:hD\gi*ҥ`S5JE*R|Ag )W9K}Oxb(hgRQ<*s͈ X|IZ=(-)2uUX0C3{e9}J8F*䬝Lb:["[ {"qu4 PM 7c1<8)̷۶EQMk-Tm:rDkevɑL$Eiɬ?|?Nն `no|ԱMu =dL1u5m@Ϋd3pm02Rںeu plvmP4c` !HP"#8-p rhx.א;D/%8nddc͇Qō4Izsɩaӳn80i'!h89،4O/t]=w0o?χPOc=T5l|SY$8uV2b'qv#)};0=c7G8 9~+COl4rFc8ܩ隣=u2[NBwk'noDfOQ8@ r'̳>{[0w FjV0yZ1c M_q)w\;,zB":Ec¢T8aΜFt4ʜesOdisv6q`|\94Tj4 i+`mΏXF{I8tZCxu+;ԌWbNW#jj9{z7nʷ¾fo7,Fe(lFͦ n w6rw + }vc/Ure\HÎĈG1ג5vXnpbNt|/"{>^` (Ej Ƨ9zc'ct ] ='HH}x} s=3wl1qO@)\jؒH<Z~ÊU3AxZgP_J 40|MkEgG4[ ".T)pgBbjtϗi5 Oj] ,83e,~pLͰٲl`7i0xŔW3.!8eemS!DB5n (peh!jZķxAQNa=Y( 8j*9H!X6rl}e _Չs0f @aB1RБQToJ<_X2K4ԯR>_$''2 3$|H^jָ}Q UEN1-J(R5 sDWψݒdh\)K%>uŖP*X>ƕ*KS{>4_BQʒu*ŗ  `\O,L2u;d""!; Y$H#elS5۪mtIQ,c~@\``"7KGH] Yk<525PM4r-&:&U#U4 r,تcjV\Kx ^1kuD~^]sV1RK(Hv 32Yx:&!ځ`CQD0J|OS;ldz$#&C+Xg <C]F?M~`}G-q:N1ҰNpbcx1sQ0"R;hk3v#-$8{!H2 00p} &cwWt:ۂȰ/?<ڲn}6*ݕKr85/mg0>A7?㠶6onC}93nLs;?تY5^!N:5;_qUsJj`krmnSW'7rNE׫LsmjZlN։ZsZ%ѷzSy=fg?Mrf+_ S%g/@ck b];.Ez|g+o~"ˤsbb2ס*ɌVT`9&ߐD LcB}@15K{G'aMh#q^ħP*pY2R=2'0iO]YR _G(V-g,ᙆ5ax8|יF: ;Ǟ#YE] cq` ,=c<82X(0<< Ì́3:‘gk2vb'tc-'o΃ Ԛ#kiV0 !v`A22} ג gԑ7H/L/j 3B?ΤT f`Xy5e?mFu;tVtJ@vT8iDPc@95=X3OWz婚h{O?iwI,uDJqmnBf3P^lG:n׵Z_]6U$S&j+x*ﮐsS^FOur+^Gq иܚ!Ёߣ7 z&h2t#לۉ". ؽB"1%rHlsİsRYIJ\V=jPBgi"*ZqTxxP Q4/JR1%P{De1ݨTȕxɷFE8(Yrc\aI`xabeZ(SbD[ g RBYHSؒP8xJ ˓)q;*şDVclSe:(3Z吥hJ̐~ xJ,> k,r:8CQl_Ry5Ul7 ![RML:2-KsLK#@#)Lӵ 8 F" @RUtH%a-Y6nkUmλ&"]7X[Ր jDuuU5QOSud :fC>d0PFgߏJB-4\4φcn#]椅 B}l[k16E k:X0T "+gUzhf( E8qqKʼnZx;a:x,m7 ߟEac{ZtIdb{whC7 x:ƾQ8g9G^Y0荧X; f܌#?,7kAYA@{:ƞ(+[$jfMG8>^,Qδ{c/G=> J hά2]#{vFs=~s+nO} Ab|{RP}: ToUnqwV\zG;4~\Z;\9o#i͟fl"3jcw{KPN[8{Hĩk9Nԫ|>o︰=ZN%֬9i:ld_"{~Z/;=V w2k{Tcnv.w"yg6u^p}'hQA+1IBIb) .;TԪ25&`eb^j0Z)ϊ})4OsRW$!Sbɏo)zP&RTǐaSk`*IR坲,P&kSv+jׯ8UDZȀV 6I>",0 ,Հ:gnZm5U1j;:I@RM ,6\Dcd\$fd Gdhn` ~ zN u@pcCG6.PGkbĦ#쥶ja`F-O連jéGࠡ,#[&xȱ)Fubab?uҙAjC#O"mEZFx#<+QGn!VWR@8toYq[)G8qv0ǎ.f^?LOGd \{g>L֑M9np;QH?[n~mW,S;oȽwif^VJ`Mez~mKUm3',$YdojRŐ*:nmNDq+喔Z9,:?QR"7BVz Z8}Lhusw/; fѵ5mxM{zG<]-og)ʲTݝ[>;=۵rȼ w!n&uE/߬ށgwWeJ3r[2}y۫FXFY~\.}R- Gi UTx>* eXo<oJE`LhW8Qj$@Lg*E ͗0 -F]wu伻&[/{1G'{2(sB*Jz(S 0>}iR(uWOE^[lR%x)a/,pHttTĂx T`Z,( eS,e^"k']8@*~zDrfh;zq4Bxdy7ZF] ت%w.kd6 Rt2(Yleelۦb95t *uMM2{ptYp{{ ;f)嘲 &Rl$6N]dpS6g4K&OIL"IZWF~UDj/\,8;qUg:(x4/E~ccY>P1&*Yx ?'m{A?B?p^ &CXSٌd 5Fx{ic&ڥ%^?J'JLnSx{q#ɴ$`D<]aESę1F(ϜOBf70c%" =18PGN<&"_LC+*G/gK_ =W-yrW|C6m,٦kiPv"{Hxp>q1LsB\L^)OȟMooxnV@I[Dlyo+̵nٖg6V}F밥MJf#'Vۖ)9My绕[.q\%O7nrvC[zޚ{V2Xwgwʆ<voy#z_ Yzs);ĻGu lm⬲&X{%_)1 mUE|fb~*+!hJ , bP"+%Qk=xAQWtа;ZP-i"(]R$AضH p'% E^UZ 0bV`$xb) ehyȕnBK\ m ,- Ua\).5H]C5'鑉f3[&r:eZ7dy":k.l"^Q)$ n6=.+J4,"[#qKd Zji HщH2Bu-,^G,<= n'I,- _waxyn NNW : ,!dj2=D;zd֟ f N08x+dVd83Nj;9F*1ɹ9/ #(q,L4P M -xx]{n<5P|C;5i{8oY))$4$p~B5i:]W&co2d۩C@s5\5t ttOI4TDct#׀S Cs;'HRJZi]h o1A]g%ΣwY3~"D]@~!2|$ P6a-)%瀘dn'ímj ylR>K5+/(Kngߎ=z襙DzJ$W,UID3vm&[W'W'IsJ/y>l\*\,iў/.6W)OcJ{c\i\]\Xv8ԟ$XvWQ9PƝk~*$Vi>5 RA%9 n+CVFkU3B[}[=յ7UTmKճVe +yL2țjj^UOJ/-|@ZtK"MzG1 }Gʵk)uSGӗmݒ; 5WvU;ucmtT$D@ي)}CP^;宩[NdZP4d7Ce : z5owH`z>mC5s\ܨ Pol%Tvu┫YFTuC@O6\g_uձlXNH|pT :/S'Ic?M,Q¡7`C#!kp:4NXqL#st6Ҹ DuUjs0SM^:TY沯zmYesVGUܬVoUv R49p̍{Lfg;^ [`˳5"Wp=+lˍne/VU:\unF2^=[*Fr<[+~OS&-2&W)PEj#a HLNxVc JX-?SF(+JTu Ob> W+ʳlZY?-8hPצ4Ju{_́l)ȲȯwUuLC#.QaZMkj* J'Id6VT , + ێ~J5Aߴ, LmuS(YXbIRUU8\&z::)eIIVeeFc /Y&hv"[,ònw;%YItnd>V@W-8,a 8 S< 8vp%:rTس41Fid$ej/`Xvc7"~s/Lhf')P4N? G8I%b'\#+q}#tf;1lH;Uc<TTdei=Wq] q|axȟQ8 RBwb˟|DQL}Ϸ>lQ@1Fy8(\Vߕo?ݼM_[-[ncolԄg*c{'FNs;zqok˹mA|gM\9{; im;MlseU[Kf+_]wm7U{yyʥuy!7nd,"f-bnkW+3q4ϻ"DY |~",u<3N>xXu+L 2`TX;Ib *S:Ku'uR*-u0qMa!6l=yϪו9lЫ\q}]_6EQo-YɩEkA8vy hKċ?b+ yYA. P{#kțS.MkQ;uZ}]kjBhq]k/hHX;m'U)*{BD$fҗV{Td:RZ;拌z*%V-Y0͎PG m \Q1C]JT7_Jw$~cK)GB*Bt 7}_e;_6ˏYo1T%\/'iK_,gM > ky؀1{d!/@PUԌnLTT`0HLm:C ȭgK}҅ZGշZ%[Y;shdy/y7펁tU;O z_dIȒdyXMU}bXP3@$LSlxA;ݞ}47tavWjɒ3tsu,ĭ0{4큡w}=Һ]ujNf'6;Q<}s/Q2)>4]#f4waO8At6OcsQ9vloV-#ZUIZH QI͟n1vG롎l@Q\drW䮥+ec=cuzŽL蛊(Y6"DR6Lٱڎb̮t^'6F8[l2uT xU~H g63P)bd$~X%[ٷ 5sUE':aJ)a:q0GN Ȕ50@SɒhqddX,'Śo'Crrp% G^>p @I`&'n0y,GA" v^$`V;0%4 ǧ8q3DyТp4pXK80Eij~Htét<A'D 3v>h דS4;Ǹ7% 6?gÛDG'Q<:ųxicsiFd6 ħd{93!V|K$GONmP-o#7Y"M៸X GvX\fqωO3ٰf3xs"1oy֛5?ou<kR3`U3qr/!ѸWKWj@-|=GN/LDG/,aEXiO4);Ĭs"I{u#$9VXUP-|h-o3%ȼeM]W^5/cj-sJo-#z;^nO^)mt]j kSckD&KUX1bqL.Mt*{a"[8*3?>Lk[?Ys~Έi^,FΙUQy_ܴصƙ٨]5Vb.uݯԮ|O6~L;*$N+;4^/ g&f7=u||rz^Z V)j1bX̗VxE6!%,dCX+`8  { g\Mn#. * MACdDȠ" DMB t;dW!K( TQEUUHUe.Ic`HmIDCV!fH#Ժ05uiZȣY*d!}IVdQX\ǿH WPo Wݪ*}Ԅmh@Ƈ HmxI606HSͱ}뎇#TXat=3*;`ˍ\^y:aZ'8hpǾA#]{Bykȉ|G#D74ca3sZ,11#Җ7}߉b N|z<Ђp< Wؚ4 ͛"ЍFre%DB4l1;790_b(h<41ylٶg!1LV}"7MϮ'?FfO]5s}/0#Le3tF+|E g*W"~o|HF4ߴќuŶp*no/) w[[߶ڸ-h2jBoa2'^n ̿E!j'%Ɠw2}Q)dMlyNع#}?ol=9\YWկv`掰#ock˰nweQyCO_ ̔WFn;Jo:[ ;N^RE[U}X}M d$c|+V~+N}{vie2[<bV'B+uH*߳= ״EY0XƜZA >ߦLXN?t:xޮz-_&Wݣ '0UФs@J0<f NXFՏ A:겿KЇ~`W,;(Xv 4yyGBG=88L0%*W80Ukra:QJc]L@5Nt TMYU$ȡ#H~>UdH:k~ #DM+d19 ^Bh:RC޾`aC?þimUę:hʦ*i:qD|ӐŇz&(*J% $!ш?H] Bꆛi 6.Cpv 8l M((m9g tS;0Dq*6nC:BdžuEG{=@Dx8aBLL0+vm 2w gY$f(A컓ojnz XQl$D>ز&cdj^Ù$ b5j+c߷Bn;% Y<@ p!،ƦBWWx Xq]#qV(-urr{ ?rlbL"_nWo{ω-߰Bs8k<[=N 2MsG|یvؽ{`?|~`¿^X;g=f{Dr]l9]ar^tGc*3e?polx&r.2tNwo;d6N:n..F#HbqK{?_6/Ϟvis P R7~Vi$8V>(HsMQL)"lH.$K _d%(< 7ӮY\"^Q:Jdv^!B*jBDžg!sX{xfS/Ƒg]H׮X"cN_w "pp0,EFh(Ysَb9I|P4DB!F` E‹j@Q>?VP`JCòR%a(Ғzg [n񦖢%! ,Ca:ej%*^1*P(G| uuAxt5S#G7BԥHzk,b_TS预`Ȃ|O,Q*/<z=ME6$|-Ĵ4t]I b ,Q}g2xBCLDDU&zH<,(I ;POCs%F4!lW0*!F 1V2~`'c% K,!MAB;kF2pkvdI?hL'%^HԙgNs】>#OD Ht'f޶]kv``>d8G@A`|%yiznz׼)C^((=A 0*3ԏ+]u1$.ptǶDY9BD|+ w~'{C .Ξ܁o|ڡm{3t(՗{[~s]uh.C"r]9J7f=OfL&vGi@2Ybel1,/_4m웭EZ_9o\\ Of^ CѸ۟}a;,)d"DXѐoI$yq;F Ρ_"ax'-1AfU}s ("i\\lJ4&U,Os cj7a4YŇY +$^HyR<)Q!Um %&`ԮK)Q'L9V!*Kʊ$Zvc5Zde:Ge+`?_)6/c2ĠsfgZI.I˃oBAbp|}Ypu6}X(Pai;?&Z>Iv18=x~Zod,O#5$$Ϣ G(0rrCEغ P[B~'H?!%)BU^cޢ~ iJk}oHV:Э'FmyA{,Q*GXg!wW 0 0PF JY 6TQPj^|[O En*w;U 6ETzLQKUWIx[`I~EQǡA$ll8֗Up.ykMp|ϱ}? s$},Wۖap =T@{O6N;;v67ϗ׻ޑo:Å9Nu"|F~O w 9-<r;)U(&qvM6d=QފzT!X{PLe^O9` c8/9l0U3n^9)wyȫ_:[Oqh[&BER\ei'X42;֥)"֛08n\~PZZE>*qYТF&щz4IH\( .ꐬUe08UJ,nW) U`֨ SLfT=/&7b~UJ`8[ alq,wu:E3H;D٦{U!Ѩ_:0L Ϋ}ܤ1Yos2װRN,!4:&XZ|3wDCQ{>|a=T~E 몪 4q5tu*zG8-H}Fb7%AAc -#A*RCXGsAU}CҭDhULBHE$Iw RA9\@)1RuG2QBf %+(/ɐM4F~,UI#(2<ns3T6c*'!$)Jp<›=^ !1AG7=0M }DۮT[w 4T K aK%[3BC:`Yl,Ywb=I2܅-+UݷG1hG,|ˡF҃! @Ѐa0lh6m~x`lC`If&P̈E^F/?m}_E!zжv`^s`^:oF 0]ǃ~1|[/xđwY?]_D%1[2bTgwAZuPPWv>zNrsfWS@u~@ՋuoQi"2\a7KRji(P47J] W/р'lp$)@]1okia<& 3EH5uH,ò| +) ة< @׫qiJ:, (7.Ux"%?hILP3̈́}Ͱo@mj.tjjhcîӱ +됚n8;[t]kCKum\1u!aYeʹGm۴q6> #SӜ^_vgl_ѠoZJ&f2vd-\tV *anNTY@ɥX5 vƯiY LC%oZ ~Gy0 Az$h7P: ñc{S qȃEY4~x`!8@E(4\=sdbGq Q܋Bgre)27D JSVfm$41YiI%+i Q0qbٚeRKk k"?&3;u &Zp8@+/+  u5g!:KR߳!rFg}ԍ'I4 BdA4Qkwhd; eq#~>gѯ~?GUoM~ϲ͢˲ޗ&ˢ/_^?[㌙|?k8 ?>4 fs)3U^om08yE%/J07GTu듫e}Rl̉q}ݙ6r^ruu}*;WӛInr:9.էMi}֜ߝN''WIgҺiwW9y׺i^LnNӛus"7Mo^yre;~ fuﶒlA"q;Җ>7Q}Vm/uA-&^[R|b=gpPaV |g?D=\G*Je+,ATE t O"(6zosB˖Uۂhjht9;Z9-QU_8^ !wvpH3oXOS40G1{LWiP]BR-(E0Խ!Eaj>-T^p%(R@IBIZξ_mPTlx8Y?Xj1NSlч=I1ttYH2)%A٠}DX:ǣ&Si `RB[ؚ˔څ9皦mȎQJ>epcbss%KweĘo՜MMLwP:Um|9X[.~In U;*au:cGƛM2i*ewTcPv_=kt 794 b%W̾MC61SCIGXaM&]Ý0\2cȠ183 ZkЍcgҁ$CU(?,q2#/z&!0,~u!geCaωՊ{}%pPb'7Q ]UGQDfI4qdt6Gre3̙N"ItaboxGDNl0Q쇣O"P0|͵hh9cs5b 0&I~߃Zfꎂ!r#|ԁcz6Ǯl{B_o[R8b7Wкh+"0DM ]?:;;,_/ T%N 7 x).rde :ջY}ϡfnN73uC͕|yڐON]cZY}:̦'{s4'|/]IU?%|؛SWa&ꚮ,H˔u6|>ц^*kU٥n;X{ &Uמ[jna71xrqqe޻2y*N&d| ̳6;Ux9<Ztڷ[<x{ m/)t/x рzV|V8RI:MW٣g,c2ۤ(-j^0;ߒڎU >ݤ#F g#01gݯj=[]+ /x|ĴzG=@5X,OXiz~ҾJcaK)Hэ%힖D J+2<+ҵC7 j%]gJ7kG] …žΟSGKLc.55 1e0[foi6Amm'25ѮQP" {;ۇUU+fΔ\[ m`YGrd"b4lntLMt1uUe˄m54ES0*edV5mV6諸QSFoɏj*g`wԞsGG|P]!d훖7=EΓ(5V6L&'ws,6ltq198λFgy٧뇚/⾬WЬhPr @wDp@0=䞅&%e(Hǣh#ILY`x`$}C87Q\360μ!mLA40|4~dFԼp@j+hE{W"2$UIfHC94\#ևDHsl(q Ë`2!t !&p䅩;;CFSo0._&xi/c6@ߜ.6`"~6U1W|ܾJItİg'+Z72o҈bh45&[;tvR/o |B,7ӹ' pn!nZ)׹''pCxҺ>!ܒ[si>׆W.90r[=؉?h}.uYY*Lot%ߓiݵndbyY3 ߂C `sWhyac6֍K[~+?z_2~S"`S>(qUF;"#REVB\i&)o24#!n|QbPaYkuA`hiX`;\+XTm5/ '1?*2 %dヂxD{Tw@8GZdJrѵ I$P8Sش;﹍Y-wre-dSi1bvmLUs5+N&wlbʰ vU P"32"@ڹ`bV9e5׀jJb-kX-˺ifiZ&|Kb̾o[1Ce;6ݕ U3W1u͚Hʝv&[> w]?6;xsɓ0F2Kb4patvfg3sb tFD.sw4t&vB3.ف*bdGajB/<>vc&$&0uC=O$ }^\$RNr;pճ (;@L.M4J 4LP~9;Z8CJz(JtLO&ay t4S?<MH)>(2RC_:c GZ 4XF( I0 <%IuRn#,~Ws>[\C'pЏcjɊ{M{wUn`7:u^nXK5]V}a:>\_Or^7קDf^.ȳweq}ur}& 4o,kN q`uzM J F <oG(Z]@ASߨyz|OӞm}чL{*nlm)Ed@hŵn ̽ /[W9u{>p>LyiHf2}]M<,.ɛ\ T(.YR4+'SW,_av@28s-l xIL,K:%@,CѥcfSIh?(b(g<- 5'blŝW%abR"2i(vR/`V)5X#hs,iGu6*&z+2ߤ"(QR*eLu)L~/S8e)@5(@,f U#TBA͐!sOI(b>df1=)v;S0nj@6;1S >y{ iµ:ΗAX2Zd][p~ úbA<# e(|׋$07a`llmKQTO|\!n L˚Z܅Qk- xv& ~m1Lص(NpP͡5s^b~-}w)d^Ѳ`Or.$]*d!^ 1glP*4%v<"eh绡gQrr8cf(μ!aQHJR'#6`+Cg"?F MhB׊،  adٙ^# g$󓑏hNLd^LQ8Ox⏒Q)#;P]h6jz:W@:LǙxe()]0AS35c#@KR8pУj/5C(SJPdFUk{A7>0BQ5kݕ(YZK2ڤ UP0($Xح2XJ *`J"[A`}YeB_EniwgUбUq#פa_14cs=8}zz!nXs2>,=Xx@lW[b3ah!,̶}knåAVؙh;XwU4vMM,#CsW@M5BmgXU۲ٕ mZFa, 6Uo e9Z$ۘ-`?n 5<7rL$#/qht4 gw^annHgIg:򐮺A졯%Ff%#q8·a:f4{oe'Ċhᵁ5_co m_ ,4 }[<|ꄾ&V ScpdAY|aN$B>B:qft0ygIb9[CdȱPDL1$P7rP<qa0뼎F=ED,+DQdNQ0؋MoviA}Wni߻?d׿rrZx-[j=f:^m\/"ͅ+_ARِ^nqycm Bz+;MNe ӻk~~4 }w7yp NIhaUgyjXS  b9Wj2D.4;(Z"_zA易D "ѳxgq-xU9I2#;r0}o82SCh ckZAЛQ3JLY dœd"× Ʊ)r<DQ[#"ʯC1 GI1El()C ƁB/HY^ b{Gȋ&ȝ&JWIPs(BdûM;b}0:{tۀ0Jo(q>F ⻞(_/wG֡ckE]K52b>XWUfb2jTe%^oX·@o vnz-Scbn]ݜ4+`f|/$MԺyO0 Ǹ,m{o`E40?4zvyE v T`#aI–NUV&.Յo~~C!ݱw#L .QkoH|5C[{OCb:͍9΍-vMt:[b]O~I'qCܸeƞ5UW~?(SC`6q)'u»My03+a+NV}Z&b7߬RS~JyqIwi!\T  ćک/_p-f˘RA<.#D<HsD9>N:Gv^*f>h\r/,.sP R*rNBX;5*[G3Rr̔'㹝ʳ]~1}V#W(@JM}Qk3#r,--lH5i =_[k"ߤ0Rߤ@p5s*Gۺ*cZд;Hg.KbNlku\ynuVσ/ Wv RGl$,Yfts`*ƟuKV=k1`(b$jDŽ1مNǔ S![ɂTup5S! ~Z| UpmǼ[lg#Ŕۆe}}.-kz%i/n-պ0C3K/G"!>ى2h : eG>f_Cm#1b"4NB5q X~/(" M|٬{( aЂ0ŝ ~2qz@Kǻ꾥o9e $ P%m $@-x442=k,c7z /h`Ƙ^ud! Q |D6֡-9NtR&Fc\92/H?UU}Om\˽W˖.dNjAt7hD+0FǽJlD[Ҧ@E*#T dKl ߪu5zٝu5Syzt 5Vx=Y/]MeyBz%kzL֟l#'&ۢ|ᲈưօÿ{٬9ùd|hۑ龟=w注4%U)*N+t31ta-ITaUܤD(vZy `Aia n-R ]Z|A#J/ݣ)% ށXʳ*FW% O2U{ǩG23P;ӕ#o@|4R8P&J1dd⊓(Ng1JRwʼnFGНٖ$h8kvR>@U||;!ЛE pV]Ɨc䩺j:k'( [yT'\>y!cmcx~np/nnܻ9:MFelkj/V~J' RRL[fԱ|V}vTt5[vs 53NZ7UWY>Dy_M"ˋ~os0Ϋl*|%yeۢS v߯k[d-63]!5u-;ΧOe@f:ǣ50*+F~0 o %ey>k 0MՉV:_zO`81q$(V:Ek)pB|Up,K7BIbiU`/L1La8)>wJg`0s<'xUrX|M)KF0w,[\r\>cˌ# (wڎX=v+a H"Z 4a%pC/rFa:lowl JU&aoҭyގj9 uܰkhmJ Y]x}5й9Σ*'isz0fcٵ )tMlpcqj1Sm'OLy/mYu. K^(؍V~/':};għε\wS]Z/)>m;VqEIChhm~k:ɚlX`,.Wc2j}.LC|5NgfVx.IUt%"|5WaW$f;׳f[:ӛ^WK~(.u8h[}9U]@g:~dؕ~Uen6 &[ܨwL~~h[Yc=mݚ/4˩\CDǜ(.UU 8+NJTP8aư4(<6ŽW/2Ih rLYTHSEŏ0R{ ]>%p^xM 2!UzAϤ=)r j2hJeS(KP.?/xRR6=J3֙r٩\$Hw((7C5yړJK]fR0+Kb>K;IRm[daSP YdzrXy2{%~'O*l(}85U9KHBkl?.Aÿ֖.op -_qJdgˈV?փ.YE]Z];lXK ]9̈`ǮVPd[w;Y]u2ť~SM7B7[aV4nyn Ig AUY-z]oo=j-߿>_>$H#n^w[)tE~ftU>}7+<0-&ɺvSt~وs2&n^ak,EQWg|j %1-VY~EXJ(*8@Ձ0@&Sa"Kx>+DLG4'Ѐ* "CkZS,EorfgMF4Kj OxJ JZ`0685Wt'A^5e(Net3+Qυg\P-XzS '#HR N?Ś5~(H0 vƶEAfDr*9nh䏿?va@g{|ܠk@ƆޕEva-7IhZ#Z`:2QXmTnbZ]Lmՠg`6L$^UK mP6[9 [娘sʲM1ˤY~b̷d]6- bP-[7TC3چU.&xk 늩NR;xdRl zp_9KLaM ]2 I/AeҩcNqS1/gM7sa(^-=N"3FYE&BS-o3]F) t/JjÙE}y4/"+DJY8Fpf;~f aFi%P g3CmobahAtQ`,1'GgNPA4|)<Ot1,m ;F dEK~EWRH %wKpm_wuDXRݚc~۠/}qvmŰ;=6 c;4w^}X-pl-/}lܗa#K\ Z'["L`О.a-|vy;2,fl2,œ?O [CͧOX<>eŠ4>q)yh)C۷ mnR]^mSxhZOvo&[.'iVd>v_PbEcBtb; (a(n4%F,@Y*4_9d@{ec(3 s\I`^1E 1\a$ώ%U8F؊T^HF<{9;YJ[K<+Ajˆ:u, @4ժL#>-tHQuܨU0Jұ0#)Ţǿ6*%Žvi+$Ywp@P@h+ $H(6tdcq@Tși ýpXi218iVK@Iݪug6>u[X*+4d#7=21HX97*ڎj;_E,Ēn(z֋|A2$a׈kH4hʰou[V>j\U_KCld2 VjaI qx!g&XL-Q*t(q7Avf,nz:ya^N5 MϏ aLF(M03^(qxxN;,ߟ P bu`${HY Bzir@÷41A1 BGO=Gz(+dӜLON~2]xV+ӥJSuIU|-qMwY#oy>[|H vpa6w(0 =]:K&X:!o1vF]-H0]dSdq.\{= ['r\qF͵U;Z;i^6#5d{m9yg͘xZTuGMM}Zc%4H䩥<t$kWias0qUvIH$|pqE2 A"|[ +یcyl,tI+w>l'u&JltQ1ҥY_) =+Hz됽$9 nj\ݤJLA=U2 v,WsyTSps*E,+~jc\fV?^{f I*WL`YkM; sts$d[5$5U7`_tL8yth!_j$,UcE<\XS]hy{~t$lܭ Hb[|1FˠvZmKq;'6N7{f6DJnr>F&@nd9Yjμx!Il)c( bryf8AUB -g&lC{MP ¯8 LOPxqt g?,EHOi!Cs~5;CyOK$v Zfz23 e\4Ihhg}+94kn8HA[G>Z{nΕ% d8=<}etK hY !MB|<ܧD\׵hz?oSuC.xb"+f=*?i4啈w`O`h/ڧF6l{)Y>;Y΍//J0.mNyBzTo?^rS]Wo>jF'-9 w nn{m$ClUGK ${ /=m/ O`8r>gCuޞ/CY6֞\mfE\>ᩎZj/TX,<zU~_2o3]oR/r!-V}G郃{ʵnPqq.,ov~"fb+CzQF  ;ReyޙHu<%-2cy/-#ȬjL3`6 ,KqY=4`ɲ07 %:%.p.9 "3"=6if2cyԙ_~o1_g\Ef:}Pv[9ύ_u. _gmAVQd"Vsb9__QEx_=:,\؜PYP0tr[z\8(b8[b[SaX/ta^_$+i$Dy'M=L}f4Ĉ.6#d~w 목6FKյ.Hf[ݞ H.{ՇRSҕ$3+_uteCi5 0MHɷ^*$=UI:}U$fIXP&xt{) YH+ acl]3b$J.4z:V7 \芙L̊B anv=ΑsK^ q]+m &1!d]h`z5g^ytx{껺~0{C"FcϜm)9[DAZqPmoR/o}ϛߚ7~a^O2~A0s_1I]%׍avGL/7ع>28O2ϳͼԥwOch[ʶY˫lT^ҋf[mڟmaN̶YNH5&&`٭IՋn7XQ`b߰sϐmY҆;x~@buTR'itiةD^THF 3N htc-_()SOA}zš_*:&`kP'h9ޖ޸Xfg[={ֵO,׽s[xy'`&٦ejS]O'HL>=c|Vxҩha'񮘃oׅV1S'*iOѻGRB.OST~ߌ9 t,4_ |ާG ܛ)'w0G&u;TnΈ >yLIt <9-F4|Nu|RgwW M]| QlxYNRCEiq\7[cr1svճA8{j4SZ,{g}IEWb`8 !P4 ^Q5 Ivuw$kIO/THG& $Xca@#e$_e4T,wd]@FԀ,c2@M<1@먰'9f(w%>@CRHvH$`֮_){T׊3nU!Y"f.DK`鉋bDz57d GHE.;Q0IDТi`)fXA(#g<߳4:xdtw/gȜ`u'(`k//åN6ݱ Gh SgS3J~IK*kZfkGX 1hvfrK[;҈&`Fcc08KC!{ KM\> {0[aA,"C*|B__zzX2X|T8^kb1{om.vH*lVM;R N16uJ K]w`l¼<{~ݐe\om͗tUeWsip]TKuh0lzZ Do*/嵪v ZX~ZR~ c{v5p%lF*6ɉ9f}9f?.iw[Pӕk4CnAyqaѵ޲+Ow궻D}Cg6|g;Rn1 ks|=U![<@,TYjEr'L`_<փ|.(N|F^(q j>HoPCZI-6so׹Xci>W[\q-vڻE)8uJ~ۏ]06  5e^+ RK4tdD&.3 1M2`BIS9LP^ j$9`tBrk@ )x 8Y3AB5-P葋ơih 4I00]q`M&nkPI껑gM"]ʮdBhh>9Q4U{Ӟ3QEXYa<‹)فmV<L/2&6ZLN0KI3pʬj!LZ7πKFЛh(\Z98MjbI,Zd?4 ȞxxlׯU0*\;fW7EZ}H nn#\1OMĖ," ٦~٢lZᆊ)-l9|GMs{kMNuh瑢Fcqr}Q0WT~92xz:{ CZdq4qgVY<QoY411 w _9Ϸη֕_Knq$⤴2,;X1ZO)Jׅf6US b.D88y♅SBd-k7JQ]t'H8}=z.9Zq+SŦÌ#܄;c3`M7z3w;cw ;*#R@VzfJ|:ЏP/)jzH)> w TO+T)?ߊ S:#.LA3G ˰tG5>;5,oQl,1ONGTV?ЬW(P.?ڙ7Vb. l8ȍ9S/0W):bjSs v +01Ojt9ÖJxmvZG"",IM_K΀xGk&7N뾡+D|1L">!ֹDL*ս/:c[&s}j̗ir`jN<2 /1\cgFRWL7-az8 I06px|G19y4AA=4yh18ʛS?R;yYvvԙ`*xjI\Z1k7 -=E*[|ן+cdښ>ޛxx2Yz횓I@ՠh:x"O繁  qiHQ pO?nFxε9xoyǧu?Xt:א0*nx7z7'",lVSHs|[ 8UwZޚZvM.^l(6MY"#+ {@6at22CNN׻uubIαw]곓}w {GMY2\{W{ٖ]B,1|zU+ ]9ZKF*ǒ`7{vW*x&5*mQbP>@`NƗg;7zݮ`À)r5]^HCzO,{16Nc|uqlM<<Ŕ|N(1\UaQbȘTu]&=m= dQ-S2K:$t eCĿ 0ւ  A2ˈ8 5Q1 Ȥz+$ZPb< K.I m(ĴB4FCՀO(6'nUÄ&Wf6tL.y?&a `L,)QGU쏌>1+!lnhsᴞxޮ?ucS L-KE Nl0mklÁ |{25C?-?m34 +n0`2-E=!gHccm՟bLQ0u<҈ՉWR9cQ26ХY,A \iw3ݞ!_w,١1Cg( (pc]@l{ -UOy[e~Bi]L +ft__LYc4ʆ>8 WXnUjdS&=@SW&ҧP=[=zA_sb`/̥n/SEpVӢp{9uZJ,bnZN>`v%Y G8 _/6to}vKb)&K z"e|rS>Z[4uZ&hފS0n]lrln `SԂ rׅ]8!|ҍ9ʞrB#E5殗5߶-01pډ-gUڽ5X8-/Id,V˴75ۀ`wMhX]Ojw)SnhFw-*=s~3~i,huow+*\s\ϕ B@y[&#YIכLԪA.5Z3ǔ[S(IJXGc#ru VQ'`K ȵe'j7Ki1wk˱^Zh\e.E4˝DQ_KJ.b;g\Soe' _,3xH*p ;c*Jds-E Pyx`Yl%>kag]!dpl(6#n$ 5F}bT6PI-TȰKtD::.@]u(u2QJ$`'?q5)bZ70ZO:2~lc׺dKHL: y}`]V1:IWDL*"L@lπS`l&7@`0B.G*Fxnhc虦-l1é2H^!dꞏ%tǷgں;D(.`'cΖlyFH7nl#gS4c7xNQG;CJhӹrl5K`fCmf 0"c'Y[Ih1}=?0;.kh-&gãS,_K;'l;-3o<8}]8_yG#BC(^y,}wj};wS[uܙD$Ɉ"҂tF-V$ ' ؉,pEDp [GD$9]3=3^LuU潪oF ܤF_x/Cf陶X0t>N[k  m7mˁnuOozgU:r ? j?'gkR~ުZy_msiZ__Gڪo{Weʶ~ .KQ'eϛdv}]5/}:ouNM'y\lNbNfzkQ_$oc>LI3I.WuQ͚'v}VWf7ՓE~yq\\ԯ/ϯZ틦xվj&bbv_Os&~ڲTk= S,KNrm~O5$K\\Ou-]&gN|sDfrʸGr@i.N_թ{:~h`\zygzz}xMJف釬9,x+Z';7mg3m{blf &svU/WlKO7˼dfYMf𥟘KI@ PV9W-Qqh,I8󅳆$s;1lqaB5!(v)gs,] HeM4_X_w>d r 1=3Zp fs%qW9P-AՃ';תWCNe4򯅼"~髟Vhm;Ʊl|o ha*2u mͰj{p0a4-SGMtӔ- B++MnȒww::n6qLXRli.j]GFO9dv q-7h*C$=P5C:y!ga_fM"NjkZ hHyhfv%CǕυ{*s4҇K?!nJH(-LI s/cd̕zL-(D0lENl[p{ Yy3u<8G酖3kam9h6a0q?zqv! ='aWlDz `n3{4vDǷp9ZcXb0AEh 푯}͵sMpL=phBs)|~4BV#'ȋK#.&pnMehYQ|>f=#݊FņNȷ,FH a'F+?%P_/Yx/w|?V[Nُ_zѿ$_??׿I}xs)z/w7L/_#T(x7=2zm84&wz9Ȉ}}dHV[a5V/0z`Uovm?lDF< fEhv"iZ'fyq3:_tgMj!&Gԯ.Y޽\\^t'׍dmYw1/tнXqC\\e{qr~j5N''狤ۺZ$%W]{rѺn^]'Wxdq}%&`kq󜼎¢Ei.5F[v#߹ͿpJAd;-3%P6Ӣ2 *\?Wӹ ԚФ1?j3xکW Ϝg8IWh0X8|\y9(fghq7E{]mո| fYSLayrDT+ݧXBemPېxUPwgB-h(7Tk?-E͡aN'ë#ֆ24,Ϧfۄe=k܎!1'}Y/xL_hH)#AHfB2 wUա.Je<0ljA=4;d`avxaPlb? JqWLG&}ŶR'#Od!%uaTŒ[#WV"ezO<Ǿ۟!<]szha;TG0v]Fmxn23<~f8 3[CrӱC Q0 f#WK{tWEa .-Yț@1/Oޚ &zg!_lYE ESm`ԷiؗO1HwPS/P}4L)?>J7X׫w-8ןۢl4Sw' >Am;cjS_۳ +Ͼhh{{[یQ o}o44E~p8O}>./0~j6[bV_ BW(S'Xf&ˋsU_ ,ULw}=:؄M)7S<[($PFkyKΉ՗O%WVо8;?DM?{qsٻ󄴳&y~\w=SvM3"x:yW\@r,m~ԯ7fY~{V:?t~"U;սf3?f~-<="#@ TqL^#Vۍ_)v Ľ@8V(˹S:.V0r;\sߩ w?*x/jOJS LT:&>8b@FYq.hNy;i߯|M|D6? wSbste[9n14]+f(G I?(4*v^\j| B8}[<(0˚+Z*Ĩɳ$P1u Ædٍ̘Z'k jnMM1F(ꊎ1t\g'BJ*˻0̞ـ=RU*Mi tR@h!gdնz C[+*H\ wNVxvDLflHzmu53aH-ݶe[$o۸Xo.>Dg)W mL.\O5yc)OpQ:0Fљi}bbrtQi?Q7T(40%ÛH(s3CSpƱ;]?"c=vL&Hx;YFql#v`C} =ΞYruwE^tƾgHs:qwB "mGo؜0BɹrU=ӧ#gbRI0!;Б̷/ٞx< c<цCAĊXmI ɟ4`zҴoC|7uVΐu6;Cΐ}ZWiC{W[+W-7ZSeɚv YGȯݿ*n# mD[/*=--@ZǓ,CQEʝdF5}x_2Ǎh@͆H;6β+|)'9y3i!8k? 8~>ˍ}A~V(F!qUBv"5 /b&-n<٫1qۻ*J~@  =|;9{q<+|-s_D%߮WC0ps6W8}v sZTi͂Vg}b*g=@Qir}T{K1N" |ȻC[4.+Q+O*U@(;|n0Sn2;֑` ׌^lu1+ 1 +EUQ%"j Jc|?@h 2460LIdU5(w꺡%SS5l; {bɁ"~Y4j9 c!JEW>i`@{f*nIS4]%®%+v󳆱tX6%h)2e<ڀ95%-T4ĕLI>&A0BKar&nOH1k#ɩ3y[5! έS45۵1POa<L!Fc Cs9 q<y81fluu=M 3m#9̽bޚтL 3|1Q]bqx"O\7sӦ،uyeZ;XYz7j7\.k m6;d7dGkd!L 53x#dz.o) l"p$C?XnRgyzg-jߧ6`_\4>䋓dȻN|ҽ A7rdI}ŇkX7ܐ[wRJXeWzGܜ߼ s d~/o&~L^V>{hpӋE5(uJ߮i/ǷOn_7%)S֢V +[,50۩EBG<՝[+_TK9YĔ T%BgBUͧ:(QC+i~X ElrLZ3r|t梣y=)Q;_. ^SŠY)1U:_+;O<]*ϔYڨҹݣvHP}0iPŃS?Zgʏ~ɇAm'kJ5;^n5*'s< %<}Z(kU(cG8'u'i9sY3.?f{zCMMh ;oObv!hKc2uU '_a^L4 ôL&b(ޘv_p>E5Ueqă-_!,mAUSnpJ햪j,5tOSU T;ZGd(2au;#z /OL0g?Ks3YubXSݣg?Pk'& >E=~15'jI׀2o|/ZQ %Wy1<><{s xGo}>& 7wU-r vk_0*;zLe̝.E}$e ,yWtyZ <Aֳjb55_)*_aGH(y rW^ WiYn^@?+pO+B?`ScZ\yp+*{TDVd)Xk& ~騆NZbR7D˩u[U"_uRDe=SI($ =YTz{55(wqU%C%"\PQNϔ/隢IHl;&r-̰_U'T 6>'aSѭ2b,]HAꂑS Iz:p2`25-Iʬ$+wu 82Awb?Ec#Ty,bۋ#._x:+m a8ȡ5I8"kd;ظ\ҭ@s0aq' {6 aypA膮; dg̦H7 [057ML1(ܱ1,"+@"U N8tGcƣoI8F&Z̶˴'VXNd(אq_}i? GG'_HYG'`ޣ`'oغK-lv5<aΐ2ØY12}y=v$~$oJt?WJo;6氮'5h+QOv8:ZKuB$GXa뙧fdhd;IS?@7^ꪾpK%:␨@35pcnaZQxDzpZU2;z릾oyp-Lp|:32N%#ʼOjUSӲ.K}8~t,cC#rob4_2lBGiH> 4%b`(iRtbt$,= h߄BO!>`d^ꨀĊ`Sb&aؗ0)fjP2J8)[[Ո$MKEpk9لсo} )vjN՞JH5ò3'LTn`Lz ibg"ߙ`I4Ĵ7A3dFd"i,x$Z(!"# &?6] KގkLb C8CWg)rMF{c25f8Q0G>xj@|e𿣅"#~}- &iӪhgB~8&}tOA3{PKx$c9SSg1H<[LPId!w8?TkM5@c40x|/; <喀9&%4,׵=n2l_CaGkUlY~^V~[~ޖ϶gΊfn@Y~B "QcLnkjtfh]n~\ /vqn/ۄ1UWFjk^_IIH$JncnI:clq5k$޷,EM]9X]CuwS]WDX릭}OmKɃ#{;Iaq=kjR^կ&i#`ԺԳ^̓[*3o燝ַ~mYmR˽s7 TN^F 2v |xRB9٭׸}UKQzJ08O4c zHX`Lr\P`;;y-Q??tHZ,Kaj$OS:LG3)0LcRb*x*p-1O)2ub 9a;y\$yCw(aT$,^ӻUiv)./TJ B/0@"ҋ(C@54q2?@\.O5ZTSG^hc<7*? 0]$abA"mtu05H*q-nIZD6 C&״HV2t4!aɆj=@+~gɪ!0sno +ю1otH^OIpo Nd .Е2„]UH!k*~#u2~:STM%H钞FAB[t؛=3Tk9PzDEgH|wEnwbƁttwS;N}.ǽۡYq` JF(|6ӡcWu(p]ىoja'#wa+P1QcH=EЈ\P0כ~ zfNG3c8vЃ^l\Me?@!@儞97XR4۷^y/Z.\{Ʊ-#م]# ܿ yM e z1M]Ƕ”- 1{[2~*n 91R*u /H Ԛ ,IjsqHn0foZ77Όv}:ռԚq6w~1dUfee(,Z{?XU V>%ۤ].yH}UH2 ~Aw+=vut'-SJA[*b%rt# UC1hrw @? 6|$WbS2M?1hZOAS,'Ap{%G1Z)oJ/_`F .f("wt}P+ "U+X.%\1h1~@NIQxǵ@Q(P)*0BTx.[{@5*Q(Jt\5 nDg ɛeˀK!1Gr|J(w~Tww| g(#)OבQcuK=*tevӈfG6vӗt217 JfّDk"bjjvYaA v!g6-c_kv'[WM"@L"TipkBP7lvjFUhlIL 7-EgEک5-P!98M4 /cKUl3] _G%^$5q%b8 (>T} "9ЊT d4~!'Ls"Xm Ë0}t8Dcf _^-&Qo&ZIr~~ݼZf㺁K/'`l_Y} :+3|6N[ʫ^:o ěr+Yw&=2l/L9Y leP>Hg ff}q #0OJGKKs'e4[_\-\^l*U-Y_}8eE=?)5^S6\)G3 B`U+fBPKGe@T&Y\j3^sŕ" T+RQ~ڗ(#+ 34hpl1J$;" 5ߩyڕ!"Mh]8g)Z4A\bR|E-ՄjRpypHE]ReoƢ9c#>wFj]+MAlhi^`h6 б-c<W4]E!|WS %- <0P[hR<| )Lz1=3Q QxC#{ G91#Xs<{4Qz}_gǪ&_+omi9޸_&#˒kLԷe|E6&o]pv:ӫMrqlo^u˷Cޭw6QNfׅ,Ztj'ݫubdu 3k=f0[F,]dv{'j$C)ɂ(嗛K$0Hwi7#OSA6ܬeROU7n?~JW殮.ybKs:tv{v_`Gb˭uwWq\V~SsMlE eik8c?COĂN!%R D zUe1w7eưnjPPlUgx<_l6(~?1>U ),S 8PuyB0 A%UVHI+a$w ucx0bJssK#z(tn:Q%BQEuZTKAKBiL.qҵ M]L[#Wu}Bb~d'vBP=EpԮ}X{!t>t iu M\I_k=}{dxSWp~ծdhN,[K\Lu&<ӱ5'(MG 1y [ab[y:GG!nV`h2 p:E;6Ƕ 4#ԛ?@AfY'좮>Ց4U,W 4qq&R"4#u P:02`Frxh[z m$곶.i8uT74`Mo؊ڒoQ_OY{)+?{/=2]#t{F^#r'mq_[*@fvw[`3"ܶ:ghm^ΔuSmV\oxgڽNg}6I0K=N"v/n+`-'6W^^Nz8\eM,m-I2=ȲNmQvr#YeY!ծL?͗*ur3&j4y7ij>d4~2%L@S|GpUW)!(%Pt1j>y:# TU#s4|vH<K"#;CQFa=x h60K`s46)%0bAᇠ8'uˇ<{X"W8 W<],UD E1/Xy;9|IT;O)>_”^ 6C81(@m@bU`MçIb3]JCƔ_9D F8Hdde7zSW6z"!֔עNCA[xʺDhȒ=2ϐD0x90ӘzW'i;},W0dʝMsїRC) a*IGI\㞹46/Q.U]]ڱMK2YPL44"˟x0Pl` |G@ckA@eԬOb "ƣ ^0rqlG+=sЁ# C-,Sf=Bہ}+pQ³ط8Cs 9ʀ,t6G/ONQ/СoȰqӞ; BJcj<]ܡKWgv䆞^k;cK9>(PO1 1@c8F(Z-u_,^]ΤdE@N>+}ٟ`H2m [}!zD MzB3-a .l3[V] Y~+_jn=2]$go,ˍdqc~v~- z hMv~\S{_8n\-K]̖;#lyž, }o&-qח9~>݇[Zx) !#oZV۾cx VnZ >ܗĭky5mgJEʗ}X<7r߉5C.I}]]R%b˘?M C|L8c,h-qBJJ`Gck|. O' xL(=?| GfԎGyj%z 2Yl[K?%b ~=W:;qmVgUܳaS1<"KPW ;_=6jl 7z8c+-zjBeY}y\+PN1@40~Q$F1,ǰ3J4bƩZ*WWr~*UlW T<σCev84(l-B57e-֩#은9K{Z;RhPOYA`^}3.@8ipUQ oq,끒Pkp-Leks|$2G*EUvVc Ѐ)bdLwF&Z3 NR0 QYZZ% ahv0@ T>Ӹ.&EY$t5$LT̵M(xNԍcՈB{Ոk[Yé&EkChVsndkْ0ɤKھ(tjӛH @뙃ۀ^t䯐V&R&x2d ~l/c߰coP6nvt z4l˵Ccc4631ASoI-&q!nmDHs{PZCD]abןS َtu'(8 æ!;'Q9=6Kw*xAEU{I5eՋEG;Zo qq-W߇ih wNf@>-p/|Fř^| ӛjmpc^1U6lInYm}%1.#+=7nkx1-Z$$DML I!I5mՉV{t)t?X*M;ݽܭBhfm;u'?OizJXZzÚvTp_zZ6FCk= ?ύtxup]}If\s9TU8fOsqrIx"RRRdKu*@ cSјL?wi*SŸ`@TΤ7$=Ñ{U,JTSjI5=V.gm4lh ``~m-n70:b_|m0 ';,&Y,)[ @@%"DV=|ޯպO5tFY'fDr_5ڂ9(#GcTIf 5F TW2w՞ 5Q,c]Bt:pgYĹ:GSNv,{e@XMX+ά ϓ{qlV:,|z3ZL; 8"KIL_ߓDkgtVHe\ ~HUD-AdvHEE[,hN&cNͷ/zQлoam&W?x~tJ麺!w7A~C}|M ,~Yj[!IVȸ,·/vWVhcZ[oW\_9`-LQzӷOtz'h}IoJICU%tUBw~w^ZtOӈ7F-EgAq {C(/ܳe\&@ax5?XnвnγYWz4mUE?Su\ /ue mUkK:=JPjKl7ڒڕrH zCiɲT:muk-vZ/rXHY5uv%UDuI?Ґqw8jSe N֟ͦ\u$fЖUTkԝZJ"rk 4b˲HeZrMjBo?\{*cE責 z :#Gc\?'7%)y[H#MF_,LtkVYUf17 Hw%BKd$S"7,q,*1s:˙%gX8 -MݱeZKgheGj@| S,@I 14drkukhpP-xo ~,mf}H$sVDY2?RƢdnѳYK;Ìrg?8LI,,~D׎ 00 2YI 3X.7yRc+=3O0i%Μz,#IXs XBczf8{4E+ZxINO񊦉4Gc1YKΈ, ;h<4m0Ʋn ? 4ϋ-rews?[,S)m1*(N?D>>|7po;wؐ +~p{/hSaéxécH4"eWś Jjv5ضݼ*71lo5TY3嚭`-9{*.^U$Amg4p-]隽cbD5P6'Vo-|U{о҂lJ܏nݻ|v`#~wռخGt9t^ˇ.^b{ߠDRSD'jHԶ>d=7)VZĞ$i_q`C $"+"A[HVakNC M/l ؒkۭAXk i2jM^I|%Ijr(J%tzCl&7T/qЕcMMHj`I  ȬЬEDQMloȚ=k7j!Q 5Aji/֚"Z-UXC tHpo1?F(7;.gёit Z1P-`gp#i{>-K-pU~A''(<}qFJߦ 5%iN,;0=qY! mA;d2v tSO:An^ps=obʗ߁:wIhM.;GYBRCzQE|q]:ŒZy*"#p8V;asF*Nrfl[qp"sp42? ONVm=Kp: ؛!`J-~>qJ;O~=iq__ˏί!oH%Y{]UE^Zqvdv+r~kjpz黛pے>`}wu}aԻ ]i~{ҽki ݍ5Y4cTu{~@)7=ݿ}{;{LQ^.˦{}d:`;fGGց9 ͙goU#vt7]⽽+Vx5HW5sRFd]@F=;wU:cYY)&R;H/bOT/c],@UuUPu~O|=EURJ^ ToKROБ:>Cr1USibUmv 3CՖ&bKXֵWFSTYU$1:VQEUxHͮPSlJMBņI#]XOڲHkz-fnWX qJu @e~)2n2PZi'egs$KKkpf LB{Z(x]:UȁKAtSZaap$uLZ/%քWKO,!jF. +;.Y2xD<ñWĂĐs-&4wZJ1^:LM  @_ Cq'V'd悭(lb>蒳& eY@smsɌ}F*)ȤK Ȍ $Zd^"l7~b=,1M,`CŔSf) gěqFa:)Ҝzr;'^8M!eF7=*lg*t} 0u++žW+=W{z>εwh&8U nݻ?h+NϮ`YIt6xOoryC:WR`O`> j`gjwwm|{ |D}~uϾk~S~)=# ᣚ!&j~H{*ʿ>%uUDUjJu֡U%6Ƹ%+_: 4#IWH<≀MQLEԑ RCVγ5j^_u+kte[M>l,Km`Y=>Uu _Q9B:4[{*`?I?ғ5Tg R+"Ut˳,le/Ђ- :tLrJs\.4G{XC£G@є5+YBK^l Ӥ?5(V[Z ZP =, ٴe4'6JNm~~`݇ $۝)# E#pQ\`%-PfPzD7&Re%ԅ ⹛:.πXW ̕fN;13ga2 XEr4J ai=m ٲ:#γYiF,:.[ױ\Y$\pGγܣQ de5l/J\}; >\ ?EȰzêgj&$*mߑSת` Ԫ쬰?Mliy>7 vKΫB= @u{-FvSV@ΎU|=\FHk/ xБP&1g~7ӸO`yv6]Ok9ws]/zt}ulx'뫻;Y{/7ܛZ{O֭Q'2J`bP+ unֺt=\ՔV~h&GΉZA AbE|*pDYG ~%]B I ~S[ɂ{.HQ+h.i:j;u]pWJO9ZSR]U,r 4DQC)aEmih/ΛX (J} )5~ _ 76e;e]Ru-S8 ET4j&HL]F dw G5=ʛO]3qOK'* *ќLl|.5AI58ErM6GyBWj6^> cm_|;(p (?LJ[΄֧+0NhpPtQBC0I']j`j 4 R4pɂ3P2 YlB-71c1-N8VH.OxqX b3r6ciX,i20 WN<4N#: o6e&ZEA5 IePfEiEg4Qv򔱐zaa/3?HΣa̝%aUNt&&^$r. pcdf)(&͒MIquč&)Xjٶ?oJIčXrmOpZp||#K_`9ߔBC^[$K~yϬ{UWwWO\떕.YU}s.wZZ%Ak&z86X$Oh~`~ a,2 4  _#gp$}⠧"V}:3~ 6}>oգlH/AZV-Ǎ^6WYFyV`h; ˱]_~2#˴q q0Ǎ7z~OZ^K->l~`9忹l[C}}}6.s֙geYsyUgF֚^No2y~56nnrvuZ:kȧoo\^_7k4o|*_ή^g̲us~s_^MgWOם Qc@ mǁ퐷mݰKB:І=emĭ:@qlBdj ha*i(ۆTk`{j=FckV޿B ʠ+6h=wZd&YU3qo:γpo}W}ݲ } 놩k1,SSL˒qٳzm]2I*SBYRw NCy~JȶP Ps/I=m*4' -%aG6{T`d3)s lJtQɺIh۾,{!tE7SY[_~[+_4_<>ϳM~e!oR&yZۻ2w5]@YW[7~cd_>'W|z_| |Cx˧9 ikW+n\q﮽朹Ky}zŒ΂%u$t$yssvsuV?{ݚ_^9덫Ε|9[d5O[89o~\!6ݧ0m\Xނf5JrL!2VoyeRN,d -=\PVKժX(A,E[ %J<ۨ~YBQhe 9lFfr*`ab+"ÃB-8怮S*2\Qd-V+)nnNς}ǀSOa[PW6|ozL&~`Ö2kWtw"+T}3 Me(.zy40aʓ1rRB6A BcO1]ibHQU^},>P ԗ-Քԗ\MY2[ٲVw?!"l!d77W|fg7OyE||]޴q&kuc>w67o||]oFfd74kNG>NonBy5 Řy8C?; 6:BۥGFy?K}Ov0[rL:\b*؁ UW>emb=vٝv4o s!W[PڣO-^SU/4 'ڸcE1p,ψ}tbZ"b@|ӀfE/bSį WY4ܠ-jH5A(@K3 u@oOkE]d\*Ժ).s#VICR4],-bBR[>CZ(3P%q,*SG P1|lEsl("J M{xAQ,  8Gmvu Q*BVMjZr+**{`T20 XV.\8d5sY1 z ^M2tNЉnI*&&6o!FNZt ش sMuŝEBMl >hYXi:ɥĄBsD(u<-L=r-Fcj036DEi! i902<8/PF0ȜxbĔaNy8'Ȟ`zVDıi7wә=#D^ Lrg41kdGopK-RQ2L( !H3 0 q]ZҬDR6Uah&F; [7I#ő;KxFQ8ǡƐ?!/އ! vP޽]ƷJ?fqjf=[HBiKwctmEW|[&W|,xL.\ ]*N-`{.ǃ^WǿKAh} Yh<\?<77ͬq}ՔX#Nm |KjL˖ 7ɠMCo ~C-oc-No]N%.d(-|vªw gCݶkg])-0{YUPa* QRR\p|j6DQ˕0Ăr U!2Fu?L,l幝%IK9 bδAgn??{]g?~{QKoo[z&ul땃s]V CrQ-w2X_.1xgk꯯t]Ne?Jˎ`p=.#H3` ӳ*Kq7ӛVNhQp+Q1o9ެsp:Ƚizi+5jw̵$Lr&*;[,nVy[OFtv y+ݒ=ۅVv-2}P\嶮Y/7/z{l5lmˍۣ "UKH?h+XG/p=H>Cɲ>*Tl{UJT*V(Vۣ `"-K=$=+ʈGfEPg`$9=)( w\ /S28rgF[&IMH5aAKEv wvǔhv"=X[ɜ.d0-бe؄-cjmhf&-occlۘ*-a=Z I׸Ub?L-pu]naoOųoyDPt ML쨪MBa"LHNw xl>9aNLvUp W\k9 ZF&OFoz /U]4ݝͣ 8 P6QfyϷt4G U}t1F / '!#+pGЎߟ {E&(ʐ#rt9#qq Bq;SLtj%qYcudN^޹ 8^"80ţ檞Ll<<3A`sG0p#ۋ8̌a2Ot0 @ă[DmtM}X]<{JZc߮s/ -B*{Yjb_" ]エ:rGJ7^ߗ9>V*mN=m A\eEO|IxFTK086svP( FPdSdj@rij*i0 ~m( e. ?)0૿j8Vh |,TI%i"G=>nsso>nIAtK/^StX(7E|nWaEIGta\ P[lC+R%,BQDvOh7uLG5P8/p|l|>a|-YMa@ U[遡 Ƀ6 5dSi۲ M ** m%j0ma5̚dT>Эkib)VW驝~G3M""w i8mEKSۖ 1K(q}]AO@l241)2-Ԉ{610:vJlqG]& qj@XpE T<^- ,ePi @  ^`[|˵u)+$^a%񡱦:~n Wg]9[ཱྀbB+Qŭk~ &Ks*n*g;%ڗzg\KݹY3NWL|u- ֍lAůߜN b/ ?pz0cTd%˵p:XS%?.G~Ow9ޡ0s0&6[6L/s.+nQj͈r-7/wb:zoV.w|]K k݁{Ndط WU@W.G=+!*tSVEug=Pyy+O8x4VY<(WN&fI(v/UJ( U G`SmOi 9) Xn[LYd4vTi;Pc$W>sT*O9C 38$R`;⟕<ӇD?)p@ @a+p,h.9R@ w>LO t)VXk2&b1k:=1!~%,ҎIѱ;udv㤮f*p!$+99%_M,}t5S%Mʘ#Qu1;-g86ٶ&TNo؆5VڷJc98yU+~tb sÄ`xqw]|^-d[)˲I->]Sv׏/+[u?ks'^5+-ʧY.^7ٚn oss'tz,/E?J./lVF-25a-Nށk?SsVn]ywCz %,z%.マXvOFR˷,_ wH^<SεXISp4QʟfPZoF#9Vߘ\;(֩FSQTz|v3}BouGZw:l[W?+2WDܵeE1޶nBs{9n[U|mظ\~;NZ\i ~o^ {xPBf[%c+J1S`/jdL Pz(j L-Р%8bM<?b%N*TAZl4X^:>6O3tc( 0Wœ:sbV/9iDghNBGLTj<`"P(Hby^glU)\P(~Tt5S_Tʵ!hӔY#vO\Mi>SM g {!%u X,G*qZK %Dv҃ i0C60^,_@y -b?p+gs(lCR_a4a߁cwFmE503&Z 3LzBx %JwAP1ba((m]u,LĶL|}4y00Tܝm S%'q˺a ȝ$xl+ `?on61y Tzk$$1m@\DS r1'1&4p|׊+ YL|/<썐FŅiyx맣dM.h읣8Y6H]tQ0@%r(LeC;y!sٛq$j{#1Rq.n|2E&GUD9Do34J-Opqh'ش ?(3  4e h4Z rFLb<Ӽ|Пs+b,O[ ~M?4cC+`n؎nh`=ח㭖ͭƷ(VWg[AWQ-y*[^z7U#{>ɷ'ꋠ͝9O,Ptn:hhbt!wd[Bk!rk0(b!cHuݙpRMհ5Rx˼dlY];{{,ֺ+-\WeYoqη gmĵllk;zޡj|L.{F {"2^f9P>/luYڥ^Ez$X}ͲH@,\.2GTM+"D>8O>-}~/_WJT9+ }SmUFؠ̳=-0^7OJ4Wӻ(D=K͗ 3U6 'IgX*РJQ 6efycfGT KUtON_t@5Fy_H \*~M" GUfYVU ;SKP#pH,$ljLޑ?b.O kij\5RvW7r7~ aѠA#%y!A4LU6fQ[:߁ڋP6mKa|}^:mbP5{!(|ٜTNF2h[f}YEA/5]H0 -0Yg/B9`HO3/L1Z8 %86rf 8<5.O]ngn1ha!sa4#7&%(r%x8DŽ|3F! kx:37=L NZ<qft%QGbpF` ӿ 0fdua w#1Cb:& 9;vRby[d D+4~C7]_2rf%$V]b߾YӓW;W =Vo9C6FP~M广zr!*֙pMΝ+ެ"Lz nܽ:[Fi{q1f9vrmoUͼG2-ފu @mu3m,mn-6 fȦR,Y*5 B2M3g%Ph?Z0moExTr10.0̗ՙ}rݲJײd(=bJC CcJ$ΕUcY"mĭmcT-U3f;ee>&䶥ULu[cð,6c xEwd:YPGpM%m *^ճtupdiNۦi(mohŊI`ŸLbilgYCr+6|K'3ē,:M4A=cu cbx4w9%cEwǷ(%ǩ~B?i40&c>B;Mv$̭߯~4 CG"8AZ2#&bѝig ~ BM{HOVן1{2nZ9KuL3Z X/Q*8l!s(4(!د.`D"(&]c@]0 V* WU>3C(PW2:0=Celɹi8)Ul]]XcSeI '+L^1z݁&Sͮ  2l;^[Ռ>Ar>BjQ.ImLP.1q 1tÄ6| hj 7,5ݶnJO%c`*BJNQ >Ugypt "t4$ ]W$ GYETImtn?_Hsv#hן\b5"}TދTPE*"{}σw@heaݶ~ڢ?\7nq%ho?U/Y_ U W捛Nh]Κ:F\l$ w֧blӛ|%W}tǵ%<4>U\.PA6B~?>ϿsZHp^:N.|MGޯG>͡  N+ekzߺu[KͲًρ]ZFR·䒼'X_G,{P*%ϖ 135chg T Tb:C+mr +HT,R0Ega 2 gO,@ "U6@XşY[=QJmpZfH)@ŽbI:5+p.T1@1bf8pRU+#\@*U1EJ=Bc.K{<ӣ&`2-b̤1^ev@ 4 $*ADB24UDP^Dcb#4%v0]!6Mg&|ff[M$ iXK ZF883=Go^LB#4GJ o#x >2HP6h8G{dAd:?5hеi<б<"}?NG4C2& ph覮7$!=\Wv01/ΐeW%k4)W; Ai`I….B$=i@dޞsk+׋1ɇ[:&:m҆F$Q"b4ax@aȣbzUXJU`.NHE1b'`H47 Cx6yB}@JutD@&fpjdiVco*č|4=7dtjElnÁv3$1Pl<"ӎ$NLOv2M"/rx0~I@mi!rGq0R(0E(2 !0C0tnqзiejw=$71!=͟ĞcB䣡‰yApab`LNFfju24 QgIF}4l8eE2A9QohC4E.l>4χ'.4'npbe{=GX!é ~qu;-*;*?T컾"7>xv+-:ݐwM]Zk|5+9X ap@l .b}I9[g7VNV^.:t68{U׍svZsgV݈s涕ozq:[ZUWk3v }NWY\ѯ޻bQ{8;>+m%PrSeK^_tݺ9Xִ]ߑvq+_\!q_Fukr&Ws̰TiFQBzPT b4 ŃvO=zǗ@Mb LN G%Q(R'"JYb9R3!Xj?e$ݧ9TBA%>|!qgt{.jP%a6$]8ޤJ*2 'QUaY ðĖcJiXJ`?fYA+,/60bĈ UB#6W0`w4ªdu1o3Mly\Vnd!;qfa {uaT%"upfçb)G[k2+Vk:jː *}Po^vXho묵/o[P7㱾 X1qUUotYb~_lڹb]=\g C[x7n'$(+zvK;ƾ-I~e6{w"N*GOD^NW[Q=/u 9u|<&ѷL3 n@V,x"G,/pBE*ObtGDA)$[g;UA_ i\,7r}}P.2E E- h 2" hBaVV%NmtA9~vg'cad} ~q> ]GЏuAC/TWYoѕ>  PT7>*J7*a̖(Q䯞U-] [6Bh#w0+F|#>\vl+ZkH`wMli\1);*`3r`2qb`&ˆ}ŶNn6T۲fZj-m`pJc*֠ȃb1vOT˒Aݖ{Ƿ${ݞކ$rQ=+d ُ2qהkS$(L (xȟM'8"?q 0 (t2QE#}aQNf! &B 91b4FWOd!7@#(/]6r1 (1ƸYۗ g;AO1x;3|s];; 0IA2?h>CMlpz wp!S vd 0r zJl\l .km}js?Uj_[:IYvM;h {ɛ|G@ k[``"?]5o|M^m[42_]hL6a=WJ‡ɵu]r3]^CSyhW2[TE:({{̾C}]ԁ=d U[EeyrkmRl ^o+ɽhf;V­mwof{ޥ2vCsoj)(JT@Qc+,E{ S k_(7f]f[gLiW9K;Lw4<~&EvA@AؔEU(P萕>-=q'(X~XPE>Kj1R٦<XΞUSn-Oj=Y MQ[\,*T cH-Z,te‡G t~htVd`N"S% ӴLsN!3iYwCB-Y#qr~] 7n`pIuFηKw}swCŽdif nM2P\& ƀa8d9{&[25o?Trq6cYZ'q47.XɆd/h.b b6X ivoۺ 3/pɞŊP(~Dd ^R5Dx?N$KHlndv:D/`;á?1a o7pt>vSd#Op)R⢎0!tx>1_cdM slڡMuk^׋$ѐd#/$Rᅃ"ӋDv2DI{a#Q<&fg{4x5F}1fqWq0՜xyLv [ݾk^xdc/"UdUt,u镞"Ztx+|W\lR=nz3]m`:Bv5]mvz!7:Ϯۻbs6]C߹:Ȃ7t4܆۝^nKА7);vwIvAY^U];:x.mxnβ);vޮ_bkUoD{rޕ6j9k&6j>W"S*2<\uI0ڡ4(.[(SJ]@=W(W@)Lԓk<+ʔ!SDx,rqw5B9mA,YyG]b(.0|xT>;rG<h-I^n\8]Z^D8ǖW[F>Z%;˥IfHzmB'( '1xayExıIύ$?L?RW(*$W[mI7+ۻo;-|vɒ?n!?G>/,ק e] e]U* ls9YMb6!uݓ(%hbW N]zt煶.wvW9o*OUߵ Nn*,^Ts}|ܐ$y!Kb.5Q{$4ĶVEv 1#6;'|úKQm|g:LI|SI̋A\ft G+vYI#ÂTc5E+i _68g͡>iA95R-J!4P3hU3uW䓪$[E04<jq,vYr.1nvrC@(=0 * T攮8Uo˪"vSU)2VPN?n>eLNW+osg͝,]6IS Zohժs=F_niu 60=E$401?={Ӫmۀ3i߹ >(tJ졡1Miulg蝕wҿw7wl<ƝdXaXb~v{jskw˲HX0cz)o'>?\Hw]g -(ۍQ ]yNTH ` g#':N:HL@pqͱ+:l\y<ӚcV͡>F#mj :WTsdPĦ)dOCNimb`H43/pb_(=ipv7QYo/KSqd؁SJȍ0Ks7LE5xq8Ϣ 70Vfѣ[l&ϖ,#̈u[Z#<}OŽsqYTy%8 ~='o`q2=!8.r ۛlE6ja_R1u(I]/e0=E.Њ7X3bcUg.e _m ?̝Gg#״+DCfYbW U<\r|˫%V饐gYΥW{Uk}uX?]i^ siW=BCjsUlKʍt5V+W?ȴ]ϕ=UDE}8z\N=1Xz쀾ήퟁzZZf~|kټS [~@v-\M Yjf^kbH3ÿx&ԎEvY"[5;|-ׅc8C.K leD¾"#Xn3 Qi\ƀb;sKB)aS}iFt RTTP3;pg9R(Cn6Ǝf̩w~bPl bCr:"ֶBb}iMUEeDU5J&ʃ0$#vKB骈}$'A، Fob2-ˌՅ6.R+'17^l^jdqYz1X<'o-Il&eZ(Xw3O7W 4Ɨ"y(2 `t%2VY楋PTQ(ts'MLhZ'g&hmhx@,E84xN=nƺ,H+K]mgy, 6ᢪ4OoOwT|zs}`y`%XRR'_ t݀NÕb7y^`ڲd2V[2/+E3~;C.ܕ*ce^ĈݲٗVζjҠ4j*&OYw+$bX7gқgxn3ޟU4u!{:Uu?woh>NzN7&d/ ^*@tKCCIccuw ׇ/X[K[K:=Zo{xs?Cc=9鬼0p7]Af6:k_Y-pj`T~(6ܟ_IYP3N|be5Ŵ%(eXKnp'!Ʉ-G|L6aCAbnKh e&+ -~q I뇭'#FkbٮsБA JRyeX$ո.l NCZHΪ!{b&S'@n4QDh` r|pAQ!\YA6gO8P]9b305 ݲf~v 8PjO<Ϧ4a&X:FiꅆCKY~/\#Q8zbbs %s ͬMw"N8\b?Ij[E@ԋpO)?|RW;- N}C|A@i5˟ ޤ;v۴ewuo ]Rwɟ߻E֓l6wK}ߕ>I*aJAڭ XNξpq [x e &pEߺ=cI?]-&#{U!n a~-{WosϯֽN a9ތ¯!²wy=ir{Ȳ\z+T,w>=;>s d22 Mz]ڽ rozgz-eD3'}WY~^tm?}? qWr)nv5ji,]gͷ~ aR^>B<]q"f[13"O@yt6 +7./t5y|[,B:Cu)ߗw@FįF{0/qG q 26&b?z,:z'51k5I9 ^*,#!{$'ZXƾ$=s5o#Fڝ.8pʬp@/ 0 r\q-"tf%@dv092c%ܜRMB2>+ 'S 4 җ*z!P22@Ɩ19s2 C*: jF *lAv62xr ܊#ИѦ>cM״Z .ub̠SUS~UGm6[zdPɜl:yڿZVQ0SGgyĴV{niy&A>5΃U.R+C=1Ȳ{i`n wu[)A U$ܷ\mu7T{ʱYfc”YE»^]湓ٹ`kq-88] ߝYFnN,6p(ސj&F>w`5&O5TpE:>x/ bjCtR>W5$8]N[ԻC`3}BCa{#}WI6=h>wE{ftG,ٖ-K~+9ۖ묽1I"M0 (l ["Yd̝{57=vu꼊,~Η;O oe.K\)GքR&Sv|2r3ܻ,U O,߫5tJeJ-arEg=j=gee%y:Ip~ij/;ȕ~=@(+4\3+Vy.[Mؒ`g9bRy>hrbZ7XC1B^,ҏJ엾Ydd##0-soI]jߐVK4SgSy+ȐMЕ~SWa""`%]\,i4 nAqPDV2)z=$uUU,x[pPMdH;rSuE7>3%M-G2gco}3N fV `<sñŞw=CM Za42.3V];T31>.ncwL gi?z6lxkg24=a<`ۏX{ Ll kC9|НC}b /{O|&/}bLQ؟?7I߿g%R8W>;|ɼ+S{]{|}B/_'/Aws@,O>9P/oЛA D m0~~gإ o'DfyϘ5(WR`O}Eqy\' X^a'|NqQ=?Y\^ "jD'pk&]u1,jzIF|Y֪ԥe[]W]̮e_t ~ tӫ=/׿]VUrPz(}ϴY;M7l{ڛo|_Fm}CV\foak馌]iorhZVSnHcj>0fA;ld] nfQ1* ktUݖѲ SK'l܅=hx5L!4tap`(Hx-nh8pig&p0ceSə>Ɠh2#GhzfKW(ht;< `9#l[p0&j*`?5ma=1-YDvԳOԵGwϔq`O]2(ԍr ݁`)f Ѝ@x${!GNTs댬P9YdMa0t$@fv0ݿ9x+ZTʦ-%AοB]PwQׇ9K0t5Q4ƚ qJ=[iiD{7r&zˬ4JշuU TR]bE%^WĻm\_Ts"^Nj+mNDjuVf@oc6+WFe.L\|zkxWLwnM7u o ҫQ/ߢ[#I뼷p^q1nOֈy:t .BfJ^8J`C%ϕ3HL2U9blP2MxCJ ;XBX6w<:dyv˱|P4K3b.HJ{@&41s>@gQ!KnBrHS9^?XE y%si"/e(T Ui;8LV{ }BP8κ ET% YۀvXyᇀLKmBVӈUkZj?>)L8UuV2i= U8Hz3@M"Z2+#dH@Z 5j}YֺZ!:MY1]M{. 6 d-@95Pcҕc˝>Z4YTK~*khWae-84zp}l8Th՞1wxر<=XG#,Er_:$F6^-,l2K`ܗoqM޾ S0;sMmd¸4Ubz*?a״̛;IȻ$7HKط'=z݃v~~gU?9skq곑j#ynn7:WI_wNˋDZy H1_V*:n\]4'׭{!'Ҙ\WO.*ĩ1plJ$XԮX ^['5O+vjhQ8}(WxjeϻU^o)AW0iA[JRΆދKq3B{DֶucЇ_^Sz4:Ru"Ż7̐` M^f8[7^^,K /n`8$il g xtR+=J*- Fijb|TG/̋,icK+eGeԞ]Ja)Q~T8ߡٗB9?ϵp7WR7q?oj*\~+{Y|)AH2j##[(?dL9&5g<)gmvH?iv|*o(G=:s|o.p2rь#E;yapC:ttȜ\6zJO^l~}ٗU'%?f=xẔ;d&Ͳ?Ld}{MQ:uiڷPPfKS;JGtM4b=MPdޑ:ͦuaHCVZPWksU֋'QZ0|J@JtsՖRzﴺM(BI]I4'-%JNK،f:&&8-sz66Lmj;w~07OF8tdcb^GD`la;O7Eaq a8<ì$4s12'C mt>>:tc7!)\=mFą) >ǁUYr-ՃOFۃMi&LìqC}#4 /64]ߐGO5hp}oO<7a :!e;z=KK~O'"x"{>~3l??;OD_&"+C5pl$gh0TO؋qc)٧&ق`t76Μ_u2U9^OU>Sg)9)LUo]7d90QqLeݜgK.&Z;YӾ7Wߡ79> !w >k%Z X>eFmo$jn],_r-5Nc}0. _fRl?=PkAZ{oi`f`wJEoLz mAyb+ҝ~է>xZ6)bٳ*29}#R-m֖yf$/9L݌aNfB&[i:O=ddG,CqEx nLsl) T!_D{.bv+RrTgR)9@1< SZA,ɶw)(@|ݡ4 ]`L,#\DA2pgS Ol1K *r͝Yy6oYQxI._:_*s\ec]}{(b@?sM7LK{-Pv_ Ǩ\.e&,:zܿBsW*QLqעjN)Q g;(Z<{j<'l>E3"}P U*S&^L9%WE [-Dm-oF/^gGÊ:1[^"o"y-1-bˈ&@FWM΄ Wt$F!K5ي"ˀyWYbo2@*a^Y{mU ]tl=ۑ B[:RaXqd 2ڊf}YԎ&AM$D5Envz B׍nڕȐ1BwHW[1mkPQ3ZbȊю7ċPao< 3Cy:!d8ƔZ8 7;h[o:hƎmsƮzÁcyANTD3/M@ϴ@H1ೣ gpZ!Eo>Zawg>~Cӵّ (G84܉pCmV4;^S4ScZ7C9u Čp^8ZzOp zQ.%{,(6N᝹ ŽK%,Cpoܻx %LZı&JjHGH.M5טּ8ͭi~ńk#T jPwq WzaTZW+R&GWWzѸqq$2pB7bN@y)_"mzy}٨]*J^jxQN@f|!)t5ْAfG wefbmNwrX_qw/x&_/xoS7J)1: of>K;_-u7L~I;8/_ɝ#JgA='LB l^4*1R ^~Bk@4>k<b+(TeBчY98#X( N>hn' JT>*A,Kgzep MYFs0#B\gjbG+\ ũ  JْX-7?X1\RѹR@QtAc2{4Mq r.w ·'Hv[:HfRmJͶf ĿCqxnhčQ"etYiz{rl2`&rvԖI66ns=p/w`4MbTcvR{x'O^*E57r"zѡH* MjN25\"NW*=$b'Rn n.y#"D &*A+- 3 fzpFXғx~F؝؆<dzasd똾F" 6ihEL _g9} cӳcOX 8p46`&X& ,{soGx>NGr6X3Px:m)= tbm#A>jRH ]_r3,ⵆ0?\G׍8Fm]K)6]e$DZ}᮳2rWqnU\B>PgRRs.դtt4/(n@]2P$m v7vA(L[08e0x|{Fq ^$|i:>[udž8oS^HbM:kZ0';Bv\I.PNQ;ψ|G<)<8:kղQ't+<׎;jln!> /c%=9DR&Qjyq^y+>ğO8{/ U:SJa^P(C !]:Q '+Ӣf2,s︖eQ,WQυJ;30jϋX>>):Ue?e-v4V-(7E2uUVf*~G'qٱ,/ oY0H8@8&JOWU"ր-\ږUHUzZ;Ni:Fmu |l! D 17zO5)jk mm5q>HLdĠl\lO3RƎ7VG3Fi.&A F9B]Kx{&vci:SLiDDw!"Q㭇uOk:(&otմ DƊTNR^yH}{Uk+*Mt'o|?,wB=ɘ5o&ĺXi4VXWիXd'Z-eUj rƆ݋$&r F -4T߈aPn[z7*(HE ykIXoӠJW5~Nz+uevwK[Bk}oߡQhyCym}1~7NERnݍ/n&Y۶^8[N[]ܾi܏rOhVzƗtTwne9jqU>֬DD'/yƔs[L>{\?i柳a|TrcMU/<(t3 ]Pfv\-[2-S^D(+ljAJ~;WY&Va2vJ GNvpo>[\EЕsZ9V(Dg"B|gPf82%vդ{}']eiJ INcsAD8c1Zf7<Ihe%SGh)jl55VK vbywSHeߔ*ZjO#6^ ڵ2{m6q"b%v 5{] !A?bH*iW#zFuTb2HdѬ82A_-*W QExhНcy c B( dlEK Lp:x؛ GXOeo,! ybXܩ0N{clcӲcx0\ F<'cs4L9\x&剉Ńh4h 9LX F> <;90pL ' Ӱ; 쌱$'>fCM,E!42G8 ̩ ĜP,2'; 00a y#r#6q/lkV_,C-% Z/C]f4f"~m IdYſulm-_# *^Cwc|e]iW֌nKZ֑Tk*ڻ:WoKsrr)ĭ,Gv?p۪oH.}:ƺ-Np1RgY샦 ^voąx&㤨F){nt}nD:Hq]ts1.ؽ#xVꛥ1܊.rZ,>:;)S|WŖe7ReQlY$#sz\9.W+ G# wXҏP|I2 ]hp42la!+ҼPQ( Gy9t헳\xKBX~J=`rTnt@2dܡwOv|*֫G9xBإ)xOL7w2^d:s,yAy),vhQ =X~okeV8W M07T@;Gnѕthi& FL퓝4dSRLeXWwu87tz$*2$GVlI֠M6$oחt[*"=Muto[E16Wv@'*ъc;n]H)p,EP؈؂,4Cy+[*%(NǙ'+bQ3Mѳ WCK3k(zMM&ܚٖW4Cxrjc/@ x699;z 1‰3A`:Z ]Us$fpWf@m9xX~0|uO hdZ4x؝ '&dl;`84O`:;7cY8G0: L7y$P{?&ȯb}jtssߴEpI[TztԳ|%X%~qQ\OH&4w&)Mr:֤/gzyˋzoY\bVEds[{k+u-TItW8Y_x%ѩh<ΆNzgofol:́gt{|aNTڒZt wۥp+>:+7&Or| 6͐O" U3{E<\w(_L:isjb8[®dL߭ كX(W_Vi^sp/=57 v)s?OijqD1żL2܌e9*dɕ8an7h&-}9LEM|nSg,z{) t2b]^>,S) 3GrGψ#g7*2r@jw)"› _jB"- B1-ITo1 !" е OUSIl^2H*z& D b7>v53LTS.e^B7}f6 *jd7F$X)J4UHl;j(f̢E kD6du#$TBkoSwth\)O L(g7z H5p1MFmoy" رǎnAW;{|?n3Oh[dVxnΔb/,w6G3G+ Lsilcf !\s:0GcFHԆE_b3?p5{giطl5G ltSD=4JK1.[m_EޒLZ7P&QmӻB#mWM9Lf$»[罭{kbHxD=^lEztXR?M6PNӫEupWVXAM Tukyc E}VO[ov.PF{?n%C}`Zi 3񳾢d lRI8tGXչ׹^o%lncƒwtc)X|u|]"{+EG/nJcl}bKʙ,# R\Lw)00WwD"mYgvh-g)vcK|!#ѢPة,#şqX/g(>J34GLe|iU1rBbXu CM\Őa|gfwi1+ԉMxFɮohQVo-: W@'YDTО l*@[z]"m3E}Smz)qJF_ۺadIS{]YE]DrI] `$˺ ]푅ZH E#VlXVۈdJAy}UG$oXM] -Ôtװپm`qKk{]6l<}< 1Vz34V0Y9u=AsVj΃mc8MI "zAsfϚó`xSKuI.<:b|ƪ9o?ul9ۆ v3gК ]#Waq'CZpC; <hNJ?Ŷ'V `;񽱌t'R}oeKʐnWxU$_5λb^fl+L7){;I=Á:^^Jfw>+"Gto_P8YtkmmMi/ax![R.­iQ:3zm[\ި:O3~;}ܤTY0¹Rj=3*$Vn3 JVN82o\eV(RL;+2B55+XZ@=3'Ԏ/|+U{r fΜi8(2jp5V8"09y*se)O@B*6QxaLJ$ߊ1j* Yu{$ZJSmD[oj='oh "fMh]> C5k75цN}Uk%75b VUliMסUtvɎz.7&F$ FMbU MnvTE q<آ:: v8?aBb&to. j䞡j^+n̐8C|NCz}2aM1Y5POz #%zfhXu1תӍ6:Nۓuaz};bexbsO(>n֜kH\KF k{bx#W3{DZC- ٰ9ڦÞgU6s'N#XBs #˰hx2cӌf|xdOmz< } ˖l ug f&cK- +,"p[ȑjsKoAn>(ߝoėz ;+;6;[ll/7KۀyIPRM7|+Yrl-jss3js(_VUdy!w!wRQSB Ւ듵S^i\Uc⇇H^I[Dl'vY'BEC"WCIkXnl$8oOBZJ. e%TkhqR$1gy/*;s'\gR,(F)Xw|x|KRgwh n6Rf_;\K,ʹ21]:*Ts?+-DgK1]Q#>43d1@U"]q_1ҟ*Z2uتR,]Vd81 ;EKJP;- wr#6^J ,ǽ|ʴ*T\{Q0|Od6=8xkՏtXQK?,p;F`mw9*A1}Ǝ9#σ3ױnl-7B<-'HqXx40=`8sk` 5F8؜.5I]_]طxYLԑf lltCr:L#ov;p >`樣D7jhHd?/ vPy44`!g 6~`4 'ckIzvrk+8`wK?Cy"N _Rfmcܷ-'aRc_֮g$TFL 滑c5g[I~gۓ$O9xïsRhDWReN:h8hlruE{-7E2;YDRmPZ×q4BU>emX&RǛWXwn+4_|SHo+6(+6~]G_k72}) 'L}("S,+@ϰ -<+[8v ZA8#iXf -e)L/f2P Q S8lE昡S:Ȳ]8 v>[|~ l,TCqoWlqLy7SkCy]:i҅#?췪ʋNq=`tJ^2+e EP@уZaսgU2dxLAn6Lاb݇X]om2!]drfa1 ljuxG]x項VBS%M ȽLN$YF[qLYwIL @{-! K}ukŽ)$&Bux /`dc|Yj-+nUJPU$T2nB?%pa(5zL imMWcşLp *ްTf=3Γ;FX(8ѩ:XsnStz8=n쌽^bXO|}@R= 5ĵ$C2H"g#W >rM,;h yb+55350'0]ppz@A!We,u (X~4C/CTv6ar0=E63Y mmCxai r +ŀ?ϣ?ƯN; /98XL.wkYj#-7{17'Q7Qjs.C_ݪO!Ot29ݙ"L@ŚzhM㒂 /xKDN< ~/Gz愞ouzSN4I,Xފ|vM+ Q:Jd[*F#x{]qǻ>גF E~\x{Utp^8-bD}wi{8ēoowKM*G?w:0G؝\m=izu; JIXNwnoXb%[8JU*C^?zbᐹ/d~d]l :s"]MO\9EZTxF*A~n˼=` ܒ99*ܳ,S 5ٿ"5b )j:F̢d}atBB,WbTpM5I ĺhĭ UE=b&nm $ F?˶$ Wt]Fѕ{LB1*m@SR.EjS#{!SqDM]5M;m!ѡa->o> ao8l&0#- !;Je8:  G ংM MMTc4~7Q/y@mZܳ|d١ݷ6 l߅W |Ә}{Ҕt͖:N`k}ǒÁ׵гmv noxThO045+j)p:ȶh-@]S9ɖ+d1S=K="l뮧I h{1fM8VWDd;H>wO&,{jvLqy3)*&ERH`4#1)Vp@՛ N_Thm l-Z2,Կ{G\8m3K_sL-&iŒފ%M &hHld ʚDH3p&Jp.Vm7#wk],j$z 7#,vZ\-;N^vMi4fqZT}d YH+>pzZ"s+!_  ,Aصy;͔ Jw`#7Vrou%p'P)nJ*K@,eudK,g r Ks|+2 0 M {92R*s<}Խ_)3.O}[t+T2ygb^rw+yJdYvz2+ ۸5V~PEG<{e?a颸PO_sE :_(uY&_c;Y&#(! Tp9k {Ar̎V3|:#\aV &"jZ˥~$U `%q.)f9*2n$逨ܣ49Τ7l='ș5u7Sdĉ9CV2^0䅤#Tyy7]xOR?@  ܭ/"#3fUudFFs"b>ޗ~eC2@:* \̸sѪB@D6ߛfm0ںܛ3DU>1yRJ\gqV;(͚ExGZC:)1Uzo8%b `#K/lɄ˳E9s.#fi&Q, ]ux : ?A⹓vTd&ĵ\&Ƌe8I~; S\`f~}Z1kpV&q?xZ52e¯oW^Ldn"N~6kNSBBֻm7$[-y^Z=Ete|zRC$w_RO49wR)/1gV[˃F|l M rXe vwXuNj bu]Yu6wz7#ߞI1dIy G3Ⱥ eZGt^#n,}ki򨭑P0 崯˲/C CP dY51ǎ#𘴉HwQ_.1Brǐѷp܅36ZxA bk^0 m3!S DZ9Q“x#LJ=6# QehEا=?7v?>||t_P}ZskO'TIvh,w_IWWz1C(t}ǔÎ%2N`Jt]ρctUSzG'h:`UpUxr)JO'83 }!l6s4CP桵afu*G@V1wΜY/PG!,%j\x0#&MqnbG( 8.3:왰D !`"ލXB]!.Yi 3A`! 1 % [rVװ)`5U6 w-rҎ SxKԅgj).LY3;̨EG^X~丑o q+~%o#!Iν[p\Gq_w&ډ4г"8HCq*) {`nk_YT ް?p/_jD~egÈrŇY y;s7dPZ*/.7dLQ^W@Ct5\/j`]hÞAG&MVUbյ`r|=Qe+h?2}ONȦ+rjbMZT3y\X7zАTmWRtZl۳ͻJjP^{~WGa+w hh ꈐwРq<3'KP˚/G3OnhdSZO×#C۞(-!z۪#?=SA$&)=tUX%H1eR[GЕ$u5b3xLLZʴ8Pxlh݁uɨ-)t:|-+& uґ_Mz90:!v֍1d$QDCEq:U9Rz9iwŀ<Ʌ!M eDA-Ht}EnI$@eUm`pֈnI^sYWPImSR% 6OECwMN   uao۹6Dr S6fk 1]]y-΀Y=O`sl2l3/ \U@с'Q:\)RZ`<1;6 &e]}Nz Jbh@m6C14uG鴲R.aP6,dY:4J.ai2 /^P:~9bћ$@A,7,q B*'a%6__: yWQLUmGV!r.Θ&E/#%b:<͂4e$e!n[4l]8i*➝gXvL]P2DN9JT%X>[)BUzm8eGUi/<\?:;UӣΩuTkNo?ũv_W.p7Īw_=E+,?16'Y%a<3iKEhkِG@UDi֠{5F6QT@&vaK׻z;DY#Y%]:~o4KǪ,^t;IZPDLtcʦ$RK}ajoY;'}]2zݖF Eɡqkn~ddс[@Ɗr=^řSm)X=HO>$iґ"&rW:1s,I8U0yU҄VW<1XO\<&X Bo}YfܜS1u.ltν6~X񄜎}XL^S &ܛa/ Є1-oQǥ8/Au,iv!^N0Yp1V}n˺͙+ػ)hY|:C9Dőp9r+_y `5Q6}?Ey"/Û ^, &ϲ,28ZxQ\`hE6`q+M]zMD6'⺸/7alq Kо M"V@E&"cQ,^c.WWpY6su!'}i=ͤ;}{n6GMtnۣjՇity;9{Q `Fͪ~6Z;O>YD}taZM]<*?Ix869tDSͪ~j䤥7T[ʘRO%J”dݔyhHGdJ c: |j]ypl7\Ǫ~>(y,Kp*k"s -C_"P4r7e]hLW#jvyM4LunQW_)ՔIcUTS텤`6/ q[(M5UTJ6в6=]W[p!%CX6Ei6 Bק|fpVQV%bcc;؏Í5|PC4 P\x>Զ8|:ym 1c?NP1r"L80"ҵP $>_[1+m T`P$3رfY5Fs-V Kv'BDGD~Th>-xK;<-"-cEr8IpD4-6vn6ƙ9}{a  YPLBެS7Y\"\^Wˌ0" K.R"("y3E0!b7[aE:e]EK'ӋqO~|dQSߋ| /+WbRipæg_XDMn=U.Ag}leQ5(-elۮ]rw auZV4P 5/ñjȇjUw-*.K>[gA..A_AC?·= \_ӈi:/ֶTWR.1kWM?#bZqyXW^CY 4K)yXEd-@CqTۺd=!bY {JX- @շnG=~?DPmz @MQ3~m'ھ +sVVy/ok@/U~zRYC`V$S%I&[CzC{14z}U6du!RI%+5hyBn(=I' !dh,3"/:F Y#f>7 HBHㅥMEkb(AWy&D`hX? yL"zQZ,=Pqbh@~iv3yLÀHR{~])˚f%Un)%$]! N_r,Kk-ŞSwƐ*O~qa ˛g5,On;w2®>BPuǖ / x=Re^o52&0Sbc&2NRX-eg<Z;A'jb^Y )x̪fߛ36q|Xh( whX\{SGuЂaDs8# ctAxn~knNqgY]!z)@]EngjY+Qɇ7oh[UejY^mCUK[Չa9I!9V*"7x}ߊB="ހBCZa{l,K~WpCrxUyK}ܕV\]i#ޕ7dI+cc9. Q%؆M  1Y'?"C !H@p?,d陞!ϹlQu/9=Us8=/Ua%AGB]Vs'ǯp=?y$_}"'8-$v=E\q/`s}E<'woO~oO~ৱ|꿳ז?xǏE/7X>fXί'ז<Tո=i]6uhv{ۼwRU/ѭھͥz{=n_Q=o̺3q|'=.E+£vSDEz6[{~˕D THQ |Y,\"'U·}F%KuNa3iJQ{MI -eOP+Vw*oq"(T/ ,jŊ q +I ZcZD-$}o9Kk;D %P٘Ǟ*K:.|gO>qg{6 UP']t.t@pj M&#[st8YzzLdžpoھvz c~6;اkNseg]ı0Jl1, 9 &._40 (5ϨCs72L?& FzCCǪuI[}jڄQ'D~`fM\;X4 Cc.IܞZCxj0≙C7t@`'ԢXs'C*01ihy1ȍb /|o8?Sɟ/O~g?+w/3;/~y<G_]1g_8Im'U16F}b't/Q Җ"K{yvf叵ff2;Q$?g{lqJf-6C+? xo Oݏ>IjES{+c7Dso.I1#H-#%16^%%afdmǹq4ȸJ|g=fSJJBJ|78 s",((IRS\;DG ~W(0=Aa-7FsRQZ,+|,OVK,TT]Q$TE$|mY DG2diQ ?RbU"WD%T _oA \Y*8{Q܁Q^86HM`RPyY+k3u>FOy^XPd0alvM0R=#v <x,zdD\Ӵ=l$O@G\`mX@մ;9\'}?oZ>TDzlDTN FJU õ0ʾ2\t݅y&*^P&C˜.L 骚`hR{yFM (iExD< e&!B߶ 8~GpdOp\z'1Mلx=7JϺ8[ :] ~${>10>RxGpȳtnS% 4Mlzux!q32:'ƌL8tG6ѓvq8IF56W`{N  @R: S-L`k@Vܷf6cF2ֻ(~ Iഞ5H2Ig|l!Аl tI5X 8bwQo| #WtAc\ uZgV%@keI_K}+" bW qgIb 5vxE?. e%rP-pȗ9 /乄 eU5P*!;"*O'J lNT^a, W%T,2/-)&RqTe,? rPҾ$#Xi'iD r}-<==GqƁ[#blbu(6&AL{&#@oؚCO*qOG:ħmGNly@%$ڥ:$Js7)n;S;P@0pn:!t#˥4]"p"C;O&).>CaJLpAqc߃:ƇdAT 7 Mq&C-JDȰ]0D˓(P c;9>K[#kL>\q 3(8rEwp7TuBr} (2-PUA1=6! o sc^>@.#:x 6o.6ڔ='K6M23/r\ ;̐sښ.u1gۣM :gӣ՝5 ༞21o[7vqttsg15n[^wMOS^ wmhcMo޽Y^p~OJ䧖kޛ&wE~e'xn))z>BuMDv9*Մ]_i)A.Xk*!CX攂๸#pDAԄGĝZXDB)V+:/ۥE.+ŚȕЬP@BP U%TE3 |+[{6 m?gȵo,ׇ}|}=mG9b.Qu>/شuw^GK={yw2m)CS1S%.BY5\ϺK!O^y=o5NdtT/:}v,tTҝ [k]Z;6S]J퓎ޱ,vC;aL0 ]v/koE"tT:, 6; m60aވg=$RbLvtM6 nĻM:n`򘟲z 6^ml1m@$FdQӣeaD}ֵyDiפnc&1 #$,$iajEԟNՁΨGQD5Ǿƞ?6yNhYQFN1IAd؏,NgɈZIb0J.&0x"O-#*Q2'YG9[MW?wS1HR%&_xV88CTqv9ݓy2JH>{-DaZ "#XHWg`c?P0LO_[1";[#CF=xo'+x$8U[ۊݷR,N:X^3fUۘ iw60LSox2Fi>%uЉ@{ÏS7X!uQnxD}5J~%[xӈJ)| Ii뀟sO󩚜d 5;$K?P11: p N 2֏@S\ 6.Jw;b`-!^E %ad.a_ 2Z\,(ѪUZ6+Nin-j36CRs/vqs4Xpijk.W.# W!06)q43A>ݿ FAw˷p26Ng/|o2np9Cn~>dZE^R:]/basE\|ݽ׳Ƽպ.o.w)E2:x{q90=wVm<7>>uo#.tn ̍rmF| ]-'^}ɼ󜾍83]*6E/p5Q/sZCq޾S<(quEdw9( S}΢NTD$8g_"nIyV BAQ?V,<{2ld Q)(JT9Ujb"((4RBCQs| ejvb.pBU NĢwJP-XVղ:@Z΋^Q*b]GoPE?Z$U;uzˍC^ބ?I؞1p b,"^Y+MPof"2dL*Tkg*Aެ0vX`q(OS -5ضK0gkdo;:ƒ3&,zaQ0p59&=ƹV; sf灃P&ӛZBCe` }` Zgewm|s>p\<Zѐ{37CpsÏh4 Oi`CtaloCq;ĉ;E4h!* thYC-/Z87=0JWP% +Uqxn+MݗbJ= }Cx=h Y|˕n/P-?UQ~M?<8?w˻;~cKMXf 9`ҩY$N;i;fadBӨ$φvcG`;x@\Epw`Gz|:0zȬkf&6&F[,Yӛuv2PHgs410x״ KYzz:Ǝӱ=`QW!_f`v:]M`o\Z\qq,8A`YK I"˥5mkH #2Sg]C<8N܏ Y6(t)5 (ҝ}9MۣاУ>|0E$ qߵ jФa,j4&ZQ@O'!a:O}! 6Ogf@`T NS5<ӦVǡSjFD̋F/NOiҳ:a;@5Oq@hxO@! HcSN4a+NjȨ||a3|#<?Ϡ,5Ku"aܚOor. " ͻiN]k,"r)N$mdvjq# _bڸ>( BFUa^r򀎬 TL-=W3t>b7O՜`I5 a/[,Puf0t5! ާqM7V-(,?ӑ/b#J m!]KZC!kjED Y8VrME̍8/ =NGcr,+<_Ud1ns1@}I , AUvvL*5w]c`eZ2븚f0|FV.q[]f5]ZŲ>[60tgu{ T6,Tu &=b`]}}߯ VqH:LLe`ͱf0Oem;'c} 1:0Ca]6>3F.y58>sp0`8'pl|=:=tI4LNh |׃I ʥ: 8 oT5)>fЉMkp6tF7zNލftXy kڑ8K#QhLƯ1퓄:fРΪkGX$ @ f6tW0';8uNN N|zC7,nҥ޾qBOiN qtNh$%ȧD)^{NƐ9)IQ]ux)ثՍ4O)xnd Gga+Ac[m]^Rh%/oF.Gaʾ->>{8U@(ՑU-ͬ=yNM6|B,a7=-8l3[}/g4X}πKpf̘3n3yշ\R L[6qw=_Lg咫oY{ 8:nwuE'#up`˶DMضXhف#TxTsuC5MEX_wT[Cba|k-Z_ғ.稱_B =+ +?_r^{N. zI֋Tt+**)8 v/T]ԟZ#t0O~?#Ug-/*eT,B+x;-s:|_,ToJqRq_aӝ /R*ck|4<]\N"ϏL]oz`Gg!P{abRyCĴȢM8pBߍ\FJ4cϤA8z`41i41 OuBahBw.0!0S7Nl݋У!q4-'К:5l $VLsj̱a[f:q"% f+S.uzsRN]:Aoe%ʭrƱPe.⭱[ӹJ"΍°=q%;??O|:9v~$H3(Kv Ϥ,[ktk&3m2r&Ao3sf2):+4jqWlnۋw+r΃|2NgeҽYx,˛rSz!>?^&ԂQ[w,ohӗ?& ۲b:]1~Te$}&\FniX߻Y=7wƹ6]5w3ŶS![< þ64{8Jp5jm>?zQ'f^vD !N|ǣQr+bA]{9yiQ~RU3x 6k-FW[lz6Cx *̘Y!N~s̠2Y<]>Wf2m>D:%>Z:k>]@Su.~SUT)h:Ϯn6qw{n"ۼ亊?H𽔷2,bi6ͭ5KŪ`;.w/ײ(rfk]@e,e^ciMRXfC}c΂m[8س{O0X' ZioE-Kv2kxI^N[S4eWJGMNBBz&ȥr^Ux(?Aӥ{I١ťT>SrSWJNW#ʮHeR*)(~Wd^zՒ/q- %{5.Evˮpp^#UvH?8͐?ϡ]aQ{Ρ(퉥j h_'zmEWګb3[a{߫ >L/ &f_ ,p_ڑ~+di#>_-$^)Md=*rAWG\9h#| L2H쮇-YN=%[t.:쏹ÖlVGl@LӲbM+E buS5 kMu1t# Rtx]Cf ;.vN_s0Y KK;rMX0 5c#*Upk :Lne$68[jmS YSO- 0}ѱ a;S'qnj\?@ɣ$ŔC+} .ь1u<*8 V<;/>tPO ]zFZlEA`a hre{t> awL3N)c驍x|ď ԝIOC/q;t"Xl6~w&/Gÿ9;7Cr}8LWw oP7Ȩ~SakjY!xA\txhRpm@}Mﯵ{oo͑JbHa:gWp PKUD+D旛ʸ4ۦ8?3db9q7M/?3|3?b5F+ wNӈ^7.\uv͛,oQw Zo1b{=ciwx[ofBڷǭV3dkEZHGlVQe]zƙeQ8v{ <<< h̰`efM[?Vwl97N-x(}}V][?󫫼< .u9 {EW0y  /7:W 'Ŧ`}5ȝyĒV.r%gid(| eBPJUdg+ rd |lA2f]˜,S-繶z+ M;LzjN3:v:i]"arbt[Cw`A s~kMșLhDӀ~0JHsSS@aL5 N"74G)N\8$!!NBKbArJ/iC_''06<<c{,<'0Nh8#j8 F^00ca05}%d!̍MhgP/J> ΔQ7I8q(RhB`jq/Hte, |]BMnVl\ukB/sPaOPDA*Hߐݢ!We Hב_˅˂( HrܯJl"rR,I*K5ieA|w})Jx ; Wn }bK!fX)܋Bې%Z*/DX zRB^y,tZrXjU΋MA|"좶reW8-/h7|+ۮN?9֖v2zX iu> 3&[aXV۠^4YBb1Է #tL TĄNV;怭?L4i4 EnKyHy&yYFvTѱa lWؒnxala^ƺ0#4fLgUwNGcUS^}3 È^B (m/ ň҉[!13/6()~{4ĎO9Јh`x80rt9dQ|v đSrBG~sp.&0>e j۾RO?P8z}j^RD3B4FY't0}àCąo Ix ; #jSyC:tHf_ABzOGcKh{ZIw(]o]ޤ/y?kqu[RmQN9gM ^'_8f֊o򛀜08_=ƙ_%f1qdm+S2n7i{> fV7ҳ1r BfF Ը_6Rz0-NݦNvXyywyn.ó,-'Ãܹuܥm+ xUz3uPK3^͗{j6گjh\$ r=,$v>&yM̭٫ib=z g=:Ļ+Xjc>|j)񲋭y[:)˨"4W]Tm$ş}RPbOixtȣF"q.WR;˼P {ÒPRm(B+*RUj}$*jO4*EUxSKJ$<4gDQb a pRKOd%_zzQ%^xÈ/xQ^]E&} b¥XةzwbAB]TJ%ԬRq\#(^[ϑccciq*`.aIY&4qA]ftH3P@Cxle,kig2SG~ 3,%6lf&*[nз1a_YvXxtfS{ s,m eXgXaϰS>e m=찉ZNJٽtc &LX zgTt,˱yXÖ[E4m ϱ?0Ep#t:БhDGN)Ȏ/BFr~Τ3>N. !AQ8 7(r` <h8:s)(dL |/3/6$8z! |:>&; '471$8GNioJ3)̼6 a s1[ q09w)ps3md!&._l')О .a큒g)~]\je+OCͿr"[=mP,\!Wn)~\- M[Ɩ\Z췄i;(i({Z_[f;6Y806Czͻs8^y-rl7Z݅⸡^_ώ zh=jlxvtl3mNm2[J_\< ]mqYsgr~VAs[5٘J^LT5\\0LM6Gn$L3(|]yhŃj9’ϊb匧 A'2\-|/Ŋ,QmW+eGFHxc@IckVkm IM7=&7H5vyC.W.sҳeBox ?JZkTEぞ?U-0v"_* eiWvcTKHk%Et@klAb:rE\Gs^K/rr]*„R5kO(4'kwY,/ Ďlt5 X7#^OOW襉;2dZ3K5:l0 dֱWnBioLN3ҳ8?f^L.SOc_ȇZ%4<74=GAxzo(zz˳{3[d/L\9q9eYvim>WU'HcQ(r |sT:,s2*W.*9/!y|]g~D$!!X""+(Ȱ$kQATBܬvDA*H.yHuX()\M% IQ^W]^ܑ3.(TyO(Vmz *% V`nWR$^Rv2Ǖ tJ%AyNlWÜd)j[k֋'苲T֎&b`0!5dٺK}ˈM,B,ּke-`*.Vt`-+P03paIzYtu &ʲ,.B̅٪Ƚcni1d4ptxnj8&n2z$9ٵ,Š8NGe)ɡ.dǥo16;z;I#rϬ-ܻؗgd$lO |@I3jA  Bp  >"3֞iK>TVdlY:#vFT ]RF4k%BCqq`"+[xD1rlaf`E2E8"cgj^bfr(0 Rh> lL680KLUkBV'adx$^4e兏us9}Lwy1% #2l6('K8Mp:I1 |؞x'5-,Rsbo26Y6.DUhkn u/\-~%1,0u$۶mo%Dʯg,a׺l bWBitxJ.k; =]E g/.Π$\Pv>达4(qxC\g%$u_e?׺oAQ=;gm7 ^!ҿlU@[-nt+P _5qz0{9/u˽c{4_u%=1kWX@we!h ՓΊ2V+FNxfZ'-9,dQ8pNj}<,%+|*Í<[.uNXZnv֣~ Ch)TO5~CiٖeQkPxꔩod`[A{cݤ~tUkطD6r-VLo푍FȂ C"J~!3 kQ7r`QAcg\zYHwl-G=?ݾJu:ՂF1S߲RKi0:UR޷hMR,gIV,=A llalx>a\mIux+;16b*Lp:+.Xps 2+/0Гyi10 rr3Fレ$<"5Ϭ@x '=etwr,ecȞdx2CGi:g1U-G,ȏ-ui CUi 5 <[ϑN XX ݉Y-cOYΝvǵ:b/ۗODy#:ۭ'~qz{u~!%>`+5>%[l`5BuKKv[!`O]ئZ&-^WB/VokMk{be! Rz9E3.v.Ep777j]AB5t2ܣ|ǯ 7͜^K:VY$%,>^zڣm>SzmnEoX|x{Xzl]4 \r)JUb u^Gr0, VEd͊|Lxw̽8|9YD' #r[@z^;yYv+VAw=8œ=YU9ZL01jrX=B2cT["7k,9,DŽ,JL]+"+`+p-YHbfz$p# +mj'm~F#uHtic[5þplY6oR_0[gY:>h=Lc.7cdƳȉlgGYAI*K%ب!6,jKg&Np_c"㖉}hn4ь5DEeOEUMO^KfhoB])T@kK- 5mpUq =C#,aj svӈ a9tj!L`>##I4΃$Z!,rB{dL<p's/{f _]K䅾fᏒϝ=;B (p^h:@d ]b"W YF^9yp'gx¯ˬ#c(L x:ؚRu`9-Yxh "pR/PEbaKݝuk[ܿkxͦ\J񙎯!WHZ'[iI_?OO%eMu<{=W \J)sv4lGu-l"v ׿;y*(arig\ be:|[za773ګu%~6az(xԦ2X5omo~hYf>;C#{5oS!MC+d޳1scT+KR5TJKԎ<3 DP}P[j Tn];ɛ =a(BClwk@!sғ @gdA:m}Hj H JUO$*R8V9$BLFpIEdI+݆W;49PXݐȠRNoI ǶDU: t̝bC 6J2W/0Gd `6f{~ =N ˵]h@CHuM9%FҺ"*'Y.:c:(U|4 Wp=O63x!M)W!8:QSLCd7"Aߨ{ZTǷq/ݺwp"#d{?C1A#l O|%neۖ!w[M%VM*Zshvc҂Nƙ&gO4 C0tOu9G؏=l^Gns/N<6 ‘z3waEtb9œ8 S$aeYNoL1y.}<3 wti Ϧbg!FNbZn؇F\׷Ì3 &]LvZ%omN|[&߾ӵy]k{4vy{4hi4Hb&|̬)ZސP]( dwLJ*{U(Xoγ=t~5LJsn;/NVgwݦ6rl6Իr {j[ۍM7e;96xQ:>綶D@fY+z,]ۭWo+ul{y-?;u>]{Y%ҷN)bT*[|M])gNe1GRsp0mPȑ|"06Pz@;+j$!YLi2J`,v9";tV@}>O8x1ty%oNQmoTB­=OQl<ǔ!=v*1dHsT+}}<ܯ(r>8)`Y341JLsuݶM4\wQWWóMrF4mW8zcO9Dr84Ymmb.:zWW4rChUiYݴ ˑ)TƏ#Z RT%|J\zhyc{] eYD(ԥybK͍0R 灹ĉEI[%*Y]ț1*И*v_RzT֤egzݥsMJEػ5A $kCn-}~|u̮뼥"d#/+p,7*{ [ol*EYށxZ*f80lϹ9 ئj@Z[ fj 筟 ZKZSe^n2@#XȐYI}ҭq8eS I\[{"Zb Tv JayQ=eԩCȟ֫N2:&vUUsK<!~JKvkMryqڧg@x!0]V98m68 :2"—"`KA~h Kw&'ʥmx:OJj?'cD lXMuQѭIs]|]gP 9QDTCIea]' ш\FQ9OG TtZCz}{3>g!Pdեv.%C4u5Dlx[B:)5${ BCKd&&vFA Ȋ#NQ.h|+ǴB#b0Q iOmCACNʎssjLf1NJaijİ)in2N#;,l¤ - NScZ0ϊ†EF~Rw9 ȟI7Q"i\o2Ans<F{E4zL,`8,oZ?8ʹv5}C#뵟OB=__BC6_Y,Yzwbόܳ^['%U=2ghp78eH,ZC_뫫)̫ճ|Yާ|[};mnkihJxmܪ|St]٪Ҹ0ZkkۥG_l^@[A_mv+rf7t'G_^%V.8TJĈ$˂x$z|I%9'(ܱPdD?dr]J T8ORGfETmI'e5.|ƺ,#Wy^F qU8F74$K VJ,' L<}ėXf,{O1E^ϡrCbX5h9*XVB36m"5cTD66؊^  ⏟99gBT3$J1^/~-> vУlYׁf}U]5gjaW v\o`c6®mTg6҆Mz`XU5XmLf ]uT_f ƚj,vabim4Es~}: B鞾l_QFZ1reK?eo =knpkNߕ˵[ ZfN=p5%qnr5k=/vul{`wAߦvV^ag/^cn)#̉~ep+0#Y*-9F*P`[D"',G$p,QbU+S8X?^?Z,eU 1b/wjx(TU$r%^\K3F9\apglY_\}a9os)p@o-9눭*mVP9WzIzՋ"S?蝵"T Z2+֔FZ {"1M8#mSV C]@몊u([KS^F<wU1xغ{mž02Enu5\Xk{:vz}vthbJh6{ t1^WugYT0|;6Lbj60 䦦mƠ]7tAݞꪥvA |lCO7 pup~gɤ Y<,`z 2.8Cl݊j~;QNijx@8o?}޲{Met˝w vLK軄$ϓĉr2&Qkhn&WhwVn Ite^~{nnCZGuw>{uڱa7חũrzzm~վ ۽ZmnXggV弱Xn5vUdDݚ)_QBrI;;9 wҪqCڃFF?{;~]sս^rw.:n ynkgs};&a9~mޮo֋p9Vӫ.:s|Y.TP@Fy֛酹+c =.W(J%<Xz@)uXQ#q,["m<8{QzT])3fA=vQ=ȷ%*HFJj"ތ#'Źĩ_/tȭrG{ĉjẻMcEe[$&p? 1א!KOZbA.R$:QaT "۬*QfǗX->¢aRjs[H{h< Uz*( O `00\(8]tc *iSu `cGBBU; 8w{ݾu2zVcC7lT݁ny՜kƶ`TZ,Ӂcot0]EUJ=* &obuςFn׽a^V-@p\35긖td͆P%PwYfGq8$`ml}D{DYjY\9@ <*3=}9RQ=g Л̲ $ϣذIB zq@a42b⡚c%K'J`${YF4"q.?-,&J/3 #/R'hxnzadecob7I8=x*I!dDi6QG'Gw/ytdon6Zo1KeʒO0ue >tL/#ywжByxe]k ]G=mv^[]rpcCri Vp-W E37k̕0=],!*6^n~uvzMRRܸzyu1S{SJv2=DyGyEPαo U?vպn[W|bnl;zql v~={QJLE.?)W,2_9.x.z-Q]dK%)#SbѱRoVx@/"DŌSUZx\f;b~]ʜT8c.[KOAf'>n,İLC"_:#" Hn,bE Xau  XlK0jGg }\xvxpxh0EN*=$A(XzR]Yev R)#@ q-VGÊdpb \ 87@8Xͷ$]%_wq s0OP 432[\=l]{ףbkkzGzhB:JUعcIVԁm mM+O @7*>ЏiZ.5YkXKklT,J 5NO`aɁV. :``-dC' {kW?`bs'Lx^6@S&SXApaaBF!%iBS/աhf23)9] !c<3_ Cr2NGnZv` Sq1Kxd;$l'!}>1("oN.ܝN&$ 8ǀh-;~zD[$n8i<Dw a$ZF1Iq7_7r}fz/\rm l;z pCd|m_ 0o(se ?F.y?iסּV*6AwXנ[]+w_\*ׯΚgڗWWgWמ~ʵN>mu_N[k,ʱr;|n(F&sS/,wÊאxnM=ȆrvwiGh; ljk=h_0ދ Իc;'|Ŧ.vk"Vp (s5mɼČEGE1á\PbUR`N+IFUX3EGُ p K؂At!JdBܸdO:K%\PCPo|ʰ3Q#XPb_,K G,ѱ|Rcw‘\BQ*$ղ4Ɨ YJ)Wf/pBcBl</إ l.—^k\-La~bذPL\\=E3` Jئ=SdlS65 j=h.QxKF&\M1{]S(VutVMr90k;=֩Ҫec֦&fCcXz}ƴ' V$9gu(u; */}a4rMWaܫ`x飡=?NKoLd^4ԁK4ZAĀFN34pUN:L1,0'8ͱX %z%S7Ԛ$Lj4 ut>Qf؈$.xG#wq롡:v:Cǎ 23#+A8ADGqy?}O'd2Jȹ:? '=j"{L8 q, ωy?[u1Yvr⻌fiG= So䥈ݐ@Pκp_?n_mv5(Lz-Z`nwӈ;;nVKЍb-XF\2o[y9wA5Wզ20|zmblrȝf b3|/_\574_?@KS\si r>뢭{FUTwsۭ7R߭ rmge6{1ubmr{$oͻz2㍩߭\,kiY.EP+sr$dTe|࿶\K[8cEir z$ԋBk{RJ9m'  ˢ#T±Eƒ +V7d=d8$V>3BϔS\C|(=`1QcEZLqi"/ǘt}2m_Yh$D2' qaGgZԀk ł\dqOJo0\1`<H0*=C_:,IEN=K U` ]@Jxi ԀJͬ12 cϦ[X{vS%Ss-5ݼ+ AE5%Y3k:]Gм[FG [ ;*vMՂOC_t)[:P@{Ӂ/mь l:^:\`r KkcK%?b 6LBУ08'oLh@Ɏm--v @;. xԞ8^ȍF9V>ܧ朼:J-~X~wmhG[{koלMQ^io.keD&6jVH% ˆ?9qX,n^mzS!7=[4TDsy3\^y |o{!ݶtFUۜN[^ /+wmn-[lbݺX oDw6iX.Wzd"[ܖ/WtIZ-6Pf\gP޷ %T<plY&2!%pyPfDZOLX*zU*u ',A"EY>;RRU,5DkJ;2zPQ&BQVXl6*ȶ QIza15X /eN,Hr|MPKOJR?xP,"fGUe]qAy(պ**=9i)ac˦2nLInbe4j=\ R18۶ ;Fxi r%-&U8I M,=LEYEp(˰pb*9qhxXc(nPYmR]װ[<v4zz7`jT۞IeOaiKG$bxz4v3JzYȝA!I?ӑ6,r@K,,qYqI;C=Ѱv((9Eihnl&$Y8O(PxzjW@Oo{ovL5)L' D~ ;ܸ;c1 1'uk]-6bn ?m- ya]3=7 V{~8m;˕yzq-2(Mn] g־ sʲ3u=˝ _mY]Dc-n;o0g.ۻu 7D~S\|}~s>iz].ڱ5=c)ًsWZj/VYoU|BOzU=@{p"'w2*(VUa%b\*s#!P2TT,P%jE^r3_}P.BeZnHtv(QŇ\e\/J\UD`)KWTP\QDb@D/qEвvPfqџD!/9$U u {(',%8QluZe[ǘ|x\"QSB0 smv-h6Оc(Qpk\QU K Ԧqrlk.JtW ܷeteԏ̠HIFV]]ϳzԨ>|Чޮ= h>ƤzYKnd^m=i[s-⮫ذr6r3.q.rK* , HQ3lr,G38< ԕihGKI?vFx$7MÊL!F>!q>$C#8pHL!ʈx$7&$ /G:!SGYߠ$ͩK~ xK]>=[k~+fxWxڢTOKHruX#f.scm:1-g]NiV;y*=2b^MʫF.bmmZjXWx_/^ ^g^=ҳk2i//XTe(k8;767Vy##qg=RPy]nsMۋU{ʋ 7~o ?2+ XPw'BSkNj{ sq4C+Vb ÔY>f"؆VL$x&I`PT-0ޟUbYLʡjQd )/@`bkB}?b*/ n U~pА`g# sE|pTẼ2!Ĕ'b+yiEh+s窒dBKea>?fY"3F>dk ^~\؂Z*+TuJɜUA=2g>~iIa:d HQÓ1q)hMPd:‘B0>'ifG<5G1qcYNpfz@$Gm !V(V0ɘQ'84>O^ T+Gq(pî8Ȳ؎ƺ=8i`P5`^lh-w%a2Y-#' A2F7|s>? o7~VP`}{c%,?e8 sAe[%(MuMùkuL؝~?\C'ڶ!γa t|}1jvJ<]BѬg˥.V4d,WRPlS{yu- mA:1t r;>_d˝Js_0|ڃ|US-ܷ/wޮ,~&M] G;y-ܑY:uF خf\[sY2vvG/7]֡gk:rզubRv՝WQ*5_npN{m|dӗ-U'Ӹ-W2vm3DZ]6&-7\n٭;*0 `{+OfjӜ<|8O9@]ng҈M#e._ͣx*5ڹ]Rq\X[ZT5pw] xqLA\4WCUe]zU>2Cv4f<`Mrs64,h cu:pPUowFC]>-;cQ>晿0Sj8zWJ{H#oxҷmľa:$"+dek$(iZ$0`LlU?Ñ9&T$Qtw:ڄZb)ɲo1Kc2衛7QyDS) q>'4zwbXS?QdÅ~xs00NJVYw8$vYBA>,'L#kdc' Nfy"#;#$sw3\y4alxՔ6{E Խ|.xgԽ3,]Oꙻ<}k tRv~6K{c`}csgk7_[qhmv]K׉{w׹6fD›nK?bnbnpc=rS~Oy&ENk%6;`'][f_3_UYZz)-sί捗a[߳6 yQ컬6jؼʸw;}x{_,myߍqZ0rONj{̲Qi*%>G5w} R4(pMm4\T?PhJY@:¶A!;aQ%۷MI`p{Fo3g^0?^6Tk9e[_-n}DCIGv >4={s Z鹙ul딾`N~Mknnf7FێJkvz_)Fg/nIgWyׯ^k*ɛ'+"{J]rϤYk')7躨]MUr嫹ƫ۬YeFnz>[ΙNgvʓw M^2Œ;%Vy1*1/2 qrxU/bJx#ZXG+P7%-1S.Vh 2sP|*me/I5F?f8$wNYW<+)|$ ^BV+0"D?LIb* cV8pDB+ )WaY!ǠĈX sŊZڜ>*!+\ƈ!ЄՔ!zPYC#y@/Et(*H8CgPbGp.pƖazѰ ١/ uiWWttnPi@ɶ .|}hy8j4%QK#-ynbSc]`d1sۏ +4g΃ F q|Q3?M0tI b252󓴧gxj.-;}{MRΦ?M[5ޭ/].=QMb'(~y׵MM.]]Zs:LV5m[Ge鳥u9=DJ#3Q׌^<[:0W7zurydn_,w"q{.Ty* *o/0ݷwS0zR/v 7y`%7۴s]QEI"Tx_ &;EwF+ѣnjaτSB(R\*K"W[H*5,[+JekW+UxʬPy r!+dؒQeY"[/eXl*JLJ|%A,0\/93c -H=fT-y\ʠEgQlprY @1[K_d7LC˰U4*B1tTyQdR#uw,WP{)eTn۵-mGSfu8E}Oc+{)᪔o<#*mz}]`G*uc;˞C< @3gtߥFaUFǠ93u^4ھ=S7-oԐqՁFytNi2cF鮰TiR#w60fX](]5$$ŸِLԷ! IfAf~v`='V:$lsDJcdscl Fh ^?2Q(@8!J6'%gZ6\`@k̀Нġ3{`b{c $6h@#4-Te&1LYCd='zn~`'y <7ač`@ޘ;;ȍG8[K6`'_a?gc s 7FldWjb'Xekwrͪ=37T5ˮ.{;uο], U6/2k7{u2 \zbo/6_]Qj\]XKm/v3(uҡo}IQ7XR߭hMT5N{[ +dV5]on6Hmu[JXT6FhpSa' b%d!;bpg^_YeƘwִ{ն%n;ADF", ybyta`]cr>0Mbd<Y4>2q}Hp IF&|32Ht#&:yw̏datboNsn$ rǑK)M&L%v3rF7?[?{awb-7H=(P?߭` \o 3ҍ6S`揁Knn Z# k`bW7 mEj=&L śMuԂ(q2]5]nZܸ_-]YQ:*kc^}>eJlUs+7_^grzul\k hԬ\W=[zj%]4h?BcZ2@8Swe5/ërX冀U Y#̞gRU_?vBgWsQ6#:Ao<{d3ֱQ՞/;ne®{3|n|w~,Ko+{Wx}:}NVO޹MټmXFp}zuv}q^%ozȍ$[H3t'7 \QԹ,WM[J_g1B|RxOу0uV6ZIeZ"+5ْg ?kHN0;Db7 Tes^*x'B|<3l B)WN誕fov՟<t͋2#rǟuQ*|kf=WjOhX~±ɶ +I7݋5VO~=(~\T+ԾI³"I:ekJȈsG߸b; "=Lv;ck4 lN]5뚺QP@ ϤR0rhZg*cZc}kFA[zT{Fٖ55Uj hdj;y/yÁ݁P,[WXmU3#._Ǥ"@[[1Φ,w`KosAǎ1~灆{:etfon.'$`H&qdg$JNI_Gcbx$ )SBIK?Iz8i0 iS 2(^ghy@.ɔďFEnipalM=ŋӇY2;؝X书+opDjB Oٱ5" CϚ>.s~J&c Bd2a*"Cd6 y>  ţs3I|6K0m0SZ{G_‹MY9x/?r~OyPtP 8mk/M]f] L^kdžm{sd^Fn3`ovo7o|#mѺZ ۗ3Xkܴ[},lr1:esu溏U+OK{Zۂ aA$$>d*_0)ŷK]lU>Sn}[y#;/Md]mV ]x"?*Jw3믍!גɻ}}7%kT>|{iUA{y!u/1ϗoݸ`>#4rgvHg+If7M9'[@ U0iGmжPTw#]vcnK|owkWcW^,߹g=w3w9Sw?^v#ccU^l>\~. &7 *Cv(qj S{*/m{JTX3,T-A"%GC3O@ h*3 ~N-x֯{Rmի?is[[Pߏi.|BBwF|K[wU?ޝ޻v Ƈ6 ~۽y kLj |n]Uvtp@iP+uO.ðu<^j WitZ%$P+)^2]^fI7Sz,ׂ[bɗk`Z0] v|1SLoA??e1HȱNVGʬh^}4ߪ0-ꐑjK0U*N p'rSzDse}-Q5@1j_UfK- 5tPOλB/jtFaA8oZC,$ZB50̍_F{E`M b,%LXcam߾dȝMdk@(U@ k*1vlR J _@ِ9$dợz,"zT!$GθWԉI"p S7/IF/=9U40-~ Ǫ1c 5ӒU nţ{U;Ԕ[,6َ=p2ǪMp? D~nn7ζ/wCZqn֤LqGhg$S =/Kq{"?tY@%-/c5 I zƈn#sE+Ld E.Fߐ(Ի|\mRlvZ OhJd@FS z*1{ݠPi+Mb~[nRߟSrMlK\&4zuiHϘ#Zl+(QaRܱz@%b=7$MVvGb5zk]6,p6n›<ۑOQlQ|o:1 e{A6%?t :C4J?ĻPIfb`X6>]SLjMplY)I@Gv{aAӴ,]WԢpw+Pç{h4ᘈ?YFBX=G&q.eC CbWu泋0e@ K=;Ù]1#r5ݜ /9 tFLEϼ|^yy4H]xv1<{Wx:Ѽcol@\Idʥq%Y=YjSd}$S&@b(S"8CxFCD+l࿌gsʍfWY9.C-=γ 0{LU_jm4<0ޕ ;BCe_i,kz/Ug?UEƾY}9HAwܑQ[285^@3Ec#c1'c6 #?odfUV:>R}﹙o[9'M4P} B{ضv3CUh{q9.vvQٯ~ͶgFa?!__K{1's~}oޏ7vބns5~n活N.7eѸh˓E,t77U}TitUs~rݼNF.sqzzq5oͻ0p7.[݋7 E9=8i_^6NN.eI6JU˓FM1I2u9~_U~[?G-ݹ-ŹܻqtGNϽèwVtY-S{h+12X_oz[n>ݷYܿӻ;[ykkw5{TڻᎫ6률mwk$qPėqtTj lklbP%,Ӑ8FBeVdPU6z\y{"b+HlXD:/qkҳ*H%ZI- 23,*b~5_fH$/2,sruYd '/'T0բ Q(EZQDUƳ1v&IFWDN8'$? 4c8L 1I%ͧ$㉞/SؘjKFј,v>ԳdQ`x'hj]q4I0"bi6M4ÝxݍNO(!._.w_{oK7_8~Dz??_ηFw`XIܿYѿ+,o-}?_PzQ:Y8GQ7h;;]ߟ6m\#ts =7oΟ.mEif [l)o4\)1/yJ8m*'o.oFMTul[vTN[ˍf}3gz:iezAyuz0za nIoh첟~Cխo5[p]\v|(`Ko]~0.Y .^oրܞP׭/ +ٽ`PczSzBMQnmXaOXJ )坱\F<_aX@"WBqbADEVlE@TY5=G9dXgbByasտQ?/p➱)0I ׶;TwC`Ss1y]\A<-[[Ajlv(-چ^`w,eo?'FiL݅ o&y5x𜌴 ;YU!&&ɒjǪ?IJ@4#r>'.@%F<2q42u-<+ I”JS'XB#33Q"gb8 4l6|&d܏Ep6ŋtL=2rY8w=~9fw,/#? p26|-VF$t1N~@OPx4 %S7-h' Acїa}EG O}"[M ۮ2o̾coYvæ￿1 3wvc݆u]>gc6 fîWkvmd;nK)ٞNMMGi/<eGA< "/;#~$z4^S⫶YI@B# hg#;$I'h۩g'L#31Aza&Kk&~:'r7Q4 dlVދ|} _+5,Ԝ;{~`gl;N5Y@b#wv3m2n ,'J3N8J":h>k?Ezr(<}eo)#$lx>cXY`:s'>'RdrU1:P,ң(;Eh8ISsh0#l%$ xYL/'!#nFHHĉu09__⅃e@`ځC S7%p=$(S)pt2[G}lOo&ȂL$n4n`<a@죑feh:ӈxy~uYD(͜ N,'d;pEaR+G8 xec`5 |5!!8'I`$]oAt?z١翿'PMg3=3x19sWctW$%&{;cmdgp~7n:B쮱й}cnm7'W|4-E.KnkKu*_NZYv^6vnz!u۽QgW'0W˻wt{>h- ݵN9[ ߼6]F=wʍz[4nN)vcT{_ߩٷ]l;vU ̔Z mo 'X6x^BYJ̋ +M:€9X:JHT$!\ Hy&hb'-5ʈyVj`q-H-?R[yt\-pR-U㧢xv"KP eBUFEpJU$Q."yVr`<_\k}I`JMԔ* L Oez/".eQD" 6X`B$-y7>o2Wy![X l P|Bz!Z!8Pըצن>AUmct 2 հL@K˶l[5YJPmgS,-T/5 |ڧ/e &0)HW5b(5ֺ= [kRkoXMU͜]aQ SǀX7CjƶT*va͍pr*s3M:OتS5sM) Y4KaxN0H5_O"@NHefn2F wQz3|ɒ *F7 'lfx>NBaLIF ӈ iIpx:Zβx'd4] 2 B{dQJ&g!vy05r!L Gad, pXN6CMcwDiGp B"oόW͖$Ml=D VUE(AMeǤK]J/JعQGdKA2CNtğK[pk6RIxFiQ ot`Wr׷@ھOrV Pz'iV=Ѹr׍-cn_u=)w.]wa2ޏHyU;o%z큽8ƾUA Rw"`F7]a;NK_~ ݺ5lM^f|l[vm]|6]ߵH#<Xk|Ghaޓ+O& ;@b SJ,WaJLTDkl%jpR)xؖȔB uQJ>~xpLvR@v*3RG^S;xQx597Ecv̗9TB\KQR:ՊzGUk/_Hc~4Os, Z'FX-,R`YR]r;^T{|Re2xWDF[ stL?`Pob6)Rv*(4;KB)_3Q1ry&aS[ai6\+.3JVքAU[ӱR tյ%olkpen~thbTlte}7_ᘊ+NOӬst ۸Ko+*`PϵJ3 Lc$:Hԛ C7H#bNn1$ 1qfIGqO8jd"uFz*͂xd4!M4LLCI\o'w-Ȟ>ägM }-HFV?Jd2tinsu~ xwGPwT0@{N8|G+1wFluU7~w]'knԉoƥoQ^.Tp=8bHPUqwq8}sr)}W/e#7λsjm*ʚs\U moNKen"Hl/[_9~0С<伶UmTok뗶`]-ncf5oŜx00B>s]bFwqrvuS̰wY{‹wVru +;B&)rs&2LI&cd)I!cJ:*آ\-)zTd9YCOˈM:jSyHeA<*%,UqSAe_>_ۏeXadtPb_rzr0/Ydx?"T|Ch=b { Vǧ%W4Keަa齞 \p!WF`=] 4ȃc.]\7N@J`awbGcT;zX*=7Hp,.0TFژ O0 qꩁ-|?rMV'1_z83s lOĵƁVydfcℳY0B$'Kp4BZI;Q f$qv:rD;_xO TbNʈKA&v8܎$QHvE:Lh~Jb;5*-gyce ߎ6|mcw+aj!{W<޷ݚj$ ؇>~Y0+2ldoL[Lm6!MP75 '[&tkސ6m*ڜjZ7'W @&0uXmy ŀW|~}j^f隉@tħvXQqֽ v+bbY1|+3:þKeW=7]}MyK'ULm@%XjwHgXBAg4o7q]W츶sW+ d7Gwa>zYwon7^C612`r7xru[wz޸5/M%X/׌=\4y8k C}`ЭۈF)|UU}ivUwצw}ѝwSw_13}ZEb˧A. Y|;O۶_"{-!*@\+KVyrPaK  eh|ɾdQ N\e8:[kO|EhTkV-qe>/NEI~ 逯HK I_.evt0zRIdZbAxL:ข\n _c gxP UQl1r3B\Sq(.%ģ/W+G@8dyTBibjyI]ʨ?jUGtXx FqmCvTJGQs: ]9My\<-ˎMʓu1P 7܄'ug^ lmj԰nۚ`XD bj{6{ LM܇q 4&OE#?/$G@<_xi(" rwq<$vd5o͇YF$&ј #YhLBOq4JPLlBU-}ՃlGS@`E 'ALȈə:M CK'8$YXcG (hSt>$];8 1,⧚$o%AĊ'YH!gf9D?~q1 q{X|x???yGq<g8 6:MN.?ocn# oF^d!~=6YOwkm0%q}N[$X[ڦ|u~q9ok\_.㔯>HE]AW-{2|oG0pH5-{[vxޕs .I[x M7/o!{*no{+z0n!__8=(1MOS+bI( _\#;~+reD٬x1ŦW#3U`?ϺfB%7 Fd AZ-DCa {ˈŧl z\egX.WyVbCLm )ϿQ.JB$1Pj OwNdx$ܱXa,s 1LdƋǧS 1\( LwPj\QR<99Ι[隶k:vVt:&ڐCA],UZ<>}ݴ2Cؠ&th kM ih74lxA:P6uCCkHV0[:Mmv)::0ءW$A_roއ}[;;W8\zwwqPt& mL?w[&^S'V3 +ߌ4WbBj2NcmtHBlE3b/D]a, lS5QǦ)şs~p~Qv1s7yշR;%g%7t: b;LmǷ2 #hfbQ0&MG.( gßцkEKcChJ"vSmuc*E'쭘qmw&\~[kﻫel&vn{w'vn@ n,{}3Ϻ+Ko tau捯bBe}!5nqOpDIm{~߂4t٘uBbݏCf.ANM Q=j_A$ b(Xii* }V=jsOD(ȡ6#x͒_e% 2'[*|`xf$WZXT@ *JV/Q=b]y\^R/s?K3$ґ_Ep /TQE,K,kBBIR?dPzZEB\q'K]6[cgʋnQvP$D"ZgXXI_LZqE:+M8wJV$HS3k\n;|h4`¡uu]u,ʵ< X. [Q- tkU[5,jO W{*ºb]Wtkk]Ղ/4*kgY]m:PʇbL=3c'P=e6vMkJ60O I>7eRP u1:: aL6ʻaG q3g,#KB&Ěٚ;OPؒXk<\BQө]S8&T> |EJpj4a+|Z`*]3 2Z4\- d,a?S xXŃ i ;GSkGyԵkw}}H-ցizbL}~צy/ch"ܮWJSu,+4+Աitlz١cm4xӵi^ IfJ 쎮LCыZYWɐ]ʪx`F NkWacZp0al4Wa=FQ m8Q6BQھzŀĂg|X7Ҭ04R3J={Mw(1hZqQl-^?vIJF6%'ċA9QJKG}D:cKĜZ=,4($ xM&$d<8nxIyH $KblTi둱m--m ~'ƮХq' I@&̵BKߛN_̋ޙ-`r`=@`}"[݃:P-kiw;F v-;c+bACڇZ{yaʂ?]<7 D>SMvA{M-v!-[kbݷ׊JRvۧWmL^–Z˫hr]qy[7vŶnn}=nZ-}s+@ !ʗQڮtwۘm^Z=%y?kom#vaϼuyomGpǃ-Sv.{&|̓츉p^@UNdXH6:QCP~BfEs\{2Sy:,#S, CDS )ʊaR)HEeb%B O#Igx?eq*S֪#Q|\e  HB ȖK]XT$NDOjK^>$eE)JԿU, rO4qT(a1C,/rP布2 s/2+UXHu8ApP*Zz@$=^D=Xe+$5b+LݥJVSuTʌF<Р= 4%q. @)3)נXL\\V`eh+sl-7tb}}O?Cդt*C^.up#lUhU ".x9A3"6M՜k!z.KŽ@Z.6A+Tk$fz4WtPc` d#Ǎ#O>$zd;t:Y:7$$!"UÔ+I 8sǛ;Y&n2xFRI#14LM"GM}' f381mm2؜d~ Oo4t3NjN`lޔ`2 }wC% z p䦆Hw"+Fd1od1]@D1/5,~1.H{C*䠛u~Aɭ&=0߮ufd |ww>!k|/v0^s5wsunSOlFp靾;hٻZmn>.C>]bmDK5XRv8r`-5h|mrWfp6ߞZ+nje?L8Yvv ApO}GPPT߮[q杺 Z>f`u*^i{a=+EK!v;̝7h'=-ʝoϴ"뾔jrK|z,KהQ@P}̔&55DFql)j9RU,?FcR! nH.r2/XY%I\,GCšX]s/p<[c8VF4\e ZA㧼P+2*.LZq -<tA\b3LPBUn"&邑NJuي$L>qT@bddJæ,fDD<rD\80uBmRm \4x]oԙ @kh:ߋ\g*o;*ˠ<-vt,&ͷ֥b`wwMj-ywV:1 \;ѣ&`2\dGƋ8y50cM !OᤎẺ1"9$ ƙGa2udd!cKx}D q6II46 1hŚ8Hsذ,QbFG4ȱS7Pds#ñ0] :FCTSG1<-JдGz@߄@Fd}9;'očD\L#`{ qat}Gf=h@ٿ:ƾaZ5fϿO8j?{ B)&37hˍv;J/o2w:rmes,Zq͖b`eTm%ri+flMu_m]\ Ou֠r*k QּX˔wQrp?k/ܼɗvUN*t){?=fxGnI;KN*qQe/bM, m#{\+%tX$ S-1-D%1 D 8Peu:%+GScK NJ4>RbU@$^r,*`%5y:խZj0̢̚46kհ8=X[Z@kYWP/(ӭ\re cs\]t*71l7?q/3 'x!t#cmnnNЭ5b96bY۾h6rE # X1B2RUhy, {mPm..-k2IvGںn}Ų?xQsQDFv a} ÝBo#h){hwg\m;GI7؁]u5jo>ݶ Cɥ~||~MTݻegm+ vnjM_sޥS΋K$5  b-9⊄Ce\IÆ(0̣"" Tz,U <`O<1'rW- (#'Yk Tf*׸(>_ah"6A,JMNXbQVX VȲWs^WPt>P4kEж-4Lf`sN~XѴsS/=45[Zj66`:'t]>YXV7d "UձmM /Uןazm0eE.xkdXt:̤ỢZFāld8E$Zğ{.!c`7SJȍ<[|1?c$t'$$Z;ZDn0N~J$MȲljb/Q8hI8" d#G:!:hd2pEqּ(siv2?{3 1qw}SC޽{5QvZܼnu&ΉXtw?Y1|-Aڞ[wm?pQǬE&t`7ۃv7^\ݬ7l넿ӫUnc,|s1_7o;H۽6iāmlFzd-ߏ+^_^68Kmw@m~Aϡo>>g7X-LdcnMwYn#w`fV쭺75v-=w͹?=JouYIՃbEl +tܳ|X`D$pT9fzhHDԔ8S-^h1ONJGr) {eXT*?xQz¤B*&-G߮r%ʬ(~̠ VM*h*:8c(H+Y6GIBIb kPzTBS5S$A,3^B+nֆ}lpNoNǡWCS4A .@:3<>ZfBuRr"l6U8OQW,E6E^P-S;0UCWjWvC Pe[}[# j}o}guU̶4i屲(?\a%j6::mW56vZ'"+0g$FQQ2!#IƄJ64dfy14 3? QN(&Y F$ w$S'HƾxO 69=s+pЃne.kd{Ө)&w͹fR]>]5w6?̉pwۋ{x*[bvMϷ$3M0X/:ee\cJ $Sڶ(=*bs Ί#ˡXcc$T3R`[m>qqṤ1}п~k~#:^KWUomǞ۽־[e&{opr*dE"@2@@ Kn^ !BAR?jWwu=wnf<ܣ>Uu?g0sXW5Čw(|@y&W!IW"Vͯ1H#f":,J9b紁E~cv!OP?Be1&V3$ޛ7A4AWdu?I}(l5]fV{p98Ek~b" ;.PMRDAq5~%AD.$l I+8Dx\ْdɛCT*o ts,e3Hًj-O  Afz XD˗p$o,A շ5bȉOO]3Hڹ4<$Z3GT6t \{dioeIuq# q4UYMF$CV@`ūOWS^pa`*G],ו!+NPDm - X#Ԟzn" P'l Y6N7x4|H;c6?)=î`2?7PҩV jka-SkVi4x+=Qd4 >/98ȪRB:HRN}2ͪirεg7Yi:ot!M^c-B+j7^.<.٭yyu}_HP$wqbpܐo'+}V~u%ϖV&,ݜ:=-^JUg7d4q17qpʚks3ӟH*?.}H.&(+:ͩV+s[T;* :D/na8dn+RTH[T d8V;(5 ;g><0 ?.5hB`݄[Zw?-by IiVd$rvK$GL!'\^CP"IyN dor4LD(vT K7 iaItL uDoa1̢/ JVBCҞKbQ|BX.tx .' ?l4>BUU+P֤_JQ&+tZN$N*,!jȉR2IxbTC]?@l%Za, /2Il" . _ [:R< ف:,sof8Q/D6Alڦ Pk %:FC5#@\LI䑫hlE; =E%p8g^`+wLu*F(sEb`(.ЂnءgDKvYb+>r ;)~Wu=ehiJc7xHHkY@e81jƖ&XU =r< '&6@9p5lv%z?c ADPv qHz[mu7g<{bmێ=F$a2&[ R콚]-zاvQx{_@27`Q^ԶbO ]4 OoWmՂl a \-rUp6xV_?p~+bKÅT.]n`,.2q;Xmӫ| k!`kEg8c;04Ѯz5HzIx~>tSt !_w +zyWJ 5A#QVƗC/EIbwheӍEk.&5/ijsw6KwKNe*$UX)Wƈ2ꥈPmܾYHڿD(lae\c?(v(Ycx+E^;~?x>%Cpzk!ƃ~7Α?`$}7Waj"IT|ۭ+;i>o4<Eb?98(7Q9AP:TN*( 1aN2Hua'KUc X]y5EaQU9"1M+A81O]'Dpm!'yh7v/QXxLAd!%7,*΢-C𖑡£|uQYuBjjI4NNz JʕP;[r1%̢r%Q@Mx1< .\TIDGA g~lgclˈC7L fp O$G }KJ:(La \XÎ4K}a3OP/ǝ/l}8 EQ=_O73'&ba#=0X S->┱'{&vXZgV,x o m<0͚kSAfdv)֎\[gT3l0r: WZj22\Sж@HXPC}ɂ1~o1}}ǏْxY>92liVeַi7",RW%Yζ9kx)[v׉e׵^L#k {ux*#CjʼnaeuDr`jTvۓ$:MRyS+Kk$-;ٚ )+hYMrSo[w{>q_őHN'm,=t3<\~HҒb`2r7;< Yɺz*kZ68ʣ̃u)dJ-ҊgE [u7Sb'3E{w[Yd}&YSFnOc>n}zO`GQm $I{{ӹV]bwT;r ;s# vjŻ۵Gx`;l) DJۄuwQ.kjR y 9HGA軶%I4_rolّ*!b` AX׷&әFАۆ>Т,n1X &c9 ]sbScز]#rn]QCp5P3?\ "6MKs!--Z3̈u}2sjP &=7&GG*1ei45>朏$2 -1WflKM<1g2xS~3ܛ;46 ϶q n){y|6WE5,l/gNs,&Yngf{fomkvzf_tl gp eg'"fIq+/鵗mfH2 '@Av=+T:j0NH"=9^_)|ƈ!jxn ?ewa00L/VV:l|x#gNO3xT˓\wZ45ꥺbI"Q!?r|!W. Dk9TţR: n..lc^hPDNnޫ;5vU*5hz'wYH}䬸ENR4ƶS<)Ey·7XmDvM[Jba*̗jM%*0V'5M{T28NJAס٢S_ƶ$aJ`D9¾h(5mNa{X>?)9$VH}D&!B_Q8 YRԹNEpHk ^@Xę=]BCI/C&jlON(*7H"VYS4NKUߤ_MUw`e@.}$qDxd6eE vxQR } YI@ f$ m H]x: ru+Nl$bu$_>\Q9Q=EVCnedD,BȲ EEUyҮfGc6m؎F/8i [Xa-੐xm9Eh@j#̱Md`YPÛǓ!lD@ D0Q`xCS G Vvca,ƶđ ^{pK82 觉~`m^nQv>;sV_Yvhe[!?wP~:,MDlڧVչڳy}15ěY'\_vnN/oYy!nNNyvF{:EGlt:Bkv:mZ}qsiqyi^]Ys^]^fWvi[iռy{/?xw~Kc&7h2%[UEGwTSߋ嶨dݖ.ow [sζ ]do5NTmN\*JZJ⇨/pJ=r[ .S\!m2v鶁;ݦ^nZ1϶}l/Tm*gx|EVъ1˵ZjjuGe˕z{L3`ѩ3t 0|W}zݤz MT)IBh9[`r[#3TkdB 3׫G$/B*?̣P, K뿝~ӯϥ/ho|qi=}}_Ss*W}vys#;4_E/AoXQoNTw6A˿S/ gStN8Il6\?f x5(^w0f/غYrU}up!^5g҅L}.r@\.9{vոi7OͶph.gҙ@-Q^Qz&.ZW;;/;܍1SZCdIaSjSa;oCm]5;.,=#|&|p;6[5uսTm>9o3TחKLנF;ܻ[-Knλ~&,[sN^xn9ݹroǼ<~DƯw=~[8YXZ4E |?WA8yv"{FVⴋSGzkA}\`y.+?j]m@]szƝF{D Y& MT,O?l>,Ub>Ge/|V+}]c_f;9O7\= lM2 [;d"=ɡRVY<-ic oPE'"%|~ %*bSŰJ_պ^eX@`THKC#|4gY@F7e24GDȖ" <ABZ # ~ 0侤= Q3b)䞨J5d2F]C]5kE, 60t f Y`"=P5C "yJiÒ a?Z;\ K.Cr?Ctd&2E 1uMA9}X}c$[1;ƦkF~`;dlCh^dj~D=TFsg~VxE<2PI]q9RjwvՇ`Ӟ=kqZlT\\ B]WY_],2ͥxh F;b ي[F봅d6B)j:mZK^[7)ZRu9rs- vFKtn۫}9'J,kn Q2P͔-2OTOXVE25ȪS6\6نȌh@YXh9{&5GW@m˗N7kߩSγ:.%ٟ:4ؗYyu'؜OlKF"6|gGЪV rah7&Mr5bZ/(YqRFvR4KDc(?<(> C2?OWBioN uxL{pԨ6QAGA=BZ9nF*]r^*ՏDO]|6b\8?xSd` U}\lҬiL$ U>M#G=|6ROU~S(҈5IQu0uk>bNQFj}M"*m"|U0 :? f؞Ktlk)E C*jבj1tʎ)n(MITMU =-(w-4EMS%?&&Kh=U EYѯؗ "YUwhZ01W[4?!-rGDn ~}/z ~ϞDqPw!.Գ|4F9Sj݋pj2ACwqQ$=3N-? 릑zs4IF8|џxN`;#C _#cQsNȆ e_L(X#'#'ɺdz$}D& Yp4uЅBcS N0 g4Ir T 0cuěžl??jfts/?_8N?7Rn;tו[(L_%ٟ]>l]־W[WvYnN+1z}V+J5ClSoNùw8]̿~cs1ee% e>u{ K)Vb,>U.h̖7Bk1L6:Xq1v)Hק,mN.X.x{()+-|MP\k;h4֖f;X*:ѷH8kqlX}6[lz+ i]Oq_ye)>9:u\g0>nSnDMoqvtA۠İT-Y'(='H98(Ru@ (sluQ,9hbnjǑQ,VAr;` hDyrH6߾` Ŗ!~2IFd5T(?aZ *id%A Q)uDT 'H@=Opl,S?N@yt TY_b)w(xﳗcPF+=Lu}tG|^"'JS?NL&ð~-ɴS7FB}Fp6B7/PwÒ,x}E%UkH K/2`wl˦Įa鎈,3-ږh bA֝aBWPu;pF"vlC4ƴ4Ge:{[5M~6&^_{!)n"v[&dmt}]yS/zbd7áfd&fb b{2é{㉛D;&~I%0GԴAXBdL=!nGhCݎ\͂3 ə'ȑA8Dσ~ `ہ9 G 0tFcǚ'r兜L6\vr6iOmt/L?$tYV1L}謼"iC?/P (vز&p,x9 Gy'Ιмw5ܶҝ=xUo.ޒC|wM^*p<oFWۘl;2n|KeQZ?|)^a]]++V5,ykoM&^^.Z"4,ݮ]\]+oE]Iy{t]-Ok3˪%I;9/N0*ں l7+M0o5QV8,zs7qt9V/oW0_:O;z}/M޲dSCrIƟro5CO|{<UhO)WcJ^Vb C#j $0&YK $;־\* vηJ| {\9J|X`CVAa^\4LK7J厎("j`t &Ǔ>SlF}>qYDѠZh}h3e m# [ |24UbUU'e76E?HzgVU8,ц?qEL XϔME{3SeAbg 679uZ9UL6hϿCnVbqJbjsc>v]kh>K zX T[;W)ƚ3͕x̛5h.=iGh68JMX߬Բ 4:߹3hSRbNq;v:Ģ ˁΎ;QlBC7޺lgZaBD{wy[t| zovm[2 RDvt)GtE~+^,V}*'?@yB}̨ Uz=OVoQi5XHA NXiBAJ+?>0uǀ;pļHJ |?m ,IPeNDQtǏcB \qHW*=Q\f|bH%AO2s@ ^JgU  $n"V(V g *hǀQtK4MQ2朞l+-Z8PO|=:GNvjuE¼Xgh4۷lEwzӳ>`_ p)YҠk*=#f,< G ߨ Jя}3 $L+*3-'QUGUbfq=V˖q>L轩E1Љb@VM]ʸԁCcE^B/~Й`Pu`8ԛ3dHe$|.qUx1 (:q}1'I[vybpG^ xS!E}74bsg6BU`-_qS[:SGfVE?|;lߜh3n~5ǼRTBwƈT߹3WC2'\tA^`{Aó?/w|[S`;mdmny=dYJ<#NN<}tРLk0S@ǍGݽýg5S4v&[~ȼlIڪWo~]>RDi0O, ;n=teZ_@_(鋽cV4 {Xu]׺Uh.l]ou{c3mZ=, :9XN2-E0bA p^ ks]nԙ7YS5K-ѐzFWCzW@2G0L 0l;i nbbgfg@%k0$ma1}!#d\2˨3KTKPu27?J p8L }05U-0xz1eZ0Vi#0c g `4O;;s}h4kb17(=(sO)[|}廌Qp%pio5]wd;,J7^ 5ro+] [;sv _,OnOZX\t.[[xvSrXڍW2Ɩay?|uZ]T÷WP^+}6j׫uP+f o+v"r[仺PyVl= OpO[ޭliV̍,KsK5C+Ǎ>%c-ԞQ<&$A6h*e XD#lE*ǥ#?٧bYOSbj&[,|t-?zDmS{Y0,VXnF˒EBAŚc;8i][EeR`F;]`M,z؊6{/+NQq́t[IZYDh {vu8m=,WLiW@p-au)˾ nS1da60&ֺcX.c>t-@54y o g[$bGSWeAU4rOʲA6,9:X\L|X x)D6"O'žk)Wᅫg~ б3T׋Bߚ$LP'$/F ,g8%#T3éG5ܛ# Q0#@zD#8y/4#MmτCu8 oPJgMY7|'cTu22%m XZqX(6UQ]sЀЅcn#DN /?p}u o ;%b63\%ֻW ,Y@ n3 g~Zr>n&(8H㛾dr_ssw3Aj\\Լ8fy4ވ[im㦱֦ļ؛Ŧ6<\/KnQ8] 0bŢy9ԙ]ѬF[A=~!fYg R&Yqϔ <m`+Ǩ~X( )*2a1ZGgDP,8Q)490hrHw RI$Y6=RݫJ/~ǵs]J8ТB #¬_l}[]S5]LiK! BHv)lKkѝմkDKlt YĔfOMӱjU%,ç fl뒌_UGnitRVESA$b6\d]X2YűoZ8. K+Kh8 9d˧rOÑ[:[b}UP3E a*՞'Ϡ ߟcM5> :jcFá?sZز7C;rcs95 CS}kxv?1a|=73wЄ:z,g1G>=sj]}ϸ~`@dsZM~czÆ+QD77Hj١[ƵEKU;]]э'L;R76vp,WؿXaGfl<ֺC&m|'woztj!UF!O0~7t72d偈{˒%nm۾?@J /N/sHzuqbq|yyVלmH6|!ǩ Fb}#~"B !*-TK,ˑ3t*Ej0<1kRE=8/@pO+8'݀I$vR'B Oz`I^,?#iJ)YG HIh!j*4K?$ S`)td+&-@ ":O21E+wA{E;Љ|1/CW8y+P$QwoU}dƲ{}6 d G|`bwqb+c}~3qsE7Vb/ ^H xuTQ8M@@,6_Xfj"D}UDm}70 }EQ$uyhLc%lI-mvqHdF~gsx^ 'K(x.2_^HP'ݾXg*vO7q4vHW%`C8`y&:j{CmpZd$*tyl*Dvb94=5O|6QYXE?GԋEqIP5þNF0/&2^hZ^<9&fvgNuXAF $Bz>}san#od 7>y* MEY׹uþU4%c=_,$Q9@@/s Y3-T bIbQ&KD($x0MNTCW*{,[3GTCOHlhP$S yL1O?2Ut.?i?A&KdǔdP P;~]mU)̀)C7IȲT <$GepE´N _NX}ʂcΦSݣ!d[' &^geM0lmK(店i\ lqb_⫗IvlOax/醎=ٖe'*Uh5~ 8+H#.2DG(oK)+ kK*nfQֶc: ϧ"NnRgDɆewmi .EOp4&Ql s`8[<^H]M tXl ьtjP{ ߟꚛ9 YNpLN8zA02wA;T8 tN&IxlŮm.Lkꇚ@ILa9C΢ "}Yԇ;7އ)dZL4]^OoyAXЈ|ESx1v@o!>GX|{显R==C?pa@|4"݋4$#ϱ -/%1Љe2򟔽wgq7"'GwZXFw}rve o1,.vFŋwrAlcƾ;)& fY>q~|F?k_7ڷW7}wqXq7dA%|%ԺuL ӧ(jqIԍ$˨F8]+6o.;ˤisѾ~iӢ0K;F<=ӧlCGӛx{Vz`MUe[P|]"]Few6߼VwQ1(/#sk3ڛgp'| m\^| ȇv-'CoC5veD}\Cot"f2햻 쌥ep^ `ȗQ`A Uܶ~ːB L%eh)gN)D׃Gl-)K2^T 2 5r?9!y)Cuc2{Qh)2mgy8簾29p ~Q3E૦ h"#' : 55uO4vgJ]膶+a Z89gq< šڞꇾx E,:@3G.NgX-,"r4˳Q b/ 8'.&mLydFJFJ4O0#UOa)&%LF1'5q& q"pQlj^D~2q,G/o:' `s۽[e@GG=å &;+K_1_psџ}J]~n-Ng̚7!>ͅ6.yCѹբ7k6ncᄊzfgil)Iml~Lpt+`\{ȼ3˳r4?\\,,E+Ո(Q=ݟ.{HqT?9[i3d}{%r gH|w\c'XFib<ӗU.OHV^K.[Dʢ6E0D1Xs:h(Գ RtIid n!Z-(v&('6Qޣc5>搪P lGT4 ŗRoEYU) jWeJ5T$}U"FJ5Tz (SM~IMX?hԈ#-t2m?4U'IV3 X˷4 (qS pReJ*],ג]0 ^CJo; ,چ UUʊ(E&qeyGu;A$ê`7{[GuYFxܗM lX[޳ѽ@EvYX>Pg+¤ٕFV-3 yfOSZ"^ɲ2z4C7[SJeKAi(&G lq`ljq+cz) } [%S_2L#H`칡1֠Aӳ'v\P,tHj(CcBߚudjV9_Lpa1= GM"+.,߁  ፇa#ȓ"qQƎ TO4񊟮ÑFHQ!4\sˎ,ן -h&9+Do1L\wa/A;ejdn;0N]E(7 c8NLi_g,=/ao{'gSJ<0?X2=r;LPw cqZ.勃|}Uft!y~ku@kߌ:J 7#64L7VZ&]*T}q'rB^diy^8d:GŃhr ,R&E>#XE4NmdhGO7ihW)d$k]ՙd6qN[ kf?eeAvA$ZJ ;{7~ [n=E5#űvipS&xI4tɲTYԻFMQ%ɒt25Ra5ȒpfSev&P*eDl)M,<@z=YUEյ[ N@:Щ_i8 Y۴Yu=sC>k<B͞0t'%Оz-GqE!xz`ءhb&fNPَ֟s= zvIF, >Ni&O=I:ChMBE $p%Y&N2r=187k-Of%83пPv!bA {4q꽗 =ƉD(#,Pc/ ^PN@(V~9C8qڦY0DLmj;4Q g[G=m7Ϥ({ogͤk=j毑l9?ArC/IE{r3"p8)ߪvK `ܚ.^ $I2>dK<< Wb@Ǻkk5ʓ3G ^ 1g1+C6T-AܣSbV%$1u\,U͡7ɹ'A< =R3Owsa܋m[B"&YʮE')jeeО'۾_̲:Y6t=Udnv`gM}J,汅Z\EޮK M㴎V$j%K͢/fF7IvSVqyng$<ﮮ^9*KŁ9vWb=- u r[U՛5f7WM{6kuV-iJj4y\\\_4@#ܴ:k)BCX\4w\z},#Mގ~qP%ޕ5hy]ż# @9+ t?{yo*~m42n7}xF+D(NSOjVf444S@b)O#W,3ngTgJ*'DZ>FMt`~>":oyFu(=a $xO IlKH2EvZyTjfE(E9;>!*8,.hf&XITb 5!G<џ"S*$W}a*Y%JwGϞ,T _)e"A1c bMp ΁dK,+(Ȇ$ٖbغ%XI7{8ޙHduܷ=r26%Т9'!\AHV ās! X$RK|<+##{nmꮌcg_<q][Ǭn1́cy-(CkCbY2`lLQ)p'u : sl &54B&.\C2q\ab52t?ܽA)Mn[dȱFcM&k Mw-[ljP Nkla:Ѕ834Xh0`84Fpc8h#W:^ 8w+cW@Gx6'5ܠpӖx.^4U riYc Fpu0ޝ50l_O~. S1WgeyT$'re w:=,euP܍V;1o"cMLpbsoX3w#1$M9U&S{>Øz:wV;xlqgP,Zh"\e,,(qϋ|0f' g1Z|y #1X2OBYI`NxE24y;ɋp~#8\?hn\,fg84sΧQFS(LҜlb8,%xF8^虛)󴥷4, !Pp@s~f>؞rN2z?YݱEKz{ t%;"u"pVp:' ->#|4͌aB"H,/ ^R?M,j,AՋқvS3fv< cϏx2s2gI|Iؔ$e^o 0cIetl1nI44lFG8UTh^#˂!"IϬffLpysx͝4 4lyQBX^0NV"&>M<`ASŁ%[S%қw Wa mL~sؾ$j .Mc 7r$Y}ͥ=w{}z{,?xVt,O~s/(L7>ɿU9o?Bnpo[7ɪqL.[VE/Urp%FK(fiWEGG7jbL;gM*dmݔ}T|u{=﮽GQFq߸7vB;;}OwXnx׻f~2w'mV__7&'pk\:U =/kÍ=)`T*}0{?JOɨ\d0U oD?:5USWZcQy*9=ik4V HƼtƕ[R?nJ 1褯H ]Qc< 浮PҬהs:k5 jZքnԔ)ԻgFY艖>,)`