FreeFem-sources-4.9/000775 000000 000000 00000000000 14037356732 014352 5ustar00rootroot000000 000000 FreeFem-sources-4.9/.clang-format000664 000000 000000 00000005740 14037356732 016733 0ustar00rootroot000000 000000 --- Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: true AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false AfterExternBlock: false BeforeCatch: false BeforeElse: false IndentBraces: false SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeInheritanceComma: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 200 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 2 ContinuationIndentWidth: 2 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH IncludeBlocks: Preserve IncludeCategories: - Regex: '^' Priority: 2 - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' Priority: 2 - Regex: '.*' Priority: 3 IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: All PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Right ReflowComments: true SortIncludes: false SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: true SpacesBeforeTrailingComments: 4 SpacesInAngles: true SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Auto TabWidth: 8 UseTab: Never FreeFem-sources-4.9/.gitignore000664 000000 000000 00000021054 14037356732 016344 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ *# *ListOfUnAllocPtr.bin *Makefile.in *TAGS *.deps *.dll *.gdb_history *.libs *autosave.edp *.DS_Store *.a *.bak *.dll *.done *.dylib *.exe *.o *.orig *.rej *.so *~ *.dirstamp *freefem++.pref WHERE* CheckAll-out examples/*/*.b examples/*/*.dat examples/*/*.dt examples/*/*.edp-out examples/*/*.eps examples/*/*.faces examples/*/*.gbb examples/*/*.gmsh examples/*/*.hb examples/*/*.matrix examples/*/all.edp examples/*/*.mesh examples/*/*.msh # build with gmsh tool .. !examples/plugin/cube.msh examples/*/*.points examples/*/*.ps examples/*/*.trs examples/tutorial/theTables FreeFem++v.*_linux-.*_libc-.* Makefile-for-Checkam *Makefile *Output *WindowsPackage.iss *.htaccess *aclocal.m4 *autom4te.cache *build-arch-stamp *build-indep-stamp *c-.* *config-version.h *config.h *config.h.in *config.status *config_LIB_INFO *configure *configure-stamp *configure.lineno *debian/.*.substvars debian/files debian/freefem++ debian/freefem++-cs debian/freefem++-doc debian/freefem++-doc.postinst.debhelper debian/freefem++-doc.prerm.debhelper debian/freefem++-glx debian/freefem++-mpich debian/freefem++-nw debian/freefem++-x11 debian/tmp AppDir/ 3rdparty/arpack/ARPACK/ 3rdparty/arpack/arpack96.tar.gz 3rdparty/arpack/arpack++ 3rdparty/arpack/arpack++.tar.gz 3rdparty/arpack/patch.tar.gz 3rdparty/bin/ 3rdparty/blacs/BLACS/ 3rdparty/blacs/Bmake.inc 3rdparty/blas/ATLAS 3rdparty/blas/BLAS/ 3rdparty/blas/CBLAS/ 3rdparty/blas/OpenBLAS/ 3rdparty/blas/WinNT_PPRO256 3rdparty/blas/atlas.*.tar.bz2 3rdparty/blas/atlas321_WinNT_PPRO.zip 3rdparty/blas/blas.tgz 3rdparty/blas/blas_xerbla.f 3rdparty/blas/cblas.tgz 3rdparty/fftw/FAIRE 3rdparty/fftw/FAIT 3rdparty/fftw/cxxflags 3rdparty/fftw/fftw- 3rdparty/gmm/FAIRE 3rdparty/gmm/FAIT 3rdparty/gmm/cxxflags 3rdparty/gmm/gmm-.*/ 3rdparty/include/ 3rdparty/ipopt/Ipopt-3.10.2/ 3rdparty/ipopt/Makefile.inc 3rdparty/lib/ 3rdparty/lib/libipopt.la 3rdparty/lib/pkgconfig/ipopt.pc 3rdparty/metis/FAIRE 3rdparty/metis/metis-* 3rdparty/mmg3d/ff-flags 3rdparty/mmg3d/mmg3d4/ 3rdparty/mmg3d/tag-tar-158547 3rdparty/mshmet/ff-flags 3rdparty/mshmet/mshmet.2012.04.25/ 3rdparty/mumps-seq/FAIRE-4.10.0 3rdparty/mumps-seq/Makefile.inc 3rdparty/mumps/FAIRE-4.10.0 3rdparty/mumps-seq/MUMPS_* 3rdparty/mumps/MUMPS_* 3rdparty/mumps/Makefile.inc 3rdparty/nlopt/Make.defs 3rdparty/nlopt/nlopt-2.2.4/ 3rdparty/parmetis/FAIRE 3rdparty/parmetis/parmetis-* 3rdparty/pkg/ 3rdparty/pkg/blacstester.tgz 3rdparty/pkg/freeyams.2010.02.22.tgz 3rdparty/pkg/mpiblacs-patch03.tgz 3rdparty/pkg/mpiblacs.tgz 3rdparty/pkg/scalapack-1.8.0.tgz 3rdparty/scalapack/FAIRE 3rdparty/scalapack/SLmake.inc 3rdparty/scalapack/scalapack- 3rdparty/scotch/FAIRE 3rdparty/scotch/Makefile.inc 3rdparty/scotch/scotch_.*/ 3rdparty/share/coin/doc/Ipopt/AUTHORS 3rdparty/share/coin/doc/Ipopt/LICENSE 3rdparty/share/coin/doc/Ipopt/README 3rdparty/share/coin/doc/Ipopt/ipopt_addlibs_c.txt 3rdparty/share/coin/doc/Ipopt/ipopt_addlibs_cpp.txt 3rdparty/share/coin/doc/Ipopt/ipopt_addlibs_f.txt 3rdparty/share/info/dir 3rdparty/share/info/fftw3.info 3rdparty/share/info/fftw3.info-1 3rdparty/share/info/fftw3.info-2 3rdparty/share/man/man1/fftw-wisdom-to-conf.1 3rdparty/share/man/man1/fftw-wisdom.1 3rdparty/share/man/man3/nlopt.3 3rdparty/superlu/FAIRE 3rdparty/superlu/FAIT 3rdparty/superlu/SuperLU_ 3rdparty/superlu/ff-FLAGS 3rdparty/tag-compile-pkg 3rdparty/tetgen/FAIRE 3rdparty/tetgen/cxxflags 3rdparty/tetgen/tetgen1.4.3/ 3rdparty/umfpack/Make.include 3rdparty/umfpack/SuiteSparse/ 3rdparty/umfpack/SuiteSparse_config.mk 3rdparty/umfpack/UFconfig.mk 3rdparty/umfpack/UMFPACKv4.. 3rdparty/umfpack/UMFPACKv4...tar.gz 3rdparty/yams/FAIRE 3rdparty/yams/ff-flags 3rdparty/yams/freeyams.*/ 3rdparty/ff-petsc/petsc-*/ FreeFEM-documentation.pdf bin/script/PostInstall.sh *ex.*.eps *ex.*.out *ex.*.ps *ex.*.so *ex.*.trs *.log *.log.err *.vtu *.vtk *.h5 *.BB *.xmf *.pvd examples/3d/A.txt examples/3d/B.txt examples/3d/dd.bb examples/3d/dd.meshb examples/3d/sphere.points examples/bamg/square/*.am_fmt examples/bamg/square/*.bb examples/bamg/square/*.mesh examples/bamg/test/DATA_bamg examples/bamg/test/*.am_fmt examples/bamg/test/*.BB examples/bamg/test/*.bb examples/bamg/test/*.mesh examples/bamg/test/*.mtr examples/chapt3/J.txt examples/chapt3/graph.txt examples/eigen/machinefile examples/plugin/ThFF.txt examples/plugin/ThFF_Metric.txt examples/plugin/TriQA.txt examples/plugin/TriQA_Metric.txt examples/plugin/ff-c++ examples/plugin/ff-get-dep examples/plugin/ff-pkg-download examples/plugin/include examples/plugin/include.done examples/plugin/ipopt.out examples/plugin/machinefile examples/plugin/pippo.data examples/plugin/pippo.dx examples/plugin/tetgenholeregion.edp examples/mpi/cube.idp examples/mpi/machinefile examples/misc/gmon.out examples/misc/speedtest.out examples/tutorial/.*.ps examples/tutorial/A.matrix examples/tutorial/f.txt examples/tutorial/lestables examples/tutorial/machinefile examples/tutorial/makeref.edp examples/tutorial/mm.points examples/tutorial/plot.gp examples/tutorial/toto..* examples/tutorial/u.txt examples/misc/.*.BB examples/misc/.*.points examples/misc/err.dat examples/misc/machinefile examples/misc/toto.txt *freefem++-...*.zip *freefem++-....-. *freefem++-....-.. html/ plugin/seq/ff-c++ plugin/seq/ff-get-dep plugin/seq/ff-pkg-download plugin/seq/ffmaster plugin/seq/include/ plugin/seq/load.link *speedtest.out src/FreeFem++ src/FreeFem++-g src/FreeFem++-glx src/FreeFem++-glx-g src/FreeFem++-mpi src/FreeFem++-mpi-g src/FreeFem++-nw src/FreeFem++-nw-g src/FreeFem++-x11 src/FreeFem++-x11-g src/MakeDepend src/agl/FreeFem++-agl src/agl/FreeFem++-agl-g src/bamg/bamg src/bamg/cvmsh2 src/bamg/drawbdmesh src/bin-win32/ff-c++ src/fflib/strversionnumber.cpp src/glx/FreeFem++-glx src/glx/FreeFem++-glx-g src/ide/FreeFem++-client src/ide/FreeFem++-cs src/ide/FreeFem++-ide src/ide/FreeFem++-ide-g src/ide/FreeFem++-server src/ide/Thumbs.db src/ide/testhighlight src/lglib/lg.output src/medit/compil.date src/medit/ffmedit src/medit/pinking.no-optffmedit src/mpi/FreeFem++-mpi src/mpi/ff-mpirun src/nw/FreeFem++ src/nw/FreeFem++-nw src/nw/ffglut src/std/FreeFem++ src/std/FreeFem++-g src/versionnumber.hpp src/x11/FreeFem++-x11 src/x11/FreeFem++-x11-g src/mpi/ffapi.cpp src/mpi/lg.tab.cpp src/mpi/mymain.cpp src/mpi/sansrgraph.cpp *stamp-h *stamp-h1 *test-driver examples/eigen/modes.txt examples/plugin/ffmaster examples/plugin/ffmaster.dSYM/ examples/plugin/toto.dt examples/tutorial/A.txt examples/tutorial/toto.Th examples/tutorial/toto.am_fmt examples/tutorial/toto.dbg examples/tutorial/u2.txt examples/examples/J.txt examples/examples/graph.txt examples/plugin/pipe.png *.ppm examples/plugin/testsavemedit.solb examples/plugin/testsavemedit.meshb examples/mpi/dd/ ff/ff.xcodeproj/project.xcworkspace/ ff/ff.xcodeproj/xcuserdata/ config.param config.path 3rdparty/ff-petsc/Make-petsc-download.mk 3rdparty/ff-petsc/Makefile.inc 3rdparty/ff-petsc/do-sudo 3rdparty/fftw/fftw-3.3.2/ 3rdparty/gmm/gmm-4.2/ 3rdparty/ipopt/Ipopt-3.12.4/ 3rdparty/scalapack/scalapack-2.0.2/ 3rdparty/scotch/scotch_6.0.4/ 3rdparty/superlu/SuperLU_5.2.1/ 3rdparty/tag-install-other 3rdparty/tetgen/tetgen1.5.1-beta1/ freefem++doc.pdf FreeFem++-CoCoa FreeFem++.app.tgz FreeFem++.app/ freefem++-*.tar.gz freefem++-*.zip FreeFem++.pmdoc/ FreeFem++.scrpt-txt ff ## file create by autoreconf -i INSTALL compile config.guess config.sub depcomp install-sh missing mkinstalldirs test-driver ar-lib mdate-sh ylwrap py-compile texinfo.tex # Local Variables: # mode:conf # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/.gitmodules000664 000000 000000 00000000153 14037356732 016526 0ustar00rootroot000000 000000 [submodule "src/ffgraphics/asio"] path = src/ffgraphics/asio url = https://github.com/chriskohlhoff/asio FreeFem-sources-4.9/.travis.yml000664 000000 000000 00000001106 14037356732 016461 0ustar00rootroot000000 000000 git: submodules: false dist: xenial language: cpp compiler: - gcc before_install: - sudo apt-get -qq update - sudo apt-get install -y g++ gcc gfortran ghostscript m4 make patch pkg-config wget python unzip libopenblas-dev liblapack-dev libhdf5-dev libscotch-dev libfftw3-dev libarpack2-dev libsuitesparse-dev libmumps-seq-dev libnlopt-dev coinor-libipopt-dev libgmm++-dev libtet1.5-dev gnuplot-qt autoconf automake autotools-dev bison flex gdb valgrind git cmake mpich script: - autoreconf -i && ./configure --enable-download && ./3rdparty/getall -a - make FreeFem-sources-4.9/3rdparty/000775 000000 000000 00000000000 14037356732 016122 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/Makefile.am000775 000000 000000 00000021005 14037356732 020157 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- # $Id: Makefile.am,v 1.16 2010/05/06 21:20:38 hecht Exp $ SUBDIRS=blas arpack umfpack EXTRA_DIST= \ ./nlopt/Make.inc ./nlopt/Makefile \ ./f2c/Makefile \ ./f2c/Makefile-MacOs \ ./f2c/f2c.h-int \ ./f2c/fort77.sed \ ./f2c/tt.f \ ./fftw/Makefile.am \ ./fftw/Makefile.in \ ./gmm/Makefile \ ./gmm/cxxflags \ ./headers-sparsesolver.inc \ ./metis/Makefile \ ./metis/Makefile-metis.in \ ./metis/metis-5.1.patch \ ./mmg3d/Makefile \ ./mmg3d/patch-mmg3dv4.diff \ ./mshmet/Makefile \ ./mshmet/Makefile-mshmet.inc \ ./mshmet/mshmet.2011.03.06.patch \ ./mshmet/mshmet.2012.04.25_i586.patch \ ./mshmet/mshmetlib-internal.h \ ./mshmet/mshmetlib.c \ ./mshmet/mshmetlib.h \ ./mumps/Makefile \ ./mumps-seq/Makefile-mumps-5.0.2.inc \ ./mumps-seq/Makefile \ ./mumps/Makefile-mumps-5.0.2.inc \ ./parmetis/Makefile-parmetis.in \ ./parmetis/makefile \ ./parmetis/parmetis-4.0.3.patch \ ./scalapack/Makefile \ ./scalapack/SLmake-scalapack.inc \ ./scotch/Makefile \ ./scotch/Makefile-scotch.inc \ ./scotch/scotch_6.0.4.patch \ ./superlu/Makefile \ ./superlu/patch-superlu-5.2.2 \ ./superlu/make.inc \ ./tetgen/Makefile \ ./mmg/Makefile \ ./parmmg/Makefile \ ./yams/Makefile \ ./yams/freeyams.2012.02.05.patch \ ./yams/freeyams.2012.02.05-return-values.patch \ ./yams/makefile-yams.inc \ ./yams/yamslib.c \ ./yams/yamslib.h \ ./yams/yamslib_internal.h \ ipopt/Makefile \ ipopt/Makefile.inc.in \ ipopt/patch-IpBlas \ ff-petsc/Makefile \ ff-petsc/Makefile-PETSc.inc \ ff-petsc/Makefile.complex \ ff-petsc/petsc-3.11.2.patch \ getall # FFCS: See [[file:../../../configure.ac::tools_problems_all_platforms]] for reasons why some tools may be deactivated MPI_SOFT= @TOOL_COMPILE_scalapack@ @TOOL_COMPILE_parmetis@ @TOOL_COMPILE_parmmg@ @TOOL_COMPILE_mumps@ LIST_SOFT= @DOWNLOAD_FFTW@ @TOOL_COMPILE_tetgen@ @TOOL_COMPILE_metis@ @TOOL_COMPILE_superlu@ \ @TOOL_COMPILE_scotch@ @TOOL_COMPILE_mshmet@ \ @TOOL_COMPILE_yams@ @TOOL_COMPILE_mmg3d@ @TOOL_COMPILE_mmg@ @TOOL_COMPILE_gmm@ \ @TOOL_COMPILE_nlopt@ @TOOL_COMPILE_mumps_seq@ \ @TOOL_COMPILE_ipopt@ @TOOL_COMPILE_libpthread_google@ all-recursive: bin lib include pkg lib: mkdir lib bin: mkdir bin include: mkdir include pkg: mkdir pkg # ALH - /3rdparty/yams and /3rdparty/mshmet need /src/libMesh/libmesh.a but /src is compiled after /download, so we # need to compile it now lib/libMesh.a:lib include cd ../src/libMesh && $(MAKE) $(AM_MAKEFLAGS) test -f ../src/libMesh/libMesh.a mkdir -p include/libMesh cp ../src/libMesh/*h include/libMesh echo libMesh LD -L@DIR@/lib -lMesh > lib/WHERE.libMesh echo libMesh INCLUDE -I@DIR@/include/libMesh >> lib/WHERE.libMesh cp ../src/libMesh/libMesh.a lib/libMesh.a all-local:bin lib include lib/libMesh.a pkg $(DOWNLOADCOMPILE) install-other install-other: tag-install-other tag-install-other: if test -n "$(TOOL_COMPILE_hpddm)" ; then $(MAKE) install-hpddm; else true; fi if test -n "$(TOOL_COMPILE_bem)" ; then $(MAKE) install-htool install-boost install-bemtool; else true; fi touch tag-install-other WHERE-OTHER: lib/WHERE.hpddm lib/WHERE.htool lib/WHERE.bemtool lib/WHERE.boost WHERE-LD: tag-compile-pkg WHERE-OTHER touch ../plugin/seq/WHERE_LIBRARY-config ../plugin/seq/WHERE_LIBRARY -grep LD ../plugin/seq/WHERE_LIBRARY ../plugin/seq/WHERE_LIBRARY-config >WHERE-LD # BEGIN HPDDM install-hpddm: ./getall if test -n "$(TOOL_COMPILE_hpddm)"; then ./getall -o hpddm -a; $(MAKE) include/hpddm/done.tag lib/WHERE.hpddm; else true;fi reinstall-hpddm:./getall -rm lib/WHERE.hpddm -rm ./pkg/hpddm.zip -rm -rf include/hpddm -test -n "$(TOOL_COMPILE_hpddm)" && $(MAKE) install-hpddm || true include/hpddm/done.tag:./pkg/hpddm.zip -if test -f ./pkg/hpddm.zip ; then \ cd include;rm -rf hpddm hpddm-*; unzip -q ../pkg/hpddm.zip ; mv hpddm-* hpddm ; touch hpddm/done.tag; \ else true; fi lib/WHERE.hpddm: ./getall if test -d include/hpddm/include ; then \ echo hpddm LD -L@DIR@/lib > $@ ;\ echo hpddm INCLUDE -I@DIR@/include/hpddm/include >> $@ ;\ elif test -d include/hpddm/src ; then \ echo hpddm LD -L@DIR@/lib > $@ ;\ echo hpddm INCLUDE -I@DIR@/include/hpddm/src >> $@ ;\ else true; fi #end HPDDM # begin HTOOL install-htool: ./getall if test -n "$(TOOL_COMPILE_bem)"; then ./getall -o htool -a; $(MAKE) include/htool/done.tag lib/WHERE.htool; else true;fi reinstall-htool:./getall -rm lib/WHERE.htool -rm ./pkg/htool.zip -rm -rf include/htool -test -n "$(TOOL_COMPILE_bem)" && $(MAKE) install-htool || true include/htool/done.tag:./pkg/htool.zip -if test -f ./pkg/htool.zip ; then \ cd include;rm -rf htool htool-*; unzip -q ../pkg/htool.zip ; mv htool-* htool ; touch htool/done.tag; \ else true; fi lib/WHERE.htool: ./getall if test -d include/htool/include ; then \ echo htool LD -L@DIR@/lib > $@ ;\ echo htool INCLUDE -I@DIR@/include/htool/include >> $@ ;\ else true; fi # end HTOOL # begin BEMTOOL install-bemtool: ./getall if test -n "$(TOOL_COMPILE_bem)"; then ./getall -o bemtool -a; $(MAKE) include/bemtool/done.tag lib/WHERE.bemtool; else true;fi reinstall-bemtool:./getall -rm lib/WHERE.bemtool -rm ./pkg/bemtool.zip -rm -rf include/BemTool -test -n "$(TOOL_COMPILE_bem)" && $(MAKE) install-bemtool || true include/bemtool/done.tag:./pkg/bemtool.zip -if test -f ./pkg/bemtool.zip ; then \ cd include;rm -rf BemTool BemTool-*; unzip -q ../pkg/bemtool.zip ; mv BemTool-* BemTool ; touch BemTool/done.tag; \ else true; fi lib/WHERE.bemtool: ./getall if test -d include/BemTool/ ; then \ echo bemtool LD -L@DIR@/lib > $@ ;\ echo bemtool INCLUDE -I@DIR@/include/BemTool/ >> $@ ;\ else true; fi # end BEMTOOL # begin BOOST install-boost: ./getall if test -n "$(TOOL_COMPILE_bem)"; then ./getall -o Boost -a; $(MAKE) boost/done.tag lib/WHERE.boost; else true;fi reinstall-boost:./getall -rm lib/WHERE.boost -rm ./pkg/boost_for_bemtool.tar.gz -rm -rf boost -test -n "$(TOOL_COMPILE_bem)" && $(MAKE) install-boost || true boost/done.tag:./pkg/boost_for_bemtool.tar.gz if test -f ./pkg/boost_for_bemtool.tar.gz ; then \ rm -rf boost boost_*; tar xzf pkg/boost_for_bemtool.tar.gz ; mv boost_for_bemtool boost; \ else true; fi touch boost/done.tag lib/WHERE.boost: ./getall if test -d boost/include ; then \ echo boost INCLUDE -I@DIR@/boost/include >> $@ ;\ else true; fi # end BOOST # to reinstall mpi under window afer clean reinstall-msmpi: -if test -f "$$MSMPI_INC"/mpif.h ; then \ echo " copy msmpi in 3rdparty form $$MSMPI_INC and $$MSMPI_LIB64 or $$MSMPI_LIB32" ; \ mkdir -p include/msmpi ;\ mkdir -p lib/msmpi ;\ cp "$$MSMPI_INC"/*.h include/msmpi ;\ sed 's/INT_PTR_KIND()/@SIZEOF_PTR@/' <"$$MSMPI_INC"/mpif.h >include/msmpi/mpif.h ;\ grep KIND include/msmpi/mpif.h; \ test "@SIZEOF_PTR@" -eq 8 && cp "$$MSMPI_INC"/x64/*.h include/msmpi && cp "$$MSMPI_LIB64"/*.lib lib/msmpi ;\ test "@SIZEOF_PTR@" -eq 4 && cp "$$MSMPI_INC"/x86/*.h include/msmpi && cp "$$MSMPI_LIB32"/*.lib lib/msmpi ;\ fi # FFCS: need to stop at the first error to make sure that all libraries are correctly compiled compile-dir: @echo "\n\n ****** $(COMPILEDIR) ****** \n\n"; @if [ 0 -eq `egrep ':$(COMPILEDIR) *LD' WHERE-LD | wc -l` ] ;then \ cd $(COMPILEDIR) && $(MAKE) $(DIRTARGET) ; \ else \ echo $(COMPILEDIR) is in WHERE- files ;\ fi compile-pkg: tag-compile-pkg WHERE-LD # FFCS: need to stop at the first error to make sure that all libraries are correctly compiled tag-compile-pkg: bin lib include pkg FORCE @if [ -n "$(WGET)" ] ; then \ for d in $(LIST_SOFT) ; do $(MAKE) compile-dir COMPILEDIR=$$d || exit 1;done ;\ if [ -n "$(MPICC)" ] ; then \ for d in $(MPI_SOFT) ; do $(MAKE) compile-dir COMPILEDIR=$$d || exit 1; done;\ fi;fi $(MAKE) install-hpddm install-htool install-bemtool install-boost touch tag-compile-pkg FORCE: re-install: $(MAKE) compile-pkg DIRTARGET=install WHERE: $(MAKE) compile-pkg DIRTARGET=WHERE install-exec-local: $(mkinstalldirs) -m 755 $(DESTDIR)$(ff_prefix_dir)/lib $(mkinstalldirs) -m 755 $(DESTDIR)$(ff_prefix_dir)/bin $(mkinstalldirs) -m 755 $(DESTDIR)$(ff_prefix_dir)/include cp -rp lib $(DESTDIR)$(ff_prefix_dir) cp -rp include $(DESTDIR)$(ff_prefix_dir) cp -rp bin $(DESTDIR)$(ff_prefix_dir) clean-local: -rm -rf tag-* include lib bin WHERE-LD -mkdir include lib bin -rm */FAIT */FAIRE # FFCS - make sure that all directories are cleaned. Thisis especially important under Windows because there is no # compilation dependencies control there (see # [[file:c:/cygwin/home/alh/ffcs/dist/configure.ac::dependency_tracking]]) for d in $(LIST_SOFT) $(MPI_SOFT) ; do $(MAKE) clean -C $$d ; done FreeFem-sources-4.9/3rdparty/arpack/000775 000000 000000 00000000000 14037356732 017363 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/arpack/ARmake.inc000664 000000 000000 00000007647 14037356732 021234 0ustar00rootroot000000 000000 # ARPACK ARmake.inc modified for FreeFEM # $Id$ ########################################################################### # # Program: ARPACK # # Module: ARmake.inc # # Purpose: Top-level Definitions # # Creation date: February 22, 1996 # # Modified: # # Send bug reports, comments or suggestions to arpack@caam.rice.edu # ############################################################################ # # %---------------------------------% # | SECTION 1: PATHS AND LIBRARIES | # %---------------------------------% # # # %--------------------------------------% # | You should change the definition of | # | home if ARPACK is built some place | # | other than your home directory. | # %--------------------------------------% # #home = $(HOME)/ARPACK # # %--------------------------------------% # | The platform identifier to suffix to | # | the end of library names | # %--------------------------------------% # PLAT = ff++ # # %------------------------------------------------------% # | The directories to find the various pieces of ARPACK | # %------------------------------------------------------% # BLASdir = $(home)/BLAS LAPACKdir = $(home)/LAPACK UTILdir = $(home)/UTIL SRCdir = $(home)/SRC # #DIRS = $(BLASdir) $(LAPACKdir) $(UTILdir) $(SRCdir) # # %-------------------------------------------------------------------% # | Comment out the previous line and uncomment the following | # | if you already have the BLAS and LAPACK installed on your system. | # | NOTE: ARPACK assumes the use of LAPACK version 2 codes. | # %-------------------------------------------------------------------% # DIRS = $(LAPACKdir) $(UTILdir) $(SRCdir) # # %---------------------------------------------------% # | The name of the libraries to be created/linked to | # %---------------------------------------------------% # ARPACKLIB = $(home)/libarpack_$(PLAT).a # if lapack is compile the we build un archive (more simple) LAPACKLIB = $(ARPACKLIB) #BLASLIB = # ALIBS = $(ARPACKLIB) $(LAPACKLIB) $(BLASLIB) # # # %---------------------------------------------------------% # | SECTION 2: COMPILERS | # | | # | The following macros specify compilers, linker/loaders, | # | the archiver, and their options. You need to make sure | # | these are correct for your system. | # %---------------------------------------------------------% # # # %------------------------------% # | Make our own suffixes' list. | # %------------------------------% # .SUFFIXES: .SUFFIXES: .f .o # # %------------------% # | Default command. | # %------------------% # .DEFAULT: @$(ECHO) "Unknown target $@, try: make help" # # %-------------------------------------------% # | Command to build .o files from .f files. | # %-------------------------------------------% # .f.o: @$(ECHO) Making $@ from $< @$(FC) -c $(FFLAGS) $< # # %-----------------------------------------% # | Various compilation programs and flags. | # | You need to make sure these are correct | # | for your system. | # %-----------------------------------------% # #FC = f77 #FFLAGS = -O -cg89 #LDFLAGS = CD = cd ECHO = echo LN = ln LNFLAGS = -s MAKE = make RM = rm RMFLAGS = -f SHELL = /bin/sh # # %----------------------------------------------------------------% # | The archiver and the flag(s) to use when building an archive | # | (library). Also the ranlib routine. If your system has no | # | ranlib, set RANLIB = touch. | # %----------------------------------------------------------------% # AR = libtool -o ARFLAGS = rv #RANLIB = touch RANLIB = ranlib # # %----------------------------------% # | This is the general help target. | # %----------------------------------% # help: @$(ECHO) "usage: make ?" FreeFem-sources-4.9/3rdparty/arpack/ARmake.m4000664 000000 000000 00000007602 14037356732 020772 0ustar00rootroot000000 000000 # ARPACK ARmake.inc modified for FreeFEM # $Id$ ########################################################################### # # Program: ARPACK # # Module: ARmake.inc # # Purpose: Top-level Definitions # # Creation date: February 22, 1996 # # Modified: # # Send bug reports, comments or suggestions to arpack@caam.rice.edu # ############################################################################ # # %---------------------------------% # | SECTION 1: PATHS AND LIBRARIES | # %---------------------------------% # # # %--------------------------------------% # | You should change the definition of | # | home if ARPACK is built some place | # | other than your home directory. | # %--------------------------------------% # home = FF_HOME # # %--------------------------------------% # | The platform identifier to suffix to | # | the end of library names | # %--------------------------------------% # PLAT = ff++ # # %------------------------------------------------------% # | The directories to find the various pieces of ARPACK | # %------------------------------------------------------% # BLASdir = $(home)/BLAS LAPACKdir = $(home)/LAPACK UTILdir = $(home)/UTIL SRCdir = $(home)/SRC # #DIRS = $(BLASdir) $(LAPACKdir) $(UTILdir) $(SRCdir) # # %-------------------------------------------------------------------% # | Comment out the previous line and uncomment the following | # | if you already have the BLAS and LAPACK installed on your system. | # | NOTE: ARPACK assumes the use of LAPACK version 2 codes. | # %-------------------------------------------------------------------% # DIRS = FF_LAPACKdir $(UTILdir) $(SRCdir) # # %---------------------------------------------------% # | The name of the libraries to be created/linked to | # %---------------------------------------------------% # ARPACKLIB = FF_ARPACKLIB LAPACKLIB = FF_LAPACKLIB BLASLIB = FF_BLASLIB # ALIBS = $(ARPACKLIB) $(LAPACKLIB) $(BLASLIB) # # # %---------------------------------------------------------% # | SECTION 2: COMPILERS | # | | # | The following macros specify compilers, linker/loaders, | # | the archiver, and their options. You need to make sure | # | these are correct for your system. | # %---------------------------------------------------------% # # # %------------------------------% # | Make our own suffixes' list. | # %------------------------------% # .SUFFIXES: .SUFFIXES: .f .o # # %------------------% # | Default command. | # %------------------% # .DEFAULT: @$(ECHO) "Unknown target $@, try: make help" # # %-------------------------------------------% # | Command to build .o files from .f files. | # %-------------------------------------------% # .f.o: @$(ECHO) Making $@ from $< @$(FC) -c $(FFLAGS) $< # # %-----------------------------------------% # | Various compilation programs and flags. | # | You need to make sure these are correct | # | for your system. | # %-----------------------------------------% # FC = FF_FC FFLAGS = FF_FFLAGS LDFLAGS = FF_LDFLAGS SECOND_O = FF_SECOND CD = cd ECHO = echo LN = ln LNFLAGS = -s MAKE = make RM = rm RMFLAGS = -f SHELL = /bin/sh # # %----------------------------------------------------------------% # | The archiver and the flag(s) to use when building an archive | # | (library). Also the ranlib routine. If your system has no | # | ranlib, set RANLIB = touch. | # %----------------------------------------------------------------% # AR = FF_AR ARFLAGS = FF_ARFLAGS RANLIB = FF_RANLIB # # %----------------------------------% # | This is the general help target. | # %----------------------------------% # help: @$(ECHO) "usage: make ?" .NOTPARALLEL: FreeFem-sources-4.9/3rdparty/arpack/Makefile.am000664 000000 000000 00000011160 14037356732 021416 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Makefile for downloaded ARPACK # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : ... # E-MAIL : ... all-local:$(DOWNLOAD_ARPACK) EXTRA_DIST=ARmake.m4 arpack-patch-lapack.tar.gz veclib_zdotc.f veclib_cdotc.f PKGCOMMON_PACKTITLE=ARPACK include ../common.mak # nothing specific to do for [[file:../common.mak::reinstall]] reinstall::install # Downloading and compiling ARPACK # -------------------------------- .NOTPARALLEL: # set in configure #ARPACKLIB=ARPACK/libarpack_ff++.a DIRPKG=../pkg ARPACK96_TAR_GZ=$(DIRPKG)/arpack96.tar.gz PATCH_TAR_GZ=$(DIRPKG)/patch.tar.gz PKGCOMMON_PACKAGES=$(ARPACK96_TAR_GZ) $(PATCH_TAR_GZ) # FFCS: need to add $(RANLIB) under mingw64 to avoid "archive has no index" error compilepkg::$(ARPACKLIB) ARPACK/ARmake.inc:ARPACK/ARmake.incnew if diff -q ARPACK/ARmake.incnew ARPACK/ARmake.inc ; then echo No Modif skip compile of arpack ; else cp ARPACK/ARmake.incnew ARPACK/ARmake.inc; fi $(ARPACKLIB):ARPACK/ARmake.inc case '$(BLASLIBS)' in *vecLib*|*Accelerate*|*mkl*) $(F77) -c $(FFLAGS) veclib_zdotc.f -o ARPACK/SRC/veclib_zdotc.o ;; esac; case '$(BLASLIBS)' in *vecLib*|*Accelerate*|*mkl*) $(F77) -c $(FFLAGS) veclib_cdotc.f -o ARPACK/SRC/veclib_cdotc.o ;; esac; mkdir -p ../include ../lib if [ -n '@FF_LAPACKdir@' ] ; then \ $(F77) -c `echo $(FFLAGS)\ |sed -e s/-O.\*\ // ` ARPACK/LAPACK/dlamch.f -o ARPACK/LAPACK/dlamch.o; \ fi; \ cd ARPACK && $(MAKE) lib if test -n '@FF_LAPACKdir@' ; then \ $(AR) $(ARFLAGS) $(LAPACK_arpack_LIB) ARPACK/SRC/*.o ARPACK/UTIL/*.o ARPACK/LAPACK/*.o ;\ $(RANLIB) $(LAPACK_arpack_LIB) ;\ else \ $(AR) $(ARFLAGS) $(ARPACKLIB) ARPACK/SRC/*.o ARPACK/UTIL/*.o ;\ fi ARPACK/ARmake.incnew: $(ARPACK96_TAR_GZ) $(PATCH_TAR_GZ) ARmake.m4 Makefile -rm -rf ARPACK gunzip -c $(ARPACK96_TAR_GZ) | tar xf - gunzip -c $(PATCH_TAR_GZ) | tar xf - gunzip -c arpack-patch-lapack.tar.gz | tar xf - case '$(BLASLIBS)' in *vecLib*|*Accelerate*|*mkl*) \ for i in ARPACK/LAPACK/zlatrs.f ARPACK/LAPACK/ztrsyl.f ARPACK/SRC/zgetv0.f ARPACK/SRC/znaitr.f ARPACK/SRC/znaup2.f ARPACK/SRC/zneupd.f;\ do mv $$i $$i.cpy; sed -e 's/ZDOTC/ZZDOTC/' -e 's/zdotc/zzdotc/' <$$i.cpy >$$i;rm $$i.cpy; \ done; \ for i in ARPACK/LAPACK/clatrs.f ARPACK/LAPACK/ctrsyl.f ARPACK/SRC/cgetv0.f ARPACK/SRC/cnaitr.f ARPACK/SRC/cnaup2.f ARPACK/SRC/cneupd.f;\ do mv $$i $$i.cpy; sed -e 's/CDOTC/CCDOTC/' -e 's/cdotc/ccdotc/' <$$i.cpy >$$i;rm $$i.cpy; \ done; \ esac for i in ARPACK/SRC/*.f ; do \ mv $$i $$i.cpy; sed -e 's/, second/, secnd2/' -e 's/call *second/call secnd2/' <$$i.cpy >$$i;rm $$i.cpy; done for i in ARPACK/UTIL/second.f; do \ mv $$i $$i.cpy; cat $$i.cpy| sed 's/ SECOND *(/ secnd2(/'|grep -v EXTERNAL >$$i;rm $$i.cpy; done m4 -DFF_BLASLIB="$(BLASLIB)" \ -DFF_ARPACKLIB="$(ARPACKLIB)" \ -DFF_LAPACK_arpack_LIB="$(LAPACK_arpack_LIB)" \ -DFF_FC="@F77@" \ -DFF_FFLAGS="@FFLAGS@" \ -DFF_LAPACKdir='@FF_LAPACKdir@' \ -DFF_LDFLAGS="@LDFLAGS@" \ -DFF_HOME=`pwd`/ARPACK \ -DFF_SECOND="@FF_SECOND@" \ -DFF_AR="@AR@" \ -DFF_ARFLAGS="@ARFLAGS@" \ -DFF_RANLIB="@RANLIB@" \ ARmake.m4 >ARPACK/ARmake.incnew $(ARPACK96_TAR_GZ) $(PATCH_TAR_GZ):download # ../getall -o ARPACK -a clean-local:: -rm -r ARPACK ../lib/libarpack.a # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/arpack/arpack-patch-lapack.tar.gz000664 000000 000000 00000002204 14037356732 024277 0ustar00rootroot000000 000000 k@Earpack-patch-lapack.tarV]s8_q!k\HNֳPLU`3M^iu%]{t9j:b?8:'aض;a(=ʟۚL:Y08nZdRA9@mőy$cZ8Z;3Ko/ӵ7;wKgY `0B7@0 )d"0I CL9q_tHgqҔ1݌F]=iMo`!]+fIi,SL N <*pGn4qpr~x*){la,C.΃u [@f;u,8k!O~3ɔsO9dN{`˴UɯEM38 <{'Bˌ Fr}LfjimjV/2ѯP[dmû Y_U+C,K_ 7y0nxqjuv.*wz@e4 g7h1`>8NP֣g3NEs:; l~_` QBvX'I@c(j)d,(+zq0gQ(ፖz lϰϐZvs(N7xv~vL?Ēh_; (DAF4S-'η@o!_k|/OWPPPPPPPPPPPPPP 74Z(FreeFem-sources-4.9/3rdparty/arpack/veclib_cdotc.f000664 000000 000000 00000001610 14037356732 022150 0ustar00rootroot000000 000000 complex function ccdotc(n,zx,incx,zy,incy) c c forms the dot product of a vector. c jack dongarra, 3/11/78. c modified 12/3/93, array(1) declarations changed to array(*) c complex zx(*),zy(*),ztemp integer i,incx,incy,ix,iy,n ztemp = (0.0d0,0.0d0) ccdotc = (0.0d0,0.0d0) if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n ztemp = ztemp + cconjg(zx(ix))*zy(iy) ix = ix + incx iy = iy + incy 10 continue ccdotc = ztemp return c c code for both increments equal to 1 c 20 do 30 i = 1,n ztemp = ztemp + sconjg(zx(i))*zy(i) 30 continue ccdotc = ztemp return end FreeFem-sources-4.9/3rdparty/arpack/veclib_zdotc.f000664 000000 000000 00000001626 14037356732 022206 0ustar00rootroot000000 000000 double complex function zzdotc(n,zx,incx,zy,incy) c c forms the dot product of a vector. c jack dongarra, 3/11/78. c modified 12/3/93, array(1) declarations changed to array(*) c double complex zx(*),zy(*),ztemp integer i,incx,incy,ix,iy,n ztemp = (0.0d0,0.0d0) zzdotc = (0.0d0,0.0d0) if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n ztemp = ztemp + dconjg(zx(ix))*zy(iy) ix = ix + incx iy = iy + incy 10 continue zzdotc = ztemp return c c code for both increments equal to 1 c 20 do 30 i = 1,n ztemp = ztemp + dconjg(zx(i))*zy(i) 30 continue zzdotc = ztemp return end FreeFem-sources-4.9/3rdparty/blas/000775 000000 000000 00000000000 14037356732 017043 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/blas/Makefile.am000664 000000 000000 00000026004 14037356732 021101 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Downloading and compiling extra BLAS libraries # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : ... # E-MAIL : ... DIRPKG=../pkg BLAS_TGZ=$(DIRPKG)/blas-3.7.1.tgz CBLAS_TGZ=$(DIRPKG)/cblas.tgz PKGCOMMON_PACKTITLE=BLAS PKGCOMMON_PACKAGES=$(BLAS_TGZ) $(CBLAS_TGZ) ../pkg/OpenBLAS.tar.gz include ../common.mak EXTRA_DIST= \ openblas.patches # Downloading and compiling the Generic Blas # ------------------------------------------ noinst_LIBRARIES=@DOWNLOADED_BLAS@ ##compile::$(noinst_LIBRARIES) EXTRA_LIBRARIES=libf77blas.a libcblas.a pkgcompile::@DOWNLOADED_BLAS@ # List of files to compile (do not list them in *_SOURCES to prevent them from being included in distributions). F77BLAS_SRC=BLAS/caxpy.f BLAS/crotg.f BLAS/dcopy.f BLAS/dsymv.f BLAS/lsame.f BLAS/sspmv.f BLAS/zaxpy.f BLAS/zhpr2.f \ BLAS/ccopy.f BLAS/cscal.f BLAS/ddot.f BLAS/dsyr2.f BLAS/sasum.f BLAS/sspr2.f BLAS/zcopy.f BLAS/zhpr.f BLAS/cdotc.f \ BLAS/csrot.f BLAS/dgbmv.f BLAS/dsyr2k.f BLAS/saxpy.f BLAS/sspr.f BLAS/zdotc.f BLAS/zrotg.f BLAS/cdotu.f BLAS/csscal.f \ BLAS/dgemm.f BLAS/dsyr.f BLAS/scasum.f BLAS/sswap.f BLAS/zdotu.f BLAS/zscal.f BLAS/cgbmv.f BLAS/cswap.f BLAS/dgemv.f \ BLAS/dsyrk.f BLAS/scnrm2.f BLAS/ssymm.f BLAS/zdrot.f BLAS/zswap.f BLAS/cgemm.f BLAS/csymm.f BLAS/dger.f BLAS/dtbmv.f \ BLAS/scopy.f BLAS/ssymv.f BLAS/zdscal.f BLAS/zsymm.f BLAS/cgemv.f BLAS/csyr2k.f BLAS/dnrm2.f BLAS/dtbsv.f BLAS/sdot.f \ BLAS/ssyr2.f BLAS/zgbmv.f BLAS/zsyr2k.f BLAS/cgerc.f BLAS/csyrk.f BLAS/drot.f BLAS/dtpmv.f BLAS/sdsdot.f BLAS/ssyr2k.f \ BLAS/zgemm.f BLAS/zsyrk.f BLAS/cgeru.f BLAS/ctbmv.f BLAS/drotg.f BLAS/dtpsv.f BLAS/sgbmv.f BLAS/ssyr.f BLAS/zgemv.f \ BLAS/ztbmv.f BLAS/chbmv.f BLAS/ctbsv.f BLAS/drotm.f BLAS/dtrmm.f BLAS/sgemm.f BLAS/ssyrk.f BLAS/zgerc.f BLAS/ztbsv.f \ BLAS/chemm.f BLAS/ctpmv.f BLAS/drotmg.f BLAS/dtrmv.f BLAS/sgemv.f BLAS/stbmv.f BLAS/zgeru.f BLAS/ztpmv.f BLAS/chemv.f \ BLAS/ctpsv.f BLAS/dsbmv.f BLAS/dtrsm.f BLAS/sger.f BLAS/stbsv.f BLAS/zhbmv.f BLAS/ztpsv.f BLAS/cher2.f BLAS/ctrmm.f \ BLAS/dscal.f BLAS/dtrsv.f BLAS/snrm2.f BLAS/stpmv.f BLAS/zhemm.f BLAS/ztrmm.f BLAS/cher2k.f BLAS/ctrmv.f BLAS/dsdot.f \ BLAS/dzasum.f BLAS/srot.f BLAS/stpsv.f BLAS/zhemv.f BLAS/ztrmv.f BLAS/cher.f BLAS/ctrsm.f BLAS/dspmv.f BLAS/dznrm2.f \ BLAS/srotg.f BLAS/strmm.f BLAS/zher2.f BLAS/ztrsm.f BLAS/cherk.f BLAS/ctrsv.f BLAS/dspr2.f BLAS/icamax.f BLAS/srotm.f \ BLAS/strmv.f BLAS/zher2k.f BLAS/ztrsv.f BLAS/chpmv.f BLAS/dasum.f BLAS/dspr.f BLAS/idamax.f BLAS/srotmg.f BLAS/strsm.f \ BLAS/zher.f BLAS/chpr2.f BLAS/daxpy.f BLAS/dswap.f BLAS/isamax.f BLAS/ssbmv.f BLAS/strsv.f BLAS/zherk.f BLAS/chpr.f \ BLAS/dcabs1.f BLAS/dsymm.f BLAS/izamax.f BLAS/sscal.f blas_xerbla.f BLAS/zhpmv.f CBLAS_SRC=CBLAS/src/cblas_caxpy.c CBLAS/src/cblas_drot.c CBLAS/src/cblas_sgemm.c CBLAS/src/cblas_zher2.c \ CBLAS/src/cblas_ccopy.c CBLAS/src/cblas_drotg.c CBLAS/src/cblas_sgemv.c CBLAS/src/cblas_zher2k.c \ CBLAS/src/cblas_cdotc_sub.c CBLAS/src/cblas_drotm.c CBLAS/src/cblas_sger.c CBLAS/src/cblas_zher.c \ CBLAS/src/cblas_cdotu_sub.c CBLAS/src/cblas_drotmg.c CBLAS/src/cblas_snrm2.c CBLAS/src/cblas_zherk.c \ CBLAS/src/cblas_cgbmv.c CBLAS/src/cblas_dsbmv.c CBLAS/src/cblas_srot.c CBLAS/src/cblas_zhpmv.c CBLAS/src/cblas_cgemm.c \ CBLAS/src/cblas_dscal.c CBLAS/src/cblas_srotg.c CBLAS/src/cblas_zhpr2.c CBLAS/src/cblas_cgemv.c CBLAS/src/cblas_dsdot.c \ CBLAS/src/cblas_srotm.c CBLAS/src/cblas_zhpr.c CBLAS/src/cblas_cgerc.c CBLAS/src/cblas_dspmv.c CBLAS/src/cblas_srotmg.c \ CBLAS/src/cblas_zscal.c CBLAS/src/cblas_cgeru.c CBLAS/src/cblas_dspr2.c CBLAS/src/cblas_ssbmv.c CBLAS/src/cblas_zswap.c \ CBLAS/src/cblas_chbmv.c CBLAS/src/cblas_dspr.c CBLAS/src/cblas_sscal.c CBLAS/src/cblas_zsymm.c CBLAS/src/cblas_chemm.c \ CBLAS/src/cblas_dswap.c CBLAS/src/cblas_sspmv.c CBLAS/src/cblas_zsyr2k.c CBLAS/src/cblas_chemv.c \ CBLAS/src/cblas_dsymm.c CBLAS/src/cblas_sspr2.c CBLAS/src/cblas_zsyrk.c CBLAS/src/cblas_cher2.c CBLAS/src/cblas_dsymv.c \ CBLAS/src/cblas_sspr.c CBLAS/src/cblas_ztbmv.c CBLAS/src/cblas_cher2k.c CBLAS/src/cblas_dsyr2.c CBLAS/src/cblas_sswap.c \ CBLAS/src/cblas_ztbsv.c CBLAS/src/cblas_cher.c CBLAS/src/cblas_dsyr2k.c CBLAS/src/cblas_ssymm.c CBLAS/src/cblas_ztpmv.c \ CBLAS/src/cblas_cherk.c CBLAS/src/cblas_dsyr.c CBLAS/src/cblas_ssymv.c CBLAS/src/cblas_ztpsv.c CBLAS/src/cblas_chpmv.c \ CBLAS/src/cblas_dsyrk.c CBLAS/src/cblas_ssyr2.c CBLAS/src/cblas_ztrmm.c CBLAS/src/cblas_chpr2.c CBLAS/src/cblas_dtbmv.c \ CBLAS/src/cblas_ssyr2k.c CBLAS/src/cblas_ztrmv.c CBLAS/src/cblas_chpr.c CBLAS/src/cblas_dtbsv.c CBLAS/src/cblas_ssyr.c \ CBLAS/src/cblas_ztrsm.c CBLAS/src/cblas_cscal.c CBLAS/src/cblas_dtpmv.c CBLAS/src/cblas_ssyrk.c CBLAS/src/cblas_ztrsv.c \ CBLAS/src/cblas_csscal.c CBLAS/src/cblas_dtpsv.c CBLAS/src/cblas_stbmv.c CBLAS/src/cdotcsub.f CBLAS/src/cblas_cswap.c \ CBLAS/src/cblas_dtrmm.c CBLAS/src/cblas_stbsv.c CBLAS/src/cdotusub.f CBLAS/src/cblas_csymm.c CBLAS/src/cblas_dtrmv.c \ CBLAS/src/cblas_stpmv.c CBLAS/src/dasumsub.f CBLAS/src/cblas_csyr2k.c CBLAS/src/cblas_dtrsm.c CBLAS/src/cblas_stpsv.c \ CBLAS/src/ddotsub.f CBLAS/src/cblas_csyrk.c CBLAS/src/cblas_dtrsv.c CBLAS/src/cblas_strmm.c CBLAS/src/dnrm2sub.f \ CBLAS/src/cblas_ctbmv.c CBLAS/src/cblas_dzasum.c CBLAS/src/cblas_strmv.c CBLAS/src/dsdotsub.f CBLAS/src/cblas_ctbsv.c \ CBLAS/src/cblas_dznrm2.c CBLAS/src/cblas_strsm.c CBLAS/src/dzasumsub.f CBLAS/src/cblas_ctpmv.c CBLAS/src/cblas_f77.h \ CBLAS/src/cblas_strsv.c CBLAS/src/dznrm2sub.f CBLAS/src/cblas_ctpsv.c CBLAS/src/cblas_globals.c \ CBLAS/src/cblas_xerbla.c CBLAS/src/icamaxsub.f CBLAS/src/cblas_ctrmm.c CBLAS/src/cblas.h CBLAS/src/cblas_zaxpy.c \ CBLAS/src/idamaxsub.f CBLAS/src/cblas_ctrmv.c CBLAS/src/cblas_icamax.c CBLAS/src/cblas_zcopy.c CBLAS/src/isamaxsub.f \ CBLAS/src/cblas_ctrsm.c CBLAS/src/cblas_idamax.c CBLAS/src/cblas_zdotc_sub.c CBLAS/src/izamaxsub.f \ CBLAS/src/cblas_ctrsv.c CBLAS/src/cblas_isamax.c CBLAS/src/cblas_zdotu_sub.c CBLAS/src/Makefile CBLAS/src/cblas_dasum.c \ CBLAS/src/cblas_izamax.c CBLAS/src/cblas_zdscal.c CBLAS/src/sasumsub.f CBLAS/src/cblas_daxpy.c CBLAS/src/cblas_sasum.c \ CBLAS/src/cblas_zgbmv.c CBLAS/src/scasumsub.f CBLAS/src/cblas_dcopy.c CBLAS/src/cblas_saxpy.c CBLAS/src/cblas_zgemm.c \ CBLAS/src/scnrm2sub.f CBLAS/src/cblas_ddot.c CBLAS/src/cblas_scasum.c CBLAS/src/cblas_zgemv.c CBLAS/src/sdotsub.f \ CBLAS/src/cblas_dgbmv.c CBLAS/src/cblas_scnrm2.c CBLAS/src/cblas_zgerc.c CBLAS/src/sdsdotsub.f CBLAS/src/cblas_dgemm.c \ CBLAS/src/cblas_scopy.c CBLAS/src/cblas_zgeru.c CBLAS/src/snrm2sub.f CBLAS/src/cblas_dgemv.c CBLAS/src/cblas_sdot.c \ CBLAS/src/cblas_zhbmv.c CBLAS/src/xerbla.c CBLAS/src/cblas_dger.c CBLAS/src/cblas_sdsdot.c CBLAS/src/cblas_zhemm.c \ CBLAS/src/zdotcsub.f CBLAS/src/cblas_dnrm2.c CBLAS/src/cblas_sgbmv.c CBLAS/src/cblas_zhemv.c CBLAS/src/zdotusub.f nodist_libf77blas_a_SOURCES= #$(F77BLAS_SRC) nodist_libcblas_a_SOURCES= #$(CBLAS_SRC) BUILT_SOURCES=@DOWNLOADED_BLAS_BUILT_SOURCES@ # -ICBLAS/include to find cblas.h libcblas_a_CFLAGS=-DADD_ -ICBLAS/include # "xerbla" exists in both BLAS and CBLAS. So we need to rename it to obtain two different object files. BLAS:BLAS/fait BLAS/fait:$(BLAS_TGZ) mkdir -p ../include ../lib pxerbla=`tar tf $(BLAS_TGZ) | grep xerbla.f`; \ dirblas=`dirname $$pxerbla` ;\ case $$dirblas in \ BLAS) tar xzf $(BLAS_TGZ) ;; \ BLAS-*) tar xzf $(BLAS_TGZ) ; mv $$dirblas BLAS ;; \ .) mkdir -p BLAS; tar xzf $(BLAS_TGZ) -c BLAS;; \ esac cp BLAS/xerbla.f blas_xerbla.f touch BLAS/fait $(F77BLAS_SRC): BLAS CBLAS:CBLAS/fait CBLAS/fait: $(CBLAS_TGZ) tar xzf $(CBLAS_TGZ) cp CBLAS/include/*.h CBLAS/src touch CBLAS/fait $(CBLAS_SRC): CBLAS/fait clean-local:: -rm -r BLAS CBLAS blas_xerbla.f # <> ALH - 18/9/13 - Downloading and building the OpenBLAS # ------------------------------------------------------------------ # to activate this, see [[file:../../configure.ac::OpenBLAS]] all-local::@COMPILE_OPENBLAS@ pkgcompile::@COMPILE_OPENBLAS@ generic: openblas:install.done ../lib/WHERE.blas # The library may sometimes have a complex name with version number, just copy it into a standard location ../lib/WHERE.blas:install.done echo blas LD -L@DIR@/lib -lopenblas $(FLIBS)> $@ echo blas INCLUDE -I@DIR@/include >> $@ echo blaslapack LD -L@DIR@/lib -lopenblas $(FLIBS) > $@ echo blaslapack INCLUDE -I@DIR@/include >> $@ if FFCS_WINDOWS # links2files is required for the MinGW compiler to understand where to find the library contents under Cygwin links.done:openblas.done cd OpenBLAS && ../../../build/links2files touch $@ install.done:links.done cp OpenBLAS/libopenblas.a ../lib cd OpenBLAS && make BINARY=@SIZEOF_PTRINBIT@ CC=${CC} FC=${FC} NO_SHARED=1 DDYNAMIC_ARCH=1 PREFIX=../.. install touch $@ else install.done:openblas.done cp OpenBLAS/libopenblas.a ../lib cd OpenBLAS && make BINARY=@SIZEOF_PTRINBIT@ CC=${CC} FC=${FC} DYNAMIC_ARCH=1 NO_SHARED=1 PREFIX=../.. install touch $@ endif clean:: -rm ../lib/libopenblas.a openblas.done:openpatches.done # # DYNAMIC_ARCH=1 allows the Openblas to run fast on all the processor architectures that the FFCS users may have # test -d ../lib || mkdir ../lib cd OpenBLAS && make BINARY=@SIZEOF_PTRINBIT@ "CC=$(CC)" "FC=$(FC)" DYNAMIC_ARCH=1 NO_SHARED=0 "FLIBS=$(FLIBS)" libs netlib touch $@ # The OpenBLAS directory is updated during the compilation, so the patching step should not depend on the directory date openpatches.done:opendownload.done tar xzf ../pkg/OpenBLAS.tar.gz # # ALH - 7/1/14 - The tar directory has changed names # mv *OpenBLAS-* OpenBLAS # patch -u -p1 < openblas.patches touch $@ clean:: -rm *.done -rm -r OpenBLAS download::opendownload.done opendownload.done: ../getall -o OpenBLAS -a touch $@ # ALH - 6/11/13 - Since OpenBLAS is quite long to compile and it does not change very often, it's only cleaned as part of the specific # target 'veryclean'. veryclean:: -rm -r *.done OpenBLAS xianyi-OpenBLAS-* # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/blas/openblas.patches000664 000000 000000 00000006547 14037356732 022233 0ustar00rootroot000000 000000 # -*- mode:diff;coding:raw-text; -*- diff -r -u ref/OpenBLAS/c_check OpenBLAS/c_check --- ref/OpenBLAS/c_check 2010-01-28 20:26:25.000000000 +0100 +++ OpenBLAS/c_check 2012-06-05 15:17:47.877934400 +0200 @@ -22,15 +22,16 @@ $cross_suffix = ""; -if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { - if ($1 =~ /(.*-)(.*)/) { - $cross_suffix = $1; - } -} else { - if ($ARGV[0] =~ /(.*-)(.*)/) { - $cross_suffix = $1; - } -} +# FFCS - this test is broken when using full paths with dashes for compilers +#if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { +# if ($1 =~ /(.*-)(.*)/) { +# $cross_suffix = $1; +# } +#} else { +# if ($ARGV[0] =~ /(.*-)(.*)/) { +# $cross_suffix = $1; +# } +#} $compiler = ""; $compiler = PGI if ($data =~ /COMPILER_PGI/); --- ref/OpenBLAS/Makefile.install 2017-02-23 17:40:19.000000000 +0100 +++ OpenBLAS/Makefile.install 2017-02-23 18:28:48.000000000 +0100 @@ -2,7 +2,7 @@ export GOTOBLAS_MAKEFILE = 1 -include $(TOPDIR)/Makefile.conf_last include ./Makefile.system - +install=../../../install-sh PREFIX ?= /opt/OpenBLAS OPENBLAS_INCLUDE_DIR := $(PREFIX)/include @@ -44,16 +44,16 @@ ifndef NO_LAPACKE @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) - @-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h - @-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h - @-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h - @-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h + @-$(install) -m 644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h + @-$(install) -m 644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h + @-$(install) -m 644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h + @-$(install) -m 644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h endif #for install static library ifndef NO_STATIC @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) - @install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) + @$(install) -m 644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) endif @@ -61,7 +61,7 @@ ifndef NO_SHARED @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ifeq ($(OSNAME), Linux) - @install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) + @$(install) -m755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) @@ -78,7 +78,7 @@ endif ifeq ($(OSNAME), Darwin) @-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) - @-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) + @-$(install)_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib endif FreeFem-sources-4.9/3rdparty/common.mak000664 000000 000000 00000003702 14037356732 020106 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Common make rules for all downloaded packages (request from FH) # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : ... # E-MAIL : ... # Common goals for all packages: # download compile install reinstall clean veryclean # <> # PKGCOMMON_PACKTITLE corresponds to package names in [[file:getall]] download:: ../getall -o $(PKGCOMMON_PACKTITLE) -a $(PKGCOMMON_PACKAGES):download ## regle qui force le telecharmeent a l'install je vide F. H compilepkg:: # <> install::compilepkg # <> reinstall::compilepkg clean-local:: veryclean::clean -rm $(PKGCOMMON_PACKAGES) # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/dissection/000775 000000 000000 00000000000 14037356732 020266 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/COPYRIGHT000664 000000 000000 00000111515 14037356732 021565 0ustar00rootroot000000 000000 Copyright (C) 2012, 2016 Atsushi Suzuki, François-Xavier Roux, Xavier Juvigny This is Dissection software. Programs in it are written by Atsushi Suzuki and by François-Xavier Roux and by Xavier Juvigny Dissection is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Linking Dissection statically or dynamically with other modules is making a combined work based on Disssection. Thus, the terms and conditions of the GNU General Public License cover the whole combination. As a special exception, the copyright holders of Dissection give you permission to combine Dissection program with free software programs or libraries that are released under the GNU LGPL and with independent modules that communicate with Dissection solely through the Dissection-fortran interface. You may copy and distribute such a system following the terms of the GNU GPL for Dissection and the licenses of the other code concerned, provided that you include the source code of that other code when and as the GNU GPL requires distribution of source code and provided that you do not modify the Dissection-fortran interface. Note that people who make modified versions of Dissection are not obligated to grant this special exception for their modified versions; it is their choice whether to do so. The GNU General Public License gives permission to release a modified version without this exception; this exception also makes it possible to release a modified version which carries forward this exception. If you modify the Dissection-fortran interface, this exception does not apply to your modified version of Dissection, and you must remove this exception when you distribute your modified version. This exception is an additional permission under section 7 of the GNU General Public License, version 3 (“GPLv3”) Dissection is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. All its programs that may be redistributed under the terms of the GNU GERERAL PUBLIC LICENSE Version 3, June 2007 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Dissection : a C++ implementation of parallel sparse direct solver on shared memory architecture with a kernel detection algorithm for singular matrices Copyright (C) 2016 Atsushi Suzuki, François-Xavier Roux, Xavier Juvigny This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Dissection Copyright (C) 2016 Atsushi Suzuki, François-Xavier Roux, Xavier Juvigny This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read .FreeFem-sources-4.9/3rdparty/dissection/LICENSE000664 000000 000000 00000002470 14037356732 021276 0ustar00rootroot000000 000000 Dissection is a C++ implementation of parallel sparse direct solver on shared memory architecture with a kernel detection algorithm for singular matrices. The source is available under GPL v3 from the FreeFEM repository, < http://www3.freefem.org/ff++/ff++/download/dissection/ > The copyright holders of Dissection give you permission to combine Dissection program with free software programs or libraries that are released under the GNU LGPL and with independent modules that communicate with Dissection solely through the Dissection-fortran interface. You may copy and distribute such a system following the terms of the GNU GPL for Dissection and the licenses of the other code concerned, provided that you include the source code of that other code when and as the GNU GPL requires distribution of source code and provided that you do not modify the Dissection-fortran interface. If you want to link Dissection to your proprietary application through other than the Dissection-fortran interface, Dissection will be licensed under CeCILL-C, < http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html >. In such case, please contact directly one of authors, Atsushi Suzuki and François-Xavier Roux Copyright (C) 2016 Atsushi Suzuki, François-Xavier Roux, Xavier JuvignyFreeFem-sources-4.9/3rdparty/dissection/README000664 000000 000000 00000004303 14037356732 021146 0ustar00rootroot000000 000000 ** dissection solver Ver 1.0.0 : written in C++ with qd libiray or ** ** GNU __float128 ** ** Atsushi Suzuki, 25 Jun. 2015 ** 30 Nov. 2016 How to compile: - directory structure needs to be the follownig. include/ lib/ src/ are needed to be the same place called as $(PROJ_DIR) - copy src/Defs.inc.Intel to src/Defs.inc - copy src/Makfile.Intel to src/Makefile - src/Defs.inc contains definition of the place of header files METIS_INC = $(PROJ_DIR)/metis-5.1.0/include SCOTCH_INC = $(PROJ_DIR)/scotch_6.0.4/include QD_INC = $(PROJ_DIR)/include and libraries METIS_DIR = $(PROJ_DIR)/metis-5.0.2/build/Darwin-x86_64/libmetis LIB_DIR_SCOTCH = $(PROJ_DIR)/scotch_6.0.4/lib LIB_DIR_QD = $(PROJ_DIR)/qd-2.3.17/lib - to use intel MKL specify BLAS in Def.inc BLAS = MKL On the MacOSX (Mavericks or Yosemite), BLAS = VECLIB attains 90 % of the speed of MKL. For preparation to use vecLib framework, command line Xcode needs to be installed by % xcode-select --install - qd library needs to be compiled with patches to handle complex class of LLVM clang++. qd-2.3.17.tar.gz will be obtained from http://crd-legacy.lbl.gov/~dhbailey/mpdist/qd-2.3.17.tar.gz to extract tar ball and apply patches % tar xzf qd-2.3.17.tar.gz % cd qd-2.3.17 % patch -p1 < qd-2.3.17-for-LLVM.patch configure needs to be done with specifying install directory $(PROJ_DIR) % ./configure --prefix=$(PROJ_DIR) CXX=clang++ CC=clang % make install - to clean directories under $(PROJ_DIR)/src, % make distclean % make distclean - to build $(PROJ_DIR)/libDissection.dyld, in $(PROJ_DIR)/src % make - a test program is located in $(PROJ_DIR)/src/C-test % make % ./MM-Dissection ../MM-matrix/MM.testA.data 0 1 1.e-2 8 2 the first argument specifies 0 for SCOTCH and 1 for METIS the second specifies number of threads, which should be less or equal to number of the physical CPU cores the third specifies pivot threshold the fourth specifies number of level of dissection the fifth specifies scaling strategy : 0 for no-scaling, 1 for diagonal 2 for scaling for KKT-type matrix FreeFem-sources-4.9/3rdparty/dissection/include/000775 000000 000000 00000000000 14037356732 021711 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/include/.hidden000664 000000 000000 00000000000 14037356732 023133 0ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/lib/000775 000000 000000 00000000000 14037356732 021034 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/lib/.hidden000664 000000 000000 00000000000 14037356732 022256 0ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/qd-2.3.17-for-LLVM.patch000664 000000 000000 00000005160 14037356732 023777 0ustar00rootroot000000 000000 *** qd-2.3.17/include/qd/dd_real.h Tue Dec 29 04:18:53 2009 --- qd-2.3.17-for-LLVM/include/qd/dd_real.h Thu Jul 23 15:54:13 2015 *************** *** 25,30 **** --- 25,32 ---- * * Yozo Hida */ + // operator int(), dd_real copysign(), fmax(), lobg(), scalbn() are added + // for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki #ifndef _QD_DD_REAL_H #define _QD_DD_REAL_H *************** *** 156,161 **** --- 158,167 ---- std::ostream &os = std::cerr) const; static dd_real debug_rand(); + + // added for complex class of LLVM Clang++: 23 Jul.2015 Atsushi Suzuki + operator int() { return int(x[0]); } + operator int() const { return int(x[0]); } }; *************** *** 286,290 **** --- 292,313 ---- #include #endif + // added for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki + inline dd_real copysign(const dd_real &x, const dd_real &y) { + return (y.x[0] < 0.0) ? ((x.x[0] < 0.0) ? x : (-x)) : ((x.x[0] < 0.0) ? (-x) : x); + } + + inline dd_real fmax(const dd_real &x, const dd_real &y) { + return x.x[0] < y.x[0] ? y : x; + } + + inline dd_real logb(const dd_real &y) { + return dd_real(logb(y.x[0])); + } + + inline dd_real scalbn(const dd_real &x, int n) { + return dd_real(scalb(x.x[0], n)); + } + #endif /* _QD_DD_REAL_H */ *** qd-2.3.17/include/qd/qd_real.h Mon May 11 19:45:05 2009 --- qd-2.3.17-for-LLVM/include/qd/qd_real.h Thu Jul 23 15:41:24 2015 *************** *** 20,25 **** --- 20,28 ---- * * Yozo Hida */ + // operator int(), dd_real copysign(), fmax(), lobg(), scalbn() are added + // for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki + #ifndef _QD_QD_REAL_H #define _QD_QD_REAL_H *************** *** 134,139 **** --- 137,146 ---- static qd_real debug_rand(); + // added for complex class of LLVM Clang++: 23 Jul.2015 Atsushi Suzuki + operator int() { return int(x[0]); } + operator int() const { return int(x[0]); } + }; namespace std { *************** *** 289,293 **** --- 296,318 ---- #include #endif + // added for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki + + inline qd_real copysign(const qd_real &x, const qd_real &y) { + return (y.x[0] < 0.0) ? ((x.x[0] < 0.0) ? x : (-x)) : ((x.x[0] < 0.0) ? (-x) : x); + } + + inline qd_real fmax(const qd_real &x, const qd_real &y) { + return x.x[0] < y.x[0] ? y : x; + } + + inline qd_real logb(const qd_real &y) { + return qd_real(logb(y.x[0])); + } + + inline qd_real scalbn(const qd_real &x, int n) { + return qd_real(scalb(x.x[0], n)); + } + #endif /* _QD_QD_REAL_H */ FreeFem-sources-4.9/3rdparty/dissection/src/000775 000000 000000 00000000000 14037356732 021055 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/000775 000000 000000 00000000000 14037356732 022412 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/CSR_matrix.hpp000664 000000 000000 00000005561 14037356732 025145 0ustar00rootroot000000 000000 /*! \file CSR_matrix.hpp \brief Sparse matrix data structure \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DISSECTION_SPLITTERS_CSR_MATRIX_HPP_ #define _DISSECTION_SPLITTERS_CSR_MATRIX_HPP_ // #define DEBUG_MAPPING_CSR struct CSR_indirect { int n; int nnz; int *ptRows; int *indCols; int *indVals0; // sym for debugging int *indVals; // sym for after the first permutation to exclude diagonals int *indVals_unsym; bool isSym; #ifdef DEBUG_MAPPING_CSR int *indVals2; // unsym #endif }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/ColumnMatrix.hpp000664 000000 000000 00000011264 14037356732 025551 0ustar00rootroot000000 000000 /*! \file ColumnMatrix.hpp \brief Rectangular matrix view as a set of column vectors \author Xavier Juvigny, ONERA \date Jan. 19th 2005 \modification allocation of array by STL vector class \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_COLUMNMATRIX_HPP # define _ALGEBRA_COLUMNMATRIX_HPP # include "Algebra/PlainMatrix.hpp" template class ColumnMatrix : public PlainMatrix { public: using PlainMatrix::coefs; using PlainMatrix::addrCoefs; using PlainMatrix::addrCoefs_pt; ColumnMatrix() : PlainMatrix(), _nrows(0), _ncols(0) {} ColumnMatrix(int nrows, int ncols) : PlainMatrix(), _nrows(0), _ncols(0) { init(nrows, ncols); } ColumnMatrix(int nrows, int ncols, T* coefs, bool isOwner) : PlainMatrix(isOwner), _nrows(0), _ncols(0) { init(nrows, ncols, coefs, isOwner); } void init(int nrows, int ncols, bool later_allocation = false) { _nrows = nrows; _ncols = ncols; if (!later_allocation) { PlainMatrix::init((_nrows * _ncols)); } } void init(int nrows, int ncols, T* cfs, bool isOwner) { _nrows = nrows; _ncols = ncols; PlainMatrix::init((_nrows * _ncols), cfs, isOwner); } void allocate() { PlainMatrix::init((_nrows * _ncols)); } ~ColumnMatrix() { } int nbColumns() const { return _ncols; } int nbRows() const { return _nrows; } int size() const { return _ncols * _nrows; } virtual T& operator () (int i, int j) { # ifdef DISDEBUG assert(int(i) < nbRows()); assert(int(j) < nbColumns()); #endif return coefs()[i + j * _nrows]; } virtual const T& operator () (int i, int j) const { # ifdef DISDEBUG assert(int(i) < nbRows()); assert(int(j) < nbColumns()); #endif return coefs()[i + j * _nrows]; } void ZeroClear() { PlainMatrix::ZeroClear(); } virtual ColumnMatrix* clone() const { ColumnMatrix *ret=new ColumnMatrix; ret->copy(*this); return(ret); } /// \brief Deep copy of B void copy(const ColumnMatrix& B) { _nrows=B._nrows; _ncols=B._ncols; PlainMatrix::copy(B); } void free() { PlainMatrix::free(); _nrows = 0; _ncols = 0; } private: ColumnMatrix(const ColumnMatrix& A); ColumnMatrix& operator = (const ColumnMatrix& A); int _nrows; int _ncols; }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/PlainMatrix.hpp000664 000000 000000 00000016457 14037356732 025370 0ustar00rootroot000000 000000 /*! \file PlainMatrix.hpp \brief Common interface for plain matrices (square, rectangulars,...) \author Xavier Juvigny, ONERA \date Jan. 25th 2005 \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_PLAINMATRIX_ # define _ALGEBRA_PLAINMATRIX_ #include #include #include //#define DEBUG_MEMORY_PLAINMATRIX #ifdef DEBUG_MEMORY_PLAINMATRIX # include #include #endif template class PlainMatrix { public: PlainMatrix(bool isOwner = true) { _isOwner = isOwner; _n = (-1); // not yet allocated _coefs = new T*; _coefs_alloc_status = true; } PlainMatrix(int nbElts) { _isOwner = true; _n = nbElts; _coefs = new T*; _coefs_alloc_status = true; try { *_coefs = new T[nbElts]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } #ifdef DEBUG_MEMORY_PLAINMATRIX mem_get(); fprintf(stderr, "@@PlainMatrix constructor : %.6e %.6e @%x %d\n", (double)_mem_vrt * 4.0 / (1024.0 *1024.0), (double)_mem_res * 4.0 / (1024.0 *1024.0), *_coefs, (sizeof(T) * nbElts)); #endif } virtual ~PlainMatrix() { free(); if (_coefs_alloc_status) { delete _coefs; // same as destructor _coefs_alloc_status = false; } } #ifdef DEBUG_MEMORY_PLAINMATRIX void mem_get() { int pid = (int)getpid(); // cerr << "pid = " << pid << endl; char buf[256]; sprintf(buf, "/proc/%d/statm", pid); ifstream fin(buf); fin >> _mem_vrt >> _mem_res; fin.close(); } #endif int size() const { return _n; } inline T& operator [] (int i) { return (*_coefs)[i]; } inline const T& operator [] (int i) const { return (*_coefs)[i]; } virtual T& operator () (int i, int j) = 0; virtual const T& operator () (int i, int j) const = 0; T** addrCoefs_pt() { return _coefs; } const T** addrCoefs_pt() const { return _coefs; } inline T* addrCoefs() { return *_coefs; } inline const T* addrCoefs() const { return *_coefs; } protected: void init(int nbElts) { _isOwner = true; if (_n == (-1)) { _n = nbElts; if (_n > 0) { try { *_coefs = new T[nbElts]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } #ifdef DEBUG_MEMORY_PLAINMATRIX mem_get(); fprintf(stderr, "@@PlainMatrix init : %.6e %.6e %d\n", (double)_mem_vrt * 4.0 / (1024.0 *1024.0), (double)_mem_res * 4.0 / (1024.0 *1024.0), (sizeof(T) * nbElts)); #endif } } // _n == (-1) else { #if 0 if (_n > nbElts) { fprintf(stderr, "%s %d : PlanMatrix::init(%d) : %d better deallocate %d\n", __FILE__, __LINE__, nbElts, _n, (_n - nbElts)); } #endif delete [] *_coefs; try { *_coefs = new T[nbElts]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } _n = nbElts; } } void init(int nbElts, T* coefs, bool isOwner) { _isOwner = isOwner; _n = nbElts; *_coefs = coefs; } void free() { if (_n > 0 && _isOwner) { #ifdef DEBUG_MEMORY_PLAINMATRIX mem_get(); fprintf(stderr, "@@PlainMatrix free : %.6e %.6e @%x\n", (double)_mem_vrt * 4.0 / (1024.0 *1024.0), (double)_mem_res * 4.0 / (1024.0 *1024.0), *_coefs); #endif delete [] *_coefs; } _n = (-1); } T* coefs() { return *_coefs; } const T* coefs() const { return *_coefs; } void ZeroClear() { #if 0 const T zero(0.0); for (int i = 0; i < _n; i++) { (*_coefs)[i] = zero; // memset() } #else memset((void *)*_coefs, 0, sizeof(T) * _n); #endif } virtual PlainMatrix* clone() const = 0; void copy(const PlainMatrix &M) { if (_n <= 0) { try { *_coefs = new T[_n]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } } _isOwner = M._isOwner; _n = M._n; #ifdef DEBUG_MEMORY_PLAINMATRIX mem_get(); fprintf(stderr, "@@PlainMatrix copy : %.6e %.6e @%x %d\n", (double)_mem_vrt * 4.0 / (1024.0 *1024.0), (double)_mem_res * 4.0 / (1024.0 *1024.0), *_coefs, (sizeof(T) * _n)); #endif #if 0 for (int i = 0; i < _n; i++) { (*_coefs)[i] = (*(M._coefs))[i]; // memcopy } #else memcpy((void *)*_coefs, (void *)*(M._coefs), _n * sizeof(T)); #endif } PlainMatrix(const PlainMatrix& A) { _isOwner = A._isOwner; _n = A._n; _coefs = A._coefs; } PlainMatrix& operator = (const PlainMatrix& A) { if (this != &A) { _isOwner = A._isOwner; _n = A._n; _coefs = A._coefs; } return *this; } private: bool _isOwner; bool _coefs_alloc_status; T** _coefs; int _n; #ifdef DEBUG_MEMORY_PLAINMATRIX uint64_t _mem_vrt, _mem_res; #endif }; // end class PlainMatrix #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/RectBlockMatrix.cpp000664 000000 000000 00000033632 14037356732 026162 0ustar00rootroot000000 000000 /*! \file RectBlockMatrix.cpp \brief Block storage for Rectangular matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Algebra/RectBlockMatrix.hpp" #include "Compiler/blas.hpp" #include template void RectBlockMatrix::init(int dim_r, int dim_c, int block_size, int first_block) { int itmp; _dim_r = dim_r; _dim_c = dim_c; _block_size = block_size; _block_size2 = _block_size * _block_size; if ((dim_r * dim_c) == 0) { _isdecomposed_c = false; _num_blocks_r = 0; _num_blocks_c = 0; _block_size_last_r = 0; _block_size_last_c = 0; _coefs = NULL; _allocation_status = NULL; _deallocation_status = false; return; } if (first_block > 0) { _isdecomposed_c = true; _dim0_c = first_block; _dim1_c = _dim_c - _dim0_c; _num_blocks_r = (_dim_r + _block_size - 1) / _block_size; _num_blocks0_c = (_dim0_c + _block_size - 1) / _block_size; _num_blocks1_c = (_dim1_c + _block_size - 1) / _block_size; _num_blocks_c = _num_blocks0_c + _num_blocks1_c; _block_sizes_r.resize(_num_blocks_r); _block_sizes_c.resize(_num_blocks_c); _indexblock_r.resize(_num_blocks_r + 1); _indexblock_c.resize(_num_blocks_c + 1); itmp = _dim_r % _block_size; _block_size_last_r = (itmp == 0) ? _block_size : itmp; int ktmp; ktmp = 0; if (_num_blocks_r > 0) { for (int i = 0; i < (_num_blocks_r - 1); i++, ktmp += _block_size) { _block_sizes_r[i] = _block_size; _indexblock_r[i] = ktmp; } _block_sizes_r[_num_blocks_r - 1] = _block_size_last_r; _indexblock_r[_num_blocks_r - 1] = ktmp; _indexblock_r[_num_blocks_r] = _dim_r; } itmp = _dim0_c % _block_size; _block_size_last0_c = (itmp == 0) ? _block_size : itmp; ktmp = 0; if (_num_blocks0_c > 0) { for (int i = 0; i < (_num_blocks0_c - 1); i++, ktmp += _block_size) { _block_sizes_c[i] = _block_size; _indexblock_c[i] = ktmp; } _block_sizes_c[_num_blocks0_c - 1] = _block_size_last0_c; _indexblock_c[_num_blocks0_c - 1] = ktmp; } itmp = _dim1_c % _block_size; _block_size_last1_c = (itmp == 0) ? _block_size : itmp; ktmp = _dim0_c; if (_num_blocks1_c > 0) { for (int i = _num_blocks0_c; i < (_num_blocks_c - 1); i++, ktmp += _block_size) { _block_sizes_c[i] = _block_size; _indexblock_c[i] = ktmp; } _block_sizes_c[_num_blocks_c - 1] = _block_size_last1_c; _indexblock_c[_num_blocks_c - 1] = ktmp; } _indexblock_c[_num_blocks_c] = _dim_c; } else { _isdecomposed_c = false; _num_blocks_r = (_dim_r + _block_size - 1) / _block_size; _num_blocks_c = (_dim_c + _block_size - 1) / _block_size; _block_sizes_r.resize(_num_blocks_r); _block_sizes_c.resize(_num_blocks_c); _indexblock_r.resize(_num_blocks_r + 1); _indexblock_c.resize(_num_blocks_c + 1); itmp = _dim_r % _block_size; _block_size_last_r = (itmp == 0) ? _block_size : itmp; int ktmp; ktmp = 0; if (_num_blocks_r > 0) { for (int i = 0; i < (_num_blocks_r - 1); i++, ktmp += _block_size) { _block_sizes_r[i] = _block_size; _indexblock_r[i] = ktmp; } _block_sizes_r[_num_blocks_r - 1] = _block_size_last_r; _indexblock_r[_num_blocks_r - 1] = ktmp; _indexblock_r[_num_blocks_r] = _dim_r; } itmp = _dim_c % _block_size; ktmp = 0; if (_num_blocks_c > 0) { _block_size_last_c = (itmp == 0) ? _block_size : itmp; for (int i = 0; i < (_num_blocks_c - 1); i++, ktmp += _block_size) { _block_sizes_c[i] = _block_size; _indexblock_c[i] = ktmp; } _block_sizes_c[_num_blocks_c - 1] = _block_size_last_c; _indexblock_c[_num_blocks_c - 1] = ktmp; _indexblock_c[_num_blocks_c] = _dim_c; } } if (_num_blocks_r * _num_blocks_c == 0) { _coefs = NULL; // new T*[1]; // dummy _allocation_status = NULL; // new bool[1]; // dummy _deallocation_status = false; // _deallocation_status = true; } else { try { _coefs = new T*[_num_blocks_r * _num_blocks_c]; // need to be managed } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } try { _allocation_status = new bool[_num_blocks_r * _num_blocks_c]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } for (int i = 0; i < (_num_blocks_r * _num_blocks_c); i++) { _allocation_status[i] = false; } _deallocation_status = false; } } template void RectBlockMatrix::init(int dim_r, int dim_c, int block_size, int first_block); template void RectBlockMatrix::init(int dim_r, int dim_c, int block_size, int first_block); template void RectBlockMatrix >::init(int dim_r, int dim_c, int block_size, int first_block); template void RectBlockMatrix >::init(int dim_r, int dim_c, int block_size, int first_block); template void RectBlockMatrix::init(int dim_r, int dim_c, int block_size, int first_block); template void RectBlockMatrix >::init(int dim_r, int dim_c, int block_size, int first_block); // template void RectBlockMatrix::allocateBlock(int i, int j) { const int nrow = _block_sizes_r[i]; const int ncol = _block_sizes_c[j]; if (!_allocation_status[i + j * _num_blocks_r]) { // try { _coefs[i + j * _num_blocks_r] = new T[nrow * ncol]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } } _allocation_status[i + j * _num_blocks_r] = true; } template void RectBlockMatrix::allocateBlock(int i, int j); template void RectBlockMatrix::allocateBlock(int i, int j); template void RectBlockMatrix >::allocateBlock(int i, int j); template void RectBlockMatrix >::allocateBlock(int i, int j); template void RectBlockMatrix::allocateBlock(int i, int j); template void RectBlockMatrix >::allocateBlock(int i, int j); // template void RectBlockMatrix::allocate() { for (int i = 0; i < _num_blocks_r; i++) { for (int j = 0; j < _num_blocks_c; j++) { const int nrow = _block_sizes_r[i]; const int ncol = _block_sizes_c[j]; if (!_allocation_status[i + j * _num_blocks_r]) { try { _coefs[i + j * _num_blocks_r] = new T[nrow * ncol]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } } _allocation_status[i + j * _num_blocks_r] = true; } } _deallocation_status = false; } template void RectBlockMatrix::allocate(); template void RectBlockMatrix::allocate(); template void RectBlockMatrix >::allocate(); template void RectBlockMatrix >::allocate(); template void RectBlockMatrix::allocate(); template void RectBlockMatrix >::allocate(); // template void RectBlockMatrix::free(int i, int j) { if (_allocation_status[i + j * _num_blocks_r]) { delete[] _coefs[i + j * _num_blocks_r]; // need to be mananged } _allocation_status[i + j * _num_blocks_r] = false; } template void RectBlockMatrix::free(int i, int j); template void RectBlockMatrix::free(int i, int j); template void RectBlockMatrix >::free(int i, int j); template void RectBlockMatrix >::free(int i, int j); template void RectBlockMatrix::free(int i, int j); template void RectBlockMatrix >::free(int i, int j); // template void RectBlockMatrix::free() { for (int i = 0; i < _num_blocks_r; i++) { for (int j = 0; j < _num_blocks_c; j++) { if (_allocation_status[i + j * _num_blocks_r]) { delete[] _coefs[i + j * _num_blocks_r]; // need to be managed _allocation_status[i + j * _num_blocks_r] = false; } } } } template void RectBlockMatrix::free(); template void RectBlockMatrix::free(); template void RectBlockMatrix >::free(); template void RectBlockMatrix >::free(); template void RectBlockMatrix::free(); template void RectBlockMatrix >::free(); // template T* RectBlockMatrix::addrCoefBlock(int i, int j) { return _coefs[i + j * _num_blocks_r]; } template double* RectBlockMatrix::addrCoefBlock(int i, int j); template quadruple* RectBlockMatrix::addrCoefBlock(int i, int j); template complex* RectBlockMatrix >:: addrCoefBlock(int i, int j); template complex* RectBlockMatrix >:: addrCoefBlock(int i, int j); template float* RectBlockMatrix::addrCoefBlock(int i, int j); template complex* RectBlockMatrix >:: addrCoefBlock(int i, int j); // template void RectBlockMatrix::ZeroClear() { const T zero(0.0); for (int i = 0; i < _num_blocks_r; i++) { for (int j = 0; j < _num_blocks_c; j++) { const int nrow = _block_sizes_r[i]; const int ncol = _block_sizes_c[j]; const int nsize = nrow * ncol; const int itmp = i + j * _num_blocks_r; for (int k = 0; k < nsize; k++) { _coefs[itmp][k] = zero; // need to be replaced by memcpy } } } } template void RectBlockMatrix::ZeroClear(); template void RectBlockMatrix::ZeroClear(); template void RectBlockMatrix >::ZeroClear(); template void RectBlockMatrix >::ZeroClear(); template void RectBlockMatrix::ZeroClear(); template void RectBlockMatrix >::ZeroClear(); // template T& RectBlockMatrix::operator () (int i, int j) { int i0, i1, j0, j1; if (_isdecomposed_c) { i0 = i / _block_size; i1 = i % _block_size; if (j < _dim0_c) { j0 = j / _block_size; j1 = j % _block_size; } else { j0 = (j - _dim0_c) / _block_size + _num_blocks0_c; j1 = (j - _dim0_c) % _block_size; } } else { i0 = i / _block_size; j0 = j / _block_size; i1 = i % _block_size; j1 = j % _block_size; } return _coefs[i0 + j0 * _num_blocks_r][i1 + j1 * _block_sizes_r[i0]]; } template double& RectBlockMatrix::operator () (int i, int j); template quadruple& RectBlockMatrix::operator () (int i, int j); template complex & RectBlockMatrix >:: operator () (int i, int j); template complex & RectBlockMatrix >:: operator () (int i, int j); template float& RectBlockMatrix::operator () (int i, int j); template complex & RectBlockMatrix >:: operator () (int i, int j); // template const T& RectBlockMatrix::operator () (int i, int j) const { int i0, i1, j0, j1; i0 = i / _block_size; j0 = j / _block_size; i1 = i % _block_size; j1 = j % _block_size; return _coefs[i0 + j0 * _num_blocks_r][i1 + j1 * _block_sizes_r[i0]]; } template const double& RectBlockMatrix::operator () (int i, int j) const; template const quadruple& RectBlockMatrix::operator () (int i, int j) const; template const complex& RectBlockMatrix >:: operator () (int i, int j) const; template const complex& RectBlockMatrix >:: operator () (int i, int j) const; template const float& RectBlockMatrix::operator () (int i, int j) const; template const complex& RectBlockMatrix >:: operator () (int i, int j) const; // FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/RectBlockMatrix.hpp000664 000000 000000 00000015172 14037356732 026166 0ustar00rootroot000000 000000 /*! \file RectBlockMatrix.hpp \brief Block storage for Rectangular matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_RECTBLOCKMATRIX_HPP #define _ALGEBRA_RECTBLOCKMATRIX_HPP #include #include #include #include using std::vector; template class RectBlockMatrix { public: RectBlockMatrix() { _dim_r = 0; _dim_c = 0; _block_size = 0; _block_size_last_r = 0; _block_size_last_c = 0; _num_blocks_r = 0; _num_blocks_c = 0; _deallocation_status = true; } RectBlockMatrix(int dim_r, int dim_c, int block_size) { #ifdef DEBUG_SQUAREBLOCKMATRIX fprintf(stderr, "%s %d : constructor with args %d %d\n", __FILE__, __LINE__, dim, block_size); #endif init(dim_r, dim_c, block_size, 0); allocate(); _deallocation_status = false; } RectBlockMatrix(int dim_r, int dim_c, double *aa) { #ifdef DEBUG_SQUAREBLOCKMATRIX fprintf(stderr, "%s %d : constructor with args %d %d\n", __FILE__, __LINE__, dim, block_size); #endif _block_size = dim_r > dim_c ? dim_r : dim_c; _dim_c = dim_c; _dim_r = dim_r; _num_blocks_r = 1; _num_blocks_c = 1; _coefs = new T*[1]; _coefs[0] = aa; _block_sizes_r.resize(1); _block_sizes_c.resize(1); _indexblock_r.resize(2); _indexblock_c.resize(2); _block_sizes_r[0] = _dim_r; _block_sizes_c[0] = _dim_c; _indexblock_r[0] = 0; _indexblock_r[1] = _dim_r; _indexblock_c[0] = 0; _indexblock_c[1] = _dim_c; _allocation_status = new bool[1]; _allocation_status[0] = false; // _coefs[0] is not allowed to free _isdecomposed_c = false; _deallocation_status = true; } void init(int dim_r, int dim_c, int block_size, int first_block = 0); void allocateBlock(int i, int j); void allocate(); void free(int i, int j); void free(); ~RectBlockMatrix() { free(); if (!_deallocation_status) { delete [] _coefs; delete [] _allocation_status; } _deallocation_status = true; _block_sizes_r.clear(); _block_sizes_c.clear(); _num_blocks_r = 0; // ? to avoid double free _num_blocks_c = 0; // ? to avoid double free _dim_r = 0; _dim_c = 0; } int dimension_r() const { return (int)_dim_r; } int dimension_c() const { return (int)_dim_c; } int block_size() const { return (int)_block_size; } int num_blocks_r() const { return _num_blocks_r; } int num_blocks_c() const { return _num_blocks_c; } int nbRows() const { return _dim_r; } int nbColumns() const { return _dim_c; } int block_size_last_r() const { return (int)_block_size_last_r; } int block_size_last_c() const { return (int)_block_size_last_c; } int nrowBlock(int i) const { return _block_sizes_r[i]; } int ncolBlock(int i) const { return _block_sizes_c[i]; } int IndexBlock_r(int i) const { return _indexblock_r[i]; } int IndexBlock_c(int i) const { return _indexblock_c[i]; } T* addrCoefBlock(int i, int j); void ZeroClear(); T& operator () (int i, int j); const T& operator () (int i, int j) const; RectBlockMatrix* clone() const { RectBlockMatrix *ret = new RectBlockMatrix; ret->copy(*this); return(ret); } void copy ( const RectBlockMatrix& A ) { _block_size = A._block_size; _block_size2 = A._block_size2; _num_blocks = A._num_blocks; _block_sizes_r = A._block_sizes_r; _block_sizes_c = A._block_sizes_c; _dim_r = A._dim_r; _dim_c = A._dim_c; _block_size_last_r = A._block_size_last_r; _block_size_last_c = A._block_size_last_c; _num_blocks_r = A._num_blocks_r; _num_blocks_c = A._num_blocks_c; _coefs = A._coefs; _allocation_status = A._allocation_status; _deallocation_status = A._deallocation_status; } private: int _block_size; int _block_size2; int _num_blocks; vector _block_sizes_r; vector _block_sizes_c; vector _indexblock_r; vector _indexblock_c; int _dim_r; int _dim_c; int _block_size_last_r; int _block_size_last_c; int _block_size_last0_c; int _block_size_last1_c; int _num_blocks_r; int _num_blocks_c; int _num_blocks0_c; int _num_blocks1_c; int _dim0_c; int _dim1_c; T** _coefs; bool *_allocation_status; bool _deallocation_status; bool _isdecomposed_c; }; // End class RectMatrix #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SparseMatrix.cpp000664 000000 000000 00000047450 14037356732 025552 0ustar00rootroot000000 000000 /*! \file SparseMatrix.cpp \brief Sparse matrix definition \author Xavier Juvigny, ONERA \date Jan. 25th 2005 \modification allocation of array by STL vector class \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Algebra/SparseMatrix.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/OptionCompiler.hpp" #include "Compiler/arithmetic.hpp" #include "Driver/DissectionDefault.hpp" // for definition of scaling option #include #include #include inline double SpMAX(double a, double b) { return (a > b ? a : b); } inline quadruple SpMAX(quadruple a, quadruple b) { return (a > b ? a : b); } template void normalize(const int type, const T* coefs0, SparseMatrix *ptDA, U* u) { const T zero(0.0); const T one(1.0); const U Uone(1.0); const U Uzero(0.0); const int n = ptDA->dimension(); VectorArray v(n); // Z* v = new Z[n]; VectorArray d(n); // Z* d = new Z[n]; int *ptUnsymRows, *indUnsymCols, *indVals; if (ptDA->isSymmetric() && (type == KKT_SCALING)) { const int nnz = ptDA->nnz(); if (ptDA->isWhole()) { ptUnsymRows = ptDA->getRows(); indUnsymCols = ptDA->getIndCols(); indVals = new int[nnz]; for (int i = 0; i < nnz; i++) { indVals[i] = i; } } else { const int nnz0 = nnz * 2 - n; ptUnsymRows = new int[n + 1]; indUnsymCols = new int[nnz0]; // diagonal entries exits though indVals = new int[nnz0]; // coeficient equals to zero // only used as extend symmetric symbolic structure to unsymmetric int nnz1; nnz1 = CSR_sym2unsym(ptUnsymRows, indUnsymCols, indVals, ptDA->getRows(), ptDA->getIndCols(), n, ptDA->isUpper()); if (nnz1 != nnz0) { fprintf(stderr, "%s %d : symmetric matrix has no diagonal entry %d != %d\n", __FILE__, __LINE__, nnz0, nnz1); exit(-1); } } } for (int i = 0; i < n; i++) { d[i] = Uzero; u[i] = Uzero; v[i] = Uzero; } for (int i = 0; i < n; i++) { for (unsigned k = ptDA->getRows()[i]; k < ptDA->getRows()[i + 1]; k++) { int j = ptDA->getIndCols()[k]; if (i == j) { d[i] = blas_abs(coefs0[k]); } } } // 23 Jun.2011 Atsushi // u[i] = |a(i, i)|, v[i] = max_{k}{|a(i,k)|,|a(k,j)|} if (ptDA->isSymmetric() && (!ptDA->isWhole())) { for (int i = 0; i < n; i++) { u[i] = blas_abs(coefs0[ptDA->getRows()[i]]); v[i] = SpMAX(v[i], u[i]); for (int k = (ptDA->getRows()[i] + 1); k < ptDA->getRows()[i + 1]; k++) { int j = ptDA->getIndCols()[k]; U acoef = blas_abs(coefs0[k]); v[i] = SpMAX(v[i], acoef); v[j] = SpMAX(v[j], acoef); } } } else { for (int i = 0; i < n; i++) { for (int k = ptDA->getRows()[i]; k < ptDA->getRows()[i + 1]; k++) { int j = ptDA->getIndCols()[k]; if (j == i) { u[i] = blas_abs(coefs0[k]); v[i] = SpMAX(v[i],u[i]); } else { U acoef = blas_abs(coefs0[k]); v[i] = SpMAX(v[i], acoef); v[j] = SpMAX(v[j], acoef); } } } } T alower, aupper, adiag; switch(type) { case DIAGONAL_SCALING: for (int i = 0; i < n; i++) { u[i] = ((u[i] != Uzero) ? sqrt(Uone / u[i]) : ((v[i] != Uzero) ? sqrt(Uone / v[i]) : Uone)); } break; case KKT_SCALING: if (ptDA->isSymmetric() && (!ptDA->isWhole())) { for (int i = 0 ; i < n; i++) { if (d[i] != Uzero) { u[i] = sqrt(Uone / u[i]); } else { T xtmp = zero; for (int m = ptUnsymRows[i]; m < ptUnsymRows[i + 1]; m++) { const int j = indUnsymCols[m]; alower = coefs0[indVals[m]]; int flag = 0; for (int n = ptUnsymRows[j]; n < ptUnsymRows[j + 1]; n++) { const int k = indUnsymCols[n]; if (k == j) { adiag = coefs0[indVals[n]]; if (adiag != zero) { flag++; } else { continue; } } if (k == i) { aupper = coefs0[indVals[n]]; flag++; } if (flag == 2) { break; } } // loop : n if (flag == 2) { xtmp += alower * aupper / adiag; } } // loop : m u[i] = Uone / sqrt(blas_abs(xtmp)); } // if (d[i] != zero) } // loop : i } // if (_isSymmetric && (!_isWhole)) else { for (int i = 0 ; i < n; i++) { if (d[i] != Uzero) { u[i] = sqrt(Uone / u[i]); } else { T xtmp = zero; for (int m = ptDA->getRows()[i]; m < ptDA->getRows()[i + 1]; m++) { const int j = ptDA->getIndCols()[m]; alower = coefs0[m]; int flag = 0; for (int n = ptDA->getRows()[j]; n < ptDA->getRows()[j + 1]; n++) { const int k = ptDA->getIndCols()[n]; if (k == j) { adiag = coefs0[n]; if (adiag != zero) { flag++; } else { continue; } } if (k == i) { aupper = coefs0[n]; flag++; } if (flag == 2) { break; } } // loop : n if (flag == 2) { xtmp += alower * aupper / adiag; } } // loop : m u[i] = Uone / sqrt(blas_abs(xtmp)); } // if (d[i] != zero) } // loop : i } // if (_isSymmetric && (!_isWhole)) break; default: for (int i = 0; i < n; i++) { u[i] = Uone; } break; } // witch (type) // Scaling switch(type) { case NO_SCALING: for (int i = 0; i < ptDA->getRows()[n]; i++) { ptDA->getCoef()[i] = coefs0[i]; } break; case DIAGONAL_SCALING: case KKT_SCALING: for ( int i = 0; i < n; i++) { for (int k = ptDA->getRows()[i]; k < ptDA->getRows()[i+1]; k++) { int j = ptDA->getIndCols()[k]; ptDA->getCoef()[k] = coefs0[k] * u[i] *u[j]; } } break; } // delete [] v; // delete [] d; if (ptDA->isSymmetric() && (type == KKT_SCALING)) { if (!ptDA->isWhole()) { delete [] ptUnsymRows; delete [] indUnsymCols; } delete [] indVals; } } template void normalize(const int type, const double *coefs0, SparseMatrix *ptDA, double *u); template void normalize(const int type, const quadruple *coefs0, SparseMatrix *ptDA, quadruple *u); template void normalize, double> (const int type, const complex *coefs0, SparseMatrix > *ptDA, double *u); template void normalize, quadruple> (const int type, const complex *coefs0, SparseMatrix > *ptDA, quadruple *u); template void normalize(const int type, const float *coefs0, SparseMatrix *ptDA, float *u); template void normalize, float> (const int type, const complex *coefs0, SparseMatrix > *ptDA, float *u); // template void SparseMatrix::extractSquareMatrix(T* DSsingCoefs, vector &singVal) { const T zero(0.0); const int nsing = singVal.size(); if (_isSymmetric && (!_isWhole)) { if (_isUpper) { for (int i = 0; i < nsing; i++) { for (int j = i; j < nsing; j++) { DSsingCoefs[i + j * nsing] = 0.0; const int icol = singVal[i]; int itmp = icol; for (int it = ptRow(icol); it < ptRow(icol + 1); it++) { itmp = indCol(it); if (itmp == singVal[j]) { DSsingCoefs[i + j * nsing] = Coef(it); } } } //symmetrize for (int j = i + 1; j < nsing; j++) { DSsingCoefs[j + i * nsing] = DSsingCoefs[i + j * nsing]; } } } // if (isUpper()) else { for (int i = 0; i < nsing; i++) { for (int j = 0; j <= i; j++) { DSsingCoefs[i + j * nsing] = zero; const int icol = singVal[i]; int itmp = icol; for (int it = ptRow(icol); it < ptRow(icol + 1); it++) { itmp = indCol(it); if (itmp == singVal[j]) { DSsingCoefs[i + j * nsing] = Coef(it); } } } //symmetrize for (int j = 0; j < i; j++) { DSsingCoefs[j + i * nsing] = DSsingCoefs[i + j * nsing]; } } } // if (isUpper()) } else { // for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { DSsingCoefs[i + j * nsing] = zero; const int icol = singVal[i]; for (int it = ptRow(icol); it != ptRow(icol + 1); it++) { if (indCol(it) == singVal[j]) { DSsingCoefs[i + j * nsing] = Coef(it); break; } } } } } } template void SparseMatrix::extractSquareMatrix(double* DSsingCoefs, vector &singVal); template void SparseMatrix::extractSquareMatrix(quadruple* DSsingCoefs, vector &singVal); template void SparseMatrix >::extractSquareMatrix(complex* DSsingCoefs, vector &singVal); template void SparseMatrix >::extractSquareMatrix(complex* DSsingCoefs, vector &singVal); template void SparseMatrix::extractSquareMatrix(float* DSsingCoefs, vector &singVal); template void SparseMatrix >::extractSquareMatrix(complex* DSsingCoefs, vector &singVal); // template void SparseMatrix::prod(const T *u, T *v) const { const T zero(0.0); for (int i = 0; i < dimension(); i++) { v[i] = zero; } if (_isSymmetric && (!_isWhole)) { for(int i = 0; i < dimension(); i++) { // assumption data structure // _indCols[k] < _indCols[k + 1] // _indCols[_ptRows[i]] = i for _isUpper == true // _indCols[_ptRows[i + 1] - 1] = i for _isUpper == false int ibegin, iend; if (_isUpper) { ibegin = _ptRows[i] + 1; iend = _ptRows[i + 1]; } else { ibegin = _ptRows[i]; iend = _ptRows[i + 1] - 1; } for (int k = ibegin; k < iend; k++) { const int icol = _indCols[k]; v[i] += _coefs[k] * u[icol]; v[icol] += _coefs[k] * u[i]; } const int k = _isUpper ? _ptRows[i] : (_ptRows[i + 1] - 1); // diagonal v[i] += _coefs[k] * u[_indCols[k]]; } // loop : i } else { for(int i = 0; i < dimension(); i++) { for (int k = _ptRows[i]; k < _ptRows[i + 1]; k++) { v[i] += _coefs[k] * u[_indCols[k]]; } } } } template void SparseMatrix::prod(const double *u, double *v) const; template void SparseMatrix::prod(const quadruple *u, quadruple *v) const; template void SparseMatrix >::prod(const complex *u, complex *v) const; template void SparseMatrix >::prod(const complex *u, complex *v) const; template void SparseMatrix::prod(const float *u, float *v) const; template void SparseMatrix >::prod(const complex *u, complex *v) const; // template void SparseMatrix::prodt(const T *u, T *v ) const { const T zero(0.0); for (int i = 0; i < dimension(); i++) { v[i] = zero; } for(int i = 0; i < dimension(); i++) { for (int k = _ptRows[i]; k < _ptRows[i + 1]; k++) { v[_indCols[k]] += _coefs[k]*u[i]; } } } template void SparseMatrix::prodt(const double *u, double *v) const; template void SparseMatrix::prodt(const quadruple *u, quadruple *v) const; template void SparseMatrix >::prodt(const complex *u, complex *v) const; template void SparseMatrix >::prodt(const complex *u, complex *v) const; template void SparseMatrix::prodt(const float *u, float *v) const; template void SparseMatrix >::prodt(const complex *u, complex *v) const; // template SparseMatrix* SparseMatrix::PartialCopyCSR(vector &permute, const int n, bool transposed) { vector dist_ptRows; dist_ptRows.resize(n + 1); int *source_ptRows = getRows(); //&source->beginRows()[0]; SparseMatrix *dist; if (_isSymmetric && !(_isWhole)) { int *width = new int[n]; for (int i = 0; i < n; i++) { width[i] = source_ptRows[permute[i] + 1] - source_ptRows[permute[i]]; } for (int i = 0; i < dimension(); i++) { for (int it = ptRow(i); it < ptRow(i + 1); it++) { const int icol = indCol(it); // does not count diagonal if (icol != i) { for (int k = 0; k < n; k++) { if (permute[k] == icol) { width[k]++; break; } } } // if (icol != i) } } // loop : i dist_ptRows[0] = 0; for (int i = 0; i < n; i++) { dist_ptRows[i + 1] = dist_ptRows[i] + width[i]; } delete [] width; dist = new SparseMatrix(n, dist_ptRows[n], dist_ptRows, false, false, false); if (isUpper()) { for (int k = 0; k < n; k++) { const int irow = permute[k]; int jt = dist->ptRow(k); for (int i = 0; i < irow; i++) { for (int it = ptRow(i); it < ptRow(i + 1); it++) { const int icol = indCol(it); // strictly lower == not touch the diagonal if (icol != i) { if (irow == icol) { dist->indCol(jt) = i; dist->Coef(jt) = Coef(it); jt++; break; } } } } // loop : i // copy upper to upper for (int it = ptRow(irow); it < ptRow(irow + 1); it++) { dist->indCol(jt) = indCol(it); dist->Coef(jt) = Coef(it); jt++; } } // loop : k } // order of filling nonzero elements is different from the upper else { // case to keep increaing indcols[] for (int k = 0; k < n; k++) { const int irow = permute[k]; int jt = dist->ptRow(k); // copy lower to lower for (int it = ptRow(irow); it < ptRow(irow + 1); it++) { dist->indCol(jt) = indCol(it); dist->Coef(jt) = Coef(it); jt++; } for (int i = irow; i < dimension(); i++) { for (int it = ptRow(i); it < ptRow(i + 1); it++) { const int icol = indCol(it); // strictly lower == not touch the diagonal if (icol != i) { if (irow == icol) { dist->indCol(jt) = i; dist->Coef(jt) = Coef(it); jt++; break; } } } } // loop : i } // loop : k } } // if (_isSymmetric) else { dist_ptRows[0] = 0; // non-zero pattern is symmetric for (int i = 0; i < n; i++) { const int m = permute[i]; dist_ptRows[i + 1] = (dist_ptRows[i] + (source_ptRows[m + 1] - source_ptRows[m])); } // isOwner = true, isSym = false dist = new SparseMatrix(n, dist_ptRows[n], dist_ptRows, false, false, false); if (transposed) { for (int k = 0; k < n; k++) { const int irow = permute[k]; int jt = dist->ptRow(k); for (int i = 0; i < dimension(); i++) { for (int it = ptRow(i); it < ptRow(i + 1); it++){ const int icol = indCol(it); if (irow == icol) { dist->indCol(jt) = i; dist->Coef(jt) = Coef(it); jt++; break; } } } // loop : i } } else { for (int k = 0; k < n; k++) { const int irow = permute[k]; int it = ptRow(irow); int jt = dist->ptRow(k); // copy lower to lower for (; it < ptRow(irow + 1); it++, jt++) {; dist->indCol(jt) = indCol(it); dist->Coef(jt) = Coef(it); } } } } // if (_isSymmetric) return dist; } template SparseMatrix* SparseMatrix:: PartialCopyCSR(vector &permute, const int n, bool transposed); template SparseMatrix* SparseMatrix:: PartialCopyCSR(vector &permute, const int n, bool transposed); template SparseMatrix >* SparseMatrix >:: PartialCopyCSR(vector &permute, const int n, bool transposed); template SparseMatrix >* SparseMatrix >:: PartialCopyCSR(vector &permute, const int n, bool transposed); template SparseMatrix* SparseMatrix:: PartialCopyCSR(vector &permute, const int n, bool transposed); template SparseMatrix >* SparseMatrix >:: PartialCopyCSR(vector &permute, const int n, bool transposed); // int CSR_sym2unsym(int *ptRows, int *indCols, int *toSym, const int *ptSymRows, const int *indSymCols, const int dim, const bool upper_flag) { int* nbIndPerRow = new int[dim]; memset(nbIndPerRow, 0, dim*sizeof(int)); for (int i = 0; i < dim; i++) { nbIndPerRow[i] += ptSymRows[i + 1] - ptSymRows[i]; int ibegin = ptSymRows[i] + (upper_flag ? 1 : 0); int iend = ptSymRows[i + 1] + (upper_flag ? 0 : (-1)); for (int k = ibegin; k < iend; k++) { nbIndPerRow[indSymCols[k]]++; } } // Build ptRows array : // ................... ptRows[0] = 0; for (int i = 0; i < dim; i++) { ptRows[i + 1] = ptRows[i] + nbIndPerRow[i]; } // CHECK(ptRows[dim] == (2 * nz - dim), // "error in sym2unsym() : Wrong number of non zeros elemnts in ptRows !"); // Allocate and fill indices columns : memset(nbIndPerRow, 0, (dim * sizeof(int))); // for upper case, nbIndPerRow[i] keeps entries added by transposed operation // but for lower case counts all nonzero entries in progress for (int i = 0; i < dim; i++) { int itmp = ptRows[i] + nbIndPerRow[i]; for (int k = ptSymRows[i]; k < ptSymRows[i + 1]; k++) { indCols[itmp] = indSymCols[k]; toSym[itmp] = k; itmp++; } // loop : k if (!upper_flag) { nbIndPerRow[i] = itmp - ptRows[i]; } // memcpy(indCols + (ptRows[i] + nbIndPerRow[i]), // indSymCols + ptSymRows[i], // (ptSymRows[i + 1] - ptSymRows[i]) * sizeof(int)); int ibegin = ptSymRows[i] + (upper_flag ? 1 : 0); int iend = ptSymRows[i + 1] + (upper_flag ? 0 : (-1)); for (int k = ibegin; k < iend; k++) { const int j = indSymCols[k]; const int jtmp = ptRows[j] + nbIndPerRow[j]; indCols[jtmp] = i; toSym[jtmp] = k; nbIndPerRow[j]++; } // loop : k } // loop : i // extract other toSym[] delete [] nbIndPerRow; return ptRows[dim]; } FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SparseMatrix.hpp000664 000000 000000 00000015205 14037356732 025550 0ustar00rootroot000000 000000 /*! \file SparseMatrix.hpp \brief Sparse matrix definition \author Xavier Juvigny, ONERA \date Jan. 25th 2005 \modification allocation of array by STL vector class \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_SPARSEMATRIX_HPP_ #define _ALGEBRA_SPARSEMATRIX_HPP_ #include #include "Compiler/blas.hpp" using std::vector; template class SparseMatrix { public: SparseMatrix() : _isSymmetric(false), _isUpper(true), _isWhole(false) {} SparseMatrix(bool isSym, bool isUpper = true, bool isWhole = false) : _isSymmetric(isSym), _isUpper(isUpper), _isWhole(isWhole) {} SparseMatrix( int n, int nnz, const T* coefs, bool isSym = false, bool isUpper = true, bool isWhole = false) : _isSymmetric(isSym), _isUpper(isUpper), _isWhole(isWhole) { _ptRows.resize(n+1); _indCols.resize(nnz); _coefs.resize(nnz); for (int i = 0 ; i < nnz; i++) { _coefs[i] = coefs[i]; } } // added by Atsushi, 01 Feb.2013 : symbolic data are given by integer arrays SparseMatrix( int n, int nnz, const int *ptRows, const int *indCols, bool isSym = false, bool isUpper = true, bool isWhole = false) : _isSymmetric(isSym), _isUpper(isUpper), _isWhole(isWhole) { _ptRows.resize(n+1); for (int i = 0; i < (n + 1); i++) { _ptRows[i] = ptRows[i]; } _indCols.resize(nnz); for (int i = 0; i < nnz; i++) { _indCols[i] = indCols[i]; } _coefs.resize(nnz); } SparseMatrix( int n, int nnz, const long long int *ptRows64, const long long int *indCols64, bool isSym = false, bool isUpper = true, bool isWhole = false) : _isSymmetric(isSym), _isUpper(isUpper), _isWhole(isWhole) { _ptRows.resize(n+1); for (int i = 0; i < (n + 1); i++) { _ptRows[i] = ptRows64[i]; } _indCols.resize(nnz); for (int i = 0; i < nnz; i++) { _indCols[i] = indCols64[i]; } _coefs.resize(nnz); } SparseMatrix( int n, int nnz, const vector& ptRows, bool isSym = false, bool isUpper = true, bool isWhole = false) : _isSymmetric(isSym), _isUpper(isUpper), _isWhole(isWhole) { _indCols.resize(nnz); _ptRows.resize(n + 1); for (int i = 0; i < (n + 1); i++) { _ptRows[i] = ptRows[i]; } _coefs.resize(nnz); } SparseMatrix( const SparseMatrix& A ) : _ptRows(A._ptRows), _indCols(A._indCols), _coefs(A._coefs), _isSymmetric(A._isSymmetric), _isUpper(A._isUpper), _isWhole(A._isWhole) { } ~SparseMatrix() { free(); } void free() { _ptRows.clear(); _indCols.clear(); _coefs.clear(); } inline int dimension() const { return (_ptRows.size() - 1); } inline int nnz() const { return _indCols.size(); } inline bool isSymmetric() const { return _isSymmetric; } inline bool isUpper() const { return _isUpper; } inline bool isWhole() const { return _isWhole; } // inline int *getRows() { return &_ptRows[0]; } // inline int *getIndCols() { return &_indCols[0]; } inline T* getCoef() { return &_coefs[0]; } inline T* getCoef() const { return &_coefs[0]; } inline int ptRow(int i) const { return _ptRows[i]; } inline int indCol(int i) const { return _indCols[i]; } inline int& indCol(int i) { return _indCols[i]; } inline const T Coef(int i) const { return _coefs[i]; } inline T& Coef(int i) { return _coefs[i]; } inline vector& ptRows() { return _ptRows; } inline vector& indCols() { return _indCols; } inline vector & coefs() { return _coefs; } void prod(const T *u, T *v) const; void prodt(const T *u, T *v ) const; // void normalize(const int type, const W* coefs0, Z* precDiag); void extractSquareMatrix(T *DSsingCofes, vector &singVal); SparseMatrix* PartialCopyCSR(vector &permute, const int n, bool transposed); private: SparseMatrix& operator = ( const SparseMatrix& A ); vector _ptRows; vector _indCols; vector _coefs; bool _isSymmetric; bool _isUpper; bool _isWhole; }; // End class SparseMatrix template void normalize(const int type, const T* coefs0, SparseMatrix *ptDA, U* u); int CSR_sym2unsym(int *ptRows, int *indCols, int *toSym, const int *ptSymRows, const int *indSymCols, const int dim, const bool upper_flag); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SparseRenumbering.cpp000664 000000 000000 00000033203 14037356732 026552 0ustar00rootroot000000 000000 /*! \file SparseRenumbering.cpp \brief tridiagonal factorization algorithm with Cuthill-McKee \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include #include "Algebra/SparseRenumbering.hpp" #include "Driver/DissectionDefault.hpp" #include "Compiler/arithmetic.hpp" #include "Compiler/DissectionIO.hpp" using std::list; void CMK_number(const int dim, const int *ptrows, const int *indcols, vector &new2old, const bool verbose, FILE *fp) { vector list, indic, connect; vector mask; int i1_prev, i2_prev, i0, i1; double profile, min_profile; connect.resize(dim); list.resize(dim); indic.resize(dim); mask.resize(dim); // compute initial profile i1_prev = 0; i2_prev = indcols[ptrows[0]]; for (int k = (ptrows[0] + 1); k < ptrows[1]; k++) { i2_prev = indcols[k] > i2_prev ? indcols[k] : i2_prev; } min_profile = ((double)(i2_prev - i1_prev + 2) * (double)(i2_prev - i1_prev + 1)) * 0.5; while (i2_prev < (dim - 1)) { int i1 = i2_prev + 1; int i2 = indcols[ptrows[i1_prev]]; for (int k = (ptrows[i1_prev] + 1); k < ptrows[i2_prev + 1]; k++) { i2 = indcols[k] > i2 ? indcols[k] : i2; } min_profile += (double)(i2 - i1 + 2) * (double)(i2 - i1 + 1) * 0.5; i1_prev = i1; i2_prev = i2; } diss_printf(verbose, fp, "%s %d : Cuthill McKee renumbering : init block : %g :: ", __FILE__, __LINE__, min_profile); // initialize frontal renumbering from an excentric point for (int i = 0; i < dim; i++) { connect[i] = ptrows[i + 1] - ptrows[i]; mask[i] = false; } i0 = 0; // initialization : critical when connent[] == dim { int imin = dim; // find index that attains minimun of connect[] for (int i = 0; i < dim; i++) { if (imin > connect[i]) { imin = connect[i]; i0 = i; } } } mask[i0] = true; profile = frontal_numb(dim, ptrows, indcols, i0, list, indic, connect); i1 = list[dim - 1]; diss_printf(verbose, fp, "- > node %d : %g :: ", i0, profile); if (profile < min_profile) { min_profile = profile; } else { for (int i = 0; i < dim; i++) { list[i] = i; } } profile = min_profile; min_profile = min_profile + 1.0; // repeat frontal numbering as long as profile improves while (profile < min_profile) { min_profile = profile; for (int i = 0; i < dim; i++) { new2old[i] = list[i]; } for (int i = 0; i < dim; i++) { connect[i] = ptrows[i + 1] - ptrows[i]; } { i0 = 0; // initialization : critical when connent[] == dim int imin = dim; for (int k = ptrows[i1]; k < ptrows[i1 + 1]; k++) { const int ii = indcols[k]; if (imin > connect[ii]) { imin = connect[ii]; i0 = ii; } } if (mask[i0]) { for (int i = (dim - 1); i >=0; i--) { if (!mask[list[i]]) { i0 = list[i]; break; } } } } mask[i0] = true; profile = frontal_numb(dim, ptrows, indcols, i0, list, indic, connect); i1 = list[dim - 1]; diss_printf(verbose, fp, "- > node: %d : %g :: ", i0, profile); } // while diss_printf(verbose, fp, "-> optimazied : %g\n", min_profile); } double frontal_numb(const int dim, const int *ptrows, const int *indcols, const int i0, vector &list, vector &indic, vector &connect) { int n, nn, i1, i2, i1_prev, i2_prev, k1, k2; double profile; // compute nodal connectivity degree for (int i = 0; i < dim; i++) { connect[i] = ptrows[i + 1] - ptrows[i]; } #if 1 for (int i = 0; i < dim; i++) { indic[i] = 0; } n = 0; nn = (-1); list[0] = i0; indic[i0] = 1; while (n < (dim - 1)) { nn++; int j = list[nn]; // renumber un-numbered neigbours of j, in increasing connectivity order int k = 0; for (int l = ptrows[j]; l < ptrows[j + 1]; l++) { const int i = indcols[l]; if (indic[i] == 0) { int ki = 1; for ( ; ki <= k; ki++) { if (connect[i] < connect[list[n + ki]]) { break; } } k++; for (int kk = k; kk >= (ki + 1); kk--) { list[n + kk] = list[n + kk - 1]; } list[n + ki] = i; } // if (indic[i] == 0) } // update connectivity degree of un-numbered nodes for (int kk = (n + 1); kk <= (n + k); kk++) { const int ii = list[kk]; for (int m = ptrows[ii]; m < ptrows[ii + 1]; m++) { connect[indcols[m]]--; } } for (int kk = (n + 1); kk <= (n + k); kk++) { indic[list[kk]] = 1; } n += k; } // while (n < (dim - 1)) // build old to new correspondance for (int i = 0; i < dim; i++) { indic[list[i]] = i; } #else std::list idexlist; vector mask; mask.resize(dim, false); idexlist.push_back(i0); std::list::iterator it = idexlist.begin(); mask[(*it)] = true; n = 0; std::list::iterator nt = idexlist.begin(); while (n < (dim - 1)) { int k = 0; for (int l = ptrows[(*it)]; l < ptrows[(*it) + 1]; l++) { const int ii = indcols[l]; if (!mask[ii]) { bool flag = false; std::list::iterator kt = nt; ++kt; for (int kk = 0 ; kk < k; kk++, ++kt) { if (connect[ii] < connect[(*kt)]) { idexlist.insert(kt, ii); flag = true; break; } } if (!flag) { idexlist.push_back(ii); } k++; } } ++it; std::list::iterator kt = nt; int kk = 0; for ( ; kk < k; ++kt, kk++) { for (int m = ptrows[(*kt)]; m < ptrows[(*kt) + 1]; m++) { connect[indcols[m]]--; } mask[(*kt)] = true; ++nt; } n += k; } { int i = 0; std::list::iterator it = idexlist.begin(); for (; it != idexlist.end(); ++it, i++) { list[i] = (*it); indic[(*it)] = i; } } idexlist.clear(); #endif // compute profile i1_prev = 0; k1 = ptrows[list[i1_prev]]; k2 = ptrows[list[i1_prev] + 1]; i2_prev = indic[indcols[k1]]; for (int k = (k1 + 1); k < k2; k++) { i2_prev = indic[indcols[k]] > i2_prev ? indic[indcols[k]] : i2_prev; } profile = ((double)(i2_prev - i1_prev + 1) * (double)(i2_prev - i1_prev + 2) * 0.5); while (i2_prev < (dim - 1)) { i1 = i2_prev + 1; i2 = i1; for (int i = i1_prev; i <= i2_prev; i++) { k1 = ptrows[list[i]]; k2 = ptrows[list[i] + 1]; i2 = std::max((int)i2, (int)indic[indcols[k1]]); for (int k = (k1 + 1); k < k2; k++) { i2 = indic[indcols[k]] > i2 ? indic[indcols[k]] : i2; } } profile += ((double)(i2 - i1 + 1) * (double)(i2 - i1 + 2) * 0.5); i1_prev = i1; i2_prev = i2; } return profile; } int point_front(const int dim, const int *ptrows, const int *indcols, vector &new2old, vector &p_front, const bool verbose, FILE *fp) { int nfront, ilast; vector old2new, p_front1; vector mask; const int size_thrs = 24; old2new.resize(dim); for (int i = 0; i < dim; i++) { old2new[new2old[i]] = i; } p_front.resize(dim + 1); // for safty nfront = 0; p_front[0] = 0; ilast = (-1); while (ilast < (dim - 1)) { // loop ilast ilast++; bool flag_cont = false; bool flag_break = false; const int ii = new2old[ilast]; nfront++; int isup = (-1); for (int k = ptrows[ii]; k < ptrows[ii + 1]; k++) { const int jj = old2new[indcols[k]]; isup = jj > isup ? jj : isup; } p_front[nfront] = isup + 1; while (isup > ilast) { ilast = isup; for (int i = p_front[nfront - 1]; i < p_front[nfront]; i++) { const int ii = new2old[i]; for (int k = ptrows[ii]; k < ptrows[ii + 1]; k++) { const int jj = old2new[indcols[k]]; isup = jj > isup ? jj : isup; } } // loop : i if (isup == ilast) { flag_cont = true; break; // continue loop_ilast } nfront++; p_front[nfront] = isup + 1; if (isup == (dim - 1)) { flag_break = true; break; // exit loop_ilast } } if (flag_cont) { continue; } if (flag_break) { break; } } // loop ilast old2new.clear(); mask.resize(nfront + 1); p_front1.resize(nfront + 1); for (int k = 0; k < nfront; k++) { const int itmp = p_front[k + 1] - p_front[k]; if (itmp >= size_thrs) { mask[k] = true; } else { mask[k] = false; } } mask[nfront] = true; int n = 0; { int k = 0; p_front1[0] = p_front[0]; int k0 = k; while (k < nfront) { int flag = 0; if (!mask[k0]) { if (!mask[k + 1]) { if ((p_front[k + 1] - p_front[k0]) < size_thrs) { flag = 2; } else { flag = (-2); } } else { if ((k == 0) && mask[1]) { flag = 2; } else { if ((k0 == k) || (k == (nfront - 1))) { flag = 0; } else { flag = (-1); } } } // if (!mask[k + 1]) } // if (!mask[k0]) else { if (k == (nfront - 2) && !mask[nfront - 1]) { flag = 1; } } switch (flag) { case 1: p_front1[n + 1] = p_front[k + 2]; n++; k += 2; k0 = k; break; case (-2): p_front1[n + 1] = p_front[k]; n++; k0 = k; break; case (-1): p_front1[n + 1] = p_front[k]; p_front1[n + 2] = p_front[k + 1]; n += 2; k++; k0 = k; break; case (0): p_front1[n + 1] = p_front[k + 1]; n++; k++; k0 = k; break; case 2: k++; break; } } // while (k < nfront) } // scope for k, n etc. if ((p_front1[0] != 0)|| (p_front1[n] != p_front[nfront])) { diss_printf(verbose, fp, "%s %d : error %d -> %d\n", __FILE__, __LINE__, p_front[nfront], p_front1[n]); exit(-1); } nfront = n; p_front.resize(nfront + 1); for (int k = 0; k <= n; k++) { p_front[k] = p_front1[k]; } return nfront; } int getColorMaskCSR(int *color_mask, const CSR_indirect *csr, const bool verbose, FILE *fp) { const int dim = csr->n; int *prow = csr->ptRows; int *indcols = csr->indCols; for (int i = 0; i < dim; i++) { color_mask[i] = (-1); } list graph_start; int num_isolated = 0; for (int i = 0 ; i < dim; i++) { if ((prow[i] + 1) == prow[i + 1]) { num_isolated++; color_mask[i] = 0; } } if (num_isolated > 0) { diss_printf(verbose, fp, "%s %d : isolated = %d\n", __FILE__, __LINE__, num_isolated); } int color = 0; for (int i = 0; i < dim; i++) { if (color_mask[i] == (-1)) { color++; color_mask[i] = color; graph_start.clear(); graph_start.push_back(i); for (list::iterator it = graph_start.begin(); it != graph_start.end(); ++it) { for (int k = prow[(*it)]; k < prow[(*it) + 1]; k++) { if (color_mask[indcols[k]] == (-1)) { color_mask[indcols[k]] = color; graph_start.push_back(indcols[k]); } } } // loop : it } } // loop : i // post process merge color with small size into previous color int reduced = 0; diss_printf(verbose, fp, "%s %d : before color = %d\n", __FILE__, __LINE__, color); int m = 1; while (m <= color) { int count = 0; for (int i = 0; i < dim; i++) { if (color_mask[i] == m) { count++; } } if ((count <= DIM_AUG_KERN) && (color > 1)){ // to treat very small matrix reduced++; color--; for (int i = 0; i < dim; i++) { if (color_mask[i] == m) { color_mask[i] = (-1); } else if (color_mask[i] > m) { color_mask[i]--; } } } else { m++; } } diss_printf(verbose, fp, "%s %d : after fused = %d color = %d\n", __FILE__, __LINE__, reduced, color); return color; } FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SparseRenumbering.hpp000664 000000 000000 00000006211 14037356732 026556 0ustar00rootroot000000 000000 /*! \file SparseRenumbering.cpp \brief tridiagonal factorization algorithm with Cuthill-McKee \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include "Algebra/CSR_matrix.hpp" using std::vector; void CMK_number(const int dim, const int *ptrows, const int *indcols, vector &new2old, const bool verbose, FILE *fp); double frontal_numb(const int dim, const int *ptrows, const int *indcols, const int i0, vector &list, vector &indic, vector &connect); int point_front(const int dim, const int *ptrows, const int *indcols, vector &new2old, vector &p_front, const bool verbose, FILE *fp); int getColorMaskCSR(int *color_mask, const CSR_indirect *csr, const bool verbose, FILE *fp); FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SquareBlockMatrix.cpp000664 000000 000000 00000065074 14037356732 026532 0ustar00rootroot000000 000000 /*! \file SquareBlockMatrix.cpp \brief Block storage for symmetric/unsymmetric Square matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Algebra/SquareBlockMatrix.hpp" #include "Compiler/blas.hpp" // #include is inside template void SquareBlockMatrix::init(int dim, int block_size, bool isSym, int first_block) { int itmp; _pivrelaxed = false; _coefs_alloc_status = false; // for safty _dim = dim; _block_size = block_size; _isSym = isSym; _block_size2 = _block_size * _block_size; if (dim == 0) { _isblocked = true; _kernelDetected = false; _lower_allocated = false; _coefs = NULL; _coefs_alloc_status = false; _allocation_status = NULL; return; } if (first_block > 0) { _isdecomposed = true; _dim0 = first_block; _dim1 = _dim - _dim0; _num_blocks0 = (_dim0 + _block_size - 1) / _block_size; _num_blocks1 = (_dim1 + _block_size - 1) / _block_size; _num_blocks = _num_blocks0 + _num_blocks1; _block_sizes.resize(_num_blocks); itmp = _dim0 % _block_size; _block_size_last0 = (itmp == 0) ? _block_size : itmp; itmp = _dim1 % _block_size; _block_size_last1 = (itmp == 0) ? _block_size : itmp; for (int i = 0; i < (_num_blocks0 - 1); i++) { _block_sizes[i] = _block_size; } _block_sizes[_num_blocks0 - 1] = _block_size_last0; if (_num_blocks1 > 0) { for (int i = _num_blocks0; i < (_num_blocks - 1); i++) { _block_sizes[i] = _block_size; } _block_sizes[_num_blocks - 1] = _block_size_last1; } else { _block_size_last1 = 0; } } else { _isdecomposed = false; _num_blocks = (_dim + _block_size - 1) / _block_size; _block_sizes.resize(_num_blocks); itmp = _dim % _block_size; _block_size_last = (itmp == 0) ? _block_size : itmp; for (int i = 0; i < (_num_blocks - 1); i++) { _block_sizes[i] = _block_size; } _block_sizes[_num_blocks - 1] = _block_size_last; } _nsing_block.resize(_num_blocks); if (_num_blocks > 0) { for (int i = 0; i < _num_blocks; i++) { _nsing_block[i] = 0; } } _isblocked = true; _kernelDetected = false; _lower_allocated = false; if (_num_blocks > 0) { try { _coefs = new T*[_num_blocks * _num_blocks]; // need to be managed } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } try { _allocation_status = new bool[_num_blocks * _num_blocks]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } _coefs_alloc_status = true; for (int i = 0; i < (_num_blocks * _num_blocks); i++) { _allocation_status[i] = false; } if (_isSym) { _size = 0; for (int i = 0; i < _num_blocks; i++) { for (int j = i; j < _num_blocks; j++) { _size += _block_sizes[i] * _block_sizes[j]; } } } else { _size = 0; for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < _num_blocks; j++) { _size += _block_sizes[i] * _block_sizes[j]; } } } } else { _coefs = NULL; _coefs_alloc_status = false; _allocation_status = NULL; } } template void SquareBlockMatrix::init(int dim, int block_size, bool isSym, int first_block); template void SquareBlockMatrix::init(int dim, int block_size, bool isSym, int first_block); template void SquareBlockMatrix >::init(int dim, int block_size, bool isSym, int first_block); template void SquareBlockMatrix >::init(int dim, int block_size, bool isSym, int first_block); template void SquareBlockMatrix::init(int dim, int block_size, bool isSym, int first_block); template void SquareBlockMatrix >::init(int dim, int block_size, bool isSym, int first_block); // template void SquareBlockMatrix::allocateBlock(int i, int j) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int itmp = i + j * _num_blocks; if (!_allocation_status[itmp]) { try { _coefs[itmp] = new T[nrow * ncol]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } _allocation_status[itmp] = true; } } template void SquareBlockMatrix::allocateBlock(int i, int j); template void SquareBlockMatrix::allocateBlock(int i, int j); template void SquareBlockMatrix >::allocateBlock(int i, int j); template void SquareBlockMatrix >::allocateBlock(int i, int j); template void SquareBlockMatrix::allocateBlock(int i, int j); template void SquareBlockMatrix >::allocateBlock(int i, int j); // template void SquareBlockMatrix::allocate() { if (_isSym) { // int ioffset = 0; for (int i = 0; i < _num_blocks; i++) { for (int j = i; j < _num_blocks; j++) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int itmp = i + j * _num_blocks; if (!_allocation_status[itmp]) { try { _coefs[itmp] = new T[nrow * ncol]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } } _allocation_status[itmp] = true; } } } else { // int ioffset = 0; for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < _num_blocks; j++) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int itmp = i + j * _num_blocks; if (!_allocation_status[itmp]) { try { _coefs[itmp] = new T[nrow * ncol]; } catch (const std::bad_alloc& e) { fprintf(stderr, "%s %d : allocation failed : %s", __FILE__, __LINE__, e.what()); } } _allocation_status[itmp] = true; } } } } template void SquareBlockMatrix::allocate(); template void SquareBlockMatrix::allocate(); template void SquareBlockMatrix >::allocate(); template void SquareBlockMatrix >::allocate(); template void SquareBlockMatrix::allocate(); template void SquareBlockMatrix >::allocate(); // template void SquareBlockMatrix::free(int i, int j) { const int itmp = i + j * _num_blocks; if (_allocation_status[itmp]) { delete [] _coefs[itmp]; // need to be mananged } _allocation_status[itmp] = false; } template void SquareBlockMatrix::free(int i, int j); template void SquareBlockMatrix::free(int i, int j); template void SquareBlockMatrix >::free(int i, int j); template void SquareBlockMatrix >::free(int i, int j); template void SquareBlockMatrix::free(int i, int j); template void SquareBlockMatrix >::free(int i, int j); // template void SquareBlockMatrix::free() { if (_isSym) { for (int i = 0; i < _num_blocks; i++) { for (int j = i; j < _num_blocks; j++) { const int itmp = i + j * _num_blocks; if (_allocation_status[itmp]) { delete [] _coefs[itmp]; // need to be mananged } _allocation_status[itmp]= false; } } } else { for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < _num_blocks; j++) { const int itmp = i + j * _num_blocks; if (_allocation_status[itmp]) { delete [] _coefs[itmp]; // need to be managed } _allocation_status[itmp] = false; } } } } template void SquareBlockMatrix::free(); template void SquareBlockMatrix::free(); template void SquareBlockMatrix >::free(); template void SquareBlockMatrix >::free(); template void SquareBlockMatrix::free(); template void SquareBlockMatrix >::free(); // template T* SquareBlockMatrix::addrCoefBlock(int i, int j) { if (_isdecomposed) { // is necessary? if (_isSym && (i > j)) { return _coefs[j + i * _num_blocks]; } } return _coefs[i + j * _num_blocks]; } template double* SquareBlockMatrix::addrCoefBlock(int i, int j); template quadruple* SquareBlockMatrix::addrCoefBlock(int i, int j); template complex* SquareBlockMatrix >:: addrCoefBlock(int i, int j); template complex* SquareBlockMatrix >:: addrCoefBlock(int i, int j); template float* SquareBlockMatrix::addrCoefBlock(int i, int j); template complex* SquareBlockMatrix >:: addrCoefBlock(int i, int j); // template void SquareBlockMatrix::ZeroClear() { const T zero(0.0); if (_isSym) { for (int i = 0; i < _num_blocks; i++) { for (int j = i; j < _num_blocks; j++) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int nsize = nrow * ncol; const int itmp = i + j * _num_blocks; for (int k = 0; k < nsize; k++) { _coefs[itmp][k] = zero; // needs to be replaced by memcpy } } } } else { for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < _num_blocks; j++) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int nsize = nrow * ncol; const int itmp = i + j * _num_blocks; for (int k = 0; k < nsize; k++) { _coefs[itmp][k] = zero; // needs to be replaced by memcpy } } } } } template void SquareBlockMatrix::ZeroClear(); template void SquareBlockMatrix::ZeroClear(); template void SquareBlockMatrix >::ZeroClear(); template void SquareBlockMatrix >::ZeroClear(); template void SquareBlockMatrix::ZeroClear(); template void SquareBlockMatrix >::ZeroClear(); // template T& SquareBlockMatrix::diag(int i) { int i0, i1; if(_isdecomposed) { fprintf(stderr, "%s %d : operator() is not suppoesd to be used\n", __FILE__, __LINE__); i0 = BlockIndex(i); i1 = BlockOffset(i); } else { i0 = i / _block_size; i1 = i % _block_size; } // const int itmp = offsetBlock(i0, i0) + i1 * (nrowBlock(i0, i0) + 1); // return coefs()[itmp]; return _coefs[i0 * (_num_blocks + 1)][i1 * (nrowBlock(i0, i0) + 1)]; } template double& SquareBlockMatrix::diag(int i); template quadruple& SquareBlockMatrix::diag(int i); template complex & SquareBlockMatrix >::diag(int i); template complex & SquareBlockMatrix >::diag(int i); template float& SquareBlockMatrix::diag(int i); template complex & SquareBlockMatrix >::diag(int i); // template const T& SquareBlockMatrix::diag(int i) const { int i0, i1; if(_isdecomposed) { fprintf(stderr, "%s %d : operator() is not suppoesd to be used\n", __FILE__, __LINE__); i0 = BlockIndex(i); i1 = BlockOffset(i); } else { i0 = i / _block_size; i1 = i % _block_size; } return _coefs[i0 * (_num_blocks + 1)][i1 * (nrowBlock(i0, i0) + 1)]; } template const double & SquareBlockMatrix::diag(int i) const; template const quadruple & SquareBlockMatrix::diag(int i) const; template const complex & SquareBlockMatrix >::diag(int i) const; template const complex & SquareBlockMatrix >:: diag(int i) const; template const float & SquareBlockMatrix::diag(int i) const; template const complex & SquareBlockMatrix >::diag(int i) const; // template T& SquareBlockMatrix::operator () (int i, int j) { int i0, i1, j0, j1; if(_isdecomposed) { fprintf(stderr, "%s %d : operator() is not suppoesd to be used\n", __FILE__, __LINE__); if (_isSym) { if (i < j) { i0 = BlockIndex(i); j0 = BlockIndex(j); i1 = BlockOffset(i); j1 = BlockOffset(j); } else { i0 = BlockIndex(j); j0 = BlockIndex(i); i1 = BlockOffset(j); j1 = BlockOffset(i); } } else { i0 = BlockIndex(i); j0 = BlockIndex(j); if(i0 > j0) { i1 = BlockOffset(j); j1 = BlockOffset(i); } else { i1 = BlockOffset(i); j1 = BlockOffset(j); } } } // if(_isdecomposed) { else { if (_isSym) { if (i < j) { i0 = i / _block_size; i1 = i % _block_size; j0 = j / _block_size; j1 = j % _block_size; } else { i0 = j / _block_size; i1 = j % _block_size; j0 = i / _block_size; j1 = i % _block_size; } } else { i0 = i / _block_size; j0 = j / _block_size; if (i0 > j0) { i1 = j % _block_size; j1 = i % _block_size; } else { i1 = i % _block_size; j1 = j % _block_size; } } } // if(_isdecomposed) else return _coefs[i0 + j0 * _num_blocks][i1 + j1 * nrowBlock(i0, j0)]; } template double& SquareBlockMatrix::operator () (int i, int j); template quadruple& SquareBlockMatrix::operator () (int i, int j); template complex & SquareBlockMatrix >:: operator () (int i, int j); template complex & SquareBlockMatrix >:: operator () (int i, int j); template float& SquareBlockMatrix::operator () (int i, int j); template complex & SquareBlockMatrix >:: operator () (int i, int j); // template const T& SquareBlockMatrix::operator () (int i, int j) const { int i0, i1, j0, j1; if(_isdecomposed) { fprintf(stderr, "%s %d : operator() is not suppoesd to be used\n", __FILE__, __LINE__); if (_isSym) { if (i < j) { i0 = BlockIndex(i); j0 = BlockIndex(j); i1 = BlockOffset(i); j1 = BlockOffset(j); } else { i0 = BlockIndex(j); j0 = BlockIndex(i); i1 = BlockOffset(j); j1 = BlockOffset(i); } } else { i0 = BlockIndex(i); j0 = BlockIndex(j); if(i0 > j0) { i1 = BlockOffset(j); j1 = BlockOffset(i); } else { i1 = BlockOffset(i); j1 = BlockOffset(j); } } } // if(_isdecomposed) { else { if (_isSym) { if (i < j) { i0 = i / _block_size; i1 = i % _block_size; j0 = j / _block_size; j1 = j % _block_size; } else { i0 = j / _block_size; i1 = j % _block_size; j0 = i / _block_size; j1 = i % _block_size; } } else { i0 = i / _block_size; j0 = j / _block_size; if (i0 > j0) { i1 = j % _block_size; j1 = i % _block_size; } else { i1 = i % _block_size; j1 = j % _block_size; } } } // if(_isdecomposed) else return _coefs[i0 + j0 * _num_blocks][i1 + j1 * nrowBlock(i0, j0)]; } template const double& SquareBlockMatrix::operator () (int i, int j) const; template const quadruple& SquareBlockMatrix::operator () (int i, int j) const; template const complex& SquareBlockMatrix >:: operator () (int i, int j) const; template const complex& SquareBlockMatrix >:: operator () (int i, int j) const; template const float& SquareBlockMatrix::operator () (int i, int j) const; template const complex& SquareBlockMatrix >:: operator () (int i, int j) const; // template void SquareBlockMatrix::copyFromArray(const T *a, int dim) { if (_isdecomposed) { fprintf(stderr, "%s %d : copyFromArray() cannot copy decompesd data\n", __FILE__, __LINE__); return; } if (dim != _dim) { fprintf(stderr, "%s %d : mismatched dim %d != %d\n", __FILE__, __LINE__, dim, _dim); return; } for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); const int dst_row = n_row; for (int kj = ki; kj < _num_blocks; kj++) { const int n_col = ((kj == (_num_blocks - 1)) ? _block_size_last : _block_size); T *a_upper = addrCoefBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0 += dst_row) { blas_copy(n_row, (T *)a + j1, 1, a_upper + j0, 1); } } // loop : kj } // loop : ki if (!_isSym) { for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = (ki == (_num_blocks - 1)) ? _block_size_last : _block_size; for (int kj = 0; kj < ki; kj++) { const int n_col = _block_size; T *a_lower = addrCoefBlock(ki, kj); int dst_row = nrowBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0++) { blas_copy(n_row, (T *)a + j1, 1, a_lower + j0, dst_row); } } // loop : kj } // loop : ki } // if (!isSym) } template void SquareBlockMatrix::copyFromArray(const double *a, int dim); template void SquareBlockMatrix::copyFromArray(const quadruple *a, int dim); template void SquareBlockMatrix >:: copyFromArray(const complex *a, int dim); template void SquareBlockMatrix >:: copyFromArray(const complex *a, int dim); template void SquareBlockMatrix::copyFromArray(const float *a, int dim); template void SquareBlockMatrix >:: copyFromArray(const complex *a, int dim); // template void SquareBlockMatrix::copyFromArrayPermute(const T *a, int dim, int *permute) { if (_isdecomposed) { fprintf(stderr, "%s %d : copyFromArray() cannot copy decompesd data\n", __FILE__, __LINE__); return; } if (dim != _dim) { fprintf(stderr, "%s %d : mismatched dim %d != %d\n", __FILE__, __LINE__, dim, _dim); return; } for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); const int dst_row = n_row; for (int kj = ki; kj < _num_blocks; kj++) { const int n_col = ((kj == (_num_blocks - 1)) ? _block_size_last : _block_size); T *a_upper = addrCoefBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0 += dst_row) { const int jj = permute[kj * _block_size + j]; int ii = ki * _block_size; int kk = j0; if (!_isSym) { for (int i = 0; i < n_row; i++, kk++, ii++) { a_upper[kk] = a[permute[ii] + jj * _dim]; } } else { for (int i = 0; i < n_row; i++, kk++, ii++) { const int itmp = permute[ii]; a_upper[kk] = a[itmp <= jj ? itmp + jj * _dim : jj + itmp * _dim]; } } // if (!isSym) { // blas_copy(n_row, (T *)a + j1, 1, a_upper + j0, 1); } } // loop : kj } // loop : ki if (!_isSym) { for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); for (int kj = 0; kj < ki; kj++) { const int n_col = _block_size; T *a_lower = addrCoefBlock(ki, kj); const int dst_row = nrowBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0++) { const int jj = permute[kj * _block_size + j]; int ii = ki * _block_size; int kk = j0; for (int i = 0; i < n_row; i++, ii++, kk += dst_row) { a_lower[kk] = a[permute[ii] + jj * _dim]; } // blas_copy(n_row, (T *)a + j1, 1, a_lower + j0, dst_row); } } // loop : kj } // loop : ki } // if (!isSym) } template void SquareBlockMatrix::copyFromArrayPermute(const double *a, int dim, int *permute); template void SquareBlockMatrix::copyFromArrayPermute(const quadruple *a, int dim, int *permute); template void SquareBlockMatrix >:: copyFromArrayPermute(const complex *a, int dim, int *permute); template void SquareBlockMatrix >:: copyFromArrayPermute(const complex *a, int dim, int *permute); template void SquareBlockMatrix::copyFromArrayPermute(const float *a, int dim, int *permute); template void SquareBlockMatrix >:: copyFromArrayPermute(const complex *a, int dim, int *permute); // template void SquareBlockMatrix::copyToArrayFull(const T *a, int dim) { if (dim != _dim) { fprintf(stderr, "%s %d : mismatched dim %d != %d\n", __FILE__, __LINE__, dim, _dim); return; } // copy upper part for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); const int dst_row = n_row; for (int kj = ki; kj < _num_blocks; kj++) { const int n_col = ((kj == (_num_blocks - 1)) ? _block_size_last : _block_size); T *a_upper = addrCoefBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0 += dst_row) { blas_copy(n_row, a_upper + j0, 1, (T *)a + j1, 1); } } // loop : kj } // loop : ki // copy lower part if (_isSym) { for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); for (int kj = 0; kj < ki; kj++) { const int n_col = _block_size; const int dst_col = n_col; T *a_upper = addrCoefBlock(kj, ki); // transposed access int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_row; j++, j1++, j0 += dst_col) { blas_copy(n_col, a_upper + j0, 1, (T *)a + j1, _dim); } } // loop : kj } // loop : ki } else { for (int ki = 0; ki < _num_blocks; ki++) { const int n_row = ((ki == (_num_blocks - 1)) ? _block_size_last : _block_size); for (int kj = 0; kj < ki; kj++) { const int n_col = _block_size; T *a_upper = addrCoefBlock(ki, kj); const int src_row = nrowBlock(ki, kj); int j, j0, j1; for (j1 = (ki + kj * _dim) * _block_size, j0 = 0, j = 0; j < n_col; j++, j1 += _dim, j0++) { blas_copy(n_row, a_upper + j0, src_row, (T *)a + j1, 1); } } // loop : kj } // loop : ki } } template void SquareBlockMatrix::copyToArrayFull(const double *a, int dim); template void SquareBlockMatrix::copyToArrayFull(const quadruple *a, int dim); template void SquareBlockMatrix >:: copyToArrayFull(const complex *a, int dim); template void SquareBlockMatrix >:: copyToArrayFull(const complex *a, int dim); template void SquareBlockMatrix::copyToArrayFull(const float *a, int dim); template void SquareBlockMatrix >:: copyToArrayFull(const complex *a, int dim); // template void SquareBlockMatrix::copyBlockToArray(int i, int j, const T *a, int dim) { if (dim != _dim) { fprintf(stderr, "%s %d : mismatched dim %d != %d\n", __FILE__, __LINE__, dim, _dim); return; } if (_isSym && i > j) { fprintf(stderr, "%s %d : symmetric matrix has upper block only : %d > %d\n", __FILE__, __LINE__, i, j); return; } T *a_src = addrCoefBlock(i, j); int j0, j1, j2; if (i <= j) { const int n_row = ((i == (_num_blocks - 1)) ? _block_size_last : _block_size); const int dst_row = n_row; const int n_col = ((j == (_num_blocks - 1)) ? _block_size_last : _block_size); for (j2 = (i + j * _dim) * _block_size, j1 = 0, j0 = 0; j0 < n_col; j0++, j1 += dst_row, j2 += _dim) { blas_copy(n_row, a_src + j1, 1, (T *)a + j2, 1); } } else { const int n_row = ((i == (_num_blocks - 1)) ? _block_size_last : _block_size); const int n_col = ((j == (_num_blocks - 1)) ? _block_size_last : _block_size); const int dst_col = n_col; for (j2 = (i + j * _dim) * _block_size, j1 = 0, j0 = 0; j0 < n_col; j0++, j1++, j2 += _dim) { blas_copy(n_row, a_src + j1, dst_col, (T *)a + j2, 1); } } } template void SquareBlockMatrix::copyBlockToArray(int i, int j, const double *a, int dim); template void SquareBlockMatrix::copyBlockToArray(int i, int j, const quadruple *a, int dim); template void SquareBlockMatrix >:: copyBlockToArray(int i, int j, const complex *a, int dim); template void SquareBlockMatrix >:: copyBlockToArray(int i, int j, const complex *a, int dim); template void SquareBlockMatrix::copyBlockToArray(int i, int j, const float *a, int dim); template void SquareBlockMatrix >:: copyBlockToArray(int i, int j, const complex *a, int dim); FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SquareBlockMatrix.hpp000664 000000 000000 00000023530 14037356732 026526 0ustar00rootroot000000 000000 /*! \file SquareBlockMatrix.hpp \brief Block storage for symmetric/unsymmetric Square matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_SQUAREBLOCKMATRIX_HPP #define _ALGEBRA_SQUAREBLOCKMATRIX_HPP #include #include #include using std::vector; template class SquareBlockMatrix { public: SquareBlockMatrix() : _dim(0), _size(0), _block_size(0), _block_size2(0), _block_size_last(0), _isSym(true) { _isblocked = false; _isdecomposed = false; _num_blocks = 0; _num_blocks0 = 0; _num_blocks1 = 0; _dim0 = 0; _dim1 = 0; _block_size_last0 = 0; _block_size_last1 = 0; _lower_allocated = false; _coefs_alloc_status = false; _pivrelaxed = false; } SquareBlockMatrix(int dim, int block_size, bool isSym) { #ifdef DEBUG_SQUAREBLOCKMATRIX fprintf(stderr, "%s %d : constructor with args %d %d\n", __FILE__, __LINE__, dim, block_size); #endif if (dim > 0) { init(dim, block_size, isSym, 0); allocate(); _permute.resize(dim); } } void init(int dim, int block_size, bool isSym, int first_block = 0); void allocateBlock(int i, int j); void allocate(); void free(int i, int j); void free(); ~SquareBlockMatrix() { free(); if (_coefs_alloc_status) { delete [] _coefs; delete [] _allocation_status; _coefs_alloc_status = false; } _block_sizes.clear(); _nsing_block.clear(); _num_blocks = 0; // ? to avoid double free _dim = 0; _permute.clear(); _singIdx.clear(); _singIdx0.clear(); } int dimension() const { return (int)_dim; } int dimension0() const { return (int)_dim0; } int dimension1() const { return (int)_dim1; } int storage_size() const { return (int)_size; } int block_size() const { return (int)_block_size; } int num_blocks() const { return _num_blocks; } int block_size_last() const { return (int)_block_size_last; } bool isSym() const { return _isSym; } bool isDecomposed() const { return _isdecomposed; } int num_blocks0() const { return _num_blocks0; } int num_blocks1() const { return _num_blocks1; } int block_size_last0() const { return (int)_block_size_last0; } int block_size_last1() const { return (int)_block_size_last1; } int nrowBlock(int i) const { return _block_sizes[i]; } // int offsetBlock(int i, int j) const; T** coefs() { return _coefs; } T* addrCoefBlock(int i, int j); void allocateLowerBlocks() { if ((!_lower_allocated) && _isSym) { for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < i; j++) { const int nrow = _block_sizes[i]; const int ncol = _block_sizes[j]; const int itmp = i + j * _num_blocks; _coefs[itmp] = new T[nrow * ncol]; _allocation_status[itmp] = true; } } _lower_allocated = true; } } void freeLowerBlocks() { #ifdef ALLOCATE_LOWER_BLOCKS if (_lower_allocated && _isSym) { #else if (_isSym) { #endif for (int i = 0; i < _num_blocks; i++) { for (int j = 0; j < i; j++) { const int itmp = i + j * _num_blocks; if (_allocation_status[itmp]) { delete [] _coefs[itmp]; } _allocation_status[itmp] = false; } } _lower_allocated = false; } else { fprintf(stderr, "double free _lower %s %d\n", __FILE__, __LINE__); } } int nrowBlock(int i, int j) const { return (i <= j ? _block_sizes[i] : _block_sizes[j]); } int ncolBlock(int i, int j) const { return (i <= j ? _block_sizes[j] : _block_sizes[i]); } bool isTransposed(int i, int j); int BlockIndex(int i) const { int iblock; if (_isdecomposed) { if (i < _dim0) { iblock = i / _block_size; } else { iblock = (i - _dim0) / _block_size + _num_blocks0; } } else { iblock = i / _block_size; } return iblock; } int IndexBlock(int iblock) { int index; if (_isdecomposed) { if (iblock < _num_blocks0) { index = iblock * _block_size; } else { index = _dim0 + (iblock - _num_blocks0) * _block_size; } } else { index = iblock * _block_size; } return index; } int BlockOffset(int i) const { int ires; if (_isdecomposed) { if (i < _dim0) { ires = i % _block_size; } else { ires = (i - _dim0) % _block_size; } } else { ires = i % _block_size; } return ires; } void copyFromArrayPermute(const T *a, int dim, int *permute); void copyFromArray(const T *a, int dim); void copyToArrayFull(const T *a,int dim); void copyBlockToArray(int i, int j, const T *a,int dim); void ZeroClear(); T& diag(int i); const T& diag(int i) const; T& operator () (int i, int j); const T& operator () (int i, int j) const; SquareBlockMatrix* clone() const { SquareBlockMatrix *ret = new SquareBlockMatrix; ret->copy(*this); return(ret); } void copy ( const SquareBlockMatrix& A ) { // PlainMatrix::copy(A); //_loc2glob=A._loc2glob; _block_size = A._block_size; _block_size2 = A._block_size2; _num_blocks = A._num_blocks; _block_size_last = A._block_size_last; _isSym = A._isSym; _rank = A._rank; _nsing = A._nsing; _nsing_block = A._nsing_block; _dim = A._dim; _size = A._size; _permute = A._permute; _kernelDetected = A._kernelDetected; _singIdx = A._singIdx; _singIdx0 = A._singIdx0; _isblocked = A._isblocked; } bool isBlocked() { return _isblocked; } void unsetBlocked() { _isblocked = false; } void setBlocked() { _isblocked = true; } void set_rank(int rank) { _rank = rank; _nsing = _dim - _rank; } void set_KernelDetected(int state) { _kernelDetected = state; } int KernelDetected() const { return _kernelDetected; } vector &getNsingBlock() { return _nsing_block; } vector &getSingIdx() { return _singIdx; } vector &getSingIdx0() { return _singIdx0; } int rank() const { return _rank; } int dim_kern() const { return _nsing; } void set_dim_kern(int nsing) { _nsing = nsing; } int dim_kern_block(int k) const { return _nsing_block[k]; } void set_dim_kern_block(int k, int nsing) { _nsing_block[k] = nsing; } vector& getPermute() { return _permute; } const vector& getPermute() const { return _permute; } void set_lastPivot(double lastpiv) { _lastpiv = lastpiv; } double lastPivot() const { # ifdef DISDEBUG assert( isFactorized() ); # endif return _lastpiv; } void unset_pivrelaxed() { _pivrelaxed = false; } void set_pivrelaxed() { _pivrelaxed = true; } bool is_pivrelaxed() { return _pivrelaxed; } private: int _dim; // _block_sizes; int _dim0; int _dim1; int _block_size_last0; int _block_size_last1; int _num_blocks0; int _num_blocks1; bool _isdecomposed; // bool _coefs_alloc_status; bool *_allocation_status; bool _lower_allocated; // int _rank; //!< Rank of the matrix (initialized after factorization) int _nsing; double _lastpiv; // in case of quadruple ? T** _coefs; // bool _isblocked; vector _nsing_block; vector _permute; bool _kernelDetected; // true : done, false : unknown vector _singIdx; // with permutation vector _singIdx0; // without permutation bool _pivrelaxed; // }; // End class SquareMatrix #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/SquareMatrix.hpp000664 000000 000000 00000031746 14037356732 025563 0ustar00rootroot000000 000000 /*! \file SquareMatrix.hpp \brief Definition of a template of square plain matrix \author X. Juvigny \date January 19th 2005 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_SQUAREMATRIX_HPP #define _ALGEBRA_SQUAREMATRIX_HPP #include #include #include #include "Algebra/PlainMatrix.hpp" #include "Algebra/ColumnMatrix.hpp" using std::vector; /** \brief Square plain matrix This matrix is a square plain matrix with a global indices array to have correspondence with local numerotation. */ template class SquareMatrix : public PlainMatrix { public: using PlainMatrix::coefs; using PlainMatrix::addrCoefs; using PlainMatrix::addrCoefs_pt; SquareMatrix() : PlainMatrix(), _rank(-1), _permute(), _isFactorized(false), _kernelDetected(-1), _singIdx(), _singIdx0() { _dim = 0; _num_blocks = 0; _isblocked = false; } SquareMatrix(int dim, int block_size) : PlainMatrix(dim * dim), _dim(dim), _rank(-1), _permute(dim), _singIdx(), _singIdx0(), _kernelDetected(-1), _isFactorized(false) { _block_size = block_size; _num_blocks = (dim + _block_size - 1) / _block_size; if (_num_blocks > 0) { _nsing_block = new int[_num_blocks]; for (int i = 0; i < _num_blocks; i++) { _nsing_block[i] = 0; } } _isblocked = true; } SquareMatrix(int dim, int block_size, T* coefs, int nsing, int *singidx0, int *singidx, int *permute, bool isblocked, bool isOwner) : PlainMatrix(isOwner) { _dim = dim; _block_size = block_size; _num_blocks = 0; _isblocked = isblocked; PlainMatrix::init(dim * dim, coefs, isOwner); _singIdx.resize(nsing); for (int i = 0; i < nsing; i++) { _singIdx[i] = singidx[i]; } _singIdx0.resize(nsing); for (int i = 0; i < nsing; i++) { _singIdx0[i] = singidx0[i]; } _permute.resize(dim); for (int i = 0; i < dim; i++) { _permute[i] = permute[i]; } } SquareMatrix(const SquareMatrix& A) : PlainMatrix(A), _rank(A._rank), _permute(A._permute), _eps(A._eps), _singIdx(), _singIdx0(), _dim(A._dim), _kernelDetected(-1), _isFactorized(A._isFactorized), _isblocked(A._isblocked) {} ~SquareMatrix() { if (_num_blocks > 0) { delete [] _nsing_block; } } int dimension() const { return _dim; } virtual T& operator () (int i, int j) { # ifdef DISDEBUG assert(int(i)= 0); assert(k < _num_blocks); #endif return _nsing_block[k]; } void set_dim_kern_block(int k, int nsing) { #ifdef DISDEBUG assert(k >= 0); assert(k < _num_blocks); #endif _nsing_block[k] = nsing; } void set_lastPivot(T lastpiv) { _lastpiv = lastpiv; } T lastPivot() const { # ifdef DISDEBUG assert(isFactorized()); # endif return _lastpiv; } T *lastPivot_val() { return &_lastpiv; } /** \brief Return the pivot obtained after factorization \pre The squarematrix must be factorized */ vector& getPermute() { #ifdef DISDEBUG assert(isFactorized()); #endif return _permute; } /** \brief Return the pivot obtained after factorization \pre The squarematrix must be factorized */ const vector& getPermute() const { #ifdef DISDEBUG assert(isFactorized()); #endif return _permute; } /** \name Operations on the Squarematrix */ //@{ /** \brief Copy operator \param A The squareMatrix to copy \return A reference on the current squareMatrix */ SquareMatrix& operator = (const SquareMatrix& A) { if (this != &A) { PlainMatrix::operator = (A); // _loc2glob=A._loc2glob; _rank = A._rank; _permute = A._permute; _eps = A._eps; _lastpiv = A._lastpiv; _singIdx = A._singIdx; _singIdx0 = A._singIdx0; _isFactorized = A._isFactorized; _kernelDetected = A._kernelDetected; } return(*this); } /** \brief Clone operator \return a new copy of the current squareMatrix */ virtual SquareMatrix* clone() const { //return new SquareMatrix(*this); SquareMatrix *ret = new SquareMatrix; ret->copy(*this); return(ret); } /** \brief Deep copy of a square matrix in the current SquareMatrix \param A The matrix to copy */ void copy (const SquareMatrix& A) { PlainMatrix::copy(A); //_loc2glob=A._loc2glob; _rank = A._rank; _permute = A._permute; _eps = A._eps; _lastpiv = A._lastpiv; _singIdx = A._singIdx; _singIdx0 = A._singIdx0; _isFactorized = A._isFactorized; _kernelDetected = A._kernelDetected; } void init(const int dim) { _dim = dim; PlainMatrix::init(dim * dim); _rank = -1; _permute.resize(dim); _singIdx.resize(0); _singIdx0.resize(0); _isFactorized = false; } void free() { PlainMatrix::free(); _dim = 0; _permute.clear(); _singIdx.clear(); _singIdx0.clear(); } bool isBlocked() { return _isblocked; } void unsetBlocked() { _isblocked = false; } void setBlocked() { _isblocked = true; } void setFactorizedState_pub() { _isFactorized = true; } void set_rank(int rank) { _rank = rank; _nsing = _dim - _rank; } void set_KernelDetected(int state) { _kernelDetected = state; } int KernelDetected() const { return _kernelDetected; } void set_epspiv(T eps) { _eps = eps; } vector &getSingIdx() { return _singIdx; } vector &getSingIdx0() { return _singIdx0; } // void set_lastpiv(int lastpiv) { _lastpiv = lastpiv; } protected: // Internal methods /// \brief Set the matrix as factorized void setFactorizedState() { _isFactorized = true; } /// \brief Set dimension of the matrix void setDimension(int n) { _dim = n; } /// \brief Return the diagonal issued from the factorization // Attributs int _rank; //!< Rank of the matrix (initialized after factorization) int _nsing; int *_nsing_block; int _block_size; int _num_blocks; T _lastpiv; //!< Last non null pivot found during the factorization T _eps; private: // Private attributs int _dim; // _permute; // _singIdx; // with permutation vector _singIdx0; // without permutation // bool _isblocked; }; // End class SquareMatrix // # include "Algebra/SquareMatrix.tpp" // ------------------------------------------------------------------------ /** \brief Output stream operator Print the square matrix in human readable form. \param out The stream outpu \param A The square matrix to print on the out stream \return The modified output stream operator */ template class SubSquareMatrix : public SquareMatrix { public: SubSquareMatrix() : SquareMatrix(), _loc2glob() { _offdiag_2x2_status = false; _full_pivoting = false; } SubSquareMatrix(const vector& l2g) : SquareMatrix() { init(l2g); } void init(const vector& l2g) { int n = l2g.size(); _full_pivoting = false; _loc2glob = l2g; // copy constructor SquareMatrix::setDimension(n); PlainMatrix::init(n * n); SquareMatrix::getPermute().resize(n); _pivot_width.resize(n); _pivot_2x2.resize(0); _offdiag_2x2 = new T[n]; _offdiag_2x2_status = true; } void init(const bool full_pivoting, const vector& l2g, const vector& l2g_left) { int n = l2g.size(); _full_pivoting = true; _loc2glob = l2g; // copy constructor _loc2glob_left = l2g_left; SquareMatrix::setDimension(n); PlainMatrix::init(n * n); SquareMatrix::getPermute().resize(n); _pivot_width.resize(n); _pivot_2x2.resize(0); _offdiag_2x2 = new T[n]; _offdiag_2x2_status = true; } vector& loc2glob() { return _loc2glob; } const vector& loc2glob() const { return _loc2glob; } vector& loc2glob_left() { return _full_pivoting ? _loc2glob_left : _loc2glob; } const vector& loc2glob_left() const { return _full_pivoting ? _loc2glob_left : _loc2glob; } virtual ~SubSquareMatrix() {} void free() { SquareMatrix::free(); _loc2glob.clear(); _pivot_width.clear(); _pivot_2x2.clear(); if (_offdiag_2x2_status) { delete [] _offdiag_2x2; _offdiag_2x2_status = false; } } vector& getPivotWidth() { return _pivot_width; } /** \brief Return the pivot obtained after factorization \pre The squarematrix must be factorized */ const vector& getPivotWidth() const { return _pivot_width; } vector& getPivot2x2() { return _pivot_2x2; } /** \brief Return the pivot obtained after factorization \pre The squarematrix must be factorized */ const vector& getPivot2x2() const { return _pivot_2x2; } T *addr2x2() { return _offdiag_2x2; } private: vector _loc2glob; vector _loc2glob_left; vector _pivot_width; vector _pivot_2x2; T* _offdiag_2x2; bool _offdiag_2x2_status; bool _full_pivoting; }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Algebra/VectorArray.hpp000664 000000 000000 00000006661 14037356732 025375 0ustar00rootroot000000 000000 /*! \file ColumnMatrix.hpp \brief Rectangular matrix view as a set of column vectors \author Xavier Juvigny, ONERA \date Jan. 19th 2005 \modification allocation of array by STL vector class \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 11th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_VECTORARRAY_HPP # define _ALGEBRA_VECTORARRAY_HPP # include "Algebra/PlainMatrix.hpp" template class VectorArray : public PlainMatrix { public: using PlainMatrix::coefs; using PlainMatrix::addrCoefs; using PlainMatrix::addrCoefs_pt; using PlainMatrix::size; using PlainMatrix::ZeroClear; using PlainMatrix::init; using PlainMatrix::free; VectorArray() : PlainMatrix() {} VectorArray(int n) : PlainMatrix() { PlainMatrix::init(n); } ~VectorArray() { } virtual T& operator () (int i, int j) { return coefs()[i]; } virtual const T& operator () (int i, int j) const { return coefs()[i]; } virtual VectorArray* clone() const { VectorArray *ret = new VectorArray; ret->copy(*this); return(ret); } }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/BitTools/000775 000000 000000 00000000000 14037356732 022614 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/BitTools/BitManipulations.hpp000664 000000 000000 00000007211 14037356732 026610 0ustar00rootroot000000 000000 /*! \file BitManipulations.hpp \brief to call grpah decomposer : METIS \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Some function to manipulate easily bits on integers == // ============================================================== #ifndef _DISSECTION_BITTOOLS_BITMANIPULATIONS_HPP_ # define _DISSECTION_BITTOOLS_BITMANIPULATIONS_HPP_ /** @brief Return the highest power of 2 which is lesser or equal to x */ inline unsigned highestbit(unsigned x) { if (0==x) return 0; # if defined(USE_X86_ASM) __asm__("bsr{l}\t%0, %0\n\t" : "=r" (x) : "0" (x)); //x = asm("bsrl %0 %0" : "=r" (x) : "0" (x)); return 1U<<(x-1); # else x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16; return x ^ (x>>1); # endif } // -------------------------------------------------------------- // @brief Return the position of the highest bit of x inline unsigned highest_one_idx(unsigned x) { # if defined(USE_X86_ASM) __asm__("bsr{l}\t%0, %0\n\t" : "=r" (x) : "0" (x)); return x; # else unsigned r = 0; if (x & 0xffff0000U) { x >>= 16; r += 16; } if (x & 0x0000ff00U) { x >>= 8 ; r += 8; } if (x & 0x000000f0U) { x >>= 4 ; r += 4; } if (x & 0x0000000cU) { x >>= 2 ; r += 2; } if (x & 0x00000002U) { x >>= 1 ; r += 1; } return r; # endif } #endif FreeFem-sources-4.9/3rdparty/dissection/src/C-test/000775 000000 000000 00000000000 14037356732 022214 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/C-test/MM-Dissection-double-quad.cpp000664 000000 000000 00000057176 14037356732 027553 0ustar00rootroot000000 000000 /*! \file MM-DissectionSolver.cpp \brief test rouinte of dissection solver reading Matrix Market format \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Feb. 29th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include #include #include #include "Driver/DissectionSolver.hpp" #ifdef BLAS_MKL #include #endif #include #include #include #include #include #include "Compiler/arithmetic.hpp" using namespace std; static int _stat = (-1); void *thread_child(void *arg) { char buf[256]; int *pid = (int *)arg; unsigned int mem_tmp, mem_min, mem_max; double avg_mem; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; int stat0, stat1; stat0 = _stat; unsigned int count = 0U; // fprintf(stderr, "thread_child forked\n"); while(_stat != 0) { stat1 = _stat; if (stat1 == 1) { sprintf(buf, "/proc/%d/statm", *pid); ifstream fin(buf); fin >> mem_tmp; fin.close(); if (mem_tmp > mem_max) { mem_max = mem_tmp; } if (mem_tmp < mem_min) { mem_min = mem_tmp; } avg_mem += (double)mem_tmp; count++; } if ((stat1 == (-1)) && (stat0 == 1)) { fprintf(stderr, "used memory :min: %14.8e max: %14.8e avg: %14.8e count: %d\n", (double)mem_min * 4.0 / (1024.0 * 1024.0), (double)mem_max * 4.0 / (1024.0 * 1024.0), (avg_mem / (double)count) * 4.0 / (1024.0 * 1024.0), count); count = 0U; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; } stat0 = stat1; usleep(1000); } // fprintf(stderr, "thread_child join\n count = %ld\n", count); pthread_exit(arg); return (void *)NULL; } template void generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, T* val, bool symmetrize) { const T zero(0.0); // ind_cols_tmp = new std::list[nrow]; // val_tmp = new std::list[nrow]; int nnz1 = *nnz; for (int i = 0; i < *nnz; i++) { const int i0 = irow[i]; const int j0 = jcol[i]; const int ii = old2new[i0]; const int jj = old2new[j0]; if ((mask[i0] != 1) || (mask[j0] != 1)) { // fprintf(stderr, "%d %d\n", i0, j0); nnz1--; continue; } // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj); if (ind_cols_tmp[ii].empty()) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { if (ind_cols_tmp[ii].back() < jj) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { typename std::list::iterator iv = val_tmp[ii].begin(); std::list::iterator it = ind_cols_tmp[ii].begin(); for ( ; it != ind_cols_tmp[ii].end(); ++it, ++iv) { if (*it == jj) { fprintf(stderr, "already exits? (%d %d)\n", ii, jj); break; } if (*it > jj) { ind_cols_tmp[ii].insert(it, jj); val_tmp[ii].insert(iv, val[i]); break; } } } } } // symmetrize if (symmetrize) { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; // fprintf(stderr, "%d %d symmetric position found\n", i, jj); break; } } if (!flag) { if (ind_cols_tmp[jj].back() < i) { ind_cols_tmp[jj].push_back(i); val_tmp[jj].push_back(zero); fprintf(stderr, "%d %d added for symmetry\n", i, jj); nnz1++; } else { typename std::list::iterator iv = val_tmp[jj].begin(); std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it, ++iv) { std::list::iterator it1 = it; ++it1; if (((*it)) < i && ((*it1) > i)) { ind_cols_tmp[jj].insert(it, i); val_tmp[jj].insert(iv, zero); nnz1++; // fprintf(stderr, "%d %d inserted for symmetry\n", i, jj); break; } } } } // if (!flag); } } } #if 0 { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; break; } } if (!flag) { fprintf(stderr, "%d %d position is not symmetric\n", i, jj); } } } } #endif *nnz = nnz1; } template void copy_CSR(int *indcols, int *ptrows, T* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp) { const T zero(0.0); ptrows[0] = 0; for (int i = 0; i < nrow; i++) { int k; int itmp = ind_cols_tmp[i].size(); if (upper_flag) { if (ind_cols_tmp[i].front() == i) { ptrows[i + 1] = ptrows[i] + itmp; k = ptrows[i]; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i]] = i; coefs[ptrows[i]] = zero; k = ptrows[i] + 1; } } else { k = ptrows[i]; if (ind_cols_tmp[i].back() == i || (!isSym)) { ptrows[i + 1] = ptrows[i] + itmp; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i + 1] - 1] = i; coefs[ptrows[i + 1] - 1] = zero; } } std::list::iterator it = ind_cols_tmp[i].begin(); typename std::list::iterator iv = val_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv, k++) { indcols[k] = *it; coefs[k] = *iv; } } // loop : i } int main(int argc, char **argv) { int n, itmp, jtmp; char fname[256], fname1[256]; char buf[1024]; int nrow, nnz, flag; int *ptrows, *indcols; int *irow, *jcol; double *val, *coefs; complex *valc, *ccoefs; // quadruple *qcoefs; // complex *qccoefs; int decomposer; int num_threads; int scaling = 1; double eps_pivot; int numlevels = -1; int minNodes = 128; std::list* ind_cols_tmp; std::list* val_tmp; std::list >* val_tmpc; FILE *fp; bool isSym, isComplex; bool upper_flag = true; bool isWhole = false; bool kernel_detection_all = false; int *indx_excl; int nexcl = 0; bool excl_flag = false; if (argc < 6) { fprintf(stderr, "MM-dissection [data file] [decomposer] [num_threads] [eps_pivot] [num_levels] [scaling] [kerner_detection_all] [upper_flag] [minNodes]\n"); exit(-1); } strcpy(fname, argv[1]); decomposer = atoi(argv[2]); num_threads = atoi(argv[3]); eps_pivot = atof(argv[4]); numlevels = atof(argv[5]); if (argc >= 7) { scaling = atoi(argv[6]); } if (argc >= 8) { kernel_detection_all = (atoi(argv[7]) == 1); } if (argc >= 9) { upper_flag = (atoi(argv[8]) == 1); isWhole = (atoi(argv[8]) == (-1)); } if (argc >= 10) { strcpy(fname1, argv[9]); excl_flag = true; } if (argc >= 11) { minNodes = atoi(argv[10]); } // read from the file if ((fp = fopen(fname, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname); } fgets(buf, 256, fp); // if (strstr(buf, "symmetric") != NULL) { isSym = true; } else { isSym = false; upper_flag = false; } if (strstr(buf, "complex") != NULL) { isComplex = true; } else { isComplex = false; } fprintf(stderr, "symmetric = %s\n", isSym ? "true " : "false"); fprintf(stderr, "scaling = %d\n", scaling); fprintf(stderr, "upper = %s\n", upper_flag ? "true" : "false"); if (kernel_detection_all) { fprintf(stderr, "kernel detection is activated for all submatrices\n"); } if (excl_flag) { fprintf(stderr, "list of singular nodes %s\n", fname1); } while (1) { fgets(buf, 256, fp); if (buf[0] != '%') { sscanf(buf, "%d %d %d", &nrow, &itmp, &nnz); break; } } irow = new int[nnz]; jcol = new int[nnz]; if (isComplex) { double xreal, ximag; valc = new complex[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &jcol[i], &irow[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &irow[i], &jcol[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } else { val = new double[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &jcol[i], &irow[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &irow[i], &jcol[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } fclose (fp); if (excl_flag) { if ((fp = fopen(fname1, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname1); } fgets(buf, 256, fp); sscanf(buf, "# %d", &nexcl); indx_excl = new int[nexcl]; for (int i = 0; i < nexcl; i++) { fgets(buf, 256, fp); sscanf(buf, "%d", &itmp); indx_excl[i] = itmp; } fclose(fp); } int *mask = new int[nrow]; int *old2new = new int[nrow]; for (int i = 0; i < nrow; i++) { mask[i] = 1; } for (int i = 0; i < nexcl; i++) { mask[indx_excl[i]] = 0; } itmp = 0; jtmp = nrow - nexcl; for (int i = 0; i < nrow; i++) { if (mask[i] == 1) { old2new[i] = itmp++; } else { old2new[i] = jtmp++; } } #if 0 if ((fp = fopen("debug-index.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d %d %d\n", i, old2new[i], mask[i]); } fclose(fp); } #endif nrow -= nexcl; ind_cols_tmp = new std::list[nrow]; fprintf(stderr, "%s %d : getnerate_CSR\n", __FILE__, __LINE__); if (isComplex) { val_tmpc = new std::list >[nrow]; generate_CSR >(ind_cols_tmp, val_tmpc, nrow, &nnz, mask, old2new, irow, jcol, valc, (!isSym)); } else { val_tmp = new std::list[nrow]; generate_CSR(ind_cols_tmp, val_tmp, nrow, &nnz, mask, old2new, irow, jcol, val, (!isSym)); } delete [] irow; delete [] jcol; delete [] mask; delete [] old2new; if (isComplex) { delete [] valc; } else { delete [] val; } if (upper_flag) { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].front() != i) { nnz++; } } } else { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].back() != i) { nnz++; } } } fprintf(stderr, "%s %d : copy_CSR\n", __FILE__, __LINE__); ptrows = new int[nrow + 1]; indcols = new int[nnz]; if (isComplex) { ccoefs = new complex[nnz]; copy_CSR >(indcols, ptrows, ccoefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmpc); #if 0 qccoefs = new complex[nnz]; for (int i = 0; i < nnz; i++) { qccoefs[i] = complex(quadruple(std::real(ccoefs[i])), quadruple(std::imag(ccoefs[i]))); } delete [] ccoefs; #endif } else { coefs = new double[nnz]; copy_CSR(indcols, ptrows, coefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmp); #if 0 qcoefs = new quadruple[nnz]; for (int i = 0; i < nnz; i++) { qcoefs[i] = quadruple(coefs[i]); } delete [] coefs; #endif } delete [] ind_cols_tmp; if (isComplex) { delete [] val_tmpc; } else { delete [] val_tmp; } #if 0 if ((fp = fopen("debug.matrix.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : %d :: ", i, (ptrows[i + 1] - ptrows[i])); for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { fprintf(fp, "%d ", indcols[k]); } fprintf(fp, "\n"); } } fclose(fp); #endif int pid = (int)getpid(); #if 1 fprintf(stderr, "pid = %d\n", pid); sprintf(fname, "dissection.%04d.log", pid); fp = fopen(fname, "a"); #else fp = stderr; #endif fprintf(stderr, "%s %d : before pthread_create\n", __FILE__, __LINE__); void* results; pthread_attr_t th_attr; pthread_t thread; pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); int pthid = pthread_create(&thread, &th_attr, &thread_child, (void *)&pid); if (pthid != 0) { cout << "bad thread creation ? " << pid << endl; exit(0); } fprintf(stderr, "%s %d : after pthread_create\n", __FILE__, __LINE__); if (isWhole) { isSym = true; upper_flag = false; } if (isComplex) { DissectionSolver, quadruple, complex, double, complex, quadruple>*dslv = new DissectionSolver, quadruple, complex, double, complex, quadruple>(num_threads, true, 0, fp); DissectionSolver, double, complex, double, complex, quadruple> *dslv2 = new DissectionSolver, double, complex, double, complex, quadruple>(num_threads, true, 0, fp); int num_levels = (-1); // automatic int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); t1_cpu = clock(); get_realtime(&t1_elapsed); t2_cpu = clock(); get_realtime(&t2_elapsed); _stat = 1; usleep(5000); dslv->NumericFact(0, (complex *)coefs, scaling, eps_pivot, kernel_detection_all); _stat = (-1); t3_cpu = clock(); get_realtime(&t3_elapsed); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); dslv2->CopyQueueFwBw(*dslv); int n0; n0 = dslv2->kern_dimension(); fprintf(fp, "## kernel dimension = %d\n", n0); complex *x = new complex[nrow]; complex *y = new complex[nrow]; complex *z = new complex[nrow]; for (int i = 0; i < nrow; i++) { y[i] = complex((double)(i % 11)); } dslv2->SpMV(y, x); dslv2->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } t4_cpu = clock(); get_realtime(&t4_elapsed); _stat = 1; usleep(5000); dslv2->SolveSingle(y, false, false, true); // with projection : true _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); t5_cpu = clock(); get_realtime(&t5_elapsed); quadruple norm0, norm1; norm0 = quadruple(0); norm1 = quadruple(0); for (int i = 0; i < nrow; i++) { norm0 += x[i].real() * x[i].real() + x[i].imag() * x[i].imag(); complex ztmp = y[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); dslv2->SpMV(y, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i].real() * z[i].real() + z[i].imag() * z[i].imag(); complex ztmp = z[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); delete dslv; delete [] ptrows; delete [] indcols; delete [] x; delete [] y; delete [] z; } // if (isComplex) else { DissectionSolver *dslv = new DissectionSolver(num_threads, true, 0, fp); DissectionSolver *dslv2 = new DissectionSolver(num_threads, true, 0, fp); int num_levels = (-1); // automatic int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); // sym, upper // dslv->SaveMMMatrix(0, coefs); // exit(-1); t1_cpu = clock(); get_realtime(&t1_elapsed); _stat = 1; usleep(5000); t2_cpu = clock(); get_realtime(&t2_elapsed); dslv->NumericFact(0, coefs, scaling, eps_pivot, kernel_detection_all); t3_cpu = clock(); get_realtime(&t3_elapsed); _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); dslv2->CopyQueueFwBw(*dslv); int n0; // n0 = dslv->kern_dimension(); n0 = dslv2->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); double *x = new double[nrow]; double *y = new double[nrow]; double *z = new double[nrow]; for (int i = 0; i < nrow; i++) { y[i] = (double)(i % 11); } dslv2->SpMV(y, x); if (n0 > 0) { dslv2->ProjectionImageSingle(x); } dslv2->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } _stat = 1; usleep(5000); t4_cpu = clock(); get_realtime(&t4_elapsed); dslv2->SolveSingle(y, false, false, true); // with projection : true _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); if (n0 > 0) { dslv2->ProjectionImageSingle(y); } t5_cpu = clock(); get_realtime(&t5_elapsed); double norm0, norm1; norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += x[i] * x[i]; norm1 += (y[i] - x[i]) * (y[i] - x[i]); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); dslv2->SpMV(y, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i] * z[i]; norm1 += (z[i] - x[i]) * (z[i] - x[i]); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); delete dslv; delete dslv2; // DissectionMatrix()._diag().free() still fails 28 Aug.2015 delete [] ptrows; delete [] indcols; delete [] x; delete [] y; delete [] z; } if (isComplex) { delete [] ccoefs; } else { delete [] coefs; } fclose(fp); } FreeFem-sources-4.9/3rdparty/dissection/src/C-test/MM-Dissection-mRHS.cpp000664 000000 000000 00000063443 14037356732 026154 0ustar00rootroot000000 000000 /*! \file MM-DissectionSolver.cpp \brief test rouinte of dissection solver reading Matrix Market format \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include #include #include #include "Driver/DissectionSolver.hpp" #ifdef BLAS_MKL #include #endif #include #include #include #include #include using namespace std; static int _stat = (-1); void *thread_child(void *arg) { char buf[256]; int *pid = (int *)arg; unsigned int mem_tmp, mem_min, mem_max; double avg_mem; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; int stat0, stat1; stat0 = _stat; unsigned int count = 0U; // fprintf(stderr, "thread_child forked\n"); while(_stat != 0) { stat1 = _stat; if (stat1 == 1) { sprintf(buf, "/proc/%d/statm", *pid); ifstream fin(buf); fin >> mem_tmp; fin.close(); if (mem_tmp > mem_max) { mem_max = mem_tmp; } if (mem_tmp < mem_min) { mem_min = mem_tmp; } avg_mem += (double)mem_tmp; count++; } if ((stat1 == (-1)) && (stat0 == 1)) { fprintf(stderr, "used memory :min: %14.8e max: %14.8e avg: %14.8e count: %d\n", (double)mem_min * 4.0 / (1024.0 * 1024.0), (double)mem_max * 4.0 / (1024.0 * 1024.0), (avg_mem / (double)count) * 4.0 / (1024.0 * 1024.0), count); count = 0U; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; } stat0 = stat1; usleep(1000); } // fprintf(stderr, "thread_child join\n count = %ld\n", count); pthread_exit(arg); return (void *)NULL; } template void generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, T* val, bool symmetrize) { const T zero(0.0); // ind_cols_tmp = new std::list[nrow]; // val_tmp = new std::list[nrow]; int nnz1 = *nnz; for (int i = 0; i < *nnz; i++) { const int i0 = irow[i]; const int j0 = jcol[i]; const int ii = old2new[i0]; const int jj = old2new[j0]; if ((mask[i0] != 1) || (mask[j0] != 1)) { // fprintf(stderr, "%d %d\n", i0, j0); nnz1--; continue; } // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj); if (ind_cols_tmp[ii].empty()) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { if (ind_cols_tmp[ii].back() < jj) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { typename std::list::iterator iv = val_tmp[ii].begin(); std::list::iterator it = ind_cols_tmp[ii].begin(); for ( ; it != ind_cols_tmp[ii].end(); ++it, ++iv) { if (*it == jj) { fprintf(stderr, "already exits? (%d %d)\n", ii, jj); break; } if (*it > jj) { ind_cols_tmp[ii].insert(it, jj); val_tmp[ii].insert(iv, val[i]); break; } } } } } // symmetrize if (symmetrize) { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; // fprintf(stderr, "%d %d symmetric position found\n", i, jj); break; } } if (!flag) { if (ind_cols_tmp[jj].back() < i) { ind_cols_tmp[jj].push_back(i); val_tmp[jj].push_back(zero); fprintf(stderr, "%d %d added for symmetry\n", i, jj); nnz1++; } else { typename std::list::iterator iv = val_tmp[jj].begin(); std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it, ++iv) { std::list::iterator it1 = it; ++it1; if (((*it)) < i && ((*it1) > i)) { ind_cols_tmp[jj].insert(it, i); val_tmp[jj].insert(iv, zero); nnz1++; // fprintf(stderr, "%d %d inserted for symmetry\n", i, jj); break; } } } } // if (!flag); } } } #if 0 { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; break; } } if (!flag) { fprintf(stderr, "%d %d position is not symmetric\n", i, jj); } } } } #endif *nnz = nnz1; } template void generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, double* val, bool symmetrize); template void generate_CSR >(std::list* ind_cols_tmp, std::list >* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, complex* val, bool symmetrize); template void copy_CSR(int *indcols, int *ptrows, T* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp) { const T zero(0.0); ptrows[0] = 0; for (int i = 0; i < nrow; i++) { int k; int itmp = ind_cols_tmp[i].size(); if (upper_flag) { if (ind_cols_tmp[i].front() == i) { ptrows[i + 1] = ptrows[i] + itmp; k = ptrows[i]; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i]] = i; coefs[ptrows[i]] = zero; k = ptrows[i] + 1; } } else { k = ptrows[i]; if (ind_cols_tmp[i].back() == i || (!isSym)) { ptrows[i + 1] = ptrows[i] + itmp; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i + 1] - 1] = i; coefs[ptrows[i + 1] - 1] = zero; } } std::list::iterator it = ind_cols_tmp[i].begin(); typename std::list::iterator iv = val_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv, k++) { indcols[k] = *it; coefs[k] = *iv; } } // loop : i } template void copy_CSR(int *indcols, int *ptrows, double* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp); template void copy_CSR >(int *indcols, int *ptrows, complex* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list >* val_tmp); int main(int argc, char **argv) { int n, itmp, jtmp; char fname[256], fname1[256]; char buf[1024]; int nrow, nnz, flag; int *ptrows, *indcols; int *irow, *jcol; double *val, *coefs; complex *valc, *ccoefs; int decomposer; int num_threads; int scaling = 1; double eps_pivot; int numlevels = -1; int minNodes = 128; std::list* ind_cols_tmp; std::list* val_tmp; std::list >* val_tmpc; FILE *fp; bool isSym, isComplex; bool upper_flag = true; bool isWhole = false; bool kernel_detection_all = false; int *indx_excl; int nexcl = 0; bool excl_flag = false; if (argc < 6) { fprintf(stderr, "MM-dissection [data file] [decomposer] [num_threads] [eps_pivot] [num_levels] [scaling] [kerner_detection_all] [upper_flag] [minNodes]\n"); exit(-1); } strcpy(fname, argv[1]); decomposer = atoi(argv[2]); num_threads = atoi(argv[3]); eps_pivot = atof(argv[4]); numlevels = atof(argv[5]); if (argc >= 7) { scaling = atoi(argv[6]); } if (argc >= 8) { kernel_detection_all = (atoi(argv[7]) == 1); } if (argc >= 9) { upper_flag = (atoi(argv[8]) == 1); isWhole = (atoi(argv[8]) == (-1)); } if (argc >= 10) { strcpy(fname1, argv[9]); excl_flag = true; } if (argc >= 11) { minNodes = atoi(argv[10]); } // read from the file if ((fp = fopen(fname, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname); } fgets(buf, 256, fp); // if (strstr(buf, "symmetric") != NULL) { isSym = true; } else { isSym = false; upper_flag = false; } if (strstr(buf, "complex") != NULL) { isComplex = true; } else { isComplex = false; } fprintf(stderr, "symmetric = %s\n", isSym ? "true " : "false"); fprintf(stderr, "scaling = %d\n", scaling); fprintf(stderr, "upper = %s\n", upper_flag ? "true" : "false"); if (kernel_detection_all) { fprintf(stderr, "kernel detection is activated for all submatrices\n"); } if (excl_flag) { fprintf(stderr, "list of singular nodes %s\n", fname1); } while (1) { fgets(buf, 256, fp); if (buf[0] != '%') { sscanf(buf, "%d %d %d", &nrow, &itmp, &nnz); break; } } irow = new int[nnz]; jcol = new int[nnz]; if (isComplex) { double xreal, ximag; valc = new complex[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &jcol[i], &irow[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &irow[i], &jcol[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } else { val = new double[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &jcol[i], &irow[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &irow[i], &jcol[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } fclose (fp); if (excl_flag) { if ((fp = fopen(fname1, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname1); } fgets(buf, 256, fp); sscanf(buf, "# %d", &nexcl); indx_excl = new int[nexcl]; for (int i = 0; i < nexcl; i++) { fgets(buf, 256, fp); sscanf(buf, "%d", &itmp); indx_excl[i] = itmp; } fclose(fp); } int *mask = new int[nrow]; int *old2new = new int[nrow]; for (int i = 0; i < nrow; i++) { mask[i] = 1; } for (int i = 0; i < nexcl; i++) { mask[indx_excl[i]] = 0; } itmp = 0; jtmp = nrow - nexcl; for (int i = 0; i < nrow; i++) { if (mask[i] == 1) { old2new[i] = itmp++; } else { old2new[i] = jtmp++; } } #if 0 if ((fp = fopen("debug-index.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d %d %d\n", i, old2new[i], mask[i]); } fclose(fp); } #endif nrow -= nexcl; ind_cols_tmp = new std::list[nrow]; fprintf(stderr, "%s %d : getnerate_CSR\n", __FILE__, __LINE__); if (isComplex) { val_tmpc = new std::list >[nrow]; generate_CSR >(ind_cols_tmp, val_tmpc, nrow, &nnz, mask, old2new, irow, jcol, valc, (!isSym)); } else { val_tmp = new std::list[nrow]; generate_CSR(ind_cols_tmp, val_tmp, nrow, &nnz, mask, old2new, irow, jcol, val, (!isSym)); } delete [] irow; delete [] jcol; delete [] mask; delete [] old2new; if (isComplex) { delete [] valc; } else { delete [] val; } if (upper_flag) { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].front() != i) { nnz++; } } } else { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].back() != i) { nnz++; } } } fprintf(stderr, "%s %d : copy_CSR\n", __FILE__, __LINE__); ptrows = new int[nrow + 1]; indcols = new int[nnz]; if (isComplex) { ccoefs = new complex[nnz]; copy_CSR >(indcols, ptrows, ccoefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmpc); } else { coefs = new double[nnz]; copy_CSR(indcols, ptrows, coefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmp); } delete [] ind_cols_tmp; if (isComplex) { delete [] val_tmpc; } else { delete [] val_tmp; } #if 0 if ((fp = fopen("debug.matrix.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : %d :: ", i, (ptrows[i + 1] - ptrows[i])); for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { fprintf(fp, "%d ", indcols[k]); } fprintf(fp, "\n"); } } fclose(fp); #endif int pid = (int)getpid(); #if 1 fprintf(stderr, "pid = %d\n", pid); sprintf(fname, "dissection.%04d.log", pid); fp = fopen(fname, "a"); #else fp = stderr; #endif for (int i = 0; i < argc; i++) { fprintf(fp, "%s ", argv[i]); } fprintf(fp, "\n"); fprintf(stderr, "%s %d : before pthread_create\n", __FILE__, __LINE__); void* results; pthread_attr_t th_attr; pthread_t thread; pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); int pthid = pthread_create(&thread, &th_attr, &thread_child, (void *)&pid); if (pthid != 0) { cout << "bad thread creation ? " << pid << endl; exit(0); } fprintf(stderr, "%s %d : after pthread_create\n", __FILE__, __LINE__); if (isWhole) { isSym = true; upper_flag = false; } if (isComplex) { DissectionSolver, double>*dslv = new DissectionSolver, double>(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; clock_t *tbegin_cpu, *tend_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; elapsed_t *tbegin_elapsed, *tend_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); t1_cpu = clock(); get_realtime(&t1_elapsed); t2_cpu = clock(); get_realtime(&t2_elapsed); _stat = 1; usleep(5000); dslv->NumericFact(0, (complex *)ccoefs, scaling, eps_pivot, kernel_detection_all); _stat = (-1); t3_cpu = clock(); get_realtime(&t3_elapsed); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); int nrhs = 40; complex *x = new complex[nrow]; complex *y = new complex[nrow * nrhs]; complex *z = new complex[nrow * nrhs]; int ntrial = 24; tbegin_cpu = new clock_t[ntrial]; tend_cpu = new clock_t[ntrial]; tbegin_elapsed = new elapsed_t[ntrial]; tend_elapsed = new elapsed_t[ntrial]; fprintf(stderr, "%s %d : RHS allocated\n", __FILE__, __LINE__); _stat = (-1); for (int k = 0; k < nrhs; k++) { const int kshft = k * nrow; for (int i = 0; i < nrow; i++) { y[kshft + i] = complex((double)((i + k) % 11), 0.0); } dslv->SpMV(y + kshft, x); dslv->SpMV(x, y + kshft); for (int i = 0; i < nrow * nrhs; i++) { z[i] = y[i]; } } _stat = 1; for (int k = 1; k <= 20; k++) { for (int i = 0; i < nrow * nrhs; i++) { y[i] = z[i]; } tbegin_cpu[k - 1] = clock(); get_realtime(&tbegin_elapsed[k - 1]); if (k == 1) { fprintf(stderr, "%s %d : SolveSingle()\n", __FILE__, __LINE__); dslv->SolveSingle(y, false, false, true); fprintf(stderr, "%s %d : SolveSingle()\n", __FILE__, __LINE__); } else { const int nnrhs = k; fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); dslv->SolveMulti(y, nnrhs, false, false, true); fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); } tend_cpu[k - 1] = clock(); get_realtime(&tend_elapsed[k - 1]); } for (int k = 5; k < 9; k++) { for (int i = 0; i < nrow * nrhs; i++) { y[i] = z[i]; } tbegin_cpu[k + 15] = clock(); get_realtime(&tbegin_elapsed[k + 15]); const int nnrhs = k * 5; fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); dslv->SolveMulti(y, nnrhs, false, false, true); fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); tend_cpu[k + 15] = clock(); get_realtime(&tend_elapsed[k + 15]); } _stat = (-1); #if 1 for (int k = 0; k < nrhs; k++) { const int kshft = k * nrow; for (int i = 0; i < nrow; i++) { z[i] = complex((double)((i + k) % 11), 0.0); } dslv->SpMV(z, x); dslv->SpMV(x, z); double norm0, norm1; norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += x[i].real() * x[i].real() + x[i].imag() * x[i].imag(); complex ztmp; ztmp = y[i + kshft] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "## %2d : error : %18.7e = %18.7e / %18.7e\n", k, sqrt(norm1 / norm0), sqrt(norm1), sqrt(norm0)); dslv->SpMV(y + kshft, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i].real() * z[i].real() + z[i].imag() * z[i].imag(); complex ztmp; ztmp = z[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "## %2d : residual : %18.7e = %18.7e / %18.7e\n", k, sqrt(norm1 / norm0), sqrt(norm1), sqrt(norm0)); } #endif delete dslv; delete [] ptrows; delete [] indcols; delete [] ccoefs; delete [] x; delete [] y; delete [] z; } // if (isComplex) else { DissectionSolver *dslv = new DissectionSolver(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; clock_t *tbegin_cpu, *tend_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; elapsed_t *tbegin_elapsed, *tend_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); // sym, upper // dslv->SaveMMMatrix(0, coefs); // exit(-1); #if 0 fprintf(stderr, "%d %d\n", nnz, ptrows[nrow]); FORTRAN_DECL(csrmatrix_save)(0, 0, 0, // is_sym 1, // real_or_cmplx nrow, ptrows[nrow], ptrows, indcols, coefs); exit(-1); #endif t1_cpu = clock(); get_realtime(&t1_elapsed); _stat = 1; usleep(5000); t2_cpu = clock(); get_realtime(&t2_elapsed); dslv->NumericFact(0, (double *)coefs, scaling, eps_pivot, kernel_detection_all); t3_cpu = clock(); get_realtime(&t3_elapsed); _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); _stat = 1; int nrhs = 40; double *x = new double[nrow]; double *y = new double[nrow * nrhs]; double *z = new double[nrow * nrhs]; int ntrial = 24; tbegin_cpu = new clock_t[ntrial]; tend_cpu = new clock_t[ntrial]; tbegin_elapsed = new elapsed_t[ntrial]; tend_elapsed = new elapsed_t[ntrial]; fprintf(stderr, "%s %d : RHS allocated\n", __FILE__, __LINE__); _stat = (-1); if (!isSym && (n0 > 0)) { _stat = 1; fprintf(stderr, "%s %d : ComputeTransposedKernels()\n", __FILE__, __LINE__); dslv->ComputeTransposedKernels(); fprintf(stderr, "%s %d : ComputeTransposedKernels()\n", __FILE__, __LINE__); _stat = (-1); } for (int k = 0; k < nrhs; k++) { const int kshft = k * nrow; for (int i = 0; i < nrow; i++) { y[kshft + i] = (double)((i + k) % 11); } dslv->SpMV(y + kshft, x); if (n0 > 0) { dslv->ProjectionImageSingle(x); } dslv->SpMV(x, y + kshft); for (int i = 0; i < nrow * nrhs; i++) { z[i] = y[i]; } } _stat = 1; for (int k = 1; k <= 20; k++) { for (int i = 0; i < nrow * nrhs; i++) { y[i] = z[i]; } tbegin_cpu[k - 1] = clock(); get_realtime(&tbegin_elapsed[k - 1]); if (k == 1) { fprintf(stderr, "%s %d : SolveSingle()\n", __FILE__, __LINE__); dslv->SolveSingle(y, false, false, true); fprintf(stderr, "%s %d : SolveSingle()\n", __FILE__, __LINE__); } else { const int nnrhs = k; fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); dslv->SolveMulti(y, nnrhs, false, false, true); fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); } if (n0 > 0) { dslv->ProjectionImageSingle(y); } tend_cpu[k - 1] = clock(); get_realtime(&tend_elapsed[k - 1]); } for (int k = 5; k < 9; k++) { for (int i = 0; i < nrow * nrhs; i++) { y[i] = z[i]; } tbegin_cpu[k + 15] = clock(); get_realtime(&tbegin_elapsed[k + 15]); const int nnrhs = k * 5; fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); dslv->SolveMulti(y, nnrhs, false, false, true); fprintf(stderr, "%s %d : SolveMulti() %d\n", __FILE__, __LINE__, nnrhs); if (n0 > 0) { dslv->ProjectionImageSingle(y); } tend_cpu[k + 15] = clock(); get_realtime(&tend_elapsed[k + 15]); } _stat = (-1); #if 1 for (int k = 0; k < nrhs; k++) { const int kshft = k * nrow; for (int i = 0; i < nrow; i++) { z[i] = (double)((i + k) % 11); } dslv->SpMV(z, x); dslv->SpMV(x, z); double norm0, norm1; norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += x[i] * x[i]; double ztmp; ztmp = y[i + kshft] - x[i]; norm1 += ztmp * ztmp; } fprintf(fp, "## %2d : error : %18.7e = %18.7e / %18.7e\n", k, sqrt(norm1 / norm0), sqrt(norm1), sqrt(norm0)); dslv->SpMV(y + kshft, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i] * z[i]; double ztmp; ztmp = z[i] - x[i]; norm1 += ztmp * ztmp; } fprintf(fp, "## %2d : residual : %18.7e = %18.7e / %18.7e\n", k, sqrt(norm1 / norm0), sqrt(norm1), sqrt(norm0)); } usleep(5000); t4_cpu = clock(); get_realtime(&t4_elapsed); dslv->SolveSingle(y, false, true, true); // with projection : true // dslv->ProjectionImageSingle(y); t5_cpu = clock(); get_realtime(&t5_elapsed); _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); #endif delete dslv; delete [] ptrows; delete [] indcols; delete [] coefs; delete [] x; delete [] y; delete [] z; } fclose(fp); } FreeFem-sources-4.9/3rdparty/dissection/src/C-test/MM-Dissection-quad.cpp000664 000000 000000 00000057553 14037356732 026302 0ustar00rootroot000000 000000 /*! \file MM-DissectionSolver.cpp \brief test rouinte of dissection solver reading Matrix Market format \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Feb. 29th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include #include #include #include "Driver/DissectionSolver.hpp" #ifdef BLAS_MKL #include #endif #include #include #include #include #include #include "Compiler/arithmetic.hpp" using namespace std; static int _stat = (-1); void *thread_child(void *arg) { char buf[256]; int *pid = (int *)arg; unsigned int mem_tmp, mem_min, mem_max; double avg_mem; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; int stat0, stat1; stat0 = _stat; unsigned int count = 0U; // fprintf(stderr, "thread_child forked\n"); while(_stat != 0) { stat1 = _stat; if (stat1 == 1) { sprintf(buf, "/proc/%d/statm", *pid); ifstream fin(buf); fin >> mem_tmp; fin.close(); if (mem_tmp > mem_max) { mem_max = mem_tmp; } if (mem_tmp < mem_min) { mem_min = mem_tmp; } avg_mem += (double)mem_tmp; count++; } if ((stat1 == (-1)) && (stat0 == 1)) { fprintf(stderr, "used memory :min: %14.8e max: %14.8e avg: %14.8e count: %d\n", (double)mem_min * 4.0 / (1024.0 * 1024.0), (double)mem_max * 4.0 / (1024.0 * 1024.0), (avg_mem / (double)count) * 4.0 / (1024.0 * 1024.0), count); count = 0U; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; } stat0 = stat1; usleep(1000); } // fprintf(stderr, "thread_child join\n count = %ld\n", count); pthread_exit(arg); return (void *)NULL; } template void generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, T* val, bool symmetrize) { const T zero(0.0); // ind_cols_tmp = new std::list[nrow]; // val_tmp = new std::list[nrow]; int nnz1 = *nnz; for (int i = 0; i < *nnz; i++) { const int i0 = irow[i]; const int j0 = jcol[i]; const int ii = old2new[i0]; const int jj = old2new[j0]; if ((mask[i0] != 1) || (mask[j0] != 1)) { // fprintf(stderr, "%d %d\n", i0, j0); nnz1--; continue; } // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj); if (ind_cols_tmp[ii].empty()) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { if (ind_cols_tmp[ii].back() < jj) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { typename std::list::iterator iv = val_tmp[ii].begin(); std::list::iterator it = ind_cols_tmp[ii].begin(); for ( ; it != ind_cols_tmp[ii].end(); ++it, ++iv) { if (*it == jj) { fprintf(stderr, "already exits? (%d %d)\n", ii, jj); break; } if (*it > jj) { ind_cols_tmp[ii].insert(it, jj); val_tmp[ii].insert(iv, val[i]); break; } } } } } // symmetrize if (symmetrize) { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; // fprintf(stderr, "%d %d symmetric position found\n", i, jj); break; } } if (!flag) { if (ind_cols_tmp[jj].back() < i) { ind_cols_tmp[jj].push_back(i); val_tmp[jj].push_back(zero); fprintf(stderr, "%d %d added for symmetry\n", i, jj); nnz1++; } else { typename std::list::iterator iv = val_tmp[jj].begin(); std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it, ++iv) { std::list::iterator it1 = it; ++it1; if (((*it)) < i && ((*it1) > i)) { ind_cols_tmp[jj].insert(it, i); val_tmp[jj].insert(iv, zero); nnz1++; // fprintf(stderr, "%d %d inserted for symmetry\n", i, jj); break; } } } } // if (!flag); } } } #if 0 { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; std::list::iterator it = ind_cols_tmp[jj].begin(); for ( ; it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; break; } } if (!flag) { fprintf(stderr, "%d %d position is not symmetric\n", i, jj); } } } } #endif *nnz = nnz1; } template void copy_CSR(int *indcols, int *ptrows, T* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp) { const T zero(0.0); ptrows[0] = 0; for (int i = 0; i < nrow; i++) { int k; int itmp = ind_cols_tmp[i].size(); if (upper_flag) { if (ind_cols_tmp[i].front() == i) { ptrows[i + 1] = ptrows[i] + itmp; k = ptrows[i]; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i]] = i; coefs[ptrows[i]] = zero; k = ptrows[i] + 1; } } else { k = ptrows[i]; if (ind_cols_tmp[i].back() == i || (!isSym)) { ptrows[i + 1] = ptrows[i] + itmp; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i + 1] - 1] = i; coefs[ptrows[i + 1] - 1] = zero; } } std::list::iterator it = ind_cols_tmp[i].begin(); typename std::list::iterator iv = val_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv, k++) { indcols[k] = *it; coefs[k] = *iv; } } // loop : i } int main(int argc, char **argv) { int n, itmp, jtmp; char fname[256], fname1[256]; char buf[1024]; int nrow, nnz, flag; int *ptrows, *indcols; int *irow, *jcol; double *val, *coefs; complex *valc, *ccoefs; quadruple *qcoefs; complex *qccoefs; int decomposer; int num_threads; int scaling = 1; double eps_pivot; int numlevels = -1; int minNodes = 128; std::list* ind_cols_tmp; std::list* val_tmp; std::list >* val_tmpc; FILE *fp; bool isSym, isComplex; bool upper_flag = true; bool isWhole = false; bool kernel_detection_all = false; int *indx_excl; int nexcl = 0; bool excl_flag = false; if (argc < 6) { fprintf(stderr, "MM-dissection [data file] [decomposer] [num_threads] [eps_pivot] [num_levels] [scaling] [kerner_detection_all] [upper_flag] [minNodes]\n"); exit(-1); } strcpy(fname, argv[1]); decomposer = atoi(argv[2]); num_threads = atoi(argv[3]); eps_pivot = atof(argv[4]); numlevels = atof(argv[5]); if (argc >= 7) { scaling = atoi(argv[6]); } if (argc >= 8) { kernel_detection_all = (atoi(argv[7]) == 1); } if (argc >= 9) { upper_flag = (atoi(argv[8]) == 1); isWhole = (atoi(argv[8]) == (-1)); } if (argc >= 10) { strcpy(fname1, argv[9]); excl_flag = true; } if (argc >= 11) { minNodes = atoi(argv[10]); } // read from the file if ((fp = fopen(fname, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname); } fgets(buf, 256, fp); // if (strstr(buf, "symmetric") != NULL) { isSym = true; } else { isSym = false; upper_flag = false; } if (strstr(buf, "complex") != NULL) { isComplex = true; } else { isComplex = false; } fprintf(stderr, "symmetric = %s\n", isSym ? "true " : "false"); fprintf(stderr, "scaling = %d\n", scaling); fprintf(stderr, "upper = %s\n", upper_flag ? "true" : "false"); if (kernel_detection_all) { fprintf(stderr, "kernel detection is activated for all submatrices\n"); } if (excl_flag) { fprintf(stderr, "list of singular nodes %s\n", fname1); } while (1) { fgets(buf, 256, fp); if (buf[0] != '%') { sscanf(buf, "%d %d %d", &nrow, &itmp, &nnz); break; } } irow = new int[nnz]; jcol = new int[nnz]; if (isComplex) { double xreal, ximag; valc = new complex[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &jcol[i], &irow[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &irow[i], &jcol[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } else { val = new double[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &jcol[i], &irow[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &irow[i], &jcol[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } fclose (fp); if (excl_flag) { if ((fp = fopen(fname1, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname1); } fgets(buf, 256, fp); sscanf(buf, "# %d", &nexcl); indx_excl = new int[nexcl]; for (int i = 0; i < nexcl; i++) { fgets(buf, 256, fp); sscanf(buf, "%d", &itmp); indx_excl[i] = itmp; } fclose(fp); } int *mask = new int[nrow]; int *old2new = new int[nrow]; for (int i = 0; i < nrow; i++) { mask[i] = 1; } for (int i = 0; i < nexcl; i++) { mask[indx_excl[i]] = 0; } itmp = 0; jtmp = nrow - nexcl; for (int i = 0; i < nrow; i++) { if (mask[i] == 1) { old2new[i] = itmp++; } else { old2new[i] = jtmp++; } } #if 0 if ((fp = fopen("debug-index.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d %d %d\n", i, old2new[i], mask[i]); } fclose(fp); } #endif nrow -= nexcl; ind_cols_tmp = new std::list[nrow]; fprintf(stderr, "%s %d : getnerate_CSR\n", __FILE__, __LINE__); if (isComplex) { val_tmpc = new std::list >[nrow]; generate_CSR >(ind_cols_tmp, val_tmpc, nrow, &nnz, mask, old2new, irow, jcol, valc, (!isSym)); } else { val_tmp = new std::list[nrow]; generate_CSR(ind_cols_tmp, val_tmp, nrow, &nnz, mask, old2new, irow, jcol, val, (!isSym)); } delete [] irow; delete [] jcol; delete [] mask; delete [] old2new; if (isComplex) { delete [] valc; } else { delete [] val; } if (upper_flag) { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].front() != i) { nnz++; } } } else { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].back() != i) { nnz++; } } } fprintf(stderr, "%s %d : copy_CSR\n", __FILE__, __LINE__); ptrows = new int[nrow + 1]; indcols = new int[nnz]; if (isComplex) { ccoefs = new complex[nnz]; copy_CSR >(indcols, ptrows, ccoefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmpc); qccoefs = new complex[nnz]; for (int i = 0; i < nnz; i++) { qccoefs[i] = complex(quadruple(std::real(ccoefs[i])), quadruple(std::imag(ccoefs[i]))); } delete [] ccoefs; } else { coefs = new double[nnz]; copy_CSR(indcols, ptrows, coefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmp); qcoefs = new quadruple[nnz]; for (int i = 0; i < nnz; i++) { qcoefs[i] = quadruple(coefs[i]); } delete [] coefs; } delete [] ind_cols_tmp; if (isComplex) { delete [] val_tmpc; } else { delete [] val_tmp; } #if 0 if ((fp = fopen("debug.matrix.data", "w")) != NULL) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : %d :: ", i, (ptrows[i + 1] - ptrows[i])); for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { fprintf(fp, "%d ", indcols[k]); } fprintf(fp, "\n"); } } fclose(fp); #endif int pid = (int)getpid(); #if 1 fprintf(stderr, "pid = %d\n", pid); sprintf(fname, "dissection.%04d.log", pid); fp = fopen(fname, "a"); #else fp = stderr; #endif fprintf(stderr, "%s %d : before pthread_create\n", __FILE__, __LINE__); void* results; pthread_attr_t th_attr; pthread_t thread; pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); int pthid = pthread_create(&thread, &th_attr, &thread_child, (void *)&pid); if (pthid != 0) { cout << "bad thread creation ? " << pid << endl; exit(0); } fprintf(stderr, "%s %d : after pthread_create\n", __FILE__, __LINE__); if (isWhole) { isSym = true; upper_flag = false; } if (isComplex) { DissectionSolver, quadruple>*dslv = new DissectionSolver, quadruple>(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); t1_cpu = clock(); get_realtime(&t1_elapsed); t2_cpu = clock(); get_realtime(&t2_elapsed); _stat = 1; usleep(5000); dslv->NumericFact(0, (complex *)qccoefs, scaling, eps_pivot, kernel_detection_all); _stat = (-1); t3_cpu = clock(); get_realtime(&t3_elapsed); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "## kernel dimension = %d\n", n0); complex *x = new complex[nrow]; complex *y = new complex[nrow]; complex *z = new complex[nrow]; for (int i = 0; i < nrow; i++) { y[i] = complex((quadruple)(i % 11), quadruple(0)); } dslv->SpMV(y, x); dslv->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } t4_cpu = clock(); get_realtime(&t4_elapsed); _stat = 1; usleep(5000); dslv->SolveSingle(y, false, false, true); // with projection : true _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); t5_cpu = clock(); get_realtime(&t5_elapsed); quadruple norm0, norm1; norm0 = quadruple(0); norm1 = quadruple(0); for (int i = 0; i < nrow; i++) { norm0 += x[i].real() * x[i].real() + x[i].imag() * x[i].imag(); complex ztmp = y[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); dslv->SpMV(y, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i].real() * z[i].real() + z[i].imag() * z[i].imag(); complex ztmp = z[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); delete dslv; delete [] ptrows; delete [] indcols; delete [] qccoefs; delete [] x; delete [] y; delete [] z; } // if (isComplex) else { DissectionSolver *dslv = new DissectionSolver(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); // sym, upper // dslv->SaveMMMatrix(0, coefs); // exit(-1); t1_cpu = clock(); get_realtime(&t1_elapsed); _stat = 1; usleep(5000); t2_cpu = clock(); get_realtime(&t2_elapsed); dslv->NumericFact(0, (quadruple *)qcoefs, scaling, eps_pivot, kernel_detection_all); t3_cpu = clock(); get_realtime(&t3_elapsed); _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); quadruple *x = new quadruple[nrow]; quadruple *y = new quadruple[nrow]; quadruple *z = new quadruple[nrow]; for (int i = 0; i < nrow; i++) { y[i] = (quadruple)(i % 11); } #define NORMAL #ifdef NORMAL #if 0 if (!isSym && (n0 > 0)) { dslv->ComputeTransposedKernels(true); } #endif dslv->SpMV(y, x); if (n0 > 0) { dslv->ProjectionImageSingle(x); } dslv->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } _stat = 1; usleep(5000); t4_cpu = clock(); get_realtime(&t4_elapsed); dslv->SolveSingle(y, false, false, true); // with projection : true _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); if (n0 > 0) { dslv->ProjectionImageSingle(y); } t5_cpu = clock(); get_realtime(&t5_elapsed); #else dslv->SpMtV(y, x); // dslv->ProjectionImageSingle(x); dslv->SpMtV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } _stat = 1; usleep(5000); t4_cpu = clock(); get_realtime(&t4_elapsed); dslv->SolveSingle(y, false, true); // with projection : true // dslv->ProjectionImageSingle(y); t5_cpu = clock(); get_realtime(&t5_elapsed); _stat = (-1); usleep(5000); fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); #endif quadruple norm0, norm1; norm0 = quadruple(0); norm1 = quadruple(0); for (int i = 0; i < nrow; i++) { norm0 += x[i] * x[i]; norm1 += (y[i] - x[i]) * (y[i] - x[i]); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); #ifdef NORMAL dslv->SpMV(y, x); #else dslv->SpMtV(y, x); #endif norm0 = quadruple(0); norm1 = quadruple(0); for (int i = 0; i < nrow; i++) { norm0 += z[i] * z[i]; norm1 += (z[i] - x[i]) * (z[i] - x[i]); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(quad2double(norm1 / norm0))); _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); #if 0 double *scalediag; scalediag = new double[nrow]; dslv->GetMatrixScaling(scalediag); for (int i = 0; i < nrow; i++) { for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { if (indcols[k] == i) { fprintf(fp, "%i : %g : %g %g\n", i, scalediag[i], 1.0 / (scalediag[i] * scalediag[i]), coefs[k]); break; } } } delete [] scalediag; #endif delete dslv; delete [] ptrows; delete [] indcols; delete [] qcoefs; delete [] x; delete [] y; delete [] z; } fclose(fp); } FreeFem-sources-4.9/3rdparty/dissection/src/C-test/MM-Dissection.cpp000664 000000 000000 00000064646 14037356732 025353 0ustar00rootroot000000 000000 /*! \file MM-DissectionSolver.cpp \brief test rouinte of dissection solver reading Matrix Market format \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 \dahe Apr. 24th 2018 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionCompiler.hpp" #include "Compiler/OptionLibrary.hpp" #include "Driver/DissectionSolver.hpp" #include #include #include #include #include #ifdef BLAS_MKL #include #endif #ifdef POSIX_THREADS #include #endif #include #include #include #include using namespace std; #ifdef POSIX_THREADS static int _stat = (-1); void *thread_child(void *arg) { char buf[256]; int *pid = (int *)arg; unsigned int mem_tmp, mem_min, mem_max; double avg_mem; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; int stat0, stat1; stat0 = _stat; unsigned int count = 0U; // fprintf(stderr, "thread_child forked\n"); while(_stat != 0) { stat1 = _stat; if (stat1 == 1) { sprintf(buf, "/proc/%d/statm", *pid); ifstream fin(buf); fin >> mem_tmp; fin.close(); if (mem_tmp > mem_max) { mem_max = mem_tmp; } if (mem_tmp < mem_min) { mem_min = mem_tmp; } avg_mem += (double)mem_tmp; count++; } if ((stat1 == (-1)) && (stat0 == 1)) { fprintf(stderr, "used memory :min: %14.8e max: %14.8e avg: %14.8e count: %d\n", (double)mem_min * 4.0 / (1024.0 * 1024.0), (double)mem_max * 4.0 / (1024.0 * 1024.0), (avg_mem / (double)count) * 4.0 / (1024.0 * 1024.0), count); count = 0U; avg_mem = 0.0; mem_min = (1U << 31) - 1; mem_max = 0U; } stat0 = stat1; #ifdef POSIX_THREADS usleep(1000); #endif } // fprintf(stderr, "thread_child join\n count = %ld\n", count); pthread_exit(arg); return (void *)NULL; } #endif template bool generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, T* val, bool symmetrize) { bool flag_modified = false; const T zero(0.0); // ind_cols_tmp = new std::list[nrow]; // val_tmp = new std::list[nrow]; int nnz1 = *nnz; for (int i = 0; i < *nnz; i++) { const int i0 = irow[i]; const int j0 = jcol[i]; const int ii = old2new[i0]; const int jj = old2new[j0]; if ((mask[i0] != 1) || (mask[j0] != 1)) { // fprintf(stderr, "%d %d\n", i0, j0); nnz1--; continue; } // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj); if (ind_cols_tmp[ii].empty()) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { if (ind_cols_tmp[ii].back() < jj) { ind_cols_tmp[ii].push_back(jj); val_tmp[ii].push_back(val[i]); } else { typename std::list::iterator iv = val_tmp[ii].begin(); std::list::iterator it = ind_cols_tmp[ii].begin(); for ( ; it != ind_cols_tmp[ii].end(); ++it, ++iv) { if (*it == jj) { fprintf(stderr, "already exits? (%d %d)\n", ii, jj); break; } if (*it > jj) { ind_cols_tmp[ii].insert(it, jj); val_tmp[ii].insert(iv, val[i]); break; } } } } } // symmetrize if (symmetrize) { for (int i = 0; i < nrow; i++) { for (std::list::iterator jt = ind_cols_tmp[i].begin(); jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; for (std::list::iterator it = ind_cols_tmp[jj].begin(); it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; // fprintf(stderr, "%d %d symmetric position found\n", i, jj); break; } } if (!flag) { flag_modified = true; // fprintf(stderr, "%d %d need to be added\n", i, jj); if (ind_cols_tmp[jj].back() < i) { ind_cols_tmp[jj].push_back(i); val_tmp[jj].push_back(zero); // fprintf(stderr, "%d %d append\n", i, jj); nnz1++; } else { typename std::list::iterator iv = val_tmp[jj].begin(); std::list::iterator it = ind_cols_tmp[jj].begin(); for (; it != ind_cols_tmp[jj].end(); ++it, ++iv) { if ((*it) > i) { ind_cols_tmp[jj].insert(it, i); val_tmp[jj].insert(iv, zero); nnz1++; // fprintf(stderr, "%d %d inserted\n", i, jj); break; } } } } // if (!flag); } } } if (symmetrize) { for (int i = 0; i < nrow; i++) { std::list::iterator jt = ind_cols_tmp[i].begin(); for ( ; jt != ind_cols_tmp[i].end(); ++jt) { const int jj = (*jt); bool flag = false; for (std::list::iterator it = ind_cols_tmp[jj].begin(); it != ind_cols_tmp[jj].end(); ++it) { if ((*it) == i) { flag = true; break; } } if (!flag) { fprintf(stderr, "%d %d position is not symmetric\n", i, jj); } } } } *nnz = nnz1; return flag_modified; } template bool generate_CSR(std::list* ind_cols_tmp, std::list* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, double* val, bool symmetrize); template bool generate_CSR >(std::list* ind_cols_tmp, std::list >* val_tmp, int nrow, int *nnz, int *mask, int *old2new, int *irow, int *jcol, complex* val, bool symmetrize); template void copy_CSR(int *indcols, int *ptrows, T* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp) { const T zero(0.0); ptrows[0] = 0; for (int i = 0; i < nrow; i++) { int k; int itmp = ind_cols_tmp[i].size(); if (upper_flag) { if (ind_cols_tmp[i].front() == i) { ptrows[i + 1] = ptrows[i] + itmp; k = ptrows[i]; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i]] = i; coefs[ptrows[i]] = zero; k = ptrows[i] + 1; } } else { k = ptrows[i]; if (ind_cols_tmp[i].back() == i || (!isSym)) { ptrows[i + 1] = ptrows[i] + itmp; } else { fprintf(stderr, "zero is added to diagonal : %d\n", i); ptrows[i + 1] = ptrows[i] + itmp + 1; indcols[ptrows[i + 1] - 1] = i; coefs[ptrows[i + 1] - 1] = zero; } } std::list::iterator it = ind_cols_tmp[i].begin(); typename std::list::iterator iv = val_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv, k++) { indcols[k] = *it; coefs[k] = *iv; } } // loop : i } template void copy_CSR(int *indcols, int *ptrows, double* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list* val_tmp); template void copy_CSR >(int *indcols, int *ptrows, complex* coefs, int nrow, bool upper_flag, bool isSym, std::list* ind_cols_tmp, std::list >* val_tmp); #if 0 inline double SpMAX(double a, double b) { return (a > b ? a : b); } template void normalize(const int type, const int n, const int *ptRows, const int *indCols, const T* coefs0, T* coefs, T* u) { const T zero(0.0); const T one(1.0); const T Wzero(0.0); T *v, *d; v = new T[n]; d = new T[n]; T alower, aupper, adiag; for (int i = 0; i < n; i++) { d[i] = zero; u[i] = zero; v[i] = zero; } for (int i = 0; i < n; i++) { for (int k = ptRows[i]; k < ptRows[i + 1]; k++) { int j = indCols[k]; if (j == i) { u[i] = coefs0[k] < zero ? -coefs0[k] : coefs0[k]; d[i] = u[i]; v[i] = SpMAX(v[i],u[i]); } else { T acoef = coefs0[k] < zero ? -coefs0[k] : coefs0[k]; v[i] = SpMAX(v[i], acoef); v[j] = SpMAX(v[j], acoef); } } } switch(type) { case DIAGONAL_SCALING: for (int i = 0; i < n; i++) { u[i] = ((u[i] != zero) ? sqrt(one / u[i]) : ((v[i] != zero) ? sqrt(one / v[i]) : one)); } break; case KKT_SCALING: for (int i = 0 ; i < n; i++) { if (d[i] != zero) { u[i] = sqrt(one / u[i]); } else { T xtmp = Wzero; for (int m = ptRows[i]; m < ptRows[i + 1]; m++) { const int j = indCols[m]; alower = coefs0[m]; int flag = 0; for (int n = ptRows[j]; n < ptRows[j + 1]; n++) { const int k = indCols[n]; if (k == j) { adiag = coefs0[n]; if (adiag != Wzero) { flag++; } else { continue; } } if (k == i) { aupper = coefs0[n]; flag++; } if (flag == 2) { break; } } // loop : n if (flag == 2) { xtmp += alower * aupper / adiag; } } // loop : m if (xtmp == zero) { u[i] = one; } else { u[i] = one / sqrt(xtmp < zero ? -xtmp : xtmp); } } // if (d[i] != zero) } // loop : i break; default: for (int i = 0; i < n; i++) { u[i] = one; } break; } // witch (type) // Scaling if (type == NO_SCALING) { for (int i = 0; i < ptRows[n]; i++) { coefs[i] = coefs0[i]; } // blas_copy(_ptRows[n], coefs0, 1, &_coefs[0], 1); // const W* coefs0 to T* _coefs is not supported becuase of type conversion } else { for ( int i = 0; i < n; i++) { for (int k = ptRows[i]; k < ptRows[i+1]; k++) { int j = indCols[k]; // _coefs[k] = tohigher((1), coefs0[k] * W(u[i]) * W(u[j])); coefs[k] = coefs0[k] * u[i] * u[j]; } } } delete [] d; delete [] v; } template void normalize(const int type, const int n, const int *ptRows, const int *indCols, const double* coefs0, double* coefs, double* u); #endif int main(int argc, char **argv) { int n, itmp, jtmp; char fname[256], fname1[256]; char buf[1024]; int nrow, nnz, flag, nnz1; int *ptrows, *indcols; int *irow, *jcol; double *val, *coefs; complex *valc, *ccoefs; int decomposer; int num_threads; int scaling = 1; double eps_pivot; int numlevels = -1; int minNodes = 128; std::list* ind_cols_tmp; std::list* val_tmp; std::list >* val_tmpc; FILE *fp; bool isSym, isComplex; bool upper_flag = true; bool isWhole = false; bool kernel_detection_all = false; int *indx_excl; int nexcl = 0; bool excl_flag = false; bool flag_modified = false; bool assume_invertible = false; if (argc < 6) { fprintf(stderr, "MM-dissection [data file] [decomposer] [num_threads] [eps_pivot] [num_levels] [scaling] [kerner_detection_all] [upper_flag] [minNodes]\n"); exit(-1); } strcpy(fname, argv[1]); decomposer = atoi(argv[2]); num_threads = atoi(argv[3]); eps_pivot = atof(argv[4]); numlevels = atof(argv[5]); if (argc >= 7) { scaling = atoi(argv[6]); } if (argc >= 8) { assume_invertible = atoi(argv[7]) > 0 ? true : false; } if (argc >= 9) { upper_flag = (atoi(argv[8]) == 1); isWhole = (atoi(argv[8]) == (-1)); } if (argc >= 10) { strcpy(fname1, argv[9]); excl_flag = true; } if (argc >= 11) { minNodes = atoi(argv[10]); } // read from the file if ((fp = fopen(fname, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname); } fgets(buf, 256, fp); // if (strstr(buf, "symmetric") != NULL) { isSym = true; } else { isSym = false; upper_flag = false; } if (strstr(buf, "complex") != NULL) { isComplex = true; } else { isComplex = false; } fprintf(stderr, "symmetric = %s\n", isSym ? "true " : "false"); fprintf(stderr, "scaling = %d\n", scaling); fprintf(stderr, "upper = %s\n", upper_flag ? "true" : "false"); if (kernel_detection_all) { fprintf(stderr, "kernel detection is activated for all submatrices\n"); } if (excl_flag) { fprintf(stderr, "list of singular nodes %s\n", fname1); } while (1) { fgets(buf, 256, fp); if (buf[0] != '%') { sscanf(buf, "%d %d %d", &nrow, &itmp, &nnz); break; } } irow = new int[nnz]; jcol = new int[nnz]; nnz1 = 0; if (isComplex) { double xreal, ximag; valc = new complex[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &jcol[i], &irow[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf %lf", &irow[i], &jcol[i], &xreal, &ximag); valc[i] = complex(xreal, ximag); irow[i]--; jcol[i]--; if (isSym && irow[i] < jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } } else { // if (isComplex) val = new double[nnz]; if (upper_flag) { for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &jcol[i], &irow[i], &val[i]); // read lower irow[i]--; jcol[i]--; if (isSym && irow[i] > jcol[i]) { fprintf(stderr, "exchanged : %d > %d\n", irow[i], jcol[i]); itmp = irow[i]; irow[i] = jcol[i]; jcol[i] = itmp; } } } else { int ii = 0; int itmp, jtmp; double vtmp; for (int i = 0; i < nnz; i++) { fscanf(fp, "%d %d %lf", &itmp, &jtmp, &vtmp); if (vtmp != 0.0 || (itmp == jtmp)) { // if (true) { // 04 Apr.2018 irow[ii] = itmp - 1; jcol[ii] = jtmp - 1; val[ii] = vtmp; ii++; } else { nnz1++; } } } // else } // if (isComplex) fclose (fp); if (nnz1 > 0) { fprintf(stderr, "%s %d : %d zero entries excluded %d -> %d\n", __FILE__, __LINE__, nnz1, nnz, (nnz - nnz1)); nnz = nnz - nnz1; } if (excl_flag) { if ((fp = fopen(fname1, "r")) == NULL) { fprintf(stderr, "fail to open %s\n", fname1); } fgets(buf, 256, fp); sscanf(buf, "# %d", &nexcl); indx_excl = new int[nexcl]; for (int i = 0; i < nexcl; i++) { fgets(buf, 256, fp); sscanf(buf, "%d", &itmp); indx_excl[i] = itmp; } fclose(fp); } int *mask = new int[nrow]; int *old2new = new int[nrow]; for (int i = 0; i < nrow; i++) { mask[i] = 1; } for (int i = 0; i < nexcl; i++) { mask[indx_excl[i]] = 0; } itmp = 0; jtmp = nrow - nexcl; for (int i = 0; i < nrow; i++) { if (mask[i] == 1) { old2new[i] = itmp++; } else { old2new[i] = jtmp++; } } nrow -= nexcl; ind_cols_tmp = new std::list[nrow]; fprintf(stderr, "%s %d : getnerate_CSR\n", __FILE__, __LINE__); if (isComplex) { val_tmpc = new std::list >[nrow]; nnz1 = nnz; flag_modified = generate_CSR >(ind_cols_tmp, val_tmpc, nrow, &nnz1, mask, old2new, irow, jcol, valc, (!isSym)); } else { val_tmp = new std::list[nrow]; nnz1 = nnz; flag_modified = generate_CSR(ind_cols_tmp, val_tmp, nrow, &nnz1, mask, old2new, irow, jcol, val, (!isSym)); } if (flag_modified) { fprintf(stderr, "%s %d : matrix is not structual symmetric %d ->%d\n", __FILE__, __LINE__, nnz, nnz1); } nnz = nnz1; delete [] irow; delete [] jcol; delete [] mask; delete [] old2new; if (isComplex) { delete [] valc; } else { delete [] val; } if (upper_flag) { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].front() != i) { nnz++; } } } else { for (int i = 0; i < nrow; i++) { if (ind_cols_tmp[i].back() != i) { nnz++; } } } fprintf(stderr, "%s %d : copy_CSR\n", __FILE__, __LINE__); ptrows = new int[nrow + 1]; indcols = new int[nnz]; if (isComplex) { ccoefs = new complex[nnz]; copy_CSR >(indcols, ptrows, ccoefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmpc); } else { coefs = new double[nnz]; copy_CSR(indcols, ptrows, coefs, nrow, upper_flag, isSym, ind_cols_tmp, val_tmp); } delete [] ind_cols_tmp; if (isComplex) { delete [] val_tmpc; } else { delete [] val_tmp; } int pid = get_process_id(); #if 1 fprintf(stderr, "pid = %d\n", pid); sprintf(fname, "dissection.%04d.log", pid); fp = fopen(fname, "a"); #else fp = stderr; #endif for (int i = 0; i < argc; i++) { fprintf(fp, "%s ", argv[i]); } fprintf(fp, "\n"); fprintf(stderr, "%s %d : before pthread_create\n", __FILE__, __LINE__); void* results; #ifdef POSIX_THREADS pthread_attr_t th_attr; pthread_t thread; pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); int pthid = pthread_create(&thread, &th_attr, &thread_child, (void *)&pid); if (pthid != 0) { cout << "bad thread creation ? " << pid << endl; exit(0); } fprintf(stderr, "%s %d : after pthread_create\n", __FILE__, __LINE__); #endif if (isWhole) { isSym = true; upper_flag = false; } if (isComplex) { DissectionSolver, double>*dslv = new DissectionSolver, double>(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); t1_cpu = clock(); get_realtime(&t1_elapsed); t2_cpu = clock(); get_realtime(&t2_elapsed); #ifdef POSIX_THREADS _stat = 1; usleep(5000); #endif dslv->NumericFact(0, (complex *)ccoefs, scaling, eps_pivot, kernel_detection_all, 4, -1.0, assume_invertible); #ifdef POSIX_THREADS _stat = (-1); #endif t3_cpu = clock(); get_realtime(&t3_elapsed); #ifdef POSIX_THREADS usleep(5000); #endif fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); complex *x = new complex[nrow]; complex *y = new complex[nrow]; complex *z = new complex[nrow]; for (int i = 0; i < nrow; i++) { y[i] = complex((double)(i % 11), 0.0); } dslv->SpMV(y, x); dslv->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } t4_cpu = clock(); get_realtime(&t4_elapsed); #ifdef POSIX_THREADS _stat = 1; usleep(5000); #endif dslv->SolveSingle(y, true, false, true); // with projection + scaling #ifdef POSIX_THREADS _stat = (-1); usleep(5000); #endif fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); t5_cpu = clock(); get_realtime(&t5_elapsed); double norm0, norm1; norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += x[i].real() * x[i].real() + x[i].imag() * x[i].imag(); complex ztmp = y[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(norm1 / norm0)); dslv->SpMV(y, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i].real() * z[i].real() + z[i].imag() * z[i].imag(); complex ztmp = z[i] - x[i]; norm1 += ztmp.real() * ztmp.real() + ztmp.imag() * ztmp.imag(); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(norm1 / norm0)); #ifdef POSIX_THREADS _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } #endif fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); delete dslv; delete [] ptrows; delete [] indcols; delete [] ccoefs; delete [] x; delete [] y; delete [] z; } // if (isComplex) else { DissectionSolver *dslv = new DissectionSolver(num_threads, true, 0, fp); int called = 0; clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu, t4_cpu, t5_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed, t4_elapsed, t5_elapsed; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif t0_cpu = clock(); get_realtime(&t0_elapsed); dslv->SymbolicFact(nrow, (int *)ptrows, (int *)indcols, isSym, upper_flag, isWhole, decomposer, numlevels, minNodes); // sym, upper // dslv->SaveMMMatrix(0, coefs); // exit(-1); t1_cpu = clock(); get_realtime(&t1_elapsed); #ifdef POSIX_THREADS _stat = 1; usleep(5000); #endif t2_cpu = clock(); get_realtime(&t2_elapsed); dslv->NumericFact(0, (double *)coefs, scaling, eps_pivot, kernel_detection_all, 4, -1.0, assume_invertible); t3_cpu = clock(); get_realtime(&t3_elapsed); #ifdef POSIX_THREADS _stat = (-1); usleep(5000); #endif fprintf(stderr, "%s %d : NumericFact() done\n", __FILE__, __LINE__); int n0; n0 = dslv->kern_dimension(); fprintf(fp, "%s %d : ## kernel dimension = %d\n", __FILE__, __LINE__, n0); double *x = new double[nrow]; double *y = new double[nrow]; double *z = new double[nrow]; for (int i = 0; i < nrow; i++) { y[i] = (double)(i % 11); } dslv->SpMV(y, x); if (n0 > 0) { dslv->ProjectionKernelOrthSingle(y, "given", false); } dslv->SpMV(x, y); for (int i = 0; i < nrow; i++) { z[i] = y[i]; } #ifdef POSIX_THREADS _stat = 1; usleep(5000); #endif t4_cpu = clock(); get_realtime(&t4_elapsed); dslv->SolveSingle(y, true, false, true); // with projection + scaling #ifdef POSIX_THREADS _stat = (-1); usleep(5000); #endif fprintf(stderr, "%s %d : SolveSingle() done\n", __FILE__, __LINE__); if (n0 > 0) { dslv->ProjectionImageSingle(y); } t5_cpu = clock(); get_realtime(&t5_elapsed); double norm0, norm1; norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += x[i] * x[i]; norm1 += (y[i] - x[i]) * (y[i] - x[i]); } fprintf(fp, "%s %d : ## error = %18.7e\n", __FILE__, __LINE__, sqrt(norm1 / norm0)); dslv->SpMV(y, x); norm0 = 0.0; norm1 = 0.0; for (int i = 0; i < nrow; i++) { norm0 += z[i] * z[i]; norm1 += (z[i] - x[i]) * (z[i] - x[i]); } fprintf(fp, "%s %d : ## residual = %18.7e\n", __FILE__, __LINE__, sqrt(norm1 / norm0)); #ifdef POSIX_THREADS _stat = 0; pthread_attr_destroy(&th_attr); pthid = pthread_join(thread, &results); if (pthid != 0) { cout << "bad thread join ? " << pthid << endl; exit(0); } #endif fprintf(fp, "%s %d : ## symbolic fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); fprintf(fp, "%s %d : ## numeric fact : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); fprintf(fp, "%s %d : ## solve single RHS : cpu time = %.4e elapsed time = %.4e\n", __FILE__, __LINE__, (double)(t5_cpu - t4_cpu) / (double)CLOCKS_PER_SEC, convert_time(t5_elapsed, t4_elapsed)); delete dslv; delete [] ptrows; delete [] indcols; delete [] coefs; delete [] x; delete [] y; delete [] z; } fclose(fp); } FreeFem-sources-4.9/3rdparty/dissection/src/C-test/Makefile.Solaris000664 000000 000000 00000002302 14037356732 025264 0ustar00rootroot000000 000000 #! \file Makefile # \brief to create MM-Dissection # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jun. 20th 2014 # \date Jul. 12th 2015 # \date Feb. 29th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Diisection. If not, see . # include ../Defs.inc DISSECT_LIB_DIR = $(PROJ_LIBDIR) # # modifications to predefined variables in Defs.inc # all: MM-Dissection.o .cpp.o: @echo Compiling $< $(CXX) $(CCFLAGS) -I../ -c $< -o $@ clean: @echo Cleaning the trash... rm -fr *~ *.o *.so *.mod core *.d *.dll $(DIRECTORY) \ MM-Dissection MM-Dissection-quad MM-Dissection-double-quad FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/000775 000000 000000 00000000000 14037356732 022627 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/DebugUtils.hpp000664 000000 000000 00000005511 14037356732 025411 0ustar00rootroot000000 000000 /*! \file DebugUtils.hpp \brief Some macros and functions to help debug session \author Xavier Juvigny, ONERA \date Jan. 19th 2005 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DISSECTION_COMPILER_DEBUGUTILS_HPP_ #define _DISSECTION_COMPILER_DEBUGUTILS_HPP_ #include #if defined(DISSECTION_DEBUG) # define CHECK(o,msg) \ assert((o)&&(msg)) #else # define CHECK(o,msg) #endif #if defined(DISSECTION_DEBUG) # define DBG_PRINT printf #else # define DBG_PRINT // #endif #if defined(TRACE) # define TRACE printf #else # define TRACE // #endif #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/DissectionIO.hpp000664 000000 000000 00000005375 14037356732 025706 0ustar00rootroot000000 000000 /*! \file DissectionIO.hpp \brief higher precision arithmetic for Kernel Detection \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Apr. 30th 2017 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COMPILER_DISSESCTIONIOC_H #define _COMPILER_DISSESCTIONIOC_H #include #include inline void diss_printf(const bool verbose, FILE *fp, const char* format, ...) { va_list arg; if (verbose && (fp != NULL)) { va_start(arg, format); vfprintf(fp, format, arg); va_end(arg); } } #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/OptionCompiler.hpp000664 000000 000000 00000007113 14037356732 026305 0ustar00rootroot000000 000000 /*! \file DebugUtils.hpp \brief compatibility of compilers \author Xavier Juvigny, ONERA \date Jan. 12th 2005 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COMPILER_OPTIONCOMPILER_H # define _COMPILER_OPTIONCOMPILER_H // ========= Append a underscore or not for Fortran subroutines ======== #ifdef WIN32 # ifdef NB_NO_UNDERSCORE # define FORTRAN_DECL_WL(x_windows,x_linux) x_windows # else # ifdef NB_DBLEUNDERSCORE # define FORTRAN_DECL_WL(x_windows,x_linux) x_windows##__ # else # define FORTRAN_DECL_WL(x_windows,x_linux) x_windows##_ # endif # endif #else # ifdef NB_NO_UNDERSCORE # define FORTRAN_DECL_WL(x_windows,x_linux) x_linux # else # ifdef NB_DBLEUNDERSCORE # define FORTRAN_DECL_WL(x_windows,x_linux) x_linux##__ # else # define FORTRAN_DECL_WL(x_windows,x_linux) x_linux##_ # endif # endif #endif # ifdef NB_NO_UNDERSCORE # define FORTRAN_DECL(x) x # else # ifdef NB_DBLEUNDERSCORE # define FORTRAN_DECL(x) x##__ # else # define FORTRAN_DECL(x) x##_ # endif # endif #ifdef _MSC_VER # ifdef _DLL # ifdef DISSECTION_EXPORTS // when building DLL # define DISSECTION_API __declspec(dllexport) # else // when client uses DLL # define DISSECTION_API __declspec(dllimport) # endif # else # define DISSECTION_API # endif #else # define DISSECTION_API #endif #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/OptionLibrary.hpp000664 000000 000000 00000007325 14037356732 026144 0ustar00rootroot000000 000000 /*! \file OptionLibrary.hpp \brief compatibility for Microsoft compiler \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Feb. 23th 2013 \date Feb. 29th 2016 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COMPILER_OPTIONLIBRARY_H # define _COMPILER_OPTIONLIBRARY_H #ifdef _MSC_VER #define NOMINMAX #define _USE_MATH_DEFINES #if (_MSC_VER < 1500) // in case of C99 is not supported # define M_PI 3.14159265358979323846264338327950288 double log2(double x) { return (log(x) / log(2.0)); } #else # include #endif # include # include #else # include # include #endif #ifdef SX_ACE #include #include #endif static inline bool random_bool() { #ifdef _MSC_VER double r = (double)rand() / (double)RAND_MAX; return (r < 0.5 ? true : false); #else #ifdef SX_ACE double r = (double)rand() / (double)RAND_MAX; return (r < 0.5 ? true : false); #else #ifdef __SUNPRO_CC return (random() < 1073741824L); // 2^31 / 2 #else return (random() < (RAND_MAX / 2L)); #endif #endif #endif } static inline int get_process_id() { #ifdef _MSC_VER return (int)_getpid(); #else return (int)getpid(); #endif } // Intel compiler + older GNU C++ library may not have to_string() // SX_ACE does not have #ifdef NO_TO_STRING #include #include inline std::string to_string(int num) { char buf[256]; sprintf(buf, "%d", num); std::string st = buf; return st; } #endif #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/arithmetic.cpp000664 000000 000000 00000010001 14037356732 025454 0ustar00rootroot000000 000000 /*! \file arithmetic.cpp \brief higher precision arithmetic for Kernel Detection \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 17th 2015 \date Feb. 29th 2016 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include "Compiler/arithmetic.hpp" #include "Compiler/blas.hpp" template<> void printscalar(const bool verbose, FILE *fp, double x) { if (verbose && (fp != NULL)) { fprintf(fp, "%16.8e ", x); } } template<> void printscalar(const bool verbose, FILE *fp, float x) { if (verbose && (fp != NULL)) { fprintf(fp, "%16.8e ", x); } } template<> void printscalar(const bool verbose, FILE *fp, quadruple x) { if (verbose && (fp != NULL)) { fprintf(fp, "%16.8e ", quad2double(x)); } } template<> void printscalar >(const bool verbose, FILE *fp, complex x) { if (verbose && (fp != NULL)) { fprintf(fp, "(%16.8e %16.8e) ", x.real(), x.imag()); } } template<> void printscalar >(const bool verbose, FILE *fp, complex x) { if (verbose && (fp != NULL)) { fprintf(fp, "(%16.8e %16.8e) ", x.real(), x.imag()); } } template<> void printscalar >(const bool verbose, FILE *fp, complex x) { if (verbose && (fp != NULL)) { fprintf(fp, "(%16.8e %16.8e) ", quad2double(x.real()), quad2double(x.imag())); } } template void printscalar(const bool verbose, FILE *fp, T x) { fprintf(stderr, "%s %d : printscalar is not implented\n", __FILE__, __LINE__); } #ifndef NO_OCTRUPLE template void printscalar(const bool verbose, FILE *fp, octruple x); template void printscalar >(const bool verbose, FILE *fp, complex x); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/arithmetic.hpp000664 000000 000000 00000032662 14037356732 025502 0ustar00rootroot000000 000000 /*! \file arithmetic.hpp \brief higher precision arithmetic for Kernel Detection \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 17th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COMPILER_ARITHMETIC_H # define _COMPILER_ARITHMETIC_H # include "Compiler/OptionCompiler.hpp" #include #include #include // scalbn modification for LLVM is taken from // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0415r0.html template T scalbn(T e, int exp) { #ifndef __clang__ return scalbn(e, exp); #else if (!exp) return e; T mult(1.0); if (exp > 0) { mult = FLT_RADIX; -- exp; } else { ++ exp; exp = -exp; mult /= FLT_RADIX; } while (exp > 0) { if (!(exp & 1)) { mult *= mult; exp >>= 1; } else { e *= mult; -- exp; } } return e; #endif } #include using std::complex; #ifdef SX_ACE #define LONG_DOUBLE #endif template inline T machine_epsilon() { return T(DBL_EPSILON); }; template<> inline double machine_epsilon() { return DBL_EPSILON; } template inline std::string tostring(const T &x) { std::string dummy; return dummy; }; template inline T sqrt(const T &x) { }; #ifdef DD_REAL #include "qd/dd_real.h" typedef dd_real quadruple; // implementation of quadruple precision #include "qd/qd_real.h" typedef qd_real octruple; // implementation of octruple precision inline double quad2double(const quadruple &x) { return x.x[0]; } inline double oct2double(const octruple &x) { return x.x[0]; } inline quadruple oct2quad(const octruple &x) { return dd_real(x.x[0], x.x[1]); } template<> inline quadruple machine_epsilon() { // argument is dummy return quadruple(dd_real::_eps); // to define type } template<> inline quadruple machine_epsilon() { // argument is dummy return quadruple(DBL_EPSILON); } template<> inline double machine_epsilon() { // argument is dummy return double(dd_real::_eps); // to define type } template<> inline std::string tostring(const quadruple &x) { return x.to_string(dd_real::_ndigits); } template<> inline std::string tostring >(const complex &x) { return "( " + x.real().to_string(dd_real::_ndigits) + " " + x.imag().to_string(dd_real::_ndigits) + " )"; } template<> inline double sqrt (const double &x) {return sqrt(x); } template<> inline quadruple sqrt (const quadruple &x) {return sqrt(x); } template<> inline octruple sqrt (const octruple &x) {return sqrt(x); } template<> inline float sqrt (const float &x) {return sqrtf(x); } #else #ifdef LONG_DOUBLE typedef long double quadruple; inline double quad2double(const quadruple &x) { return (double)x; } template<> inline quadruple machine_epsilon() { return (long double)1.93e-34; // need to be updated for LONG DOUBLE } template<> inline quadruple machine_epsilon() { return (long double)DBL_EPSILON; } #if ((!defined(SX_ACE)) && (!defined(_MSC_VER))) inline long double fabs(const long double x) { return (x > 0.0L ? x : (-x)); } #endif template<> inline double sqrt (const double &x) {return sqrt(x); } template<> inline quadruple sqrt (const quadruple &x) {return sqrtl(x); } emplate<> inline float sqrt (const float &x) {return sqrtf(x); } #else #ifdef FAST_DD #include typedef dd_real quadruple; // implementation of quadruple precisio inline double quad2double(const quadruple &x) { return to_double(x); } template<> inline quadruple machine_epsilon() { // argument is dummy return dd_real(1.93e-34); // need to be updated for LONG DOUBLE } template<> inline std::string tostring(const quadruple &x) { char buf[256]; sprintf(buf, "%24.16e", quad2double(x)); return std::string(buf); } template<> inline std::string tostring >(const complex &x) { char buf[256]; sprintf(buf, "( %24.16e 24.16e )", quad2double(x.real()), quad2double(x.imag())); return std::string(buf); } // atan2 is not defined in fast_dd : approximated by double! inline quadruple atan2(const quadruple &y, const quadruple &x) { double t; t = atan2(quad2double(y), quad2double(x)); return dd_real(t); } #else // #ifdef FAST_DD #include typedef __float128 quadruple; inline double quad2double(const quadruple &x) { return (double)x; } template<> inline std::string tostring (const quadruple &x) { char buf[256]; quadmath_snprintf(buf, 256, "%.32Qe", x); return std::string(buf); } template<> inline std::string tostring >(const complex &x) { char buf[256]; quadmath_snprintf(buf, 256, "%.32Qe %.32Qe", x.real(), x.imag()); return std::string(buf); } template<> inline double sqrt (const double &x) {return sqrt(x); } template<> inline quadruple sqrt (const quadruple &x) {return sqrtq(x); } template<> inline float sqrt (const float &x) {return sqrtf(x); } #endif #endif #endif template<> inline std::string tostring(const double &x) { char buf[256]; sprintf(buf, "%24.16e", x); return std::string(buf); } template<> inline std::string tostring >(const complex &x) { char buf[256]; sprintf(buf, "(%24.16e %24.16e)", x.real(), x.imag()); return std::string(buf); } template<> inline std::string tostring(const float &x) { char buf[256]; sprintf(buf, "%16.8e", x); return std::string(buf); } template<> inline std::string tostring >(const complex &x) { char buf[256]; sprintf(buf, "(%16.8e %16.8e)", x.real(), x.imag()); return std::string(buf); } #ifndef NO_OCTRUPLE template<> inline std::string tostring(const octruple &x) { return x.to_string(); } #endif template inline Z conv_prec(const T &x){ fprintf(stderr, "%s %d : specialized template is not implemented\n", __FILE__, __LINE__); return Z(0.0); }; template<> inline double conv_prec(const quadruple &y) { return quad2double(y); } template<> inline quadruple conv_prec(const double &y) { return quadruple(y); } template<> inline quadruple conv_prec(const quadruple &y) { return y; } template<> inline complex conv_prec, complex >(const complex &y) { return y; } #ifndef NO_OCTRUPLE template<> inline octruple conv_prec(const quadruple &y) { return octruple(y); } #endif template<> inline complex conv_prec, double>(const double &y) { return complex(quadruple(y), quadruple(0.0)); } template<> inline complex conv_prec,complex > (const complex &y) { return complex(quadruple(y.real()), quadruple(y.imag())); } #ifndef NO_OCTRUPLE template<> inline complex conv_prec, quadruple> (const quadruple &y) { return complex(octruple(y), octruple(0.0)); } template<> inline complex conv_prec, complex >(const complex &y) { return complex(octruple(y.real()), octruple(y.imag())); } #endif template<> inline complex conv_prec, complex >(const complex &x) { return complex(quad2double(x.real()), quad2double(x.imag())); } #ifndef NO_OCTRUPLE template<> inline quadruple conv_prec(const octruple &x) { return oct2quad(x); } template<> inline complex conv_prec, complex >(const complex &x) { return complex(oct2quad(x.real()), oct2quad(x.imag())); } #endif template<> inline double conv_prec(const double &x) { return x; } template<> inline complex conv_prec, complex >(const complex &x) { return x; } template<> inline float conv_prec(const double &y) { return (float)y; } template<> inline double conv_prec(const float &y) { return (double)y; } template inline double todouble(const T &x){ return double(x); }; template<> inline double todouble(const double &x) { return x; } template<> inline double todouble(const float &x) { return x; } template<> inline double todouble(const quadruple &x) { return quad2double(x); } template<> inline double todouble >(const complex &x) { return x.real(); } template<> inline double todouble >(const complex &x) { return x.real(); } template<> inline double todouble >(const complex &x) { return quad2double(x.real()); } #ifndef NO_OCTRUPLE template<> inline double todouble(const octruple &x) { return oct2double(x); } template<> inline double todouble >(const complex &x) { return oct2double(x.real()); } #endif template void printscalar(const bool verbose, FILE *fp, T x); template<> void printscalar(const bool verbose, FILE *fp, double x); template<> void printscalar(const bool verbose, FILE *fp, float x); template<> void printscalar(const bool verbose, FILE *fp, quadruple x); template<> void printscalar >(const bool verbose, FILE *fp, complex x); template<> void printscalar >(const bool verbose, FILE *fp, complex x); template<> void printscalar >(const bool verbose, FILE *fp, complex x); template T tocomplex(const U &x); template<> double tocomplex(const double &x); template<> float tocomplex(const float &x); template<> quadruple tocomplex(const quadruple &x); template<> complex tocomplex, double>(const double &x); template<> complex tocomplex, quadruple>(const quadruple &x); template inline T tocomplex(const U &x) { fprintf(stderr, "%s %d : specilized template is not defined\n", __FILE__, __LINE__); return T(0.0); } template<> inline double tocomplex(const double &x){ return x; } template<> inline float tocomplex(const float &x){ return x; } template<> inline quadruple tocomplex(const quadruple &x) { return x; } template<> inline complex tocomplex, double>(const double &x) { return std::complex(x, 0.0); } template<> inline complex tocomplex, quadruple>(const quadruple &x) { quadruple zero(0.0); return std::complex(x, zero); } #ifndef NO_OCTRUPLE template<> inline octruple tocomplex(const octruple &x) { return x; } template<> inline complex tocomplex >(const octruple &x) { octruple zero(0.0); return std::complex(x, zero); } #endif #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/blas.cpp000664 000000 000000 00000305750 14037356732 024266 0ustar00rootroot000000 000000 /*! \file blas.cpp \brief BLAS function interface \author Xavier Juvigny, ONERA \date Jan. 12th 2005 \modification function from NETLIB source with BLAS and CBLAS wrapper \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 16th 2015 \date Jul. 17th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/blas.hpp" #include #include #define FORCE_EXPLICIT_INSTANTIATION #if __clang_major__ == 4 #undef FORCE_EXPLICIT_INSTANTIATION #endif #ifdef BLAS_FORTRAN extern "C" { // double precision extern int FORTRAN_DECL(idamax)(const int &n, // return Fortran index const double *x, const int &incx); extern int FORTRAN_DECL(izamax)(const int &n, // retrun Fortran index const double *x, const int &incx); extern void FORTRAN_DECL(dcopy)(const int &n, const double *x, const int &incx, double *y, const int &incy); extern void FORTRAN_DECL(zcopy)(const int &n, const double *x, const int &incx, double *y, const int &incy); extern void FORTRAN_DECL(daxpy)(const int &n, const double &alpha, const double *x, const int &incx, double *y, const int &incy); extern void FORTRAN_DECL(zaxpy)(const int &n, const double *alpha, const double *x, const int &incx, double *y, const int &incy); extern double FORTRAN_DECL(ddot)(const int &n, const double *x, const int &incx, const double *y, const int &incy); extern void FORTRAN_DECL(zdotc)(double *z, const int &n, const double *x, const int &incx, const double *y, const int &incy); extern void FORTRAN_DECL(dscal)(const int &N, const double &alpha, double *X, const int &incX); extern void FORTRAN_DECL(zscal)(const int &N, const double *alpha, double *X, const int &incX); extern void FORTRAN_DECL(dgemv)(unsigned char &tra_, const int &m, const int &n, const double &alpha, const double *A, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy); extern void FORTRAN_DECL(zgemv)(unsigned char &tra_, const int &m, const int &n, const double *alpha, const double *A, const int &lda, const double *x, const int &incx, const double *beta, double *y, const int &incy); extern void FORTRAN_DECL(dtrsv)(unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &N, const double *A, const int &lda, double *X, const int &incX); extern void FORTRAN_DECL(ztrsv)(unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &N, const double *A, const int &lda, double *X, const int &incX); extern void FORTRAN_DECL(dsyr)(unsigned char &uplo_, const int &N, const double &alpha, const double *X, const int &incX, double *A, const int &lda); extern void FORTRAN_DECL(dsyr2)(unsigned char &uplo_, const int &N, const double &alpha, const double *X, const int &incX, const double *Y, const int &incY, double *A, const int &lda); extern void FORTRAN_DECL(dger)(const int &M, const int &N, const double &alpha, const double *X, const int &incX, const double *Y, const int &incY, double *A, const int &lda); extern void FORTRAN_DECL(zgeru)(const int &M, const int &N, const double *alpha, double *X, const int &incX, double *Y, const int &incY, double *A, const int &lda); extern void FORTRAN_DECL(zgerc)(const int &M, const int &N, const double *alpha, const double *X, const int &incX, const double *Y, const int &incY, double *A, const int &lda); extern void FORTRAN_DECL(dtrsm)(unsigned char &side_, unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &M, const int &N, const double &alpha, const double *A, const int &lda, double *B, const int &ldb); extern void FORTRAN_DECL(ztrsm)(unsigned char &side_, unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &M, const int &N, const double *alpha, const double *A, const int &lda, double *B, const int &ldb); extern void FORTRAN_DECL(dgemm)(unsigned char &tra_, unsigned char &trb_, const int &m, const int &n, const int &k, const double &alpha, const double *A, const int &lda, const double *B, const int &ldb, const double &beta, double *C, const int &ldc); extern void FORTRAN_DECL(zgemm)(unsigned char &tra_, unsigned char &trb_, const int &m, const int &n, const int &k, const double *alpha, const double *A, const int &lda, const double *B, const int &ldb, const double *beta, double *C, const int &ldc); // single precision extern int FORTRAN_DECL(isamax)(const int &n, // return Fortran index const float *x, const int &incx); extern int FORTRAN_DECL(icamax)(const int &n, // retrun Fortran index const float *x, const int &incx); extern void FORTRAN_DECL(scopy)(const int &n, const float *x, const int &incx, float *y, const int &incy); extern void FORTRAN_DECL(ccopy)(const int &n, const float *x, const int &incx, float *y, const int &incy); extern void FORTRAN_DECL(saxpy)(const int &n, const float &alpha, const float *x, const int &incx, float *y, const int &incy); extern void FORTRAN_DECL(caxpy)(const int &n, const float *alpha, const float *x, const int &incx, float *y, const int &incy); extern float FORTRAN_DECL(sdot)(const int &n, const float *x, const int &incx, const float *y, const int &incy); extern void FORTRAN_DECL(cdotc)(float *z, const int &n, const float *x, const int &incx, const float *y, const int &incy); extern void FORTRAN_DECL(sscal)(const int &N, const float &alpha, float *X, const int &incX); extern void FORTRAN_DECL(cscal)(const int &N, const float *alpha, float *X, const int &incX); extern void FORTRAN_DECL(sgemv)(unsigned char &tra_, const int &m, const int &n, const float &alpha, const float *A, const int &lda, const float *x, const int &incx, const float &beta, float *y, const int &incy); extern void FORTRAN_DECL(cgemv)(unsigned char &tra_, const int &m, const int &n, const float *alpha, const float *A, const int &lda, const float *x, const int &incx, const float *beta, float *y, const int &incy); extern void FORTRAN_DECL(strsv)(unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &N, const float *A, const int &lda, float *X, const int &incX); extern void FORTRAN_DECL(ctrsv)(unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &N, const float *A, const int &lda, float *X, const int &incX); extern void FORTRAN_DECL(ssyr)(unsigned char &uplo_, const int &N, const float &alpha, const float *X, const int &incX, float *A, const int &lda); extern void FORTRAN_DECL(ssyr2)(unsigned char &uplo_, const int &N, const float &alpha, const float *X, const int &incX, const float *Y, const int &incY, float *A, const int &lda); extern void FORTRAN_DECL(sger)(const int &M, const int &N, const float &alpha, const float *X, const int &incX, const float *Y, const int &incY, float *A, const int &lda); extern void FORTRAN_DECL(cgeru)(const int &M, const int &N, const float *alpha, float *X, const int &incX, float *Y, const int &incY, float *A, const int &lda); extern void FORTRAN_DECL(cgerc)(const int &M, const int &N, const float *alpha, const float *X, const int &incX, const float *Y, const int &incY, float *A, const int &lda); extern void FORTRAN_DECL(strsm)(unsigned char &side_, unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &M, const int &N, const float &alpha, const float *A, const int &lda, float *B, const int &ldb); extern void FORTRAN_DECL(ctrsm)(unsigned char &side_, unsigned char &uplo_, unsigned char &transa_, unsigned char &diag_, const int &M, const int &N, const float *alpha, const float *A, const int &lda, float *B, const int &ldb); extern void FORTRAN_DECL(sgemm)(unsigned char &tra_, unsigned char &trb_, const int &m, const int &n, const int &k, const float &alpha, const float *A, const int &lda, const float *B, const int &ldb, const float &beta, float *C, const int &ldc); extern void FORTRAN_DECL(cgemm)(unsigned char &tra_, unsigned char &trb_, const int &m, const int &n, const int &k, const float *alpha, const float *A, const int &lda, const float *B, const int &ldb, const float *beta, float *C, const int &ldc); } #endif // BLAS 1 // dz copy #ifndef BLAS_GENERIC template<> void blas_copy(const int n, const double* x, const int incx, double* y, const int incy) { if ((incx == 1) && (incy == 1)) { memcpy((void *)y, (void *)x, n * sizeof(double)); } else { #ifdef BLAS_FORTRAN FORTRAN_DECL(dcopy)(n, x, incx, y, incy); #else cblas_dcopy((BLAS_INT)n, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } } template<> void blas_copy >(const int n, const complex* x, const int incx, complex* y, const int incy) { if ((incx == 1) && (incy == 1)) { memcpy((void *)y, (void *)x, n * sizeof(complex)); } else { #ifdef BLAS_FORTRAN FORTRAN_DECL(zcopy)(n, (double *)x, incx, (double *)y, incy); #else cblas_zcopy((BLAS_INT)n, (void *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy); #endif } } template<> void blas_copy(const int n, const float* x, const int incx, float* y, const int incy) { if ((incx == 1) && (incy == 1)) { memcpy((void *)y, (void *)x, n * sizeof(float)); } else { #ifdef BLAS_FORTRAN FORTRAN_DECL(scopy)(n, x, incx, y, incy); #else cblas_scopy((BLAS_INT)n, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } } template<> void blas_copy >(const int n, const complex* x, const int incx, complex* y, const int incy) { if ((incx == 1) && (incy == 1)) { memcpy((void *)y, (void *)x, n * sizeof(complex)); } else { #ifdef BLAS_FORTRAN FORTRAN_DECL(ccopy)(n, (float *)x, incx, (float *)y, incy); #else cblas_ccopy((BLAS_INT)n, (void *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy); #endif } } #endif template void blas_copy(const int n, const T* x, const int incx, T *y, const int incy) { if ((incx == 1) && (incy == 1)) { memcpy((void *)y, (void *)x, n * sizeof(T)); } else { int ix = 0; int iy = 0; for (int i = 0; i < n; i++, ix += incx, iy += incy) { y[iy] = x[ix]; } } } #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_copy(const int n, const double* x, const int incx, double *y, const int incy); template void blas_copy > (const int n, const complex* x, const int incx, complex *y, const int incy); template void blas_copy(const int n, const float* x, const int incx, float *y, const int incy); template void blas_copy > (const int n, const complex* x, const int incx, complex *y, const int incy); #endif template void blas_copy(const int n, const quadruple* x, const int incx, quadruple *y, const int incy); template void blas_copy > (const int n, const complex* x, const int incx, complex *y, const int incy); template void blas_copy(const int n, const octruple* x, const int incx, octruple *y, const int incy); template void blas_copy > (const int n, const complex* x, const int incx, complex *y, const int incy); // dz axpy #ifndef BLAS_GENERIC template<> void blas_axpy(const int n, const double &alpha, const double* x, const int incx, double* y, const int incy) { #ifdef BLAS_FORTRAN FORTRAN_DECL(daxpy)(n, alpha, x, incx, y, incy); #else cblas_daxpy((BLAS_INT)n, alpha, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } template<> void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy) { #ifdef BLAS_FORTRAN FORTRAN_DECL(zaxpy)(n, (double *)&alpha, (double *)x, incx, (double *)y, incy); #else cblas_zaxpy((BLAS_INT)n, (BLAS_VOID *)&alpha, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy); #endif } template<> void blas_axpy(const int n, const float &alpha, const float* x, const int incx, float* y, const int incy) { #ifdef BLAS_FORTRAN FORTRAN_DECL(saxpy)(n, alpha, x, incx, y, incy); #else cblas_saxpy((BLAS_INT)n, alpha, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } template<> void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy) { #ifdef BLAS_FORTRAN FORTRAN_DECL(caxpy)(n, (float *)&alpha, (float *)x, incx, (float *)y, incy); #else cblas_caxpy((BLAS_INT)n, (BLAS_VOID *)&alpha, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy); #endif } #endif template void blas_axpy(const int n, const T &alpha, const T* x, const int incx, T* y, int incy) { int ix = 0; int iy = 0; for (int i = 0; i < n; i++, ix += incx, iy += incy) { y[iy] += alpha * x[ix]; } } #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_axpy(const int n, const double &alpha, const double* x, const int incx, double* y, int incy); template void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy); template void blas_axpy(const int n, const float &alpha, const float* x, const int incx, float* y, int incy); template void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy); #endif template void blas_axpy(const int n, const quadruple &alpha, const quadruple* x, const int incx, quadruple* y, int incy); template void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy); // dz dot #ifndef BLAS_GENERIC template<> double blas_dot(const int n, const double* x, const int incx, const double* y, const int incy) { #ifdef BLAS_FORTRAN return FORTRAN_DECL(ddot)(n, x, incx, y, incy); // ? #else return cblas_ddot((BLAS_INT)n, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } template<> complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy) { complex val; #ifdef BLAS_FORTRAN // fortran function that returns complex is called as C void function FORTRAN_DECL(zdotc)((double*) &val, n, (double *)x, incx, (double *)y, incy); #else cblas_zdotc_sub((BLAS_INT)n, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy, (BLAS_VOID *)&val); #endif return val; } template<> float blas_dot(const int n, const float* x, const int incx, const float* y, const int incy) { #ifdef BLAS_FORTRAN return FORTRAN_DECL(sdot)(n, x, incx, y, incy); // ? #else return cblas_sdot((BLAS_INT)n, x, (BLAS_INT)incx, y, (BLAS_INT)incy); #endif } template<> complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy) { complex val; #ifdef BLAS_FORTRAN // fortran function that returns complex is called as C void function FORTRAN_DECL(cdotc)((float*) &val, n, (float *)x, incx, (float *)y, incy); #else cblas_cdotc_sub((BLAS_INT)n, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)y, (BLAS_INT)incy, (BLAS_VOID *)&val); #endif return val; } #endif template T blas_dot(const int n, const T* x, const int incx, const T* y, const int incy) { int ix, iy; T temp; temp = T(0.0); if (n <= 0) { return temp; } if ((incx == 1) && (incy == 1)) { for (int i = 0; i < n; i++) { temp += blas_conj(x[i]) * y[i]; } } else { ix = 0; iy = 0; if (incx < 0) { ix = (n - 1)* (-incx); // decrement form the last index } if (incy < 0) { iy = (n - 1)* (-incy); // decrement form the last index } for (int i = 0; i < n; i++) { temp += blas_conj(x[i]) * y[i]; ix += incx; iy += incy; } } return temp; } // explicit instantiation of blas_dot #ifdef FORCE_EXPLICIT_INSTANTIATION template double blas_dot(const int n, const double* x, const int incx, const double* y, const int incy); template complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); template float blas_dot(const int n, const float* x, const int incx, const float* y, const int incy); template complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); #endif template quadruple blas_dot(const int n, const quadruple* x, const int incx, const quadruple* y, const int incy); template complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); #ifndef NO_OCTRUPLE template octruple blas_dot(const int n, const octruple* x, const int incx, const octruple* y, const int incy); template complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); #endif // dz scal #ifndef BLAS_GENERIC template<> void blas_scal(const int N, const double &alpha, double *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(dscal)(N, alpha, X, incX); #else cblas_dscal((BLAS_INT)N, alpha, X, (BLAS_INT)incX); #endif } template<> void blas_scal >(const int N, const complex &alpha, complex *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(zscal)(N, (double *)&alpha, (double *)X, incX); #else cblas_zscal((BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } template<> void blas_scal(const int N, const float &alpha, float *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(sscal)(N, alpha, X, incX); #else cblas_sscal((BLAS_INT)N, alpha, X, (BLAS_INT)incX); #endif } template<> void blas_scal >(const int N, const complex &alpha, complex *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(cscal)(N, (float *)&alpha, (float *)X, incX); #else cblas_cscal((BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } #endif // #ifdef BLAS_GENERIC template void blas_scal(const int N, const T &alpha, T *X, const int incX) { if ((N <= 0) || (incX <= 0)) { return; } if (incX == 1) { for (int i = 0; i < N; i++) { X[i] *= alpha; } } else { int nincx = N * incX; for (int i = 0; i < nincx; i += incX) { X[i] *= alpha; } } } // explicit instantiation of blas_scal #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_scal(const int N, const double &alpha, double *X, const int incX); template void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); template void blas_scal(const int N, const float &alpha, float *X, const int incX); template void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); #endif template void blas_scal(const int N, const quadruple &alpha, quadruple *X, const int incX); template void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); #ifndef NO_OCTRUPLE template void blas_scal(const int N, const octruple &alpha, octruple *X, const int incX); template void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); #endif // scaling with magnitude represented by real valued #ifndef BLAS_GENERIC template<> void blas_scal2(const int N, const double &alpha, double *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(dscal)(N, alpha, X, incX); #else cblas_dscal((BLAS_INT)N, alpha, X, (BLAS_INT)incX); #endif } template<> void blas_scal2, double>(const int N, const double &alpha_, complex *X, const int incX) { const complex alpha = complex(alpha_, 0.0); #ifdef BLAS_FORTRAN FORTRAN_DECL(zscal)(N, (double *)&alpha, (double *)X, incX); #else cblas_zscal((BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } template<> void blas_scal2(const int N, const float &alpha, float *X, const int incX) { #ifdef BLAS_FORTRAN FORTRAN_DECL(sscal)(N, alpha, X, incX); #else cblas_sscal((BLAS_INT)N, alpha, X, (BLAS_INT)incX); #endif } template<> void blas_scal2, float>(const int N, const float &alpha_, complex *X, const int incX) { const complex alpha = complex(alpha_, 0.0); #ifdef BLAS_FORTRAN FORTRAN_DECL(cscal)(N, (float *)&alpha, (float *)X, incX); #else cblas_cscal((BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } template<> void blas_scal2(const int N, const quadruple &alpha, quadruple *X, const int incX) { blas_scal(N, alpha, X, incX); } template<> void blas_scal2, quadruple>(const int N, const quadruple &alpha_, complex *X, const int incX) { complex alpha = complex(alpha_, quadruple(0.0)); blas_scal >(N, alpha, X, incX); } #ifndef NO_OCTRUPLE template<> void blas_scal2(const int N, const octruple &alpha, octruple *X, const int incX) { blas_scal(N, alpha, X, incX); } template<> void blas_scal2, octruple>(const int N, const octruple &alpha_, complex *X, const int incX) { complex alpha = complex(alpha_, octruple(0.0)); blas_scal >(N, alpha, X, incX); } #endif #endif // ifndef BLAS_GENERIC template void blas_scal2(const int N, const U &alpha_, T *X, const int incX) { T alpha = tocomplex(alpha_); if ((N <= 0) || (incX <= 0)) { return; } if (incX == 1) { for (int i = 0; i < N; i++) { X[i] *= alpha; } } else { int nincx = N * incX; for (int i = 0; i < nincx; i += incX) { X[i] *= alpha; } } } // explicit instantiation blas_scal with real number saling #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_scal2(const int N, const double &alpha, double *X, const int incX); template void blas_scal2, double>(const int N, const double &alpha_, complex *X, const int incX); template void blas_scal2(const int N, const float &alpha, float *X, const int incX); template void blas_scal2, float>(const int N, const float &alpha_, complex *X, const int incX); template void blas_scal2(const int N, const quadruple &alpha, quadruple *X, const int incX); template void blas_scal2, quadruple>(const int N, const quadruple &alpha_, complex *X, const int incX); #ifndef NO_OCTRUPLE template void blas_scal2(const int N, const octruple &alpha, octruple *X, const int incX); template void blas_scal2, octruple>(const int N, const octruple &alpha, complex *X, const int incX); #endif #endif template int blas_iamax(const int n, const T *x, const int incx) { U umax; int ix, iamax; iamax = (-1); if ((n < 1) || incx <= 0) { return iamax; } iamax = 0; if (n == 1) { return iamax; } if (incx == 1) { umax = blas_abs(x[0]); for (int i = 1; i < n; i++) { U tmax = blas_abs(x[0]); if (tmax > umax) { iamax = i; umax = tmax; } } } else { ix = 0; umax = blas_abs(x[0]); for (int i = 1; i < n; i++) { ix += incx; U tmax = blas_abs(x[ix]); if (tmax > umax) { iamax = i; umax = tmax; } } } return iamax; } #ifndef BLAS_GENERIC template<> int blas_iamax(const int n, const double *x, const int incx) { #ifdef BLAS_FORTRAN return (FORTRAN_DECL(idamax)(n, x, incx) - 1); // Fortran to C index #else return cblas_idamax((BLAS_INT)n, x, (BLAS_INT)incx); #endif } template<> int blas_iamax, double>(const int n, const complex *x, const int incx) { #ifdef BLAS_FORTRAN // Fortran to C index return (FORTRAN_DECL(izamax)(n, (double *)x, incx) - 1); #else return cblas_izamax((BLAS_INT)n, (double *)x, (BLAS_INT)incx); #endif } template<> int blas_iamax(const int n, const float *x, const int incx) { #ifdef BLAS_FORTRAN return (FORTRAN_DECL(isamax)(n, x, incx) - 1); // Fortran to C index #else return cblas_isamax((BLAS_INT)n, x, (BLAS_INT)incx); #endif } template<> int blas_iamax, float>(const int n, const complex *x, const int incx) { #ifdef BLAS_FORTRAN // Fortran to C index return (FORTRAN_DECL(icamax)(n, (float *)x, incx) - 1); #else return cblas_icamax((BLAS_INT)n, (float *)x, (BLAS_INT)incx); #endif } #endif #ifdef FORCE_EXPLICIT_INSTANTIATION template int blas_iamax(const int n, const double *x, const int incx); template int blas_iamax, double>(const int n, const complex *x, const int incx); template int blas_iamax(const int n, const float *x, const int incx); template int blas_iamax, float>(const int n, const complex *x, const int incx); #endif template int blas_iamax(const int n, const quadruple *x, const int incx); template int blas_iamax, quadruple>(const int n, const complex *x, const int incx); template int blas_iamax(const int n, const octruple *x, const int incx); template int blas_iamax, octruple>(const int n, const complex *x, const int incx); // // BLAS 2 // dz gemv #ifndef BLAS_GENERIC template<> void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const double &alpha, const double* A, const int lda, const double* x, const int incx, const double &beta, double* y, const int incy) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; switch(trA) { case CblasNoTrans : tra_ = 'n'; break; case CblasTrans : tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } FORTRAN_DECL(dgemv)(tra_, m, n, alpha, A, lda, x, incx, beta, y, incy); // #else cblas_dgemv(CblasColMajor, trA, (BLAS_INT)m, (BLAS_INT)n, alpha, A, (BLAS_INT)lda, x, (BLAS_INT)incx, beta, y, (BLAS_INT)incy); #endif } template<> void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } FORTRAN_DECL(zgemv)(tra_, m, n, (double *)&alpha, (double *)A, lda, (double *)x, incx, (double *)&beta, (double *)y, incy); #else cblas_zgemv(CblasColMajor, trA, (BLAS_INT)m, (BLAS_INT)n, (BLAS_VOID *)&alpha, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)&beta, (void *)y, (BLAS_INT)incy); #endif } template<> void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const float &alpha, const float* A, const int lda, const float* x, const int incx, const float &beta, float* y, const int incy) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; switch(trA) { case CblasNoTrans : tra_ = 'n'; break; case CblasTrans : tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } FORTRAN_DECL(sgemv)(tra_, m, n, alpha, A, lda, x, incx, beta, y, incy); // #else cblas_sgemv(CblasColMajor, trA, (BLAS_INT)m, (BLAS_INT)n, alpha, A, (BLAS_INT)lda, x, (BLAS_INT)incx, beta, y, (BLAS_INT)incy); #endif } template<> void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } FORTRAN_DECL(cgemv)(tra_, m, n, (float *)&alpha, (float *)A, lda, (float *)x, incx, (float *)&beta, (float *)y, incy); #else cblas_cgemv(CblasColMajor, trA, (BLAS_INT)m, (BLAS_INT)n, (BLAS_VOID *)&alpha, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)x, (BLAS_INT)incx, (BLAS_VOID *)&beta, (void *)y, (BLAS_INT)incy); #endif } #endif template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const T &alpha, const T* A, const int lda, const T* x, const int incx, const T &beta, T* y, const int incy) { const T zero(0.0); const T one(1.0); int lenx, leny, ix, iy, jx, jy, kx, ky; bool noconj; if ((m == 0) || (n == 0) || ((alpha == zero) && (beta == one))) { return; } noconj = (trA == CblasTrans); if (trA == CblasNoTrans) { lenx = n; leny = m; } else { lenx = m; leny = n; } if (incx > 0) { kx = 0; } else { kx = (lenx - 1) * (-incx); // } if (incy > 0) { ky = 0; } else { ky = (leny - 1) * (-incy); // } if (beta != one) { if (incy == 1) { if (beta == zero) { for (int i = 0; i < leny; i++) { y[i] = zero; } // loop 10 : i } else { for (int i = 0; i < leny; i++) { y[i] *= beta; } // loop 20 : i } } else { iy = ky; if (beta == zero) { for (int i = 0; i < leny; i++) { y[iy] = zero; iy += incy; } // loop 30 : i } else { for (int i = 0; i < leny; i++) { y[iy] *= beta; iy += incy; } // loop 40 : i } } } // if (beta != one) if (alpha == zero) { return; } if (trA == CblasNoTrans) { // Form y := alpha*A*x + y. jx = kx; if (incy == 1) { for (int j = 0; j < n; j++) { if (x[jx] != zero) { T temp = alpha * x[jx]; for (int i = 0; i < m; i++) { y[i] += temp * A[i + j * lda]; } // loop 50 : i } // end if jx += incx; } // loop 60 : j } else { for (int j = 0; j < n; j++) { if (x[jx] != zero) { T temp = alpha * x[jx]; iy = ky; for (int i = 0; i < m; i++) { y[iy] += temp * A[i + j * lda]; iy += incy; } // loop 70 : i } jx += incx; } // loop 80 : j } } else { // Form y := alpha*A**T*x + y or y := alpha*conjg( A' )*x + y. jy = ky; if (incx == 1) { for (int j = 0; j < n; j++) { T temp = zero; if (noconj) { for (int i = 0; i < m; i++) { temp += A[i + j * lda] * x[i]; } // loop 90 : i } else { for (int i = 0; i < m; i++) { temp += blas_conj(A[i + j * lda]) * x[i]; } // loop 100 : j } y[jy] += alpha * temp; jy += incy; } // loop 110 : j } else { for (int j = 0; j < n; j++) { T temp = zero; ix = kx; if (noconj) { for (int i = 0; i < m; i++) { temp += A[i + j * lda] * x[ix]; ix += incx; } // loop 120 : i } else { for (int i = 0; i < m; i++) { temp += blas_conj(A[i + j * lda]) * x[ix]; ix += incx; } // loop 130 : i } y[jy] += alpha * temp; jy += incy; } // loop 140 : j } // if (incx != 1) } } // explicit instantiation of blas_gemv #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const double &alpha, const double* A, const int lda, const double* x, const int incx, const double &beta, double* y, const int incy); template void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const float &alpha, const float* A, const int lda, const float* x, const int incx, const float &beta, float* y, const int incy); template void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); #endif template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const quadruple &alpha, const quadruple* A, const int lda, const quadruple* x, const int incx, const quadruple &beta, quadruple* y, const int incy); #ifndef NO_OCTRUPLE template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const octruple &alpha, const octruple* A, const int lda, const octruple* x, const int incx, const octruple &beta, octruple* y, const int incy); #endif template void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); #ifndef NO_OCTRUPLE template void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); #endif // dz trsv #ifndef BLAS_GENERIC template<> void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(dtrsv)(uplo_, transa_, diag_, N, A, lda, X, incX); #else cblas_dtrsv(CblasColMajor, Uplo, TransA, Diag, (BLAS_INT)N, A, (BLAS_INT)lda, X, (BLAS_INT)incX); #endif } template<> void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex* A, const int lda, complex *X, const int incX) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(ztrsv)(uplo_, transa_, diag_, N, (double *)A, lda, (double *)X, incX); #else cblas_ztrsv(CblasColMajor, Uplo, TransA, Diag, (BLAS_INT)N, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } template<> void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(strsv)(uplo_, transa_, diag_, N, A, lda, X, incX); #else cblas_strsv(CblasColMajor, Uplo, TransA, Diag, (BLAS_INT)N, A, (BLAS_INT)lda, X, (BLAS_INT)incX); #endif } template<> void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex* A, const int lda, complex *X, const int incX) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(ctrsv)(uplo_, transa_, diag_, N, (float *)A, lda, (float *)X, incX); #else cblas_ctrsv(CblasColMajor, Uplo, TransA, Diag, (BLAS_INT)N, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)X, (BLAS_INT)incX); #endif } #endif template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const T *A, const int lda, T *X, const int incX) { const T zero(0.0); bool noconj, nounit; int ix, jx, kx; if (N == 0) { return; } noconj = (TransA == CblasTrans); nounit = (Diag == CblasNonUnit); if (incX < 0) { kx = (N - 1) * (-incX); // decrement form the last index } else { kx = 0; } if (TransA == CblasNoTrans) { // Form x := inv( A )*x. if (Uplo == CblasUpper) { if (incX == 1) { for (int j = (N - 1); j >= 0; j--) { if (X[j] != zero) { if (nounit) { X[j] /= A[j + j * lda]; } T temp = X[j]; for (int i = (j - 1); i >= 0; i--) { X[i] -= temp * A[i + j * lda]; } // loop 10 : i } } // loop 20 : j } else { jx = kx + (N - 1) * incX; for (int j = (N - 1); j >= 0; j--) { if (X[jx] != zero) { if (nounit) { X[jx] /= A[j + j * lda]; } T temp = X[jx]; ix = jx; for (int i = (j - 1); j >= 0; j--) { ix -= incX; X[ix] -= temp * A[i + j * lda]; } // loop 30 : i } jx -= incX; } // loop 40 : j } // if (incX == 1) } else { if (incX == 1) { for (int j = 0; j < N; j++) { if (X[j] != zero) { if (nounit) { X[j] /= A[j + j * lda]; } T temp = X[j]; for (int i = j + 1; i < N; i++) { X[i] -= temp * A[i + j * lda]; } // loop 50 : i } } // loop 60 : j } else { jx = kx; for (int j = 0; j < N; j++) { if (X[jx] != zero) { if (nounit) { X[jx] /= A[j + j * lda]; } T temp = X[jx]; ix = jx; for (int i = j + 1; i < N; i++) { ix += incX; X[ix] -= temp * A[i + j * lda]; } // loop 70 : i } jx += incX; } // loop 80 : j } // if (incX == 1) } // if (Uplo == CblasUpper) } else { // Form x := inv( A **T )*x or x := inv( A **H ) *x. if (Uplo == CblasUpper) { if (incX == 1) { for (int j = 0; j < N; j++) { T temp = X[j]; if (noconj) { for (int i = 0; i < j; i++) { temp -= A[i + j * lda] * X[i]; } // loop 90 : i if (nounit) { temp /= A[j + j * lda]; } } else { for (int i = 0; i < j; i++) { temp -= blas_conj(A[i + j * lda]) * X[i]; } // loop 100 : i if (nounit) { temp /= blas_conj(A[j + j * lda]); } } X[j] = temp; } // loop 110 : j } else { jx = kx; for (int j = 0; j < N; j++) { ix = kx; T temp = X[jx]; if (noconj) { for (int i = 0; i < j; i++) { temp -= A[i + j * lda] * X[ix]; ix += incX; } // loop 120 : i if (nounit) { temp /= A[j + j * lda]; } } else { for (int i = 0; i < j; i++) { temp -= blas_conj(A[i + j * lda]) * X[ix]; ix += incX; } // loop 120 : i if (nounit) { temp /= blas_conj(A[j + j * lda]); } } X[jx] = temp; jx += incX; } } } else { if (incX == 1) { for (int j = (N - 1); j >= 0; j--) { T temp = X[j]; if (noconj) { for (int i = (N - 1); i > j; i--) { temp -= A[i + j * lda] * X[i]; } // loop 150 : i if (nounit) { temp /= A[j + j * lda]; } } else { for (int i = 0; i < j; i++) { temp -= blas_conj(A[i + j * lda]) * X[i]; } // loop 160 : i if (nounit) { temp /= blas_conj(A[j + j * lda]); } } X[j] = temp; } // loop 170 : j } else { kx = kx + N * incX; jx = kx; for (int j = (N - 1); j >= 0; j--) { ix = kx; T temp = X[jx]; if (noconj) { for (int i = (N - 1); i > j; i--) { temp -= A[i + j * lda] * X[ix]; ix -= incX; } // loop 180 : i if (nounit) { temp /= A[j + j * lda]; } } else { for (int i = (N - 1); i > j; i--) { temp -= blas_conj(A[i + j * lda]) * X[ix]; ix -= incX; } // loop 190 : i if (nounit) { temp /= blas_conj(A[j + j * lda]); } } X[jx] = temp; jx -= incX; } // loop 200 : j } } // if (Uplo == CblasUpper) } } // explicit instantiation of blas_trsv #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX); template void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex *A, const int lda, complex *X, const int incX); template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX); template void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex *A, const int lda, complex *X, const int incX); #endif template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const quadruple *A, const int lda, quadruple *X, const int incX); #ifndef NO_OCTRUPLE template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const octruple *A, const int lda, octruple *X, const int incX); #endif template void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex *A, const int lda, complex *X, const int incX); #ifndef NO_OCTRUPLE template void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex *A, const int lda, complex *X, const int incX); #endif // dz syr : symmetric rank-1 update template void blas_syr(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, T *A, const int lda) { const T zero(0.0); int ix, jx, kx; if ((N == 0) || (alpha == zero)) { return; } if (incX > 0) { kx = 0; } else { kx = (N - 1) * (-incX); // decrement form the last index } if (Uplo == CblasUpper) { // Form A when A is stored in upper triangle. if (incX == 1) { for (int j = 0; j < N; j++) { if (X[j] != zero) { T temp = alpha * X[j]; for (int i = 0; i <= j; i++) { A[i + j * lda] += X[i] * temp; } // loop 10 : i } } // loop 20 : j } else { jx = kx; for (int j = 0; j < N; j++) { if (X[jx] != zero) { T temp = alpha * X[jx]; ix = kx; for (int i = 0; i <= j; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 30 : i } jx += incX; } // loop 40 : j } } else { // Form A when A is stored in lower triangle. if (incX == 1) { for (int j = 0; j < N; j++) { if (X[j] != zero) { T temp = alpha * X[j]; for (int i = j; i < N; i++) { A[i + j * lda] += X[i] * temp; } // loop 50 : i } } // loop 60 : j } else { jx = kx; for (int j = 0; j < N; j++) { if (X[jx] != zero) { T temp = alpha * X[jx]; ix = jx; for (int i = j; i < N; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 70 : i } jx += incX; } // loop 80 : j } } } #ifndef BLAS_GENERIC template<> void blas_syr(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, double *A, const int lda) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } FORTRAN_DECL(dsyr)(uplo_, N, alpha, X, incX, A, lda); #else cblas_dsyr(CblasColMajor, Uplo, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, A, (BLAS_INT)lda); #endif } template<> void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(zgeru)(N, N, (double *)&alpha, (double *)X, incX, (double *)X, incX, (double *)A, lda); // no zsyr in BLAS! : Uplo is ignored #else cblas_zgeru(CblasColMajor, N, N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)A, (BLAS_INT)lda); // no zsyr in BLAS! : Uplo is ignored #endif } template<> void blas_syr(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, float *A, const int lda) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } FORTRAN_DECL(ssyr)(uplo_, N, alpha, X, incX, A, lda); #else cblas_ssyr(CblasColMajor, Uplo, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, A, (BLAS_INT)lda); #endif } template<> void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(cgeru)(N, N, (float *)&alpha, (float *)X, incX, (float *)X, incX, (float *)A, lda); // no zsyr in BLAS! : Uplo is ignored #else cblas_cgeru(CblasColMajor, N, N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)A, (BLAS_INT)lda); // no zsyr in BLAS! : Uplo is ignored #endif } #endif // explicit instantiation of blas_syr #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_syr(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, double *A, const int lda); template void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); template void blas_syr(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, float *A, const int lda); template void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #endif template void blas_syr(const CBLAS_UPLO Uplo, const int N, const quadruple &alpha, const quadruple *X, const int incX, quadruple *A, const int lda); #ifndef NO_OCTRUPLE template void blas_syr(const CBLAS_UPLO Uplo, const int N, const octruple &alpha, const octruple *X, const int incX, octruple *A, const int lda); #endif template void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #ifndef NO_OCTRUPLE template void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #endif // dz syrc : with complex conjugate template void blas_syrc(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, T *A, const int lda) { const T zero(0.0); int ix, jx, kx; if ((N == 0) || (alpha == zero)) { return; } if (incX > 0) { kx = 0; } else { kx = (N - 1) * (-incX); // decrement form the last index } if (Uplo == CblasUpper) { // Form A when A is stored in upper triangle. if (incX == 1) { for (int j = 0; j < N; j++) { if (X[j] != zero) { T temp = alpha * blas_conj(X[j]); for (int i = 0; i <= j; i++) { A[i + j * lda] += X[i] * temp; } // loop 10 : i } } // loop 20 : j } else { jx = kx; for (int j = 0; j < N; j++) { if (X[jx] != zero) { T temp = alpha * blas_conj(X[jx]); ix = kx; for (int i = 0; i <= j; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 30 : i } jx += incX; } // loop 40 : j } } else { // Form A when A is stored in lower triangle. if (incX == 1) { for (int j = 0; j < N; j++) { if (X[j] != zero) { T temp = alpha * blas_conj(X[j]); for (int i = j; i < N; i++) { A[i + j * lda] += X[i] * temp; } // loop 50 : i } } // loop 60 : j } else { jx = kx; for (int j = 0; j < N; j++) { if (X[jx] != zero) { T temp = alpha * blas_conj(X[jx]); ix = jx; for (int i = j; i < N; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 70 : i } jx += incX; } // loop 80 : j } } } // explicit instantiation of blas_syrc template void blas_syrc >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); template void blas_syrc >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); template void blas_syrc >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #ifndef NO_OCTRUPLE template void blas_syrc >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #endif // dz syr2 symmetric rank-2 update : DSYR2 only exists in BLAS #ifndef BLAS_GENERIC template<> void blas_syr2(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } FORTRAN_DECL(dsyr2)(uplo_, N, alpha, X, incX, Y, incY, A, lda); #else cblas_dsyr2(CblasColMajor, Uplo, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } template<> void blas_syr2(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda) { #ifdef BLAS_FORTRAN unsigned char uplo_ = 0; switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } FORTRAN_DECL(ssyr2)(uplo_, N, alpha, X, incX, Y, incY, A, lda); #else cblas_ssyr2(CblasColMajor, Uplo, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } #endif template void blas_syr2(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda) { const T zero(0.0); int ix, iy, jx, jy, kx, ky; if ((N == 0) || (alpha == zero)) { return; } if ((incX != 1) || (incY != 1)) { if (incX > 0) { kx = 0; } else { kx = (N - 1) * (-incX); // decrement form the last index } if (incY > 0) { ky = 0; } else { ky = (N - 1) * (-incY); // decrement form the last index } jx = kx; jy = ky; } if (Uplo == CblasUpper) { // Form A when A is stored in upper triangle. if ((incX == 1) && (incY == 1)) { for (int j = 0; j < N; j++) { if ((X[j] != zero) || (Y[j] != zero)) { T temp1 = alpha * Y[j]; T temp2 = alpha * X[j]; for (int i = 0; i < j; i++) { A[i + j * lda] += X[i] * temp1 + Y[i] * temp2; } // loop 10 : i } } // loop 20 : j } else { for (int j = 0; j < N; j++) { if ((X[jx] != zero) || (Y[jy] != zero)) { T temp1 = alpha * Y[jy]; T temp2 = alpha * X[jx]; ix = kx; iy = ky; // 1st Nov.2015 bug fund : ky = ky; for (int i = 0; i < j; i++) { A[i + j * lda] += X[ix] * temp1 + Y[iy] * temp2; ix += incX; iy += incY; } // loop 30 : i } jx += incX; jx += incY; } // loop 40 : j } } else { // Form A when A is stored in lower triangle. if ((incX == 1) && (incY == 1)) { for (int j = 0; j < N; j++) { if ((X[j] != zero) || (Y[j] != zero)) { T temp1 = alpha * Y[j]; T temp2 = alpha * X[j]; for (int i = j; i < N; i++) { A[i + j * lda] += X[i] * temp1 + Y[i] * temp2; } // loop 50 : i } } // loop 60 : j } else { for (int j = 0; j < N; j++) { if ((X[jx] != zero) || (Y[jy] != zero)) { T temp1 = alpha * Y[jy]; T temp2 = alpha * X[jx]; ix = jx; iy = jy; for (int i = j; i < N; i++) { A[i + j * lda] += X[ix] * temp1 + Y[iy] * temp2; ix += incX; iy += incY; } // loop 70 : i } jx += incX; jy += incY; } // loop 80 : j } } } // explicit instantiation of blas_syr2 #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_syr2(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template void blas_syr2(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); #endif template void blas_syr2 >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); // there is no cblas_syr2 template void blas_syr2(const CBLAS_UPLO Uplo, const int N, const quadruple &nalpha, const quadruple *X, const int incX, const quadruple *Y, const int incY, quadruple *A, const int lda); template void blas_syr2 >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); // dz ger #ifndef BLAS_GENERIC template<> void blas_ger(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(dger)(M, N, alpha, X, incX, Y, incY, A, lda); #else cblas_dger(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } template<> void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(zgeru)(M, N, (double *)&alpha, (double *)X, incX, (double *)Y, incY, (double *)A, lda); #else cblas_zgeru(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)Y, (BLAS_INT)incY, (BLAS_VOID *)A, (BLAS_INT)lda); #endif } template<> void blas_ger(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(sger)(M, N, alpha, X, incX, Y, incY, A, lda); #else cblas_sger(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } template<> void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(cgeru)(M, N, (float *)&alpha, (float *)X, incX, (float *)Y, incY, (float *)A, lda); #else cblas_cgeru(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (BLAS_VOID *)Y, (BLAS_INT)incY, (BLAS_VOID *)A, (BLAS_INT)lda); #endif } #endif template void blas_ger(const int M, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda) { const T zero(0.0); int ix, jy, kx; if ((M == 0) || (N == 0) || (alpha == zero)) { return; } if (incY > 0) { jy = 0; } else { jy = (N - 1) * (-incY); // decrement form the last index } if (incX == 1) { for (int j = 0; j < N; j++) { if (Y[jy] != zero) { T temp = alpha * Y[jy]; for (int i = 0; i < M; i++) { A[i + j * lda] += X[i] * temp; } // loop 10 : i } jy += incY; } // loop 20 : j } else { if (incX > 0) { kx = 0; } else { kx = (M - 1) * (-incX); // decrement form the last index } for (int j = 0; j < N; j++) { if (Y[jy] != zero) { T temp = alpha * Y[jy]; ix = kx; for (int i = 0; i < M; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 30 : i } jy += incY; } // loop 40 : j } } // explicit instantiation of blas_ger #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_ger(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); template void blas_ger(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); template void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #endif template void blas_ger(const int M, const int N, const quadruple &alpha, const quadruple *X, const int incX, const quadruple *Y, const int incY, quadruple *A, const int lda); template void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #ifndef NO_OCTRUPLE template void blas_ger(const int M, const int N, const octruple &alpha, const octruple *X, const int incX, const octruple *Y, const int incY, octruple *A, const int lda); template void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #endif // dz gerc : rank-1 update with complex conjugate #ifndef BLAS_GENERIC template<> void blas_gerc(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(dger)(M, N, alpha, X, incX, Y, incY, A, lda); #else cblas_dger(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } template<> void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(zgerc)(M, N, (double *)&alpha, (double *)X, incX, (double *)Y, incY, (double *)A, lda); // no zsyr in BLAS! #else cblas_zgerc(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (void *)Y, (BLAS_INT)incY, (BLAS_VOID *)A, (BLAS_INT)lda); // no zsyr in BLAS! #endif } template<> void blas_gerc(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(sger)(M, N, alpha, X, incX, Y, incY, A, lda); #else cblas_sger(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, alpha, X, (BLAS_INT)incX, Y, (BLAS_INT)incY, A, (BLAS_INT)lda); #endif } template<> void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda) { #ifdef BLAS_FORTRAN FORTRAN_DECL(cgerc)(M, N, (float *)&alpha, (float *)X, incX, (float *)Y, incY, (float *)A, lda); // no zsyr in BLAS! #else cblas_cgerc(CblasColMajor, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)X, (BLAS_INT)incX, (void *)Y, (BLAS_INT)incY, (BLAS_VOID *)A, (BLAS_INT)lda); // no zsyr in BLAS! #endif } #endif template void blas_gerc(const int M, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda) { const T zero(0.0); int ix, jy, kx; if ((M == 0) || (N == 0) || (alpha == zero)) { return; } if (incY > 0) { jy = 0; } else { jy = (N - 1) * (-incY); // decrement form the last index } if (incX == 1) { for (int j = 0; j < N; j++) { if (Y[jy] != zero) { T temp = alpha * blas_conj(Y[jy]); for (int i = 0; i < M; i++) { A[i + j * lda] += X[i] * temp; } // loop 10 : i } jy += incY; } // loop 20 : j } else { if (incX > 0) { kx = 0; } else { kx = (M - 1) * (-incX); // decrement form the last index } for (int j = 0; j < N; j++) { if (Y[jy] != zero) { T temp = alpha * blas_conj(Y[jy]); ix = kx; for (int i = 0; i < M; i++) { A[i + j * lda] += X[ix] * temp; ix += incX; } // loop 30 : i } jy += incY; } // loop 40 : j } } // explicit instantiation of blas_gerc #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_gerc(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); template void blas_gerc(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); template void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #endif template void blas_gerc(const int M, const int N, const quadruple &alpha, const quadruple *X, const int incX, const quadruple *Y, const int incY, quadruple *A, const int lda); template void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); template void blas_gerc(const int M, const int N, const octruple &alpha, const octruple *X, const int incX, const octruple *Y, const int incY, octruple *A, const int lda); template void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); // BLAS 3 // dz trsm #ifndef BLAS_GENERIC template<> void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const double &alpha, const double *A, const int lda, double *B, const int ldb) { #ifdef BLAS_FORTRAN unsigned char side_ = 0; unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Side) { case CblasLeft: side_ = 'l'; break; case CblasRight: side_ = 'r'; break; } switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(dtrsm)(side_, uplo_, transa_, diag_, M, N, alpha, A, lda, B, ldb); #else cblas_dtrsm(CblasColMajor, Side, Uplo, TransA, Diag, (BLAS_INT)M, (BLAS_INT)N, alpha, A, (BLAS_INT)lda, B, (BLAS_INT)ldb); #endif } template<> void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb) { #ifdef BLAS_FORTRAN unsigned char side_ = 0; unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Side) { case CblasLeft: side_ = 'l'; break; case CblasRight: side_ = 'r'; break; } switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'u'; break; case CblasUnit: diag_ = 'n'; break; } FORTRAN_DECL(ztrsm)(side_, uplo_, transa_, diag_, M, N, (double *)&alpha, (double *)A, lda, (double *)B, ldb); #else cblas_ztrsm(CblasColMajor, Side, Uplo, TransA, Diag, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)B, (BLAS_INT)ldb); #endif } template<> void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const float &alpha, const float *A, const int lda, float *B, const int ldb) { #ifdef BLAS_FORTRAN unsigned char side_ = 0; unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Side) { case CblasLeft: side_ = 'l'; break; case CblasRight: side_ = 'r'; break; } switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'n'; break; case CblasUnit: diag_ = 'u'; break; } FORTRAN_DECL(strsm)(side_, uplo_, transa_, diag_, M, N, alpha, A, lda, B, ldb); #else cblas_strsm(CblasColMajor, Side, Uplo, TransA, Diag, (BLAS_INT)M, (BLAS_INT)N, alpha, A, (BLAS_INT)lda, B, (BLAS_INT)ldb); #endif } template<> void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb) { #ifdef BLAS_FORTRAN unsigned char side_ = 0; unsigned char uplo_ = 0; unsigned char transa_ = 0; unsigned char diag_ = 0; switch(Side) { case CblasLeft: side_ = 'l'; break; case CblasRight: side_ = 'r'; break; } switch(Uplo) { case CblasUpper: uplo_ = 'u'; break; case CblasLower: uplo_ = 'l'; break; } switch(TransA) { case CblasNoTrans: transa_ = 'n'; break; case CblasTrans: transa_ = 't'; break; case CblasConjTrans: transa_ = 'c'; break; } switch(Diag) { case CblasNonUnit: diag_ = 'u'; break; case CblasUnit: diag_ = 'n'; break; } FORTRAN_DECL(cztrsm)(side_, uplo_, transa_, diag_, M, N, (float *)&alpha, (float *)A, lda, (float *)B, ldb); #else cblas_ctrsm(CblasColMajor, Side, Uplo, TransA, Diag, (BLAS_INT)M, (BLAS_INT)N, (BLAS_VOID *)&alpha, (BLAS_VOID *)A, (BLAS_INT)lda, (BLAS_VOID *)B, (BLAS_INT)ldb); #endif } #endif template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const T &alpha, const T *A, const int lda, T *B, const int ldb) { const T zero(0.0); const T one(1.0); bool lside, noconj, nounit, upper; lside = (Side == CblasLeft); noconj = (TransA == CblasTrans); nounit = (Diag == CblasNonUnit); upper = (Uplo == CblasUpper); if ((M == 0) || (N == 0)) { return; } if (alpha == zero) { for (int j = 0; j < N; j++) { for (int i = 0; i < M; i++) { B[i + j * ldb] = zero; }// loop 10 : i } // loop 10 : j } if (lside) { if (TransA == CblasNoTrans) { // Form B := alpha*inv( A )*B. if (upper) { for (int j = 0; j < N; j++) { if (alpha != one) { for (int i = 0; i < M; i++) { B[i + j * ldb] *= alpha; } // loop 30 : i } for (int k = (M - 1); k >= 0; k--) { if (B[k + j * ldb] != zero) { if (nounit) { B[k + j * ldb] /= A[k + k * lda]; } for (int i = 0; i < k; i++) { B[i + j * ldb] -= B[k + j *ldb] * A[i + k * lda]; } // loop 40 : i } } // loop 50 : k } // loop 60 : j } // if (upper) else { for (int j = 0; j < N; j++) { if (alpha != one) { for (int i = 0; i < M; i++) { B[i + j * ldb] *= alpha; } // loop 70 : i } for (int k = 0; k < M; k++) { if (B[k + j * ldb] != zero) { if (nounit) { B[k + j * ldb] /= A[k + k * lda]; } for (int i = (k + 1); i < M; i++) { B[i + j * ldb] -= B[k + j * ldb] * A[i + k * lda]; } // loop 80 : i } } //loop 90 : k } // loop 100 : j } // if (upper) } // if (TransA == CblasNoTrans) else { // Form B := alpha*inv( A**T )*B // or B := alpha*inv( A**H )*B. if (upper) { for (int j = 0; j < N; j++) { for (int i = 0; i < M; i++) { T temp = alpha * B[i + j * ldb]; if (noconj) { for (int k = 0; k < i; k++) { temp -= A[k + i * lda] * B[k + j * ldb]; } // loop 110 : k if (nounit) { temp /= A[i + i * lda]; } } else { for (int k = 0; k < i; k++) { temp -= blas_conj(A[k + i * lda]) * B[k + j * ldb]; } // loop 120 : k if (nounit) { temp /= blas_conj(A[i + i * lda]); } } B[i + j * ldb] = temp; } // loop 130 : i } // loop 140 : j } // if (upper) else { for (int j = 0; j < N; j++) { for (int i = (M - 1); i >= 0; i--) { T temp = alpha * B[i + j * ldb]; if (noconj) { for (int k = (i + 1); k < M; k++) { temp -= A[k + i * lda] * B[k + j * ldb]; } // loop 150 : k if (nounit) { temp /= A[i + i * lda]; } } else { for (int k = (i + 1); k < M; k++) { temp -= blas_conj(A[k + i * lda]) * B[k + j * ldb]; } // loop 160 : k if (nounit) { temp /= blas_conj(A[i + i * lda]); } } B[i + j * ldb] = temp; } // loop 170 : i } // loop 180 : j } // if (upper) } // if (TransA == CblasNoTrans) } // if (lsdie) else { if (TransA == CblasNoTrans) { // Form B := alpha*B*inv( A ). if (upper) { for (int j = 0; j < N; j++) { if (alpha != one) { for (int i = 0; i < M; i++) { B[i + j * ldb] *= alpha; } // loop 190 : i } for (int k = 0; k < j; k++) { if (A[k + j * lda] != zero) { for (int i = 0; i < M; i++) { B[i + j * ldb] -= A[k + j * lda] * B[i + k * ldb]; } // loop 200 : i } } // loop 210 : k if (nounit) { T temp = one / A[j + j * lda]; for (int i = 0; i < M; i++) { B[i + j * ldb] *= temp; } // loop 220 : i } } // loop 230 : j } else { for (int j = (N - 1); j >= 0; j--) { if (alpha != one) { for (int i = 0; i < M; i++) { B[i + j * ldb] *= alpha; } // loop 240 : i } for (int k = (j + 1); k < N; k++) { if (A[k + j * lda] != zero) { for (int i = 0; i < M; i++) { B[i + j * ldb] -= A[k + j * lda] * B[i + k * ldb]; } // loop 2500 : i } } // loop 260 : k if (nounit) { T temp = one / A[j + j * lda]; for (int i = 0; i < M; i++) { B[i + j * ldb] *= temp; } // loop 270 : i } } // loop 280 : j } } // if (TransA == CblasNoTrans) { else { // Form B := alpha*B*inv( A**T ) // or B := alpha*B*inv( A**H ). if (upper) { for (int k = (N - 1); k >= 0; k--) { if (nounit) { T temp; if (noconj) { temp = one / A[k + k * lda]; } else { temp = one / blas_conj(A[k + k * lda]); } for (int i = 0; i < M; i++) { B[i + k * ldb] *= temp; } // loop 290 : i } for (int j = 0; j < k; j++) { if (A[j + k * lda] != zero) { T temp; if (noconj) { temp = A[j + k * lda]; } else { temp = blas_conj(A[j + k * lda]); } for (int i = 0; i < M; i++) { B[i + j * ldb] -= temp * B[i + k * ldb]; } //loop 300 : i } } // loop 310 : j if (alpha != one) { for (int i = 0; i < M; i++) { B[i + k + ldb] *= alpha; } // loop 320 : i } } // loop 330 : k } // if (upper) else { for (int k = 0; k < N; k++) { if (nounit) { T temp; if (noconj) { temp = one / A[k + k * lda]; } else { temp = one / blas_conj(A[k + k * lda]); } for (int i = 0; i < M; i++) { B[i + k * ldb] *= temp; } // loop 340 : i } // if (nounit) for (int j = (k + 1); j < N; j++) { if (A[j + k * lda] != zero) { T temp; if (noconj) { temp = A[j + k * lda]; } else { temp = blas_conj(A[j + k * lda]); } for (int i = 0; i < M; i++) { B[i + j * ldb] -= temp * B[i + k * ldb]; } //loop 350 : i } } // loop 360 : j if (alpha != one) { for (int i = 0; i < M; i++) { B[i + k + ldb] *= alpha; } // loop 370 : i } } // loop 380 : k } } } // if (lside) } //#endif // explicit instantiation of blas_trsm #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const double &alpha, const double *A, const int lda, double *B, const int ldb); template void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const float &alpha, const float *A, const int lda, float *B, const int ldb); template void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); #endif template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const quadruple &alpha, const quadruple *A, const int lda, quadruple *B, const int ldb); #ifndef NO_OCTRUPLE template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const octruple &alpha, const octruple *A, const int lda, octruple *B, const int ldb); #endif template void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); #ifndef NO_OCTRUPLE template void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); #endif // dz gemm #ifndef BLAS_GENERIC template<> void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const double &alpha, const double* A, int lda, const double* B, int ldb, const double &beta, double* C, int ldc ) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; unsigned char trb_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } switch(trB) { case CblasNoTrans: trb_ = 'n'; break; case CblasTrans: trb_ = 't'; break; case CblasConjTrans: trb_ = 'c'; break; } FORTRAN_DECL(dgemm)(tra_, trb_, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); #else cblas_dgemm(CblasColMajor, trA, trB, (BLAS_INT)m, (BLAS_INT)n, k, alpha, A, (BLAS_INT)lda, B, (BLAS_INT)ldb, beta, C, (BLAS_INT)ldc ); #endif } template<> void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha, const complex* A, int lda, const complex* B, int ldb, const complex &beta, complex* C, int ldc ) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; unsigned char trb_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } switch(trB) { case CblasNoTrans: trb_ = 'n'; break; case CblasTrans: trb_ = 't'; break; case CblasConjTrans: trb_ = 'c'; break; } FORTRAN_DECL(zgemm)(tra_, trb_, m, n, k, (const double *)&alpha, (const double *)A, lda, (const double *)B, ldb, (const double *)&beta, (double *)C, ldc ); #else cblas_zgemm(CblasColMajor, trA, trB, (BLAS_INT)m, (BLAS_INT)n, (BLAS_INT)k, (const BLAS_VOID *)&alpha, (const BLAS_VOID *)A, (BLAS_INT)lda, (const BLAS_VOID *)B, (BLAS_INT)ldb, (const BLAS_VOID *)&beta, C, (BLAS_INT)ldc ); #endif } template<> void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const float &alpha, const float* A, int lda, const float* B, int ldb, const float &beta, float* C, int ldc ) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; unsigned char trb_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } switch(trB) { case CblasNoTrans: trb_ = 'n'; break; case CblasTrans: trb_ = 't'; break; case CblasConjTrans: trb_ = 'c'; break; } FORTRAN_DECL(sgemm)(tra_, trb_, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); #else cblas_sgemm(CblasColMajor, trA, trB, (BLAS_INT)m, (BLAS_INT)n, k, alpha, A, (BLAS_INT)lda, B, (BLAS_INT)ldb, beta, C, (BLAS_INT)ldc ); #endif } template<> void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha, const complex* A, int lda, const complex* B, int ldb, const complex &beta, complex* C, int ldc ) { #ifdef BLAS_FORTRAN unsigned char tra_ = 0; unsigned char trb_ = 0; switch(trA) { case CblasNoTrans: tra_ = 'n'; break; case CblasTrans: tra_ = 't'; break; case CblasConjTrans: tra_ = 'c'; break; } switch(trB) { case CblasNoTrans: trb_ = 'n'; break; case CblasTrans: trb_ = 't'; break; case CblasConjTrans: trb_ = 'c'; break; } FORTRAN_DECL(cgemm)(tra_, trb_, m, n, k, (const float *)&alpha, (const float *)A, lda, (const float *)B, ldb, (const float *)&beta, (float *)C, ldc ); #else cblas_cgemm(CblasColMajor, trA, trB, (BLAS_INT)m, (BLAS_INT)n, (BLAS_INT)k, (const BLAS_VOID *)&alpha, (const BLAS_VOID *)A, (BLAS_INT)lda, (const BLAS_VOID *)B, (BLAS_INT)ldb, (const BLAS_VOID *)&beta, C, (BLAS_INT)ldc ); #endif } #endif template void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const T &alpha, const T* A, int lda, const T* B, int ldb, const T &beta, T* C, int ldc ) { const T zero(0.0); const T one(1.0); // based on zgemm bool nota, notb; bool conja, conjb; nota = (trA == CblasNoTrans); notb = (trB == CblasNoTrans); conja = (trA == CblasConjTrans); conjb = (trB == CblasConjTrans); // without checking parameters if ((m == 0) || (n == 0) || (((alpha == zero || (k == 0)) && (beta == one)))) { return; } if (alpha == zero) { if (beta == zero) { for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { C[i + j * ldc] = zero; } // loop 10 : i } // loop 20 : j } else { for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { C[i + j * ldc] *= beta; } // loop 30 : i } // loop 40 : j } return; } if (notb) { if (nota) { // Form C := alpha*A*B + beta*C. for (int j = 0; j < n; j++) { if (beta == zero) { for (int i = 0; i < m; i++) { C[i + j * ldc] = zero; } // loop 50 : i } else if (beta != one) { for (int i = 0; i < m; i++) { C[i + j * ldc] *= beta; } // loop 60 : i } for (int l = 0; l < k; l++) { if (B[l + j * ldb] != zero) { T temp = alpha * B[l + j * ldb]; for (int i = 0; i < m; i++) { C[i + j * ldc] += temp * A[i + l * lda]; } // loop 70 : i } } // loop 80 : l } // loop 90 : j } else if (conja) { // Form C := alpha*conjg( A' )*B + beta*C. for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += blas_conj(A[l + i * lda]) * B[l + j * ldb]; } // loop 100 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 110 : i } // loop 120 : j } else { // if (nota) // Form C := alpha*A**T*B + beta*C for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += A[l + i * lda] * B[l + j * ldb]; } // loop 130 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 140 : i } // loop 150 : j } // if (nota) } // if (notb) else { if (nota) { if (conjb) { // Form C := alpha*A*conjg( B' ) + beta*C. for (int j = 0; j < n; j++) { if (beta == zero) { for (int i = 0; i < m; i++) { C[i + j * ldc] = zero; } // loop 160 : i } else if (beta != one) { for (int i = 0; i < m; i++) { C[i + j * ldc] *= beta; } // loop 170 : i } for (int l = 0; l < k; l++) { if (B[j + l * ldb] != zero) { T temp = alpha * blas_conj(B[j + l * ldb]); for (int i = 0; i < m; i++) { C[i + j * ldc] += temp * A[i + l * lda]; } // loop 180 : i } } // loop 190 : i } // loop 200 : j } else { // Form C := alpha*A*B**T + beta*C for (int j = 0; j < n; j++) { if (beta == zero) { for (int i = 0; i < m; i++) { C[i + j * ldc] = zero; } //loop 210 : i } else if (beta != one) { for (int i = 0; i < m; i++) { C[i + j * ldc] *= beta; } // loop 220 : i } for (int l = 0; l < k; l++) { if (B[j + l * ldb] != zero) { T temp = alpha * B[j + l * ldb]; for (int i = 0; i < m; i++) { C[i + j * ldc] += temp * A[i + l * lda]; } // loop 230 : i } } // loop 240 : i } // loop 250 : j } } // if (nota) else if (conja) { if (conjb) { // Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += blas_conj(A[l + i * lda]) * blas_conj(B[j + l * ldb]); } // loop 260 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 270 : i } // loop 280 : i } else { // Form C := alpha*conjg( A' )*B' + beta*C for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += blas_conj(A[l + i * lda]) * B[j + l * ldb]; } // loop 290 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 300 : i } // loop 310 : i } } else { if (conjb) { // Form C := alpha*A'*conjg( B' ) + beta*C for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += A[l + i * lda] * blas_conj(B[j + l * ldb]); } // loop 320 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 330 : i } // loop 340 : i } else { // Form C := alpha*A**T*B**T + beta*C for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { T temp = zero; for (int l = 0; l < k; l++) { temp += A[l + i * lda] * B[j + l * ldb]; } // loop 350 : l if (beta == zero) { C[i + j * ldc] = alpha * temp; } else { C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc]; } } // loop 360 : i } // loop 370 : i } } // if (nota) } // if (notb) } // explicit instantiation of blas_gemm #ifdef FORCE_EXPLICIT_INSTANTIATION template void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const double &alpha, const double* A, int lda, const double* B, int ldb, const double &beta, double* C, int ldc ); template void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha_, const complex* A, int lda, const complex* B, int ldb, const complex &beta_, complex* C, int ldc ); template void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const float &alpha, const float* A, int lda, const float* B, int ldb, const float &beta, float* C, int ldc ); template void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha_, const complex* A, int lda, const complex* B, int ldb, const complex &beta_, complex* C, int ldc ); #endif template void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const quadruple &alpha, const quadruple* A, int lda, const quadruple* B, int ldb, const quadruple &beta, quadruple* C, int ldc ); template void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha_, const complex* A, int lda, const complex* B, int ldb, const complex &beta_, complex* C, int ldc ); #ifndef NO_OCTRUPLE template void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const octruple &alpha, const octruple* A, int lda, const octruple* B, int ldb, const octruple &beta, octruple* C, int ldc ); template void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha_, const complex* A, int lda, const complex* B, int ldb, const complex &beta_, complex* C, int ldc ); #endif // OTHERS template U blas_l2norm(const int n, T *x, const int incX) { U tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(tmp); // works for T = double, quadruple, etc. not for complex } template<> double blas_l2norm, double>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(tmp.real()); } template<> float blas_l2norm, float>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(tmp.real()); } template<> quadruple blas_l2norm, quadruple>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return sqrt(tmp.real()); // } #ifndef NO_OCTRUPLE template<> octruple blas_l2norm, octruple>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return sqrt(tmp.real()); // } #endif template double blas_l2norm(const int n, double *x, const int incX); template quadruple blas_l2norm(const int n, quadruple *x, const int incX); template float blas_l2norm(const int n, float *x, const int incX); #ifdef FORCE_EXPLICIT_INSTANTIATION template double blas_l2norm, double>(const int n, complex *x, const int incX); template quadruple blas_l2norm, quadruple>(const int n, complex *x, const int incX); #ifndef NO_OCTRUPLE template octruple blas_l2norm, octruple>(const int n, complex *x, const int incX); #endif #endif #ifndef NO_OCTRUPLE template octruple blas_l2norm(const int n, octruple *x, const int incX); #endif double blas_l2norm_lower_prec(const int n, quadruple *x, const int incX) { quadruple tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(quad2double(tmp)); // } quadruple blas_l2norm_lower_prec(const int n, octruple *x, const int incX) { octruple tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(oct2quad(tmp)); // } double blas_l2norm_lower_prec(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(quad2double(tmp.real())); // } quadruple blas_l2norm_lower_prec(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot(n, x, incX, x, incX); return sqrt(oct2quad(tmp.real())); // } template U blas_l2norm2(const int n, T *x, const int incX) { U tmp; tmp = blas_dot(n, x, incX, x, incX); return tmp; } template<> double blas_l2norm2, double>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return tmp.real(); } template<> float blas_l2norm2, float>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return tmp.real(); } template<> quadruple blas_l2norm2, quadruple>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return tmp.real(); // } template<> octruple blas_l2norm2, octruple>(const int n, complex *x, const int incX) { complex tmp; tmp = blas_dot >(n, x, incX, x, incX); return tmp.real(); // } template double blas_l2norm2(const int n, double *x, const int incX); template float blas_l2norm2(const int n, float *x, const int incX); template quadruple blas_l2norm2(const int n, quadruple *x, const int incX); #ifdef FORCE_EXPLICIT_INSTANTIATION template double blas_l2norm2, double>(const int n, complex *x, const int incX); template quadruple blas_l2norm2, quadruple>(const int n, complex *x, const int incX); template octruple blas_l2norm2(const int n, octruple *x, const int incX); template octruple blas_l2norm2, octruple>(const int n, complex *x, const int incX); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/blas.hpp000664 000000 000000 00000050354 14037356732 024270 0ustar00rootroot000000 000000 /*! \file blas.hpp \brief BLAS function interface \author Xavier Juvigny, ONERA \date Jan. 12th 2005 \modification function from NETLIB source with BLAS and CBLAS wrapper \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 16th 2015 \date Jul. 17th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COMPILER_BLAS_H # define _COMPILER_BLAS_H # include "Compiler/OptionCompiler.hpp" # include "Compiler/arithmetic.hpp" #ifdef BLAS_MKL #define MKL_Complex16 std::complex #include #include typedef MKL_INT BLAS_INT; typedef void BLAS_VOID; #endif #ifdef VECLIB #include typedef int BLAS_INT; typedef void BLAS_VOID; #endif #ifdef SX_ACE_BLAS #include typedef int BLAS_INT; typedef void BLAS_VOID; #endif #ifdef OPENBLAS #include "cblas.h" typedef int BLAS_INT; typedef double BLAS_VOID; #endif #ifdef SUNPERF #define floatcomplex std::complex #define doublecomplex std::complex #define _SUNPERF_COMPLEX #include typedef int BLAS_INT; typedef void BLAS_VOID; #endif #if (defined(BLAS_GENERIC) || defined(BLAS_FORTRAN)) typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113 } CBLAS_TRANSPOSE; typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO; typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE; #endif using std::complex; template inline U blas_abs(const T& x) {} template<> inline double blas_abs(const double &x) { return fabs(x); } template<> inline double blas_abs, double>(const complex &x) { return abs(x); } template<> inline float blas_abs(const float &x) { return fabsf(x); } template<> inline double blas_abs(const float &x) { return double(fabsf(x)); } template<> inline float blas_abs, float>(const complex &x) { return abs(x); // using class } template<> inline double blas_abs, double>(const complex &x) { return double(abs(x)); // using class } template<> inline quadruple blas_abs(const quadruple &x) { quadruple zero(0.0); return (x > zero ? x : (-x)); } template<> inline quadruple blas_abs, quadruple>(const complex &x) { quadruple xx = x.real(); quadruple yy = x.imag(); return sqrt(xx * xx + yy * yy); } #ifndef NO_OCTRUPLE template<> inline octruple blas_abs(const octruple &x) { octruple zero(0.0); return (x > zero ? x : (-x)); } template<> inline octruple blas_abs, octruple>(const complex &x) { octruple xx = x.real(); octruple yy = x.imag(); return sqrt(xx * xx + yy * yy); } #endif template<> inline double blas_abs(const quadruple &x) { quadruple zero(0.0); return quad2double(x > zero ? x : (-x)); } template<> inline double blas_abs, double>(const complex &x) { quadruple xx = x.real(); quadruple yy = x.imag(); return quad2double(sqrt(xx * xx + yy * yy)); } #ifndef NO_OCTRUPLE template<> inline double blas_abs(const octruple &x) { octruple zero(0.0); return oct2double(x > zero ? x : (-x)); } template<> inline double blas_abs, double>(const complex &x) { octruple xx = x.real(); octruple yy = x.imag(); return oct2double(sqrt(xx * xx + yy * yy)); } #endif inline double blas_conj(double x) { return x; } inline float blas_conj(float x) { return x; } inline quadruple blas_conj(const quadruple &x) { return x; } #ifndef NO_OCTRUPLE inline octruple blas_conj(const octruple &x) { return x; } #endif inline complex blas_conj(const complex &x) { return std::conj(x); } inline complex blas_conj(const complex &x) { return std::conj(x); } inline complex blas_conj(const complex &x) { return std::conj(x); } #ifndef NO_OCTRUPLE inline complex blas_conj(const complex &x) { return std::conj(x); } #endif // ====================== Blas subroutine level 1 ========================= template void blas_axpy(const int n, const T &alpha, const T* x, const int incx, T* y, int incy); #ifndef BLAS_GENERIC template<> void blas_axpy(const int n, const double &alpha, const double* x, const int incx, double* y, const int incy); template<> void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy); template<> void blas_axpy(const int n, const float &alpha, const float* x, const int incx, float* y, const int incy); template<> void blas_axpy >(const int n, const complex &alpha, const complex* x, const int incx, complex* y, int incy); #endif template void blas_copy(const int n, const T* x, const int incx, T* y, const int incy); #ifndef BLAS_GENERIC template<> void blas_copy(const int n, const double* x, const int incx, double* y, const int incy); template<> void blas_copy >(const int n, const complex* x, const int incx, complex* y, const int incy); template<> void blas_copy(const int n, const float* x, const int incx, float* y, const int incy); template<> void blas_copy >(const int n, const complex* x, const int incx, complex* y, const int incy); #endif template T blas_dot(const int n, const T* x, const int incx, const T* y, const int incy); #ifndef BLAS_GENERIC template<> double blas_dot(const int n, const double* x, const int incx, const double* y, const int incy); template<> complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); template<> float blas_dot(const int n, const float* x, const int incx, const float* y, const int incy); template<> complex blas_dot >(const int n, const complex* x, const int incx, const complex* y, const int incy); #endif template void blas_scal(const int N, const T &alpha, T *X, const int incX); #ifndef BLAS_GENERIC template<> void blas_scal(const int N, const double &alpha, double *X, const int incX); template<> void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); template<> void blas_scal(const int N, const float &alpha, float *X, const int incX); template<> void blas_scal >(const int N, const complex &alpha, complex *X, const int incX); #endif // T may be std::complex of U template void blas_scal2(const int N, const U &alpha, T *X, const int incX); #ifndef BLAS_GENERIC template<> void blas_scal2(const int N, const double &alpha, double *X, const int incX); template<> void blas_scal2, double>(const int N, const double &alpha_, complex *X, const int incX); template<> void blas_scal2(const int N, const float &alpha, float *X, const int incX); template<> void blas_scal2, float>(const int N, const float &alpha_, complex *X, const int incX); template<> void blas_scal2(const int N, const quadruple &alpha, quadruple *X, const int incX); template<> void blas_scal2, quadruple>(const int N, const quadruple &alpha_, complex *X, const int incX); #endif template int blas_iamax(const int n, const T *x, const int incx); #ifndef BLAS_GENERIC template<> int blas_iamax(const int n, const double *x, const int incx); template<> int blas_iamax, double>(const int n, const complex *x, const int incx); template<> int blas_iamax(const int n, const float *x, const int incx); template<> int blas_iamax, float>(const int n, const complex *x, const int incx); #endif // ====================== Blas subroutine level 2 ========================= template void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const T &alpha, const T* A, const int lda, const T* x, const int incx, const T &beta, T* y, const int incy); #ifndef BLAS_GENERIC template<> void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const double &alpha, const double* A, const int lda, const double* x, const int incx, const double &beta, double* y, const int incy); template<> void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); template<> void blas_gemv(const CBLAS_TRANSPOSE trA, const int m, const int n, const float &alpha, const float* A, const int lda, const float* x, const int incx, const float &beta, float* y, const int incy); template<> void blas_gemv >(const CBLAS_TRANSPOSE trA, const int m, const int n, const complex &alpha, const complex* A, const int lda, const complex* x, const int incx, const complex &beta, complex* y, const int incy); #endif template void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const T *A, const int lda, T *X, const int incX); #ifndef BLAS_GENERIC template<> void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX); template<> void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex* A, const int lda, complex *X, const int incX); template<> void blas_trsv(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX); template<> void blas_trsv >(const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int N, const complex* A, const int lda, complex *X, const int incX); #endif template void blas_syr(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, T *A, const int lda); #ifndef BLAS_GENERIC template<> void blas_syr(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, double *A, const int lda); template<> void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); template<> void blas_syr(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, float *A, const int lda); template<> void blas_syr >(const CBLAS_UPLO Uplo, const int N, const complex &alpha, const complex *X, const int incX, complex *A, const int lda); #endif template void blas_syrc(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, T *A, const int lda); template void blas_syr2(const CBLAS_UPLO Uplo, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda); #ifndef BLAS_GENERIC template<> void blas_syr2(const CBLAS_UPLO Uplo, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template<> void blas_syr2(const CBLAS_UPLO Uplo, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); #endif template void blas_ger(const int M, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda); #ifndef BLAS_GENERIC template<> void blas_ger(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template<> void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); template<> void blas_ger(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); template<> void blas_ger >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #endif template void blas_gerc(const int M, const int N, const T &alpha, const T *X, const int incX, const T *Y, const int incY, T *A, const int lda); #ifndef BLAS_GENERIC template<> void blas_gerc(const int M, const int N, const double &alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); template<> void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); template<> void blas_gerc(const int M, const int N, const float &alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); template<> void blas_gerc >(const int M, const int N, const complex &alpha, const complex *X, const int incX, const complex *Y, const int incY, complex *A, const int lda); #endif // ====================== Blas subroutine level 3 ========================= template void blas_gemm( CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const T &alpha, const T* A, int lda, const T* B, int ldb, const T &beta, T* C, int ldc ); #ifndef BLAS_GENERIC template<> void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const double &alpha, const double* A, int lda, const double* B, int ldb, const double &beta, double* C, int ldc ); template<> void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha, const complex* A, int lda, const complex* B, int ldb, const complex &beta, complex* C, int ldc ); template<> void blas_gemm(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const float &alpha, const float* A, int lda, const float* B, int ldb, const float &beta, float* C, int ldc ); template<> void blas_gemm >(CBLAS_TRANSPOSE trA, CBLAS_TRANSPOSE trB, int m, int n, int k, const complex &alpha, const complex* A, int lda, const complex* B, int ldb, const complex &beta, complex* C, int ldc ); #endif template void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const T &alpha, const T *A, const int lda, T *B, const int ldb); #ifndef BLAS_GENERIC template<> void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const double &alpha, const double *A, const int lda, double *B, const int ldb); template<> void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); template<> void blas_trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const float &alpha, const float *A, const int lda, float *B, const int ldb); template<> void blas_trsm >(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const complex &alpha, const complex *A, const int lda, complex *B, const int ldb); #endif //#ifdef BLAS_MKL // ======== for computation of norm and norm^2 template U blas_l2norm(const int n, T *x, const int incX); template<> double blas_l2norm, double>(const int n, complex *x, const int incX); template<> float blas_l2norm, float>(const int n, complex *x, const int incX); template<> quadruple blas_l2norm, quadruple>(const int n, complex *x, const int incX); template U blas_l2norm2(const int n, T *x, const int incX); template<> double blas_l2norm2, double>(const int n, complex *x, const int incX); template<> float blas_l2norm2, float>(const int n, complex *x, const int incX); template<> quadruple blas_l2norm2, quadruple>(const int n, complex *x, const int incX); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/elapsed_time.cpp000664 000000 000000 00000015330 14037356732 025770 0ustar00rootroot000000 000000 /*! \file elapsed_time.cpp \brief time esurment functions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 4th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "elapsed_time.hpp" #ifdef CLOCK_GETTIME #ifdef _MSC_VER // added by Yann Collette // From http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows LARGE_INTEGER getFILETIMEoffset() { SYSTEMTIME s; FILETIME f; LARGE_INTEGER t; s.wYear = 1970; s.wMonth = 1; s.wDay = 1; s.wHour = 0; s.wMinute = 0; s.wSecond = 0; s.wMilliseconds = 0; SystemTimeToFileTime(&s, &f); t.QuadPart = f.dwHighDateTime; t.QuadPart <<= 32; t.QuadPart |= f.dwLowDateTime; return (t); } int clock_gettime(int X, struct timeval *tv) { LARGE_INTEGER t; FILETIME f; double microseconds; static LARGE_INTEGER offset; static double frequencyToMicroseconds; static int initialized = 0; static BOOL usePerformanceCounter = 0; if (!initialized) { LARGE_INTEGER performanceFrequency; initialized = 1; usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency); if (usePerformanceCounter) { QueryPerformanceCounter(&offset); frequencyToMicroseconds = (double)performanceFrequency.QuadPart / 1000000.; } else { offset = getFILETIMEoffset(); frequencyToMicroseconds = 10.; } } if (usePerformanceCounter) QueryPerformanceCounter(&t); else { GetSystemTimeAsFileTime(&f); t.QuadPart = f.dwHighDateTime; t.QuadPart <<= 32; t.QuadPart |= f.dwLowDateTime; } t.QuadPart -= offset.QuadPart; microseconds = (double)t.QuadPart / frequencyToMicroseconds; t.QuadPart = microseconds; tv->tv_sec = t.QuadPart / 1000000; tv->tv_usec = t.QuadPart % 1000000; return (0); } void get_realtime(elapsed_t *tm) { clock_gettime(0, (timeval *)tm); } double convert_time(elapsed_t time1, elapsed_t time0) { return ((double)time1.tv_sec - (double)time0.tv_sec + ((double)time1.tv_usec - (double)time0.tv_usec) / 1.0e+6); } int convert_sec(elapsed_t t) { return (int)t.tv_sec; } int convert_microsec(elapsed_t t) { return (int)(t.tv_usec); } #else // _MSC_VER void get_realtime(elapsed_t *tm) { //clock_gettime(CLOCK_REALTIME, tm); clock_gettime(CLOCK_MONOTONIC, tm); } double convert_time(elapsed_t time1, elapsed_t time0) { double t; t = ((double)time1.tv_sec - (double)time0.tv_sec + ((double)time1.tv_nsec - (double)time0.tv_nsec) / 1.0e+9); return t; } int convert_sec(elapsed_t t) { return (int)t.tv_sec; } int convert_microsec(elapsed_t t) { return (int)(t.tv_nsec / 1.0e+3); } #endif // _MSC_VER #else /* #ifdef CLOCK_GETTIME */ #ifdef GETRUSAGE void get_realtime(elapsed_t *tm) { getrusage(RUSAGE_SELF, tm); } double convert_time(elapsed_t time1, elapsed_t time0) { double t; t = ((double)time1.ru_utime.tv_sec - (double)time0.ru_utime.tv_sec + (double)time1.ru_stime.tv_sec - (double)time0.ru_stime.tv_sec + ((double)time1.ru_utime.tv_usec - (double)time0.ru_utime.tv_usec + (double)time1.ru_stime.tv_usec - (double)time0.ru_stime.tv_usec) / 1.0e+6); return t; } int convert_sec(elapsed_t t) { int t0 = (int)(t.ru_utime.tv_sec + t.ru_stime.tv_sec); int t1 = (int)(t.ru_utime.tv_usec + t.ru_stime.tv_usec); return (int)(t0 + t1 / 1000000); } int convert_microsec(elapsed_t t) { int t1 = (int)(t.ru_utime.tv_usec + t.ru_stime.tv_usec); return (int)(t1 % 1000000); } #else #ifdef CLOCK void get_realtime(elapsed_t *tm) { *tm = clock(); } double convert_time(elapsed_t time1, elapsed_t time0) { double t; t = (time1 - time0) / CLOCKS_PER_SEC; return t; } int convert_sec(elapsed_t t) { return (int)(t / CLOCKS_PER_SEC); } int convert_microsec(elapsed_t t) { return (int)((t * 1.0e+3) / CLOCKS_PER_SEC); } #else void get_realtime(elapsed_t *tm) { gettimeofday(tm, (struct timezone *)0); } double convert_time(elapsed_t time1, elapsed_t time0) { double t; t = ((double)time1.tv_sec - (double)time0.tv_sec + ((double)time1.tv_usec - (double)time0.tv_usec) / 1.0e+6); return t; } int convert_sec(elapsed_t t) { return (int)t.tv_sec; } int convert_microsec(elapsed_t t) { return (int)t.tv_usec; } #endif /* #ifdef CLOCK */ #endif /* #ifdef GETRUSAGE */ #endif /* #ifdef CLOCK_GETTIME */ FreeFem-sources-4.9/3rdparty/dissection/src/Compiler/elapsed_time.hpp000664 000000 000000 00000007402 14037356732 025776 0ustar00rootroot000000 000000 /*! \file elapsed_time.cpp \brief time esurment functions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 4th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _elapsed_time_ #ifdef CLOCK_GETTIME # ifdef _MSC_VER // added by Yann Collette # include typedef struct timeval elapsed_t; #define COPYTIME(a, b) ((a).tv_sec = (b).tv_sec);\ ((a).tv_usec = (b).tv_usec) # else // ! _MSC_VER == Linux #include typedef struct timespec elapsed_t; #define COPYTIME(a, b) ((a).tv_sec = (b).tv_sec);\ ((a).tv_nsec = (b).tv_nsec) #endif #else /* #ifdef CLOCK_GETTIME */ # ifdef GETRUSAGE # include # include typedef struct rusage elapsed_t; #define COPYTIME(a, b) ((a).ru_utime.tv_sec = (b).ru_utime.tv_sec); \ ((a).ru_utime.tv_usec = (b).ru_utime.tv_usec); \ ((a).ru_stime.tv_sec = (b).ru_stime.tv_sec); \ ((a).ru_stime.tv_usec = (b).ru_stime.tv_usec) # else /* #ifdef GETTIMEOFDAY */ #ifdef CLOCK // for NEC SX-ACE #include typedef clock_t elapsed_t; #define COPYTIME(a, b) (a = b); #else #include typedef struct timeval elapsed_t; #define COPYTIME(a, b) ((a).tv_sec = (b).tv_sec); \ ((a).tv_usec = (b).tv_usec) # endif /* #ifdef CLOCK */ # endif /* #ifdef GETRUSAGE */ #endif /* #ifdef CLOCK_GETTIME */ void get_realtime(elapsed_t *tm); double convert_time(elapsed_t time0, elapsed_t time1); int convert_sec(elapsed_t t); int convert_microsec(elapsed_t t); #define _elapsed_time_ #endif FreeFem-sources-4.9/3rdparty/dissection/src/Defs.inc000664 000000 000000 00000023530 14037356732 022434 0ustar00rootroot000000 000000 #! \file DissectionSolver.hpp # \brief task mangemanet of dissection algorithm # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Mar. 30th 2012 # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # WORK_DIR = $(HOME)/work/Linux PROJ_DIR = $(WORK_DIR)/dissection PROJ_SRC = $(PROJ_DIR)/src PROJ_LIBDIR = $(PROJ_DIR)/lib UNAME := $(shell uname -s) # define BLAS MKL or ATLAS : be careful with no space after the name of variable # QUAD specifies 128bit floating point arithmetic : DD_REAL by qd library # LOND_DOUBLE on Intel specifies 80bit floating point, which may be enough ifeq ($(UNAME), Darwin) EXT_SHAREDLIB = dylib # BLAS = VECLIB BLAS = MKL COMPILER=LLVM QUAD = DD_REAL endif ifeq ($(UNAME), Linux) EXT_SHAREDLIB = so BLAS = MKL # BLAS = GENERIC COMPILER = INTEL17 # QUAD = FLOAT128 QUAD = DD_REAL endif PARALLEL = NONE MAKE = make -j 8 # DEBUGFLAG = -g -O3 DISSECTION = #DISSECTION = NO_METIS #DISSECTION = NO_SCOTCH # COMPILERBASE = $(COMPILER) VER = $(subst INTEL,,$(COMPILER)) ifneq ($(VER), $(COMPILER)) COMPILERBASE=$(subst $(VER),,$(COMPILER)) endif # ifeq ($(COMPILERBASE), INTEL) CXX = icpc -std=gnu++98 -restrict -fPIC -pthread LD = $(CXX) else ifeq ($(COMPILERBASE), GNU) CXX = g++ -std=c++98 -fPIC -pthread -Drestrict=__restrict LD = $(CXX) LDOPT = $(DEBUGFLAG) -shared MAKE_DEP_OPT = -MM DEBUGFLAG += #-pg -fno-omit-frame-pointer ifeq ($(UNAME), Darwin) SHARED_COMPILER = -framework Accelerate else SHARED_COMPILER = endif else ifeq ($(COMPILERBASE), LLVM) ifeq ($(UNAME), Linux) CXX = clang++-3.9 -std=c++03 -pthread -stdlib=libc++ -fPIC -Drestrict=__restrict LD = $(CXX) else CXX = clang -x c++ -std=c++03 -pthread -stdlib=libc++ -fPIC -Drestrict=__restrict LD = clang -fPIC -Drestrict=__restrict endif MAKE_DEP_OPT = -MM ifeq ($(UNAME), Darwin) LDOPT = $(DEBUGFLAG) -dynamiclib -install_name '$(PROJ_LIBDIR)/libDissection.dylib' -current_version 1.0 else LDOPT = $(DEBUGFLAG) -shared SHARED_COMPILER = endif VER = 17 else $(error COMPILER should be specified as one of INTEL14, GNU, LLVM) endif endif endif ifeq ($(UNAME), Darwin) # LDOPT = $(DEBUGFLAG) -dynamiclib -install_name '$(PROJ_LIBDIR)/libDissection.dylib' endif ifeq ($(UNAME), Linux) LDOPT = $(DEBUGFLAG) -shared endif MAKE_DEP_OPT = -MM # # metis and scotch are assumed to be compiled as shared library METIS_INC = $(WORK_DIR)/metis-5.1.0/include SCOTCH_INC = $(WORK_DIR)/scotch_6.0.4/include QD_INC = $(WORK_DIR)/include ifeq ($(UNAME), Darwin) LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0/build/Darwin-x86_64/libmetis endif ifeq ($(UNAME), Linux) LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0/build/Linux-x86_64/libmetis endif LIB_DIR_SCOTCH = $(WORK_DIR)/scotch_6.0.4/lib LIB_DIR_QD = $(WORK_DIR)/lib # ifeq ($(BLAS), MKL) # BLAS_DEF = -DBLAS_MKL -DMKL_ILP64 # 64bit (long long) int for CSR data BLAS_DEF = -DBLAS_MKL # 32bit int ifeq ($(VER), 18) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries_2018/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2018/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2018/linux/compiler/lib/intel64 endif else ifeq ($(VER), 17) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries_2017/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2017/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2017/linux/compiler/lib/intel64 endif ifeq ($(UNAME), Darwin) BLAS_INC = /opt/intel/compilers_and_libraries_2017/mac/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2017/mac/mkl/lib LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2017/mac/lib endif else ifeq ($(VER), 16) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries/linux/compiler/lib/intel64 endif else ifeq ($(VER), 15) BLAS_INC = /opt/intel/composer_xe_2015/mkl/include ifeq ($(UNAME), Linux) LIB_DIR_MKL = /opt/intel/composer_xe_2015/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/composer_xe_2015/compiler/lib/intel64 else ifeq ($(UNAME), Darwin) LIB_DIR_MKL = /opt/intel/composer_xe_2015/mkl/lib LIB_DIR_INTEL = /opt/intel/composer_xe_2015/lib endif endif else ifeq ($(VER), 14) BLAS_INC = /opt/intel/composer_xe_2013_sp1/mkl/include ifeq ($(UNAME), Linux) LIB_DIR_MKL = /opt/intel/composer_xe_2013_sp1/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/composer_xe_2013_sp1/compiler/lib/intel64 else ifeq ($(UNAME), Darwin) LIB_DIR_MKL = /opt/intel/composer_xe_2013_sp1/mkl/lib LIB_DIR_INTEL = /opt/intel/composer_xe_2013_sp1/lib endif endif else $(error MKLverion should be 14, 15, 16, 17 or 18) endif endif endif endif endif # ifeq ($(UNAME), Linux) SHARED_COMPILER = -L$(LIB_DIR_INTEL) -lpthread -lrt SHARED_BLAS = -L$(LIB_DIR_MKL) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # SHARED_BLAS = -L$(LIB_DIR_MKL) -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core ## for 64bit integer else ifeq ($(UNAME), Darwin) SHARED_COMPILER = -L$(LIB_DIR_INTEL) -lc++ -lm #-lintlc -lsvml -lc++ -lm SHARED_BLAS = -Wl,-rpath,$(LIB_DIR_MKL) -L$(LIB_DIR_MKL) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core endif endif # else ifeq ($(BLAS), ATLAS) BLAS_DEF = -DBLAS_ATLAS # for ATLAS BLAS ifeq ($(COMPILER), INTEL) BLAS_INC = /usr/local/atlas/intel/include else BLAS_INC = /usr/local/atlas/include endif else ifeq ($(BLAS), OPENBLAS) BLAS_DEF = -DOPENBLAS OPENBLAS_DIR = $(WORK_DIR)/OpenBLAS-0.2.14 BLAS_INC = $(OPENBLAS_DIR) SHARED_BLAS = -L$(OPENBLAS_DIR) -lopenblas else ifeq ($(BLAS), VECLIB) ifeq ($(UNAME), Darwin) BLAS_DEF = -DVECLIB BLAS_INC = /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers SHARED_COMPILER = -framework Accelerate -lc++ -lm endif else ifeq ($(BLAS), GENERIC) BLAS_DEF = -DBLAS_GENERIC BLAS_INC = .. SHARED_BLAS = SHARED_COMPILER = -lc++ -lm else $(error BLAS should be specified as one of MKL, VECLIB, OpenBLAS, GENERIC) endif endif endif endif endif # CCFLAGS = $(DEBUGFLAG) -I$(SCOTCH_INC) $(BLAS_DEF) -I$(BLAS_INC) -I$(PROJ_SRC) # to use pthread.h in case of C++03 CCFLAGS += -DPOSIX_THREADS # -DNO_TO_STRING for C++ compiler without to_string() defined in C++11 ifeq ($(COMPILERBASE), INTEL) CCFLAGS += -DNO_TO_STRING endif ifeq ($(DISSECTION), NO_METIS) CCFLAGS += -DNO_METIS LIB_METIS = else CCFLAGS += -I$(METIS_INC) ifeq ($(UNAME), Linux) LIB_METIS = -Xlinker -rpath=$(LIB_DIR_METIS) -L$(LIB_DIR_METIS) -lmetis else LIB_METIS = -L$(LIB_DIR_METIS) -lmetis endif endif # ifeq ($(QUAD), DD_REAL) CCFLAGS += -D$(QUAD) -I$(QD_INC) ifeq ($(UNAME), Linux) LIB_QD = -Xlinker -rpath=$(LIB_DIR_QD) -L$(LIB_DIR_QD) -lqd else LIB_QD = -L$(LIB_DIR_QD) -lqd endif endif ifeq ($(QUAD), LONG_DOUBLE) CCFLAGS += -D$(QUAD) -DNO_OCTRUPLE endif ifeq ($(QUAD), FLOAT128) CCFLAGS += -DNO_OCTRUPLE LIB_QD = -lquadmath endif # # ifeq ($(UNAME), Darwin) CCFLAGS += #-DGETRUSAGE LIB_SCOTCH = -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr endif ifeq ($(UNAME), Linux) CCFLAGS += -DCLOCK_GETTIME LIB_SCOTCH = -Xlinker -rpath=$(LIB_DIR_SCOTCH) -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/000775 000000 000000 00000000000 14037356732 022310 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_BlasRoutines.cpp000664 000000 000000 00000211701 14037356732 025672 0ustar00rootroot000000 000000 /*! \file C_BlasRoutines.cpp \brief factorization routines LDL^t, LDU, forward/backward substitution \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Driver/C_BlasRoutines.hpp" #include "Driver/C_threads_tasks.hpp" #include "Algebra/VectorArray.hpp" // T may be complex of U that is higher precision than Z template Z matrix_infty_norm_(const int n, T *a, const int lda) { Z err(0.0); for (int i = 0; i < n; i++) { U err_tmp1(0.0); for (int j = 0; j < n; j++) { err_tmp1 += blas_abs(a[i + j * lda]); } Z err_tmp0 = conv_prec(err_tmp1); // accuracy conversion : U to Z err = err > err_tmp0 ? err : err_tmp0; } return err; } template double matrix_infty_norm_(const int n, quadruple *a, const int lda); #ifndef NO_OCTRUPLE template quadruple matrix_infty_norm_(const int n, octruple *a, const int lda); #endif template float matrix_infty_norm_(const int n, double *a, const int lda); template double matrix_infty_norm_, quadruple, double>(const int n, complex *a, const int lda); template float matrix_infty_norm_, double, float>(const int n, complex *a, const int lda); #ifndef NO_OCTRUPLE template quadruple matrix_infty_norm_, octruple, quadruple>(const int n, complex *a, const int lda); #endif // template U matrix_infty_norm(const int n, T *a, const int lda) { fprintf(stderr, "%s %d : general template is not implemented\n", __FILE__, __LINE__); return U(0.0); } template<> double matrix_infty_norm(const int n, quadruple *a, const int lda) { return matrix_infty_norm_(n, a, lda); } template<> float matrix_infty_norm(const int n, double *a, const int lda) { return matrix_infty_norm_(n, a, lda); } #ifndef NO_OCTRUPLE template<> quadruple matrix_infty_norm(const int n, octruple *a, const int lda) { return matrix_infty_norm_(n, a, lda); } #endif template<> double matrix_infty_norm, double>(const int n, complex *a, const int lda) { return matrix_infty_norm_, quadruple, double>(n, a, lda); } template<> float matrix_infty_norm, float>(const int n, complex *a, const int lda) { return matrix_infty_norm_, double, float>(n, a, lda); } #ifndef NO_OCTRUPLE template<> quadruple matrix_infty_norm, quadruple>(const int n, complex *a, const int lda) { return matrix_infty_norm_, octruple, quadruple>(n, a, lda); } #endif // #define LDLT_LOWER // *pivot is dealt as "double", which give no defect on accuracy, because // magnitude is important in the value of pivot template bool full_ldlt_permute(int *nn0, const int n0, const int n, T *a, const int lda, double *pivot, int *permute, const double eps, double *fop) { const int lda1 = lda + 1; T alpha; const T one(1.0); const T none(-1.0); bool flag = true; VectorArray col_k(n); VectorArray col_km(n); alpha = none; for (int i = 0; i < n; i++) { permute[i] = i; } const int n1 = n - n0; int k = 0; while (k < n1) { int km = k; double vmax = 0.0; km = blas_iamax((n - k), a + (k * lda1), lda1) + k; vmax = blas_abs(a[km * lda1]); if (vmax < (*pivot * eps)) { // enough in doule precision flag = false; break; } *pivot = vmax; if (km > k) { int itmp = permute[km]; permute[km] = permute[k]; permute[k] = itmp; // swap row/column #ifdef LDLT_LOWER swap_sym_lower(n, a, lda, k, km, col_k.addrCoefs(), col_km.addrCoefs()); #else swap_sym_upper(n, a, lda, k, km, col_k.addrCoefs(), col_km.addrCoefs()); #endif } const T d = one / a[k * lda1]; a[k * lda1] = d; if (k == (n1 - 1)) { k++; break; } alpha = (-d); #ifdef LDLT_LOWER blas_syr(CblasLower, (n - k - 1), alpha, &a[k * lda1 + 1], 1, &a[(k + 1) * lda1], lda); #else blas_syr(CblasUpper, (n - k - 1), alpha, &a[k * lda1 + lda], lda, &a[(k + 1) * lda1], lda); #endif *fop += 3.0 * (double)(n - k - 1) * (double)(n - k) / 2.0; #ifdef LDLT_LOWER blas_scal((n - k - 1), d, &a[k * lda1 + 1], 1); #else blas_scal((n - k - 1), d, &a[k * lda1 + lda], lda); #endif *fop += double(n - k - 1); k++; } // while (k < n1) // symmetrize the results : copy lower to upper #ifdef LDLT_LOWER for (int j = 1; j < n1; j++) { for (int i = 0; i < j; i++) { a[i + j * lda] = a[j + i * lda]; } } #else for (int j = 1; j < n1; j++) { for (int i = 0; i < j; i++) { a[j + i * lda] = a[i + j * lda]; } } #endif *nn0 = n - k; return flag; } template bool full_ldlt_permute(int *nn0, const int n0, const int n, double *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute, double>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute(int *nn0, const int n0, const int n, float *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute, float>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute(int *nn0, const int n0, const int n, quadruple *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute, quadruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute(int *nn0, const int n0, const int n, octruple *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldlt_permute, octruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); // template bool full_ldu_permute(int *nn0, const int n0, const int n, T *a, const int lda, double *pivot, int *permute, const double eps, double *fop) { const int lda1 = lda + 1; // T *col_k, *col_km; const T one(1.0); const T none(-1.0); bool flag = true; VectorArray col_k(n); VectorArray col_km(n); for (int i = 0; i < n; i++) { permute[i] = i; } const int n1 = n - n0; int k = 0; while (k < n1) { int km = k; double vmax = 0.0; km = blas_iamax((n - k), a + (k * lda1), lda1) + k; vmax = blas_abs(a[km * lda1]); if (vmax < (*pivot * eps)) { flag = false; break; } *pivot = vmax; if (km > k) { int itmp = permute[km]; permute[km] = permute[k]; permute[k] = itmp; // swap row/column swap_unsym(n, a, lda, k, km, col_k.addrCoefs(), col_km.addrCoefs()); } const T d = one / a[k * lda1]; a[k * lda1] = d; if (k == (n1 - 1)) { k++; break; } // alpha = none; blas_scal((n - k - 1), d, &a[k * lda1 + lda], lda); blas_ger((n - k - 1), (n - k - 1), none, &a[k * lda1 + 1], 1, &a[k * lda1 + lda], lda, &a[(k + 1) * lda1], lda); *fop += 3.0 * (double)(n - k - 1) * (double)(n - k) / 2.0; blas_scal((n - k - 1), d, &a[k * lda1 + 1], 1); *fop += 2.0 * double(n - k - 1); k++; } // while (k < n1) *nn0 = n - k; return flag; } template bool full_ldu_permute(int *nn0, const int n0, const int n, double *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute, double>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute(int *nn0, const int n0, const int n, float *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute, float>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute(int *nn0, const int n0, const int n, quadruple *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute, quadruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute(int *nn0, const int n0, const int n, octruple *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute, octruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute, const double eps, double *fop); // template bool ldu_full_permute(int *nn0, const int n0, const int n, T *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop) { const int lda1 = lda + 1; // T *col_k, *col_km; const T one(1.0); const T none(-1.0); bool flag = true; VectorArray coli(n); VectorArray colj(n); for (int i = 0; i < n; i++) { permute_right[i] = i; permute_left[i] = i; } const int n1 = n - n0; int k = 0; while (k < n1) { int ki, kj, itmp; double vmax = 0.0; for (int i = k; i < n; i++) { for (int j = k; j < n; j++) { double tmp = blas_abs(a[i + j * lda]); if (tmp > vmax) { vmax = tmp; ki = i; kj = j; } } } if (vmax < (*pivot * eps)) { flag = false; break; } *pivot = vmax; if (kj > k) { itmp = permute_right[kj]; permute_right[kj] = permute_right[k]; permute_right[k] = itmp; } if (ki > k) { itmp = permute_left[ki]; permute_left[ki] = permute_left[k]; permute_left[k] = itmp; } // swap row/column swap_full(n, a, lda, k, ki, kj, coli.addrCoefs(), colj.addrCoefs()); const T d = one / a[k * lda1]; a[k * lda1] = d; if (k == (n1 - 1)) { k++; break; } // alpha = none; blas_scal((n - k - 1), d, &a[k * lda1 + lda], lda); blas_ger((n - k - 1), (n - k - 1), none, &a[k * lda1 + 1], 1, &a[k * lda1 + lda], lda, &a[(k + 1) * lda1], lda); *fop += 3.0 * (double)(n - k - 1) * (double)(n - k) / 2.0; blas_scal((n - k - 1), d, &a[k * lda1 + 1], 1); *fop += 2.0 * double(n - k - 1); k++; } // while (k < n1) *nn0 = n - k; return flag; } template bool ldu_full_permute(int *nn0, const int n0, const int n, double *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute, double>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute(int *nn0, const int n0, const int n, quadruple *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute, quadruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute(int *nn0, const int n0, const int n, octruple *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute, octruple>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute(int *nn0, const int n0, const int n, float *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template bool ldu_full_permute, float>(int *nn0, const int n0, const int n, complex *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); // template void swap_sym_lower(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km) { const int lda1 = lda + 1; for (int i = k; i < n; i++) { col_k[i] = a[i + k * lda]; // save lower column k } for (int j = 0; j < k; j++) { col_k[j] = a[k + j * lda]; // save lower row k } for (int i = km; i < n; i++) { col_km[i] = a[i + km * lda]; // save lower column km } for (int j = 0; j < km; j++) { col_km[j] = a[km + j * lda]; // save lower row km } for (int i = (k + 1); i < n; i++) { a[i + k * lda] = col_km[i]; // restore lower column k } for (int i = (km + 1); i < n; i++) { a[i + km * lda] = col_k[i]; // restore lower column km } for (int j = 0; j < k; j++) { a[k + j * lda] = col_km[j]; // restore lower row k } for (int j = 0; j < km; j++) { a[km + j * lda] = col_k[j]; // restore lower row km } a[k * lda1] = col_km[km]; a[km * lda1] = col_k[k]; a[km + k * lda] = col_k[km]; } template void swap_sym_lower(const int n, double *a, const int lda, const int k, const int km, double *col_k, double *col_km); template void swap_sym_lower >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_sym_lower(const int n, float *a, const int lda, const int k, const int km, float *col_k, float *col_km); template void swap_sym_lower >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_sym_lower(const int n, quadruple *a, const int lda, const int k, const int km, quadruple *col_k, quadruple *col_km); template void swap_sym_lower >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); // template void swap_sym_upper(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km) { const int lda1 = lda + 1; for (int i = k; i < n; i++) { col_k[i] = a[k + i * lda]; // save lower column k } for (int j = 0; j < k; j++) { col_k[j] = a[j + k * lda]; // save lower row k } for (int i = km; i < n; i++) { col_km[i] = a[km + i * lda]; // save lower column km } for (int j = 0; j < km; j++) { col_km[j] = a[j + km * lda]; // save lower row km } for (int i = (k + 1); i < n; i++) { a[k + i * lda] = col_km[i]; // restore lower column k } for (int i = (km + 1); i < n; i++) { a[km + i * lda] = col_k[i]; // restore lower column km } for (int j = 0; j < k; j++) { a[j + k * lda] = col_km[j]; // restore lower row k } for (int j = 0; j < km; j++) { a[j + km * lda] = col_k[j]; // restore lower row km } a[k * lda1] = col_km[km]; a[km * lda1] = col_k[k]; a[k + km * lda] = col_k[km]; } template void swap_sym_upper(const int n, double *a, const int lda, const int k, const int km, double *col_k, double *col_km); template void swap_sym_upper >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_sym_upper(const int n, float *a, const int lda, const int k, const int km, float *col_k, float *col_km); template void swap_sym_upper >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_sym_upper(const int n, quadruple *a, const int lda, const int k, const int km, quadruple *col_k, quadruple *col_km); template void swap_sym_upper >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); // template void swap_unsym(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km) { for (int i = 0; i < n; i++) { col_k[i] = a[i + k * lda]; // save lower column k col_km[i] = a[i + km * lda]; // save lower column km } for (int i = 0; i < n; i++) { a[i + km * lda] = col_k[i]; // save lower column km a[i + k * lda] = col_km[i]; } for (int i = 0; i < n; i++) { col_k[i] = a[k + i * lda]; // save lower column k col_km[i] = a[km + i * lda]; // save lower column km } for (int i = 0; i < n; i++) { a[km + i * lda] = col_k[i]; // save lower column km a[k + i * lda] = col_km[i]; } } template void swap_unsym(const int n, double *a, const int lda, const int k, const int km, double *col_k, double *col_km); template void swap_unsym >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_unsym(const int n, float *a, const int lda, const int k, const int km, float *col_k, float *col_km); template void swap_unsym >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); template void swap_unsym(const int n, quadruple *a, const int lda, const int k, const int km, quadruple *col_k, quadruple *col_km); template void swap_unsym >(const int n, complex *a, const int lda, const int k, const int km, complex *col_k, complex *col_km); // template void swap_full(const int n, T *a, const int lda, const int k, const int ki, const int kj, T *coli, T *colj) { for (int i = 0; i < n; i++) { coli[i] = a[i + k * lda]; // save lower column k colj[i] = a[i + kj * lda]; // save lower column km } for (int i = 0; i < n; i++) { a[i + kj * lda] = coli[i]; // save lower column km a[i + k * lda] = colj[i]; } for (int i = 0; i < n; i++) { coli[i] = a[k + i * lda]; // save lower column k colj[i] = a[ki + i * lda]; // save lower column km } for (int i = 0; i < n; i++) { a[ki + i * lda] = coli[i]; // save lower column km a[k + i * lda] = colj[i]; } } template void swap_full(const int n, double *a, const int lda, const int k, const int ki, const int kj, double *coli, double *colj); template void swap_full >(const int n, complex *a, const int lda, const int k, const int ki, const int kj, complex *coli, complex *colj); template void swap_full(const int n, quadruple *a, const int lda, const int k, const int ki, const int kj, quadruple *coli, quadruple *colj); template void swap_full >(const int n, complex *a, const int lda, const int k, const int ki, const int kj, complex *coli, complex *colj); template void swap_full(const int n, float *a, const int lda, const int k, const int ki, const int kj, float *coli, float *colj); template void swap_full >(const int n, complex *a, const int lda, const int k, const int ki, const int kj, complex *coli, complex *colj); // template void full_ldlt(const int n, T *a, const int lda) { T alpha; const T one(1.0); const int lda1 = lda + 1; for (int k = 0; k < (n - 1); k++) { T d = one / a[k * lda1]; a[k * lda1] = d; alpha = (-d); blas_syr(CblasLower, (n - k - 1), alpha, &a[(k + 1) + k * lda], 1, &a[(k + 1) * lda1], lda); blas_scal((n - k - 1), d, &a[(k + 1) + k * lda], 1); } // symmetrize for (int j = 1; j < n; j++) { for (int i = 0; i < j; i++) { a[i + j * lda] = a[j + i * lda]; } } a[(n - 1) * lda1] = one / a[(n - 1) * lda1]; } template void full_ldlt(const int n, double *a, const int lda); template void full_ldlt(const int n, quadruple *a, const int lda); template void full_ldlt(const int n, float *a, const int lda); #ifndef NO_OCTRUPLE template void full_ldlt(const int n, octruple *a, const int lda); #endif template void full_ldlt >(const int n, complex *a, const int lda); template void full_ldlt >(const int n, complex *a, const int lda); template void full_ldlt >(const int n, complex *a, const int lda); #ifndef NO_OCTRUPLE template void full_ldlt >(const int n, complex *a, const int lda); #endif // template void full_ldlh(const int n, T *a, const int lda) { T alpha; const T one(1.0); const int lda1 = lda + 1; for (int k = 0; k < (n - 1); k++) { const T d = one / a[k * lda1]; alpha = (-d); a[k * lda1] = d; blas_syrc(CblasLower, (n - k - 1), alpha, &a[(k + 1) + k * lda], 1, &a[(k + 1) * lda1], lda); blas_scal((n - k - 1), d, &a[(k + 1) + k * lda], 1); } // symmetrize for (int j = 1; j < n; j++) { for (int i = 0; i < j; i++) { a[i + j * lda] = std::conj(a[j + i * lda]); } } a[(n - 1) * lda1] = one / a[(n - 1) * lda1]; } template<> void full_ldlh(const int n, double *a, const int lda) { full_ldlt(n, a, lda); } template<> void full_ldlh(const int n, quadruple *a, const int lda) { full_ldlt(n, a, lda); } template<> void full_ldlh(const int n, float *a, const int lda) { full_ldlt(n, a, lda); } #ifndef NO_OCTRUPLE template<> void full_ldlh(const int n, octruple *a, const int lda) { full_ldlt(n, a, lda); } #endif template void full_ldlh >(const int n, complex *a, const int lda); template void full_ldlh >(const int n, complex *a, const int lda); template void full_ldlh >(const int n, complex *a, const int lda); #ifndef NO_OCTRUPLE template void full_ldlh >(const int n, complex *a, const int lda); #endif // template void full_ldu(const int n, T *a, const int lda) { const int lda1 = lda + 1; const T one(1.0); const T none(-1.0); // alpha = none; for (int k = 0; k < (n - 1); k++) { const T d = one / a[k * lda1]; a[k * lda1] = d; blas_scal((n - k - 1), d, &a[k * lda1 + lda], lda); blas_ger((n - k - 1), (n - k - 1), none, &a[k * lda1 + 1], 1, &a[k * lda1 + lda], lda, &a[(k + 1) * lda1], lda); blas_scal((n - k - 1), d, &a[k * lda1 + 1], 1); } a[(n - 1) * lda1] = one / a[(n - 1) * lda1]; } template void full_ldu(const int n, double *a, const int lda); template void full_ldu(const int n, quadruple *a, const int lda); template void full_ldu(const int n, float *a, const int lda); #ifndef NO_OCTRUPLE template void full_ldu(const int n, octruple *a, const int lda); #endif template void full_ldu >(const int n, complex *a, const int lda); template void full_ldu >(const int n, complex *a, const int lda); template void full_ldu >(const int n, complex *a, const int lda); #ifndef NO_OCTRUPLE template void full_ldu >(const int n, complex *a, const int lda); #endif // template void FillUpperBlock(const int nrow, T *coef, const int *prow, const int *indcols, const int *indvals, const int *old2new_i, const int *old2new_j, ColumnMatrix &b) { for (int i = 0; i < nrow; i++) { const int ii = old2new_i[i]; for (int k = prow[ii]; k < prow[ii + 1]; k++) { const int jj = old2new_j[indcols[k]]; b(ii, jj) = coef[indvals[k]]; } } } template void FillUpperBlock(const int nnz, double *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix &b); template void FillUpperBlock >(const int nnz, complex *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix > &b); template void FillUpperBlock(const int nnz, quadruple *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix &b); template void FillUpperBlock >(const int nnz, complex *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix > &b); template void FillUpperBlock(const int nnz, float *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix &b); template void FillUpperBlock >(const int nnz, complex *coef, const int *prow1, const int *indcols1, const int *indvals1, const int *new2old, const int *old_j, ColumnMatrix > &b); // template void full_fw_multiprofile(bool isTransposed, const int nrow, const int n0, const int ncol, T *a, const int lda, T *y, const int ldy, vector &i0, double *fop) { const T one(1.0); const T zero(0.0); int jlast; const int n1 = nrow - n0; list j1; for (int j = 0; j < (ncol - 1); j++) { if (i0[j] < i0[j + 1]) { j1.push_back(j); } } j1.push_back(ncol - 1); jlast = (-1); for (list::const_iterator jt = j1.begin(); jt != j1.end(); ++jt) { const int width = (*jt) - jlast; const int ioffset = i0[(*jt)]; const int height = nrow - ioffset - n0; if (height > 0) { if (width > 1) { blas_trsm(CblasLeft, (isTransposed ? CblasUpper : CblasLower), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, height, width, one, a + (ioffset * (nrow + 1)), nrow, y + (ioffset + (jlast + 1) * ldy), ldy); *fop += (double)height * (double)(height - 1) * (double)width; } else { blas_trsv((isTransposed ? CblasUpper : CblasLower), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, height, a + (ioffset * (nrow + 1)), nrow, y + (ioffset + (jlast + 1) * ldy), 1); *fop += (double)height * (double)(height - 1); } } jlast = (*jt); } // loop : jt for (int j = 0; j < ncol; j++) { for (int i = n1; i < nrow; i++) { y[i + j * ldy] = zero; } } } template void full_fw_multiprofile(bool isTransposed, const int nrow, const int n0, const int ncol, double *a, const int lda, double *y, const int ldy, vector &i0, double *fop); template void full_fw_multiprofile >(bool isTransposed, const int nrow, const int n0, const int ncol, complex *a, const int lda, complex *y, const int ldy, vector &i0, double *fop); template void full_fw_multiprofile(bool isTransposed, const int nrow, const int n0, const int ncol, quadruple *a, const int lda, quadruple *y, const int ldy, vector &i0, double *fop); template void full_fw_multiprofile >(bool isTransposed, const int nrow, const int n0, const int ncol, complex *a, const int lda, complex *y, const int ldy, vector &i0, double *fop); template void full_fw_multiprofile(bool isTransposed, const int nrow, const int n0, const int ncol, float *a, const int lda, float *y, const int ldy, vector &i0, double *fop); template void full_fw_multiprofile >(bool isTransposed, const int nrow, const int n0, const int ncol, complex *a, const int lda, complex *y, const int ldy, vector &i0, double *fop); // template void full_fw_multi(bool isTransposed, const int nrow, const int n0, T *a, const int lda, const int ncol, T *y, const int ldy, double *fop) { const T one(1.0); const T zero(0.0); const int n1 = nrow - n0; // alpha = 1.0 // symmetric matrix should have upper part by symmeterization : LDL^t blas_trsm(CblasLeft, (isTransposed ? CblasUpper : CblasLower), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, n1, ncol, one, a, lda, y, ldy); *fop += (double)nrow * (double)(nrow - 1) * (double)ncol; for (int j = 0; j < ncol; j++) { for (int i = n1; i < nrow; i++) { y[i + j * ldy] = zero; } } } template void full_fw_multi(bool isTransposed, const int nrow, const int n0, double *a, const int lda, const int ncol, double *y, const int ldy, double *fop); template void full_fw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); template void full_fw_multi(bool isTransposed, const int nrow, const int n0, quadruple *a, const int lda, const int ncol, quadruple *y, const int ldy, double *fop); template void full_fw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); template void full_fw_multi(bool isTransposed, const int nrow, const int n0, float *a, const int lda, const int ncol, float *y, const int ldy, double *fop); template void full_fw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); // template void full_fw_single(bool isTransposed, const int nrow, const int n0, T *a, const int lda, T *y, double *fop) { const T zero(0.0); const int n1 = nrow - n0; blas_trsv((isTransposed ? CblasUpper : CblasLower), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, n1, a, lda, y, 1); *fop += (double)nrow * (double)(nrow - 1); for (int i = n1; i < nrow; i++) { y[i] = zero; } } template void full_fw_single(bool isTransposed, const int nrow, const int n0, double *a, const int lda, double *y, double *fop); template void full_fw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); template void full_fw_single(bool isTransposed, const int nrow, const int n0, quadruple *a, const int lda, quadruple *y, double *fop); template void full_fw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); template void full_fw_single(bool isTransposed, const int nrow, const int n0, float *a, const int lda, float *y, double *fop); template void full_fw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); // template void full_bw_single(bool isTransposed, const int nrow, const int n0, T *a, const int lda, T *y, double *fop) { const T zero(0.0); const int n1 = nrow - n0; blas_trsv((isTransposed ? CblasLower : CblasUpper), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, n1, a, lda, y, 1); *fop += (double)nrow * (double)(nrow - 1); for (int i = n1; i < nrow; i++) { y[i] = zero; } } template void full_bw_single(bool isTransposed, const int nrow, const int n0, double *a, const int lda, double *y, double *fop); template void full_bw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); template void full_bw_single(bool isTransposed, const int nrow, const int n0, quadruple *a, const int lda, quadruple *y, double *fop); template void full_bw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); template void full_bw_single(bool isTransposed, const int nrow, const int n0, float *a, const int lda, float *y, double *fop); template void full_bw_single >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, complex *y, double *fop); // template void full_bw_multi(bool isTransposed, const int nrow, const int n0, T *a, const int lda, const int ncol, T *y, const int ldy, double *fop) { const T one(1.0); const T zero(0.0); const int n1 = nrow - n0; // alpha = 1.0 // symmetric matrix should have upper part by symmeterization : LDL^t blas_trsm(CblasLeft, (isTransposed ? CblasLower : CblasUpper), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, n1, ncol, one, a, lda, y, ldy); *fop += (double)nrow * (double)(nrow - 1) * (double)ncol; for (int j = 0; j < ncol; j++) { for (int i = n1; i < nrow; i++) { y[i + j * ldy] = zero; } } } template void full_bw_multi(bool isTransposed, const int nrow, const int n0, double *a, const int lda, const int ncol, double *y, const int ldy, double *fop); template void full_bw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); template void full_bw_multi(bool isTransposed, const int nrow, const int n0, quadruple *a, const int lda, const int ncol, quadruple *y, const int ldy, double *fop); template void full_bw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); template void full_bw_multi(bool isTransposed, const int nrow, const int n0, float *a, const int lda, const int ncol, float *y, const int ldy, double *fop); template void full_bw_multi >(bool isTransposed, const int nrow, const int n0, complex *a, const int lda, const int ncol, complex *y, const int ldy, double *fop); // template void SparseSchur(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix &upper, ColumnMatrix &lower, ColumnMatrix &diag, double *fop) { const T zero(0.0); const T none(-1.0); // alpha = -1, beta = 0 if (isSym) { C_gemm_symm(dim2, dim1, none, lower.addrCoefs(), dim1, upper.addrCoefs(), dim1, zero, diag.addrCoefs(), dim2); // symmetrize : upper -> lower for (int i = 0; i < dim2; i++) { for (int j = 0; j < i; j++) { diag(i, j) = diag(j, i); } } } else { blas_gemm(CblasTrans, CblasNoTrans, dim2, dim2, dim1, none, lower.addrCoefs(), dim1, upper.addrCoefs(), dim1, zero, diag.addrCoefs(), dim2); *fop = (double)dim2 * (double)dim2 * (double)dim1; } } template void SparseSchur(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix &upper, ColumnMatrix &lower, ColumnMatrix &diag, double *fop); template void SparseSchur >(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix > &upper, ColumnMatrix > &lower, ColumnMatrix > &diag, double *fop); template void SparseSchur(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix &upper, ColumnMatrix &lower, ColumnMatrix &diag, double *fop); template void SparseSchur >(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix > &upper, ColumnMatrix > &lower, ColumnMatrix > &diag, double *fop); template void SparseSchur(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix &upper, ColumnMatrix &lower, ColumnMatrix &diag, double *fop); template void SparseSchur >(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix > &upper, ColumnMatrix > &lower, ColumnMatrix > &diag, double *fop); // template void full_fwbw_single(const bool isTrans, const int n, const int n0, T *a, const int lda, T *z) { const T zero(0.0); const int n1 = n - n0; for (int i = n1; i < n; i++) { z[i] = zero; } blas_trsv((isTrans ? CblasUpper : CblasLower), (isTrans ? CblasTrans : CblasNoTrans), CblasUnit, n1, a, lda, z, 1); for (int i = 0; i < n1; i++) { z[i] *= a[i * (n + 1)]; } blas_trsv((isTrans ? CblasLower : CblasUpper), (isTrans ? CblasTrans : CblasNoTrans), CblasUnit, n1, a, lda, z, 1); } template void full_fwbw_single(const bool isTrans, const int n, const int n0, double *a, const int lda, double *z); template void full_fwbw_single >(const bool isTrans, const int n, const int n0, complex *a, const int lda, complex *z); template void full_fwbw_single(const bool isTrans, const int n, const int n0, quadruple *a, const int lda, quadruple *z); template void full_fwbw_single >(const bool isTrans, const int n, const int n0, complex *a, const int lda, complex *z); template void full_fwbw_single(const bool isTrans, const int n, const int n0, float *a, const int lda, float *z); template void full_fwbw_single >(const bool isTrans, const int n, const int n0, complex *a, const int lda, complex *z); // template void full_fwbw_multi(const bool isTrans, const int n, const int n0, T *a, const int lda, const int m, T *x, const int ldx) { const T one(1.0); const T zero(0.0); const int n1 = n - n0; for (int j = 0; j < m; j++) { for (int i = n1; i < n; i++) { x[i + j * ldx] = zero; } } // alpha = 1.0 blas_trsm(CblasLeft, (isTrans ? CblasUpper : CblasLower), (isTrans ? CblasTrans : CblasNoTrans), CblasUnit, n1, m, one, a, lda, x, ldx); for (int j = 0; j < m; j++) { for (int i = 0; i < n1; i++) { // x[i + j * ldx] *= a[i * (n + 1)]; // 3 Nov.2015 x[i + j * ldx] *= a[i * (lda + 1)]; // 3 Nov.2015 } } blas_trsm(CblasLeft, (isTrans ? CblasLower : CblasUpper), (isTrans ? CblasTrans: CblasNoTrans), CblasUnit, n1, m, one, a, lda, x, ldx); } template void full_fwbw_multi(const bool isTrans, const int n, const int n0, double *a, const int lda, const int m, double *x, const int ldx); template void full_fwbw_multi >(const bool isTrans, const int n, const int n0, complex *a, const int lda, const int m, complex *x, const int ldx); template void full_fwbw_multi(const bool isTrans, const int n, const int n0, quadruple *a, const int lda, const int m, quadruple *x, const int ldx); template void full_fwbw_multi >(const bool isTrans, const int n, const int n0, complex *a, const int lda, const int m, complex *x, const int ldx); template void full_fwbw_multi(const bool isTrans, const int n, const int n0, float *a, const int lda, const int m, float *x, const int ldx); template void full_fwbw_multi >(const bool isTrans, const int n, const int n0, complex *a, const int lda, const int m, complex *x, const int ldx); // template void full_fwbw_part(const int n, T *a, const int lda, T*x) { int ifirst; const T zero(0.0); ifirst = n; for (int i = 0; i < n; i++) { if (x[i] != zero) { ifirst = i; break; } } if (ifirst == n) { return; } blas_trsv(CblasLower, CblasNoTrans, CblasUnit, (n - ifirst), &a[ifirst * (lda + 1)], lda, &x[ifirst], 1); for (int i = ifirst; i < n; i++) { x[i] *= a[i * (lda + 1)]; } blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, n, a, lda, x, 1); } template void full_fwbw_part(const int n, double *a, const int lda, double *x); template void full_fwbw_part(const int n, quadruple *a, const int lda, quadruple *x); template void full_fwbw_part(const int n, float *a, const int lda, float *x); #ifndef NO_OCTRUPLE template void full_fwbw_part(const int n, octruple *a, const int lda, octruple *x); #endif template void full_fwbw_part >(const int n, complex *a, const int lda, complex *x); template void full_fwbw_part >(const int n, complex *a, const int lda, complex *x); template void full_fwbw_part >(const int n, complex *a, const int lda, complex *x); #ifndef NO_OCTRUPLE template void full_fwbw_part >(const int n, complex *a, const int lda, complex *x); #endif // template void SchurProfileSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, T *s, const int size_b1, double *fop) { const T one(1.0); const T zero(0.0); #if 1 const int num_block = ncol / size_b1 + ((ncol % size_b1) == 0 ? 0 : 1); for (int j = 0; j < num_block; j++) { const int jj = j * size_b1; for (int i = 0; i < j; i++) { const int ii = i * size_b1; const int nrowb = std::min(size_b1, ncol - ii); const int ncolb = std::min(size_b1, ncol - jj); const int ioffset = std::max(i0[ii], i0[jj]); //const int ioffset = 0; const int nnrow = nrow - ioffset; // alpha = 1 // beta = 0 blas_gemm(CblasTrans, CblasNoTrans, nrowb, ncolb, nnrow, one, c.addrCoefs() + (ioffset + ii * nrow), nrow, b.addrCoefs() + (ioffset + jj * nrow), nrow, zero, s + (ii + jj * ncol), ncol); *fop += (double)nrowb * (double)ncolb * (double) nnrow; } { const int nrowb = std::min(size_b1, ncol - jj); const int ioffset = i0[jj]; //const int ioffset = 0; const int nnrow = nrow - ioffset; // alpha = 1 // beta = 0 if (nnrow == 0) { // clear upper entries for (int j = 0; j < nrowb; j++) { for (int i = 0; i <= j; i++) { s[i + j * ncol + jj * (ncol + 1)] = zero; } } } else { C_gemm_symm(nrowb, nnrow, one, c.addrCoefs() + (ioffset + jj * nrow), nrow, b.addrCoefs() + (ioffset + jj * nrow), nrow, zero, s + (jj * (ncol + 1)), ncol); } *fop += (double)nrowb * (double)nrowb * (double) nnrow / 2.0; } } // loop : j #else // debug : compute all entries // alpha = 1 // beta = 0 // blas_gemm(CblasTrans, CblasNoTrans, // ncol, ncol, C_gemm_symm(ncol, nrow, one, c.addrCoefs(), nrow, b.addrCoefs(), nrow, zero, s, ncol); *fop += (double)ncol * (double)ncol * (double) nrow; #endif } template void SchurProfileSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, double *s, const int size_b1, double *fop); template void SchurProfileSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); template void SchurProfileSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, quadruple *s, const int size_b1, double *fop); template void SchurProfileSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); template void SchurProfileSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, float *s, const int size_b1, double *fop); template void SchurProfileSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); // template void SchurProfileUnSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, T* s, const int size_b1, double *fop) { const T one(1.0); const T zero(0.0); const int num_block = ncol / size_b1 + (ncol % size_b1 == 0 ? 0 : 1); for (int j = 0; j < num_block; j++) { const int jj = j * size_b1; for (int i = 0; i < num_block; i++) { const int ii = i * size_b1; const int nrowb = std::min(size_b1, ncol - ii); const int ncolb = std::min(size_b1, ncol - jj); const int ioffset = std::max(i0[ii], i0[jj]); const int nnrow = nrow - ioffset; // alpha=1 // beta=0 blas_gemm(CblasTrans, CblasNoTrans, nrowb, ncolb, nnrow, one, c.addrCoefs() + (ioffset + ii * nrow), nrow, b.addrCoefs() + (ioffset + jj * nrow), nrow, zero, s + (ii + jj * ncol), ncol); *fop += (double)nrowb * (double)ncolb * (double) nnrow; } } } template void SchurProfileUnSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, double *s, const int size_b1, double *fop); template void SchurProfileUnSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); template void SchurProfileUnSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, quadruple *s, const int size_b1, double *fop); template void SchurProfileUnSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); template void SchurProfileUnSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, float *s, const int size_b1, double *fop); template void SchurProfileUnSym >(const int nrow, const int ncol, vector &i0, ColumnMatrix > &b, ColumnMatrix > &c, complex *s, const int size_b1, double *fop); // // Z for perturbation has lower precision than T (real or complex valued) template void full_fwbw_perturb_single(const int n, T *a, const int lda, T *a_fact, T *x, const int dim_augkern, const Z &eps, bool flag_sym) { // T alpha, beta; // T *upper, *lower, *schur; const T one(1.0); const T none(-1.0); const T Teps = conv_prec(eps); int n1; if (dim_augkern < n) { n1 = n - dim_augkern; } else { n1 = 0; } if (n1 > 0) { ColumnMatrix upper(dim_augkern, n1); // new T[dim_augkern * n1]; ColumnMatrix lower(dim_augkern, n1); // lower = new T[dim_augkern * n1]; ColumnMatrix schur(n1, n1); // = new T[n1 * n1]; // factorization with perturbation should be shared with both sing and multi for (int j = 0; j < n1; j++) { for (int i = 0; i < dim_augkern; i++) { upper(i, j) = a[i + (j + dim_augkern) * lda]; } for (int i = 0; i < n1; i++) { schur(i, j) = a[i + dim_augkern + (j + dim_augkern) * lda]; } } { // alpha = one; blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, upper.addrCoefs(), dim_augkern); for (int i = 0; i < dim_augkern; i++) { for (int j = 0; j < n1; j++) { upper(i, j) *= a_fact[i * (lda + 1)]; } } // alphe = one; blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, upper.addrCoefs(), dim_augkern); // emulating machine eplison in double for (int j = 0; j < n1; j++) { upper((dim_augkern - 1), j) += Teps; } } if (!flag_sym) { for (int j = 0; j < n1; j++) { for (int i = 0; i < dim_augkern; i++) { lower(i, j) = a[(j + dim_augkern) + i * lda]; } } // alpha = one; blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, lower.addrCoefs(), dim_augkern); for (int i = 0; i < dim_augkern; i++) { for (int j = 0; j < n1; j++) { lower(i, j) *= a_fact[i * (lda + 1)]; } } // alpha = one; blas_trsm(CblasLeft, CblasLower, CblasTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, lower.addrCoefs(), dim_augkern); for (int j = 0; j < n1; j++) { lower((dim_augkern - 1), j) += Teps; } } // alpha = none; // beta = one; blas_gemm(CblasNoTrans, CblasNoTrans, n1, n1, dim_augkern, none, &a[dim_augkern], lda, upper.addrCoefs(), dim_augkern, one, schur.addrCoefs(), n1); if (flag_sym) { full_ldlt(n1, schur.addrCoefs(), n1); } else { full_ldu(n1, schur.addrCoefs(), n1); } // alpha = none; // beta = one; if (flag_sym) { blas_gemv(CblasTrans, dim_augkern, n1, none, upper.addrCoefs(), dim_augkern, x, 1, one, &x[dim_augkern], 1); } else { blas_gemv(CblasTrans, dim_augkern, n1, none, lower.addrCoefs(), dim_augkern, x, 1, one, &x[dim_augkern], 1); } // forward blas_trsv(CblasLower, CblasNoTrans, CblasUnit, dim_augkern, a_fact, lda, x, 1); for (int j = 0; j < dim_augkern; j++) { x[j] *= a_fact[j * (lda + 1)]; } // backward blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, dim_augkern, a_fact, lda, x, 1); // forward blas_trsv(CblasLower, CblasNoTrans, CblasUnit, dim_augkern, schur.addrCoefs(), n1, &x[dim_augkern], 1); // diagonal divide for (int j = 0; j < n1; j++) { x[j + dim_augkern] *= schur(j, j); //[j * (n1 + 1)]; } // backward blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, dim_augkern, schur.addrCoefs(), n1, &x[dim_augkern], 1); // alpha = none; // beta = one; blas_gemv(CblasNoTrans, dim_augkern, n1, none, upper.addrCoefs(), dim_augkern, &x[dim_augkern], 1, one, x, 1); } else { // forward blas_trsv(CblasLower, CblasNoTrans, CblasUnit, n, a_fact, lda, x, 1); for (int j = 0; j < n; j++) { x[j] *= a_fact[j * (lda + 1)]; } x[(n - 1)] += Teps; // backward blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, n, a_fact, lda, x, 1); } } template void full_fwbw_perturb_single(const int n, quadruple *a, const int lda, quadruple *a_fact, quadruple *x, const int dim_augkern, const double &eps, bool flag_sym); template void full_fwbw_perturb_single(const int n, double *a, const int lda, double *a_fact, double *x, const int dim_augkern, const float &eps, bool flag_sym); #ifndef NO_OCTRUPLE template void full_fwbw_perturb_single(const int n, octruple *a, const int lda, octruple *a_fact, octruple *x, const int dim_augkern, const quadruple &eps, bool flag_sym); #endif template void full_fwbw_perturb_single, double>(const int n, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const double &eps, bool flag_sym); template void full_fwbw_perturb_single, float>(const int n, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const float &eps, bool flag_sym); #ifndef NO_OCTRUPLE template void full_fwbw_perturb_single, quadruple>(const int n, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const quadruple &eps, bool flag_sym); #endif // // Z for perturbation has lower precision than T (real or complex valued) template void full_fwbw_perturb_multi(const int n, const int m, T *a, const int lda, T *a_fact, T *x, const int dim_augkern, const Z &eps, bool flag_sym) { // T alpha, beta; // T *upper, *lower, *schur, *v; const T one(1.0); const T none(-1.0); const T Teps = conv_prec(eps); if (dim_augkern >= n) { // forward // alpha = one; blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n, m, one, a_fact, lda, x, lda); for (int j = 0; j < m; j++) { for (int i= 0; i < n; i++) { x[i + j * lda] *= a_fact[i * (lda + 1)]; } x[(n - 1) + j * lda] += Teps; } // backward blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, n, m, one, a_fact, lda, x, lda); } else { const int n1 = n - dim_augkern; ColumnMatrix upper(dim_augkern, n1); ColumnMatrix lower(dim_augkern, n1); ColumnMatrix schur(n1, n1); // factorization with perturbation should be shared with both sing and multi for (int j = 0; j < n1; j++) { for (int i = 0; i < dim_augkern; i++) { upper(i, j) = a[i + (j + dim_augkern) * lda]; } for (int i = 0; i < n1; i++) { schur(i, j) = a[i + dim_augkern + (j + dim_augkern) * lda]; } } { // alpha = one; blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, upper.addrCoefs(), dim_augkern); for (int i = 0; i < dim_augkern; i++) { for (int j = 0; j < n1; j++) { upper(i, j) *= a_fact[i * (lda + 1)]; } } // alpha = one; blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, upper.addrCoefs(), dim_augkern); for (int j = 0; j < n1; j++) { upper((dim_augkern - 1), j) += Teps; } } if (!flag_sym) { for (int j = 0; j < n1; j++) { for (int i = 0; i < dim_augkern; i++) { lower(i, j) = a[(j + dim_augkern) + i * lda]; } } // alpha = one; blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, lower.addrCoefs(), dim_augkern); for (int i = 0; i < dim_augkern; i++) { for (int j = 0; j < n1; j++) { lower(i, j) *= a_fact[i * (lda + 1)]; } } // alpha = one; blas_trsm(CblasLeft, CblasLower, CblasTrans, CblasUnit, dim_augkern, n1, one, a_fact, lda, lower.addrCoefs(), dim_augkern); for (int j = 0; j < n1; j++) { lower((dim_augkern - 1), j) += Teps; } } // alpha = none; // beta = one; blas_gemm(CblasNoTrans, CblasNoTrans, n1, n1, dim_augkern, none, &a[dim_augkern], lda, upper.addrCoefs(), dim_augkern, one, schur.addrCoefs(), n1); if (flag_sym) { full_ldlt(n1, schur.addrCoefs(), n1); } else { full_ldu(n1, schur.addrCoefs(), n1); } // alpha = none; // beta = one; if (flag_sym) { blas_gemm(CblasTrans, CblasNoTrans, n1, m, dim_augkern, none, upper.addrCoefs(), dim_augkern, x, lda, one, &x[dim_augkern], lda); } else { blas_gemm(CblasTrans, CblasNoTrans, n1, m, dim_augkern, none, lower.addrCoefs(), dim_augkern, x, lda, one, &x[dim_augkern], lda); } // alpha = one; // forward blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, dim_augkern, m, one, a_fact, lda, x, lda); // diagonal divide for (int j = 0; j < m; j++) { for (int i = 0; i < dim_augkern; i++) { x[i + j * lda] *= a_fact[i * (lda + 1)]; } } // backward blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, dim_augkern, m, one, a_fact, lda, x, lda); // forward blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, m, one, schur.addrCoefs(), n1, &x[dim_augkern], lda); for (int j = 0; j < m; j++) { for (int i = 0; i < n1; i++) { x[i + dim_augkern + j * lda] *= schur(i, i);// } } // backward blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, n1, m, one, schur.addrCoefs(), n1, &x[dim_augkern], lda); // alpha = none; // beta = one; blas_gemm(CblasNoTrans, CblasNoTrans, dim_augkern, m, n1, none, upper.addrCoefs(), dim_augkern, &x[dim_augkern], lda, one, x, lda); } } template void full_fwbw_perturb_multi(const int n, const int m, quadruple *a, const int lda, quadruple *a_fact, quadruple *x, const int dim_augkern, const double &eps, bool flag_sym); template void full_fwbw_perturb_multi(const int n, const int m, double *a, const int lda, double *a_fact, double *x, const int dim_augkern, const float &eps, bool flag_sym); #ifndef NO_OCTRUPLE template void full_fwbw_perturb_multi(const int n, const int m, octruple *a, const int lda, octruple *a_fact, octruple *x, const int dim_augkern, const quadruple &eps, bool flag_sym); #endif template void full_fwbw_perturb_multi, double>(const int n, const int m, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const double &eps, bool flag_sym); template void full_fwbw_perturb_multi, float>(const int n, const int m, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const float &eps, bool flag_sym); #ifndef NO_OCTRUPLE template void full_fwbw_perturb_multi, quadruple>(const int n, const int m, complex *a, const int lda, complex *a_fact, complex *x, const int dim_augkern, const quadruple &eps, bool flag_sym); #endif template void full_sym_2x2BK(int n, T *a, T *dd1, int *pivot_width, int *permute) { fprintf(stderr, "%s %d : general template is not implomented\n", __FILE__, __LINE__); } template void full_sym_2x2BK(int n, double *a, double *dd1, int *pivot_width, int *permute); template void full_sym_2x2BK >(int n, complex *a, complex *dd1, int *pivot_width, int *permute); template void full_sym_2x2BK(int n, quadruple *a, quadruple *dd1, int *pivot_width, int *permute); template void full_sym_2x2BK >(int n, complex *a, complex *dd1, int *pivot_width, int *permute); template void full_sym_2x2BK(int n, float *a, float *dd1, int *pivot_width, int *permute); template void full_sym_2x2BK >(int n, complex *a, complex *dd1, int *pivot_width, int *permute); // template void C_gemm_symm(const int ncol, const int nrow, const T &alpha, const T *a, const int lda, const T *b, const int ldb, const T &beta, T *c, const int ldc) { // #define DGEMM_FOR_SYMM #ifdef DGEMM_FOR_SYMM blas_gemm(CblasTrans, CblasNoTrans, ncol, ncol, nrow, alpha, a, lda, b, ldb, beta, c, ldc); #else int n1, n2, n11, n12, n21, n22, n3; n1 = ncol / 2; n2 = ncol - n1; n11 = n1 / 2; n12 = n1 - n11; n21 = n2 / 2; n22 = n2 - n21; n3 = n1 + n21; if (ncol < SIZE_DGEMM_SYMM_DTRSV) { for (int j = 0; j < ncol; j++) { blas_gemv(CblasTrans, nrow, (j + 1), alpha, a, lda, b + (ldb * j), 1, beta, c + (ldc * j), 1); } } else { C_gemm_symm(n11, nrow, alpha, a, lda, b, ldb, beta, c, ldc); C_gemm_symm(n12, nrow, alpha, a + (lda * n11), lda, b + (ldb * n11), ldb, beta, c + (ldc + 1) * n11, ldc); C_gemm_symm(n21, nrow, alpha, a + lda * n1, lda, b + ldb * n1, ldb, beta, c + (ldc + 1) * n1, ldc); C_gemm_symm(n22, nrow, alpha, a + lda * n3, lda, b + ldb * n3, ldb, beta, c + (ldc + 1) * n3, ldc); blas_gemm(CblasTrans, CblasNoTrans, n11, n12, nrow, alpha, a, lda, b + (ldb * n11), ldb, beta, c + (ldc * n11), ldc); blas_gemm(CblasTrans, CblasNoTrans, n21, n22, nrow, alpha, a + (lda * n1), lda, b + (ldb * n3), ldb, beta, c + (n1 + ldc * n3), ldc); blas_gemm(CblasTrans, CblasNoTrans, n1, n2, nrow, alpha, a, lda, b + (ldb * n1), ldb, beta, c + (ldc * n1), ldc); } #endif } template void C_gemm_symm(const int ncol, const int nrow, const double &alpha, const double *a, const int lda, const double *b, const int ldb, const double &beta, double *c, const int ldc); template void C_gemm_symm >(const int ncol, const int nrow, const complex &alpha, const complex *a, const int lda, const complex *b, const int ldb, const complex &beta, complex *c, const int ldc); template void C_gemm_symm(const int ncol, const int nrow, const quadruple &alpha, const quadruple *a, const int lda, const quadruple *b, const int ldb, const quadruple &beta, quadruple *c, const int ldc); template void C_gemm_symm >(const int ncol, const int nrow, const complex &alpha, const complex *a, const int lda, const complex *b, const int ldb, const complex &beta, complex *c, const int ldc); template void C_gemm_symm(const int ncol, const int nrow, const float &alpha, const float *a, const int lda, const float *b, const int ldb, const float &beta, float *c, const int ldc); template void C_gemm_symm >(const int ncol, const int nrow, const complex &alpha, const complex *a, const int lda, const complex *b, const int ldb, const complex &beta, complex *c, const int ldc); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_BlasRoutines.hpp000664 000000 000000 00000017634 14037356732 025710 0ustar00rootroot000000 000000 /*! \file C_BlasRoutines.hpp \brief factorization routines LDL^t, LDU, forward/backward substitution \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // # ifndef _DRIVER_C_BLASROUTINES_ # define _DRIVER_C_BLASROUTINES_ #include #include #include "Compiler/blas.hpp" #include "Algebra/ColumnMatrix.hpp" using std::vector; using std::list; template Z matrix_infty_norm_(const int n, T *a, const int lda); template U matrix_infty_norm(const int n, T *a, const int lda); template<> double matrix_infty_norm(const int n, quadruple *a, const int lda); template<> double matrix_infty_norm, double>(const int n, complex *a, const int lda); #ifndef NO_OCTRUPLE template<> quadruple matrix_infty_norm(const int n, octruple *a, const int lda); template<> quadruple matrix_infty_norm, quadruple>(const int n, complex *a, const int lda); #endif template<> float matrix_infty_norm(const int n, double *a, const int lda); template<> float matrix_infty_norm, float>(const int n, complex *a, const int lda); template void full_ldlt(const int n, T *a, const int lda); template void full_ldlh(const int n, T *a, const int lda); template<> void full_ldlh(const int n, double *a, const int lda); template<> void full_ldlh(const int n, quadruple *a, const int lda); template<> void full_ldlh(const int n, float *a, const int lda); template void full_ldu(const int n, T *a, const int lda); template void FillUpperBlock(const int nrow, T *coef, const int *prow, const int *indcols, const int *indvals, const int *old2new_i, const int *old2new_j, ColumnMatrix &b); template bool full_ldlt_permute(int *n0, const int nn0, const int n, T *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool full_ldu_permute(int *n0, const int nn0, const int n, T *a, const int lda, double *pivot, int *permute, const double eps, double *fop); template bool ldu_full_permute(int *nn0, const int n0, const int n, T *a, const int lda, double *pivot, int *permute_right, int *permute_left, const double eps, double *fop); template void full_fw_multiprofile(bool isTransposed, const int nrow, const int n0, const int ncol, T *a, const int lda, T *y, const int ldy, vector &i0, double *fop); template void full_fw_single(bool isTransposed, const int nrow, const int n0, T *a, const int lda, T *x, double *fop); template void full_bw_single(bool isTransposed, const int nrow, const int n0, T *a, const int lda, T *x, double *fop); template void full_fw_multi(bool isTransposed, const int nrow, const int n0, T *a, const int lda, const int ncol, T *x, const int ldy, double *fop); template void full_bw_multi(bool isTransposed, const int nrow, const int n0, T *a, const int lda, const int ncol, T *x, const int ldy, double *fop); template void SparseSchur(const bool isSym, const int dim2, const int dim1, vector& i0, ColumnMatrix &upper, ColumnMatrix &lower, ColumnMatrix &diag, double *fop); template void full_fwbw_single(const bool isTrans, const int n, const int n0, T *a, const int lda, T *z); template void full_fwbw_multi(const bool isTrans, const int n, const int n0, T *a, const int lda, const int m, T *x, const int ldx); template void full_fwbw_part(const int n, T *a, const int lda, T*x); template void SchurProfileSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, T* s, const int size_b1, double *fop); template void SchurProfileUnSym(const int nrow, const int ncol, vector &i0, ColumnMatrix &b, ColumnMatrix &c, T* s, const int size_b1, double *fop); template void swap_sym_lower(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km); template void swap_sym_upper(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km); template void swap_unsym(const int n, T *a, const int lda, const int k, const int km, T *col_k, T *col_km); template void swap_full(const int n, T *a, const int lda, const int k, const int ki, const int kj, T *coli, T *colj); // routines for C_KernDetect.cpp template void full_fwbw_perturb_single(const int n, T *a, const int lda, T *a_fact, T *x, const int dim_augkern, const Z &eps, bool flag_sym); template void full_fwbw_perturb_multi(const int n, const int m, T *a, const int lda, T *a_fact, T *x, const int dim_augkern, const Z &eps, bool flag_sym); // template void full_sym_2x2BK(int n, T *a, T *dd1, int *pivot_width, int *permute); template void C_gemm_symm(const int ncol, const int nrow, const T &alpha, const T *a, const int lda, const T *b, const int ldb, const T &beta, T *c, const int ldc); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_DFullLDLt.cpp000664 000000 000000 00000066270 14037356732 025017 0ustar00rootroot000000 000000 /*! \file C_DFullLDLt.cpp \brief block factorization routines LDL^t, LDU \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Apr. 22th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Driver/C_threads_tasks.hpp" #include "Driver/C_KernDetect.hpp" #include "Driver/C_BlasRoutines.hpp" #include "Driver/DissectionDefault.hpp" #include "Compiler/arithmetic.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" #include #ifdef BLAS_MKL #define MKL_DOMATCOPY #endif template void C_dupdateb_Schur_diag(void *arg_) { C_dupdateb_Schur_arg *arg = (C_dupdateb_Schur_arg *)arg_; const int task_position = arg->task_position; const int id_block = arg->id_block; const int id_level = arg->id_level; const int n = arg->n; const int nrow = arg->nrow; // size of diag. block whose matrix is factorized const int ncol = arg->ncol; // const int i1_block = arg->i1_block; const int ii_block = arg->ii_block; SquareBlockMatrix &D = *(arg->D); const int n0 = D.dim_kern_block(id_block); const T none(-1.0); const T one(1.0); if ((task_position % 2 == 1) && (id_level == 0)) { T *a = arg->a->addrCoefs(); //T *a = *(arg->a); D.copyBlockToArray(arg->ii_block, arg->ii_block, a, n); } if (arg->isSym) { // alpha=-1 // beta=1 C_gemm_symm(ncol, (nrow - n0), // nrow : zero padding is better ? none, // alpha D.addrCoefBlock(ii_block, i1_block), // lower transposed D.nrowBlock(ii_block, i1_block), D.addrCoefBlock(i1_block, ii_block), // upper D.nrowBlock(i1_block, ii_block), one, // beta, D.addrCoefBlock(ii_block, ii_block), // Schur D.nrowBlock(ii_block, ii_block)); } else { blas_gemm(CblasTrans, CblasNoTrans, ncol, ncol, (nrow - n0), // nrow : zero padding is better ? none, // alpha, D.addrCoefBlock(ii_block, i1_block), // lower transposed D.nrowBlock(ii_block, i1_block), D.addrCoefBlock(i1_block, ii_block), // upper D.nrowBlock(i1_block, ii_block), one, // beta, D.addrCoefBlock(ii_block, ii_block), // Schur D.nrowBlock(ii_block, ii_block)); } } template void C_dupdateb_Schur_diag(void *arg_); template void C_dupdateb_Schur_diag(void *arg_); template void C_dupdateb_Schur_diag >(void *arg_); template void C_dupdateb_Schur_diag >(void *arg_); template void C_dupdateb_Schur_diag(void *arg_); template void C_dupdateb_Schur_diag >(void *arg_); // template void C_dupdateb_Schur_offdiag(void *arg_) { C_dupdateb_Schur_arg *arg = (C_dupdateb_Schur_arg *)arg_; const int task_position = arg->task_position; const int id_block = arg->id_block; const int id_level = arg->id_level; const int n = arg->n; const int nrow = arg->nrow; const int ncol = arg->ncol; const int i1_block = arg->i1_block; const int ii_block = arg->ii_block; const int jj_block = arg->jj_block; SquareBlockMatrix &D = *(arg->D); const int n0 = D.dim_kern_block(id_block); const T none(-1.0); const T one(1.0); if ((task_position % 2 == 1) && (id_level == 0)) { T *a = arg->a->addrCoefs(); //T *a = *(arg->a); D.copyBlockToArray(arg->ii_block, arg->jj_block, a, n); if (!arg->isSym) { D.copyBlockToArray(arg->jj_block, arg->ii_block, a, n); } } blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, (nrow - n0), // none, // alpha, D.addrCoefBlock(ii_block, i1_block), // lower D.nrowBlock(ii_block, i1_block), D.addrCoefBlock(i1_block, jj_block), // upper D.nrowBlock(i1_block, jj_block), one, // beta, D.addrCoefBlock(ii_block, jj_block), // Schur D.nrowBlock(ii_block, jj_block)); if (!arg->isSym) { // S(jj,ii)-=L(jj)*U(ii) <==> S(jj,ii)^T-=L(jj)^T*U(ii)^T blas_gemm(CblasTrans, CblasNoTrans, nrow, // row of S^T ncol, (nrow - n0), // none, // alpha, D.addrCoefBlock(i1_block, ii_block), // upper^T D.nrowBlock(i1_block, ii_block), D.addrCoefBlock(jj_block, i1_block), // lower^T D.nrowBlock(jj_block, i1_block), one, // beta, D.addrCoefBlock(jj_block, ii_block), // Schur D.nrowBlock(jj_block, ii_block)); } } template void C_dupdateb_Schur_offdiag(void *arg_); template void C_dupdateb_Schur_offdiag(void *arg_); template void C_dupdateb_Schur_offdiag >(void *arg_); template void C_dupdateb_Schur_offdiag >(void *arg_); template void C_dupdateb_Schur_offdiag(void *arg_); template void C_dupdateb_Schur_offdiag >(void *arg_); // template void C_dfull_gauss_b(void *arg_) { const T zero(0.0); C_dfull_gauss_arg *arg = (C_dfull_gauss_arg *)arg_; const int task_position = arg->task_position; const int id_block = arg->id_block; const int id_level = arg->id_level; const int n = arg->n; const bool verbose = arg->verbose; FILE *fp = *(arg->fp); // int *nnn0 = arg->n0; int nn0; const int nrow = arg->nrow; const int i1_block = arg->i1_block; SquareBlockMatrix &D = *(arg->D); const int i1 = D.IndexBlock(i1_block); int *permute_block = new int[D.nrowBlock(i1_block)]; // block_size()] ; const int aug_dim = *(arg->aug_dim); const U eps_machine = *(arg->eps_machine); const double eps_piv = *(arg->eps_piv); // get smaller pivot of two children T *a_diag = D.addrCoefBlock(arg->i1_block, arg->i1_block); int nrow_block = D.nrowBlock(arg->i1_block, arg->i1_block); vector& permute = D.getPermute(); double pivot, pivot0, pivot1; vector& singIdx0 = D.getSingIdx0(); if (task_position % 2 == 1) { // copy the whole matrix for kernel detection and refactorization // if (*(arg->kernel_detection) || (id_level == 0)) { // 30 Jul.2014 ColumnMatrix &a = *(arg->a); a.init(n, n); // symmetrizie the first diagonal block : copy upper to lower if (arg->isSym) { for (int i = 0; i < nrow_block; i++) { for (int j = 0; j < i; j++) { // access lower a_diag[i + j * nrow_block] = a_diag[j + i * nrow_block]; } } } D.copyBlockToArray(arg->i1_block, arg->i1_block, a.addrCoefs(), n); // D.copyToArrayFull(a, n); } // if (id_level == 0) singIdx0.clear(); singIdx0.reserve(n); // singIdx0.resize(n, -1); D.set_dim_kern(0); // counting total number of null pivots among blocks // initialize permute[] for (int i = 0; i < n; i++) { permute[i] = i; } pivot0 = *(arg->pivot0); pivot1 = *(arg->pivot1); pivot = (pivot0 < pivot1 ? pivot0 : pivot1); *(arg->quit) = false; // default is to continue queue for refactorization } // if (task_position % 2 == 1) else { pivot = *(arg->pivot); } const int n0 = 0; double fop; if ((task_position % 2 == 1) && (nrow > aug_dim)) { bool flag_repeat_piv = true; // bool repeat_first = true; double eps_piv1 = eps_piv; int count_repeat = 0; ColumnMatrix a_diag1(nrow_block, nrow_block); while (flag_repeat_piv) { if (count_repeat == 0) { blas_copy((nrow_block * nrow_block), a_diag, 1, a_diag1.addrCoefs(), 1); } else { blas_copy((nrow_block * nrow_block), a_diag1.addrCoefs(), 1, a_diag, 1); } // if (count_repeat > 0) if (arg->isSym) { full_ldlt_permute(&nn0, n0, nrow, a_diag, nrow_block, &pivot, permute_block, eps_piv1, &fop); } else { full_ldu_permute(&nn0, n0, nrow, a_diag, nrow_block, &pivot, permute_block, eps_piv1, &fop); } // if (arg->isSym) if (((nrow - nn0) >= aug_dim) || (eps_piv1 < TOL_PIVOT)) { flag_repeat_piv = false; } else { eps_piv1 /= 10.0; count_repeat++; } } // while (flag_repeat_piv) if (eps_piv1 < TOL_PIVOT) { D.set_pivrelaxed(); } a_diag1.free(); if (count_repeat > 0) { diss_printf(verbose, fp, "%s %d : eps_piv = %g pivot = %g n0 = %d count_repeat = %d\n", __FILE__, __LINE__, eps_piv1, pivot, n0, count_repeat); } } // if ((id_level == 0) && (task_position % 2 == 1)) else { if (arg->isSym) { full_ldlt_permute(&nn0, n0, nrow, a_diag, nrow_block, &pivot, permute_block, eps_piv, &fop); } // if (arg->isSym) else { full_ldu_permute(&nn0, n0, nrow, a_diag, nrow_block, &pivot, permute_block, eps_piv, &fop); } } if (nn0 > 0) { diss_printf(verbose, fp, "%s %d : nd = %d : level = %d block = %d null = %d / %d\n", __FILE__, __LINE__, arg->nb, arg->id_level, arg->id_block, nn0, nrow); } // permute_block is defined by Fortran array, i.e. takes index starting 1 for (int i = i1; i < i1 + nrow; i++) { permute[i] = permute_block[i - i1] + i1; } // store singular nodes int sing_max = D.dim_kern(); // D.sing_max(); { int itmp = i1 + nrow - nn0; for (int i = 0; i < nn0; i++, itmp++, sing_max++) { // singIdx0[sing_max] = itmp; singIdx0.push_back(itmp); } D.set_dim_kern(sing_max); // total number of null pivots D.set_dim_kern_block(id_block, nn0); // #null pivots candidate in the block } if (task_position / 2 == 1) { { // scope for list tmp_idx list tmp_idx; for (int i = 0; i < sing_max; i++) { for (int j = -1; j <= 1; j++) { const int itmp = singIdx0[i] + j; if (itmp >= 0 && itmp < n) { tmp_idx.push_back(itmp); } } } tmp_idx.sort(); tmp_idx.unique(); if (sing_max > 0) { diss_printf(verbose, fp, "%s %d : %d : %g\n", __FILE__, __LINE__, arg->nb, pivot); } for (list::const_iterator it = tmp_idx.begin(); it != tmp_idx.end(); ++it) { bool dispflag = false; for (int j = 0; j < sing_max; j++) { if ((*it) == singIdx0[j]) { dispflag = true; } } diss_printf(verbose, fp, "%d %s %s\n", (*it), (dispflag ? "*" :":"), tostring(D.diag(*it)).c_str()); } } // nullifying diagonal entries for (int m = 0; m < sing_max; m++) { const int ii = singIdx0[m]; D.diag(ii) = zero; } bool refactorize = false; // default value // check dim of factorized matrix int dim_kern = 0; bool flagKernelDetect = true; if (sing_max > 0) { diss_printf(verbose, fp, "%s %d : C_dfull_gauss_b : id_level %d : nb = %d : %d / %d ", __FILE__, __LINE__, id_level, arg->nb, arg->id_block, arg->num_block); diss_printf(verbose, fp, "task_position = %d, i1 = %d , nrow = %d\n", task_position, i1, nrow); diss_printf(verbose, fp, "%s %d : sing_max = %d : ", __FILE__, __LINE__, sing_max); for (int i = 0; i < sing_max; i++) { diss_printf(verbose, fp, "%d ", singIdx0[i]); } diss_printf(verbose, fp, "\n"); if ((task_position == 3) && (sing_max + aug_dim) > nrow) { // if (task_position == 3) { *(arg->quit) = true; // refactorize is only applied for the root matrix diss_printf(verbose, fp, "%s %d : nonsingular part is too small %d : %d - %d : %d\n", __FILE__, __LINE__, aug_dim, nrow, nn0, sing_max); flagKernelDetect = false; dim_kern = sing_max; // nullify rows with all suspicious pivots D.set_KernelDetected(false); // kernel is unknown singIdx0.resize(sing_max); } } // if (sing_max > 0) else { flagKernelDetect = false; singIdx0.resize(0); dim_kern = 0; D.set_KernelDetected(true); } if (flagKernelDetect) { if (*(arg->kernel_detection) || (id_level == 0)) { singIdx0.resize(sing_max); dim_kern = dimKernDense(singIdx0, n, aug_dim, eps_machine, eps_piv, D, arg->a->addrCoefs(), //*(arg->a), refactorize, false, // isFullPermute arg->isSym, verbose, fp); diss_printf(verbose, fp, "%s %d : dim_kern = %d : singIdx0 = [", __FILE__, __LINE__, dim_kern); for (int i = 0; i < singIdx0.size(); i++) { diss_printf(verbose, fp, "%d ", singIdx0[i]); } diss_printf(verbose, fp, "]\n"); if (dim_kern == (-1)) { // refactorization happens in only the last level refactorize = true; D.set_KernelDetected(false); } else if (dim_kern != singIdx0.size()) { // 20 Feb.2017 dim_kern = sing_max; // nullify rows with all suspicious pivots D.set_KernelDetected(false); // kernel is unknown singIdx0.resize(sing_max); } else { D.set_KernelDetected(true); } } //else if (*(arg->kernel_detection) || (id_level == 0)) { else { diss_printf(verbose, fp, "%s %d : kernel detection will be done in the last\n", __FILE__, __LINE__); dim_kern = sing_max; // nullify rows with all suspicious pivots D.set_KernelDetected(false); // kernel is unknown singIdx0.resize(sing_max); } //else if (*(arg->kernel_detection) || (id_level == 0)) { } // if (flagKernelDetect) // if (sing_max > 0) if(arg->isSym) { D.freeLowerBlocks(); } if (refactorize){ //reset permutation for refactorization for (int i = 0; i < n; i++) { permute[i] = i; } diss_printf(verbose, fp, "%s %d : refactorization with whole diagonal pivots starts\n", __FILE__, __LINE__); *(arg->quit) = false; } else { // if (!refactorize) if (*(arg->kernel_detection) || (id_level == 0)) { // 30 Jul.2014 arg->a->free(); } if(id_level == 0) { *(arg->quit) = true; // refactorize is only applied for the root matrix } D.getSingIdx().resize(sing_max); for (int i = 0; i < sing_max; i++) { D.getSingIdx()[i] = permute[singIdx0[i]]; } D.set_lastPivot(pivot); D.set_dim_kern(dim_kern); D.set_rank(n - dim_kern); // } // if (refactorize) } // if (task_position / 2 == 1) else { if (nn0 > 0) { // nullifying diangol entries for (int ii = (i1 + nrow - nn0); ii < (i1 + nrow); ii++) { D.diag(ii) = zero; // a[ii * (n + 1)] = 0.0; } } // if (nn0 > 0) } // if (task_position / 2 == 1) *(arg->pivot) = pivot; // for passing information to other task delete [] permute_block; } template void C_dfull_gauss_b(void *arg_); template void C_dfull_gauss_b, double>(void *arg_); template void C_dfull_gauss_b(void *arg_); template void C_dfull_gauss_b, quadruple>(void *arg_); template void C_dfull_gauss_b(void *arg_); template void C_dfull_gauss_b, float>(void *arg_); // template void C_dinvDL_timesU(void *arg_) { const T one(1.0); C_dinvDL_timesU_arg *arg = (C_dinvDL_timesU_arg *)arg_; const int task_position = arg->task_position; const int id_level = arg->id_level; const int id_block = arg->id_block; const int n = arg->n; const int nrow = arg->nrow; const int ncol = arg->ncol; SquareBlockMatrix &D = *(arg->D); const int i1 = D.IndexBlock(arg->i1_block); T *a_diag = D.addrCoefBlock(arg->i1_block, arg->i1_block); T *a_upper = D.addrCoefBlock(arg->i1_block, arg->jj_block); const int nrow_block_diag = D.nrowBlock(arg->i1_block, arg->i1_block); const int nrow_block_upper= D.nrowBlock(arg->i1_block, arg->jj_block); if (arg->isSym) { D.allocateBlock(arg->jj_block, arg->i1_block); } T *a_lower = D.addrCoefBlock(arg->jj_block, arg->i1_block); const int nrow_block_lower = D.nrowBlock(arg->jj_block, arg->i1_block); vector& permute = D.getPermute(); const int n0 = D.dim_kern_block(id_block); // assuming to be allocated in the cache memory VectorArray a_tmp(nrow); if ((task_position % 2 == 1) && (id_level == 0)) { // T *a = *(arg->a); ColumnMatrix &a = *(arg->a); // copy a_upper D.copyBlockToArray(arg->i1_block, arg->jj_block, a.addrCoefs(), n); if (!arg->isSym) { // copy a_lower D.copyBlockToArray(arg->jj_block, arg->i1_block, a.addrCoefs(), n); } } { // copy upper to upper with permutation for (int j = 0; j < ncol; j++) { const int jn = j * nrow; for (int i = 0; i < nrow; i++) { const int ip = permute[i1 + i] - i1; a_tmp[i] = a_upper[ip + jn]; } blas_copy(nrow, a_tmp.addrCoefs(), 1, a_upper + jn, 1); } } if (!arg->isSym) { // copy transposed lower to transposed lower with permutation for (int j = 0; j < ncol; j++) { const int jn = j * nrow; for (int i = 0; i < nrow; i++) { const int ip = permute[i1 + i] - i1; a_tmp[i] = a_lower[ip + jn]; } blas_copy(nrow, a_tmp.addrCoefs(), 1, a_lower + jn, 1); } } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, (nrow - n0), // n0 : dim of suspicious pivots ncol, one, // alpha, a_diag, nrow_block_diag, // 3 Dec. 2015 a_upper, nrow_block_upper); // Dec. 2015 if (arg->isSym) { for (int j = 0; j < ncol; j++) { const int jn = j * nrow; blas_copy(nrow, a_upper + jn, 1, a_lower + jn, 1); } } else { // (A_{jj i1} U_{i1 i1}^-1)^T = U_{i1 i1}^{-T} A_{jj i1}^{T} blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, (nrow - n0), // n0 : dim of suspicious pivots ncol, one, // alpha, a_diag, nrow_block_diag, // 3 Dec.2015 a_lower, nrow_block_lower); // 3 Dec.2015 } for (int i = 0; i < nrow; i++) { const T aa = D.diag(i1 + i); for (int j = 0; j < ncol; j++) { a_upper[i + j * nrow] *= aa; } } } template void C_dinvDL_timesU(void *arg_); template void C_dinvDL_timesU(void *arg_); template void C_dinvDL_timesU >(void *arg_); template void C_dinvDL_timesU >(void *arg_); template void C_dinvDL_timesU(void *arg_); template void C_dinvDL_timesU >(void *arg_); // template void C_gauss_whole_pivot(void *arg_) { const T zero(0.0); const T one(1.0); C_dfull_gauss_arg *arg = (C_dfull_gauss_arg *)arg_; const int task_position = arg->task_position; const int n = arg->n; SquareBlockMatrix &D = *(arg->D); ColumnMatrix &a = *(arg->a); const double eps_piv = *(arg->eps_piv); double pivot = *(arg->pivot); // double *a_sym = D.addrCoefs(); vector& permute = D.getPermute(); vector& singIdx0 = D.getSingIdx0(); const int aug_dim = *(arg->aug_dim); const U eps_machine = *(arg->eps_machine); const bool verbose = arg->verbose; FILE *fp = *(arg->fp); int n1, dim_kern; if (task_position % 2 == 1) { ColumnMatrix aa(n, n); aa.copy(a); // blas_copy((n * n), a.addrCoefs(), 1, aa, 1); if (arg->isSym) { // copy upper to lower for rank-1 update procedure by dsyr('L', ) for (int j = 0; j < n; j++) { for (int i = 0; i < j; i++) { // aa[j + i * n] = aa[i + j * n]; aa(j, i) = aa(i, j); } } } const double pivot0 = *(arg->pivot0); const double pivot1 = *(arg->pivot1); pivot = (pivot0 < pivot1 ? pivot0 : pivot1); int n0 = 0; diss_printf(verbose, fp, "%s %d : C_gauss_whole_pivot : serial factroization : n = %d\n", __FILE__, __LINE__, n); int count_repeat = 0; bool flag_repeat_piv = true; double eps_piv1 = eps_piv; while (flag_repeat_piv) { if (count_repeat > 0) { aa.copy(a); // blas_copy((n * n), a.addrCoefs(), 1, aa.addrCoefs(), 1); if (arg->isSym) { // copy upper to lower for rank-1 update procedure by dsyr('L', ) for (int j = 0; j < n; j++) { for (int i = 0; i < j; i++) { aa[j + i * n] = aa[i + j * n]; } } } } // if (count_repeat > 0) n0 = 0; full_gauss3(&n0, aa.addrCoefs(), n, &pivot, &permute[0], arg->isSym, eps_piv1, verbose, fp); diss_printf(verbose, fp, "%s %d : C_gauss_whole_pivot : pivot = %g n0 = %d\n", __FILE__, __LINE__, pivot, n0); if (((n - n0) >= aug_dim) || (eps_piv1 < TOL_PIVOT)) { flag_repeat_piv = false; } else { eps_piv1 /= 10.0; count_repeat++; } } // while (flag_repeat_piv) diss_printf(verbose, fp, "%s %d : eps_pvi = %g pivot = %g n0 = %d count_repeat = %d\n", __FILE__, __LINE__, eps_piv1, pivot, n0, count_repeat); // D.unsetBlocked(); // D.copyFromArray(aa.addrCoefs(), n); // modification of lower blocks by removing D^-1 in the same mannar of // factorization by blocks if (!arg->isSym) { const int num_block = D.num_blocks(); for (int k = 0; k < num_block; k++) { for (int m = (k + 1); m < num_block; m++) { T *lower = D.addrCoefBlock(m, k); const int nrow = D.nrowBlock(m, k); const int ncol = D.ncolBlock(m, k); for (int i = 0; i < nrow; i++) { const T aa = one / D.addrCoefBlock(k, k)[i * (nrow + 1)]; for (int j = 0; j < ncol; j++) { lower[i + j * nrow] *= aa; } } } } } // dim_kern = 0; if (n0 > 0) { singIdx0.resize(n0); // singular nodes are continuously located from the last for (int i = 0; i < n0; i++) { singIdx0[i] = n - n0 + i; } dim_kern = dimKernDense(singIdx0, n, aug_dim, eps_machine, eps_piv, D, a.addrCoefs(), false, // refactorize true, // isFullPermute arg->isSym, verbose, fp); dim_kern = n0; diss_printf(verbose, fp, "%s %d : C_gauss_whole_pivot : dim_kern = %d\n", __FILE__, __LINE__, dim_kern); if (dim_kern == (-1)) { diss_printf(verbose, fp, "%s %d : strict diagonal pivot is not enough!\n", __FILE__, __LINE__); dim_kern = n0; D.set_KernelDetected(false); } else { n0 = dim_kern; D.set_KernelDetected(true); } } // if (n0 > 0) aa.free(); // need to move after copy to SquareBlockMatrix D(,) D.unsetBlocked(); // if (n0 > 0) { n1 = n - n0; // n - dim_kern; ==> regular part needs to be restored // nullify lower part corresponding to the suspicious pivots for (int j = 0; j < n; j++) { for (int i = n1; i < n; i++){ a[i + j * n] = zero; } } if (!arg->isSym) { for (int j = 0; j < n; j++) { for (int i = n1; i < n; i++){ a[j + i * n] = zero; } } } D.getSingIdx().resize(n0); for (int i = 0; i < n0; i++) { D.getSingIdx()[i] = permute[singIdx0[i]]; } // D.set_lastPivot(pivot); D.set_dim_kern(dim_kern); D.set_rank(n - dim_kern); } // if (n0 > 0) else { singIdx0.resize(0); D.getSingIdx().resize(0); D.set_KernelDetected(true); D.set_lastPivot(pivot); D.set_dim_kern(0); D.set_rank(n); } // if (n0 > 0) { int dim_kern1 = dim_kern; for (int k = (D.num_blocks() - 1); k >= 0 ; k--) { int nrow_local = D.nrowBlock(k); int dim_kern2 = nrow_local < dim_kern1 ? nrow_local : dim_kern1; D.set_dim_kern_block(k, dim_kern2); if (dim_kern1 > nrow_local) { dim_kern1 -= nrow_local; } else { dim_kern1 = 0; } } } #ifdef DEBUG_MATRIX_DFULLLDLT cout << arg->task_name << "n= " << n << endl; for (int i = 0; i < n; i++) { cout << " " << permute[i]; } cout << endl; for (int i = 0; i < ((n * (n + 1)) / 2); i++) { cout << " " << a_sym[i]; } cout << endl; #endif } // if (task_position % 2 == 1) *(arg->quit) = true; arg->a->free(); #ifdef DEBUG_MEMORY_ALLOC cerr << "C_gauss_whole_pivot : memory deallocate." << endl; // cout << "pivot " << pivot << endl; #endif *(arg->pivot) = pivot; // for passing information to other task } template void C_gauss_whole_pivot(void *arg_); template void C_gauss_whole_pivot, double>(void *arg_); template void C_gauss_whole_pivot(void *arg_); template void C_gauss_whole_pivot, quadruple>(void *arg_); template void C_gauss_whole_pivot(void *arg_); template void C_gauss_whole_pivot, float>(void *arg_); // template void C_dupdateb_Schur_offdiag_t(void *arg_) { C_dupdateb_Schur_arg *arg = (C_dupdateb_Schur_arg *)arg_; const int nrow = arg->nrow; const int ncol = arg->ncol; const int b_size = arg->b_size; SquareBlockMatrix &D = *(arg->D); const int i1 = D.IndexBlock(arg->i1_block); // arg->i1_block * SIZE_B1; const int ii = D.IndexBlock(arg->ii_block); // arg->ii_block * SIZE_B1; const int jj = D.IndexBlock(arg->jj_block); // arg->jj_block * SIZE_B1; FILE *fp = *(arg->fp); const bool verbose = arg->verbose; diss_printf(verbose, fp, "C_dupdate_Schur_offdiag_t : i1=%d ii=%d jj=%d nrow=%d ncol=%d b_size=%d\n", i1, ii, jj, nrow, ncol, b_size); diss_printf(verbose, fp, "no computation\n"); } template void C_dupdateb_Schur_offdiag_t(void *arg_); template void C_dupdateb_Schur_offdiag_t(void *arg_); template void C_dupdateb_Schur_offdiag_t >(void *arg_); template void C_dupdateb_Schur_offdiag_t >(void *arg_); template void C_dupdateb_Schur_offdiag_t(void *arg_); template void C_dupdateb_Schur_offdiag_t >(void *arg_); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_Dsub.cpp000664 000000 000000 00000437516 14037356732 024173 0ustar00rootroot000000 000000 /*! \file C_Dsub.cpp \brief routines for substiution of off-diagonal matrix with strip \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionLibrary.hpp" #include #include "Compiler/blas.hpp" #include "Driver/C_threads_tasks.hpp" #include "Driver/C_Dsub.hpp" #include "Compiler/DissectionIO.hpp" template void C_Dsub_task_exec(void *arg_) { list* >*arg = (list* >*)arg_; int k = 0; for (typename list* >::const_iterator it = arg->begin(); it != arg->end(); ++it, k++) { (*it)->func(*it); } } template void C_Dsub_task_exec(void *arg_); template void C_Dsub_task_exec(void *arg_); template void C_Dsub_task_exec >(void *arg_); template void C_Dsub_task_exec >(void *arg_); template void C_Dsub_task_exec(void *arg_); template void C_Dsub_task_exec >(void *arg_); // #define DEBUG_BLOCKSUBT template void dsub_sym2sym_diag(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int ir_bgn_src = arg->ir_bgn_src; T *dst_pt = arg->dst_mtrx->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_mtrx->nrowBlock(arg->ir_block, arg->jc_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const T none(-1.0); if ((iblock0 + 1) == iblock1) { const int ir_mid_src = src_mtrx->IndexBlock(iblock1); // iblock1 * SIZE_B1; const int nrow0 = ir_mid_src - ir_bgn_src; const int nrow1 = nrow - nrow0; const int ir_mid = ir_bgn + nrow0; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, iblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, iblock0); for (j1 = nrow_src_offset * (nrow_src + 1), j0 = ir_bgn * (dst_row + 1), //D(ir_bgn,ir_bgn)-(ir_mid,ir_mid) jj = ir_bgn; jj < ir_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow0, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1, j2, j3, j4; const int nrow_src1 = src_mtrx->nrowBlock(iblock1, iblock1); const int nrow_src0 = src_mtrx->nrowBlock(iblock0, iblock1); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, iblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, iblock1); for (j4 = 0, j3 = nrow_src_offset, j0 = ir_bgn + ir_mid * dst_row, // (ir_bgn,ir_mid)-(ir_mid,ir_end) j1 = ir_mid * (dst_row + 1), //D(ir_mid,ir_mid)-(ir_end,ir_end) j2 = ir_mid + ir_bgn * dst_row, //T(ir_bgn,ir_mid)-(ir_mid,ir_end) jj = ir_mid; jj < ir_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j2++, j3 += nrow_src0, j4 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j3, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j4, 1, dst_pt + j1, 1); // lower part of Schur complement matrix // blas_axpy(nrow0, none, src_pt0 + j3, 1, dst_pt + j2, dst_row); } } } else if (iblock0 == iblock1) { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, iblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, iblock0); for (j1 = nrow_src_offset * (nrow_src + 1), j0 = ir_bgn * (dst_row + 1), //D(ir_bgn,ir_bgn)-(ir_end,ir_end) jj = ir_bgn; jj < ir_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } // loop : j0, j1 } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in diagonal %d %d\n", __FILE__, __LINE__, iblock0, iblock1); } } template void dsub_sym2sym_diag(C_Dsub_task *arg); template void dsub_sym2sym_diag(C_Dsub_task *arg); template void dsub_sym2sym_diag >(C_Dsub_task > *arg); template void dsub_sym2sym_diag >(C_Dsub_task > *arg); template void dsub_sym2sym_diag(C_Dsub_task *arg); template void dsub_sym2sym_diag >(C_Dsub_task > *arg); // end of template function dsub_sym2sym_diag template void dsub_unsym2unsym_diag(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int ir_bgn_src = arg->ir_bgn_src; T *dst_pt = arg->dst_mtrx->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_mtrx->nrowBlock(arg->ir_block, arg->jc_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); // block copy : lower triangle to lower is enough const T none(-1.0); if ((iblock0 + 1) == iblock1) { const int ir_mid_src = src_mtrx->IndexBlock(iblock1); const int nrow0 = ir_mid_src - ir_bgn_src; const int nrow1 = nrow - nrow0; const int ir_mid = ir_bgn + nrow0; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, iblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, iblock0); for (j1 = nrow_src_offset * (nrow_src + 1), j0 = ir_bgn * (dst_row + 1), //D(ir_bgn,ir_bgn)-(ir_mid,ir_mid) jj = ir_bgn; jj < ir_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow0, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1, j2, j3, j4; const int nrow_src1 = src_mtrx->nrowBlock(iblock1, iblock1); const int nrow_src0 = src_mtrx->nrowBlock(iblock0, iblock1); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, iblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, iblock1); // lower part of Schur complement matrix T *src_pt2 = src_mtrx->addrCoefBlock(iblock1, iblock0); for (j4 = 0, j3 = nrow_src_offset, j0 = ir_bgn + ir_mid * dst_row, // (ir_bgn,ir_mid)-(ir_mid,ir_end) j1 = ir_mid * (dst_row + 1), //D(ir_mid,ir_mid)-(ir_end,ir_end) j2 = ir_mid + ir_bgn * dst_row, //T(ir_bgn,ir_mid)-(ir_mid,ir_end) jj = ir_mid; jj < ir_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j2++, j3 += nrow_src0, j4 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j3, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j4, 1, dst_pt + j1, 1); // lower part of Schur complement matrix blas_axpy(nrow0, none, src_pt2 + j3, 1, dst_pt + j2, dst_row); } } } else if (iblock0 == iblock1) { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, iblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, iblock0); for (j1 = nrow_src_offset * (nrow_src + 1), j0 = ir_bgn * (dst_row + 1), //D(ir_bgn,ir_bgn)-(ir_end,ir_end) jj = ir_bgn; jj < ir_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } // loop : j0, j1 } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in diagonal %d %d\n", __FILE__, __LINE__, iblock0, iblock1); } } template void dsub_unsym2unsym_diag(C_Dsub_task *arg); template void dsub_unsym2unsym_diag(C_Dsub_task *arg); template void dsub_unsym2unsym_diag >(C_Dsub_task >*arg); template void dsub_unsym2unsym_diag >(C_Dsub_task >*arg); template void dsub_unsym2unsym_diag(C_Dsub_task *arg); template void dsub_unsym2unsym_diag >(C_Dsub_task >*arg); // end of template function dsub_unsym2unsym_diag template void dsub_sym2sym(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int jc_bgn = arg->jc_bgn; const int jc_end = arg->jc_end; const int ir_bgn_src = arg->ir_bgn_src; const int jc_bgn_src = arg->jc_bgn_src; T *dst_pt = arg->dst_mtrx->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_mtrx->nrowBlock(arg->ir_block, arg->jc_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int ncol = jc_end - jc_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const int jblock0 = src_mtrx->BlockIndex(jc_bgn_src); const int jblock1 = src_mtrx->BlockIndex(jc_bgn_src + ncol - 1); const int ncol_src_offset = src_mtrx->BlockOffset(jc_bgn_src); const T none(-1.0); if ((iblock0 + 1) == iblock1) { // decomposition is defined by src block with SIZE_B1 const int ir_mid_src = src_mtrx->IndexBlock(iblock1); // * SIZE_B1; const int nrow0 = ir_mid_src - ir_bgn_src; const int nrow1 = nrow - nrow0; const int ir_mid = ir_bgn + nrow0; if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1); // * SIZE_B1; const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); // distnation : transpose of // distnation : transpose of for (j3 = ncol_src_offset * nrow_src1, j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_mid,jc_mid) j1 = ir_mid + jc_bgn * dst_row, // (ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock1); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock1); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock1); // distnation : transpose of // distnation : transpose of for (j3 = 0, j2 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_mid,jc_end) j1 = ir_mid + jc_mid * dst_row, // (ir_mid,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } } // if ((jblock0 + 1) == jblock1) { else if (jblock0 == jblock1) { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); // distnation : transpose of // distnation : transpose of for (j3 = ncol_src_offset * nrow_src1, j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_mid,jc_end) j1 = ir_mid + jc_bgn * dst_row, // (ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } else if (iblock0 == iblock1) { if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1);// * SIZE_B1; const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock1); // distnation : transpose of for (j1 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } } else { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); // distnation : transpose of for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } } // if ((jblock0 + 1) == jblock1) else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in row : %d %d\n", __FILE__, __LINE__, iblock0, iblock1); } } template void dsub_sym2sym(C_Dsub_task *arg); template void dsub_sym2sym(C_Dsub_task *arg); template void dsub_sym2sym >(C_Dsub_task > *arg); template void dsub_sym2sym >(C_Dsub_task > *arg); template void dsub_sym2sym(C_Dsub_task *arg); template void dsub_sym2sym >(C_Dsub_task > *arg); // end of template function dsub_sym2sym template void dsub_unsym2unsym(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int jc_bgn = arg->jc_bgn; const int jc_end = arg->jc_end; const int ir_bgn_src = arg->ir_bgn_src; const int jc_bgn_src = arg->jc_bgn_src; T *dst_pt = arg->dst_mtrx->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_mtrx->nrowBlock(arg->ir_block, arg->jc_block); T *dst_pt2 = arg->dst_mtrx->addrCoefBlock(arg->jc_block, arg->ir_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int ncol = jc_end - jc_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const int jblock0 = src_mtrx->BlockIndex(jc_bgn_src); const int jblock1 = src_mtrx->BlockIndex(jc_bgn_src + ncol - 1); const int ncol_src_offset = src_mtrx->BlockOffset(jc_bgn_src); const T none(-1.0); if ((iblock0 + 1) == iblock1) { // decomposition is defined by src block with SIZE_B1 const int ir_mid_src = src_mtrx->IndexBlock(iblock1);// * SIZE_B1; const int nrow0 = ir_mid_src - ir_bgn_src; const int nrow1 = nrow - nrow0; const int ir_mid = ir_bgn + nrow0; if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1);// * SIZE_B1; const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); for (j3 = ncol_src_offset * nrow_src1, j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_mid,jc_mid) j1 = ir_mid + jc_bgn * dst_row, //(ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } // lower part of Schur complement matrix { int jj, j0, j2; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); { for (j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_mid,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j2 += nrow_src0) { blas_axpy(nrow0, none, src_pt2 + j2, 1, dst_pt2 + j0, 1); } } } { int jj, j1, j3; const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt3 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (iblock1 == jblock0) { for (j3 = ncol_src_offset, j1 = ir_mid + jc_bgn * dst_row, //(ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j1 += dst_row, j3++) { blas_axpy(nrow1, none, src_pt3 + j3, nrow_src1, dst_pt2 + j1, 1); } } else { for (j3 = ncol_src_offset * nrow_src1, j1 = ir_mid + jc_bgn * dst_row, //(ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j1 += dst_row, j3 += nrow_src1) { blas_axpy(nrow1, none, src_pt3 + j3, 1, dst_pt2 + j1, 1); } } } { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock1); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock1); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock1); for (j3 = 0, j2 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, //(ir_bgn,jc_mid)-(ir_mid,jc_end) j1 = ir_mid + jc_mid * dst_row, //(ir_mid,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, //driving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } { int jj, j0, j2; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt2 = src_mtrx->addrCoefBlock(jblock1, iblock0); { for (j2 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, //(ir_bgn,jc_mid)-(ir_mid,jc_end) jj = jc_mid; jj < jc_end; jj++, //driving index j0 += dst_row, j2 += nrow_src0) { blas_axpy(nrow0, none, src_pt2 + j2, 1, dst_pt2 + j0, 1); } } } { int jj, j1, j3; const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock1); T *src_pt3 = src_mtrx->addrCoefBlock(jblock1, iblock1); { for (j3 = 0, j1 = ir_mid + jc_mid * dst_row, //(ir_mid,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, //driving index j1 += dst_row, j3 += nrow_src1) { blas_axpy(nrow1, none, src_pt3 + j3, 1, dst_pt2 + j1, 1); } } } } // if ((jblock0 + 1) == jblock1) { else if (jblock0 == jblock1) { { int jj, j0, j1, j2, j3; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); for (j3 = ncol_src_offset * nrow_src1, j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_mid,jc_end) j1 = ir_mid + jc_bgn * dst_row, // (ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j2 += nrow_src0, j3 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j2, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j3, 1, dst_pt + j1, 1); } } { int jj, j0, j2; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); { for (j2 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_mid,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j2 += nrow_src0) { blas_axpy(nrow0, none, src_pt2 + j2, 1, dst_pt2 + j0, 1); } } } { int jj, j1, j3; const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt3 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (iblock1 == jblock0) { for (j3 = ncol_src_offset, // transposed j1 = ir_mid + jc_bgn * dst_row, //(ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j1 += dst_row, j3++) { blas_axpy(nrow1, none, src_pt3 + j3, nrow_src1, dst_pt2 + j1, 1); } } else { for (j3 = ncol_src_offset * nrow_src1, j1 = ir_mid + jc_bgn * dst_row, //(ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j1 += dst_row, j3 += nrow_src1) { blas_axpy(nrow1, none, src_pt3 + j3, 1, dst_pt2 + j1, 1); } } } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } else if (iblock0 == iblock1) { if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1);// * SIZE_B1; const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (iblock0 == jblock0) { for (j1 = ncol_src_offset + nrow_src_offset * nrow_src, // transposed j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1++) { blas_axpy(nrow, none, src_pt2 + j1, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt2 + j1, 1, dst_pt2 + j0, 1); } } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock1); for (j1 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt2 = src_mtrx->addrCoefBlock(jblock1, iblock0); { for (j1 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, //(ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt2 + j1, 1, dst_pt2 + j0, 1); } } } } else { { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (iblock0 == jblock0) { for (j1 = ncol_src_offset + nrow_src_offset * nrow_src, // transposed j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1++) { blas_axpy(nrow, none, src_pt2 + j1, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt2 + j1, 1, dst_pt2 + j0, 1); } } } } } // if ((jblock0 + 1) == jblock1) else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in row : %d %d\n", __FILE__, __LINE__, iblock0, iblock1); } } template void dsub_unsym2unsym(C_Dsub_task *arg); template void dsub_unsym2unsym(C_Dsub_task *arg); template void dsub_unsym2unsym >(C_Dsub_task > *arg); template void dsub_unsym2unsym >(C_Dsub_task > *arg); template void dsub_unsym2unsym(C_Dsub_task *arg); template void dsub_unsym2unsym >(C_Dsub_task > *arg); // end of template function dsub_unsym2unsym template void dsub_unsym2diag(C_Dsub_task *arg) { // destination is inside of a diagonal block : ir_block == jc_block const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int jc_bgn = arg->jc_bgn; const int jc_end = arg->jc_end; const int ir_bgn_src = arg->ir_bgn_src; const int jc_bgn_src = arg->jc_bgn_src; T *dst_pt = arg->dst_mtrx->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_mtrx->nrowBlock(arg->ir_block, arg->jc_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int ncol = jc_end - jc_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const int jblock0 = src_mtrx->BlockIndex(jc_bgn_src); const int jblock1 = src_mtrx->BlockIndex(jc_bgn_src + ncol - 1); const int ncol_src_offset = src_mtrx->BlockOffset(jc_bgn_src); const T none(-1.0); if ((iblock0 + 1) == iblock1) { // decomposition is defined by src block with SIZE_B1 const int ir_mid_src = src_mtrx->IndexBlock(iblock1); const int nrow0 = ir_mid_src - ir_bgn_src; const int nrow1 = nrow - nrow0; const int ir_mid = ir_bgn + nrow0; if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1); const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1, j2, j3, j4, j5; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); // lower part of Schur complement matrix for (j5 = ncol_src_offset * nrow_src1, j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_mid,jc_mid) j1 = ir_mid + jc_bgn * dst_row, // (ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += dst_row, j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j4, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j5, 1, dst_pt + j1, 1); } // lower part of Schur complement matrix T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); T *src_pt3 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (iblock1 == jblock0) { for (j5 = ncol_src_offset, // transposted j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j2 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_mid,jc_mid) j3 = jc_bgn + ir_mid * dst_row,//(ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j2++, j3++, j4 += nrow_src0, j5++) { blas_axpy(nrow0, none, src_pt2 + j4, 1, // transposed dst_pt + j2, dst_row); blas_axpy(nrow1, none, src_pt3 + j5, nrow_src1, // both in the dst_pt + j3, dst_row); // diagonal } } else { for (j5 = ncol_src_offset * nrow_src1, j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j2 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_mid,jc_mid) j3 = jc_bgn + ir_mid * dst_row,//(ir_mid,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j2++, j3++, j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt2 + j4, 1, // transposed dst_pt + j2, dst_row); blas_axpy(nrow1, none, src_pt3 + j5, 1, // transposed dst_pt + j3, dst_row); } } } { int jj, j0, j1, j2, j3, j4, j5; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock1); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock1); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock1); for (j5 = 0, j4 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_mid,jc_end) j1 = ir_mid + jc_mid * dst_row, // (ir_mid,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += dst_row , j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j4, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j5, 1, dst_pt + j1, 1); } // lower part of Schur complement matrix T *src_pt2 = src_mtrx->addrCoefBlock(jblock1, iblock0); T *src_pt3 = src_mtrx->addrCoefBlock(jblock1, iblock1); { for (j5 = 0, j4 = nrow_src_offset, j2 = jc_mid + ir_bgn * dst_row,//(ir_bgn,jc_mid)-(ir_mid,jc_end) j3 = jc_mid + ir_mid * dst_row,//(ir_mid,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j2++, j3++, j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt2 + j4, 1, dst_pt + j2, dst_row); blas_axpy(nrow1, none, src_pt3 + j5, 1, dst_pt + j3, dst_row); } } } } // if ((jblock0 + 1) == jblock1) { else if (jblock0 == jblock1) { int jj, j0, j1, j2, j3, j4, j5; const int nrow_src0 = src_mtrx->nrowBlock(iblock0, jblock0); const int nrow_src1 = src_mtrx->nrowBlock(iblock1, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(iblock1, jblock0); for (j5 = ncol_src_offset * nrow_src1, j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_mid,jc_end) j1 = ir_mid + jc_bgn * dst_row, // (ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += dst_row, j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt0 + j4, 1, dst_pt + j0, 1); blas_axpy(nrow1, none, src_pt1 + j5, 1, dst_pt + j1, 1); } // lower part of Schur complement matrix T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); T *src_pt3 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (iblock1 == jblock0) { for (j5 = ncol_src_offset, j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j2 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_mid,jc_end) j3 = jc_bgn + ir_mid * dst_row,//(ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j2++, j3++, j4 += nrow_src0, j5++) { blas_axpy(nrow0, none, src_pt2 + j4, 1, // transposed dst_pt + j2, dst_row); blas_axpy(nrow1, none, src_pt3 + j5, nrow_src1, // both in the dst_pt + j3, dst_row); // diagonal } } else { for (j5 = ncol_src_offset * nrow_src1, j4 = nrow_src_offset + ncol_src_offset * nrow_src0, j2 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_mid,jc_end) j3 = jc_bgn + ir_mid * dst_row,//(ir_mid,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j2++, j3++, j4 += nrow_src0, j5 += nrow_src1) { blas_axpy(nrow0, none, src_pt2 + j4, 1, dst_pt + j2, dst_row); blas_axpy(nrow1, none, src_pt3 + j5, 1, dst_pt + j3, dst_row); } } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } else if (iblock0 == iblock1) { if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1); const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j2 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j2 += nrow_src) { blas_axpy(nrow, none, src_pt + j2, 1, dst_pt + j0, 1); } // lower part of Schur complement matrix T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (iblock0 == jblock0) { for (j2 = ncol_src_offset + nrow_src_offset * nrow_src, j1 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j1++, j2++) { blas_axpy(nrow, none, src_pt2 + j2, nrow_src, dst_pt + j1, dst_row); } } else { for (j2 = nrow_src_offset + ncol_src_offset * nrow_src, j1 = jc_bgn + ir_bgn * dst_row,//(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j1++, j2 += nrow_src) { blas_axpy(nrow, none, src_pt2 + j2, 1, dst_pt + j1, dst_row); } } } { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock1); T *src_pt2 = src_mtrx->addrCoefBlock(jblock1, iblock0); for (j2 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_end,jc_end) j1 = jc_mid + ir_bgn * dst_row,//T(ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1++, j2 += nrow_src) { blas_axpy(nrow, none, src_pt + j2, 1, dst_pt + j0, 1); // lower part of Schur complement matrix blas_axpy(nrow, none, src_pt2 + j2, 1, dst_pt + j1, dst_row); } } } else { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j2 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j2 += nrow_src) { blas_axpy(nrow, none, src_pt + j2, 1, dst_pt + j0, 1); } T *src_pt2 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (iblock0 == jblock0) { for (j2 = ncol_src_offset + nrow_src_offset * nrow_src, //trans j1 = jc_bgn + ir_bgn * dst_row, //T(ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j1++, j2++) { blas_axpy(nrow, none, src_pt2 + j2, nrow_src, dst_pt + j1, dst_row); } } else { for (j2 = nrow_src_offset + ncol_src_offset * nrow_src, j1 = jc_bgn + ir_bgn * dst_row, //T(ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j1++, j2 += nrow_src) { blas_axpy(nrow, none, src_pt2 + j2, 1, dst_pt + j1, dst_row); } } } } // if ((jblock0 + 1) == jblock1) else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in row : %d %d\n", __FILE__, __LINE__, iblock0, iblock1); } } template void dsub_unsym2diag(C_Dsub_task *arg); template void dsub_unsym2diag(C_Dsub_task *arg); template void dsub_unsym2diag >(C_Dsub_task > *arg); template void dsub_unsym2diag >(C_Dsub_task > *arg); template void dsub_unsym2diag(C_Dsub_task *arg); template void dsub_unsym2diag >(C_Dsub_task > *arg); // end of template function dsub_unsym2diag template void dsub_sym2rct(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int jc_bgn = arg->jc_bgn; const int jc_end = arg->jc_end; const int ir_bgn_src = arg->ir_bgn_src; const int jc_bgn_src = arg->jc_bgn_src; // const int dst_row = arg->dst_row; T *dst_pt = arg->dst_pt->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_pt->nrowBlock(arg->ir_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int ncol = jc_end - jc_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const int jblock0 = src_mtrx->BlockIndex(jc_bgn_src); const int jblock1 = src_mtrx->BlockIndex(jc_bgn_src + ncol - 1); const int ncol_src_offset = src_mtrx->BlockOffset(jc_bgn_src); const T none(-1.0); if (iblock0 < iblock1) { // decomposition is defined by src block with SIZE_B1 if ((jblock0 + 1) == jblock1) { const int jc_mid = jc_bgn + (src_mtrx->IndexBlock(jblock1) - jc_bgn_src); int ir_start = ir_bgn; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); // SIZE_B1? const int nnrow = nrow_src - nrow_src_offset; T *src_pt; src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } src_pt = src_mtrx->addrCoefBlock(iblock0, jblock1); for (j1 = nrow_src_offset, j0 = ir_start + jc_mid * dst_row, // ( ,jc_mid)-( +nnrow,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } ir_start += nnrow; } for (int iblock = iblock0 + 1; iblock < iblock1; iblock++) { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock, jblock0); // SIZE_B1? T *src_pt; src_pt = src_mtrx->addrCoefBlock(iblock, jblock0); for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +SIZE_B1,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt + j1, 1, dst_pt + j0, 1); } src_pt = src_mtrx->addrCoefBlock(iblock, jblock1); for (j1 = 0, j0 = ir_start + jc_mid * dst_row, // ( ,jc_mid)-( +SIZE_B1,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt + j1, 1, dst_pt + j0, 1); } ir_start += nrow_src; } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock1, jblock0); const int nnrow = ir_bgn_src + nrow - src_mtrx->IndexBlock(iblock1); T *src_pt; src_pt = src_mtrx->addrCoefBlock(iblock1, jblock0); for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } src_pt = src_mtrx->addrCoefBlock(iblock1, jblock1); for (j1 = 0, j0 = ir_start + jc_mid * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } } else if (jblock0 == jblock1) { int ir_start = ir_bgn; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); const int nnrow = nrow_src - nrow_src_offset; T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } ir_start += nnrow; } for (int iblock = iblock0 + 1; iblock < iblock1; iblock++) { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock, jblock0); for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +SIZE_B1,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt + j1, 1, dst_pt + j0, 1); } ir_start += nrow_src; } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock1, jblock0); // const int itmp = (ir_bgn_src + nrow) % SIZE_B1; // const int nnrow = (itmp == 0) ? SIZE_B1 : itmp; const int nnrow = ir_bgn_src + nrow - src_mtrx->IndexBlock(iblock1); T *src_pt = src_mtrx->addrCoefBlock(iblock1, jblock0); for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } // if (iblock0 < iblock1) else { if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1); const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock1); for (j1 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, // (ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } } else if (jblock0 == jblock1) { int jj, j0, j1; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt = src_mtrx->addrCoefBlock(iblock0, jblock0); for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt + j1, 1, dst_pt + j0, 1); } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } // if (iblock0 < iblock1) } template void dsub_sym2rct(C_Dsub_task *arg); template void dsub_sym2rct(C_Dsub_task *arg); template void dsub_sym2rct >(C_Dsub_task > *arg); template void dsub_sym2rct >(C_Dsub_task > *arg); template void dsub_sym2rct(C_Dsub_task *arg); template void dsub_sym2rct >(C_Dsub_task > *arg); // end of template function dsub_sym2rct template void dsub_unsym2rct(C_Dsub_task *arg) { const int ir_bgn = arg->ir_bgn; const int ir_end = arg->ir_end; const int jc_bgn = arg->jc_bgn; const int jc_end = arg->jc_end; const int ir_bgn_src = arg->ir_bgn_src; const int jc_bgn_src = arg->jc_bgn_src; // const int dst_row = arg->dst_row; T *dst_pt = arg->dst_pt->addrCoefBlock(arg->ir_block, arg->jc_block); T *dst_pt2 = arg->dst_pt2->addrCoefBlock(arg->ir_block, arg->jc_block); const int dst_row = arg->dst_pt->nrowBlock(arg->ir_block); SquareBlockMatrix *src_mtrx = arg->src_pt; const int nrow = ir_end - ir_bgn; const int ncol = jc_end - jc_bgn; const int iblock0 = src_mtrx->BlockIndex(ir_bgn_src); const int iblock1 = src_mtrx->BlockIndex(ir_bgn_src + nrow - 1); const int nrow_src_offset = src_mtrx->BlockOffset(ir_bgn_src); const int jblock0 = src_mtrx->BlockIndex(jc_bgn_src); const int jblock1 = src_mtrx->BlockIndex(jc_bgn_src + ncol - 1); const int ncol_src_offset = src_mtrx->BlockOffset(jc_bgn_src); const T none(-1.0); if (iblock0 < iblock1) { // decomposition is defined by src block with SIZE_B1 if ((jblock0 + 1) == jblock1) { const int jc_mid = jc_bgn + (src_mtrx->IndexBlock(jblock1) - jc_bgn_src); int ir_start = ir_bgn; { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); // SIZE_B1? const int nnrow = nrow_src - nrow_src_offset; T *src_pt0, *src_pt1; src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (jblock0 == iblock0) { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j2 = ncol_src_offset + nrow_src_offset * nrow_src, // transposed j0 = ir_start + jc_bgn * dst_row,//( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row,//( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock1); src_pt1 = src_mtrx->addrCoefBlock(jblock1, iblock0); if (jblock1 == iblock0) { for (j1 = nrow_src_offset, j2 = nrow_src_offset * nrow_src, // transposed j0 = ir_start + jc_mid * dst_row,//( ,jc_mid)-( +nnrow,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset, j0 = ir_start + jc_mid * dst_row,//( ,jc_mid)-( +nnrow,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } ir_start += nnrow; } for (int iblock = iblock0 + 1; iblock < iblock1; iblock++) { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock, jblock0); // SIZE_B1? T *src_pt0, *src_pt1; src_pt0 = src_mtrx->addrCoefBlock(iblock, jblock0); src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock); if (jblock0 == iblock) { for (j1 = ncol_src_offset * nrow_src, j2 = ncol_src_offset, // transposed j0 = ir_start + jc_bgn * dst_row,//( ,jc_bgn)-( +SIZE_B1,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row,//( ,jc_bgn)-( +SIZE_B1,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } src_pt0 = src_mtrx->addrCoefBlock(iblock, jblock1); src_pt1 = src_mtrx->addrCoefBlock(jblock1, iblock); if (jblock1 == iblock) { for (j1 = 0, j2 = 0, j0 = ir_start + jc_mid * dst_row,//( ,jc_mid)-( +SIZE_B1,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = 0, j0 = ir_start + jc_mid * dst_row,//( ,jc_mid)-( +SIZE_B1,jc_end) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } ir_start += nrow_src; } { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock1, jblock0); // const int itmp = (ir_bgn_src + nrow) % SIZE_B1; // const int nnrow = (itmp == 0) ? SIZE_B1 : itmp; const int nnrow = ir_bgn_src + nrow - src_mtrx->IndexBlock(iblock1); T *src_pt0, *src_pt1; src_pt0 = src_mtrx->addrCoefBlock(iblock1, jblock0); src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (jblock0 == iblock1) { for (j1 = ncol_src_offset * nrow_src, j2 = ncol_src_offset, // transposed j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } src_pt0 = src_mtrx->addrCoefBlock(iblock1, jblock1); src_pt1 = src_mtrx->addrCoefBlock(jblock1, iblock1); if (jblock1 == iblock1) { for (j2 = 0, j1 = 0, // j0 = ir_start + jc_mid * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = 0, j0 = ir_start + jc_mid * dst_row, // ( ,jc_bgn)-( +nnrow,jc_mid) jj = jc_mid; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } } } else if (jblock0 == jblock1) { int ir_start = ir_bgn; { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); const int nnrow = nrow_src - nrow_src_offset; T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (jblock0 == iblock0) { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j2 = ncol_src_offset + nrow_src_offset * nrow_src, // transpopsed j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } ir_start += nnrow; } for (int iblock = iblock0 + 1; iblock < iblock1; iblock++) { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock); if (jblock0 == iblock) { for (j1 = ncol_src_offset * nrow_src, j2 = ncol_src_offset, // transposed j0 = ir_start + jc_bgn * dst_row,//( ,jc_bgn)-( +SIZE_B1,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +SIZE_B1,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow_src, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow_src, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } ir_start += nrow_src; } { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock1, jblock0); // const int itmp = (ir_bgn_src + nrow) % SIZE_B1; // const int nnrow = (itmp == 0) ? SIZE_B1 : itmp; const int nnrow = ir_bgn_src + nrow - src_mtrx->IndexBlock(iblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock1, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock1); if (jblock0 == iblock1) { for (j1 = ncol_src_offset * nrow_src, j2 = ncol_src_offset, // transposed j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = ncol_src_offset * nrow_src, j0 = ir_start + jc_bgn * dst_row, // ( ,jc_bgn)-( +nnrow,jc_end) jj = jc_bgn; jj < jc_end; jj++, // drinving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nnrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nnrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } else { if ((jblock0 + 1) == jblock1) { const int jc_mid_src = src_mtrx->IndexBlock(jblock1); const int ncol0 = jc_mid_src - jc_bgn_src; const int jc_mid = jc_bgn + ncol0; { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (jblock0 == iblock0) { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j2 = ncol_src_offset + nrow_src_offset * nrow_src, //transposed j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, //(ir_bgn,jc_bgn)-(ir_end,jc_mid) jj = jc_bgn; jj < jc_mid; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } } { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock1); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock1); T *src_pt1 = src_mtrx->addrCoefBlock(jblock1, iblock0); if (jblock1 == iblock0) { for (j1 = nrow_src_offset, j2 = nrow_src_offset * nrow_src, // j0 = ir_bgn + jc_mid * dst_row, //(ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { // distnation : for (j1 = nrow_src_offset, j0 = ir_bgn + jc_mid * dst_row, //(ir_bgn,jc_mid)-(ir_end,jc_end) jj = jc_mid; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } } } else if (jblock0 == jblock1) { int jj, j0, j1, j2; const int nrow_src = src_mtrx->nrowBlock(iblock0, jblock0); T *src_pt0 = src_mtrx->addrCoefBlock(iblock0, jblock0); T *src_pt1 = src_mtrx->addrCoefBlock(jblock0, iblock0); if (jblock0 == iblock0) { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j2 = ncol_src_offset + nrow_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src, j2++) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j2, nrow_src, dst_pt2 + j0, 1); } } else { for (j1 = nrow_src_offset + ncol_src_offset * nrow_src, j0 = ir_bgn + jc_bgn * dst_row, // (ir_bgn,jc_bgn)-(ir_end,jc_end) jj = jc_bgn; jj < jc_end; jj++, // driving index j0 += dst_row, j1 += nrow_src) { blas_axpy(nrow, none, src_pt0 + j1, 1, dst_pt + j0, 1); blas_axpy(nrow, none, src_pt1 + j1, 1, dst_pt2 + j0, 1); } } } else { diss_printf(arg->verbose, arg->fp, "%s %d : bad block indices in column : %d %d\n", __FILE__, __LINE__, jblock0, jblock1); } } // if (iblock0 < iblock1) } template void dsub_unsym2rct(C_Dsub_task *arg); template void dsub_unsym2rct(C_Dsub_task *arg); template void dsub_unsym2rct >(C_Dsub_task > *arg); template void dsub_unsym2rct >(C_Dsub_task > *arg); // end of template function dsub_unsym2rct template void dsub_unsym2rct(C_Dsub_task *arg); template void dsub_unsym2rct >(C_Dsub_task > *arg); template void dsub_sym2sym_diag_two(C_Dsub_task *arg) { if (arg->isSkip) { return; } C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, (-1), // jc_bgn (-1), // jc_end arg->ir_bgn_src, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2sym_diag, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_sym2sym_diag(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, (-1), // jc_bgn (-1), // jc_end arg->ir_bgn_src2, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2sym_diag, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_sym2sym_diag(tmp); delete tmp; } template void dsub_sym2sym_diag_two(C_Dsub_task *arg); template void dsub_sym2sym_diag_two(C_Dsub_task *arg); template void dsub_sym2sym_diag_two >(C_Dsub_task > *arg); template void dsub_sym2sym_diag_two >(C_Dsub_task > *arg); template void dsub_sym2sym_diag_two(C_Dsub_task *arg); template void dsub_sym2sym_diag_two >(C_Dsub_task > *arg); // end of template function dsub_sym2sym_diag_two template void dsub_unsym2unsym_diag_two(C_Dsub_task *arg) { if (arg->isSkip) { return; } C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, (-1), // jc_bgn (-1), // jc_end arg->ir_bgn_src, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym_diag, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2unsym_diag(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, (-1), // jc_bgn (-1), // jc_end arg->ir_bgn_src2, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym_diag, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2unsym_diag(tmp); delete tmp; } template void dsub_unsym2unsym_diag_two(C_Dsub_task *arg); template void dsub_unsym2unsym_diag_two(C_Dsub_task *arg); template void dsub_unsym2unsym_diag_two >(C_Dsub_task > *arg); template void dsub_unsym2unsym_diag_two >(C_Dsub_task > *arg); template void dsub_unsym2unsym_diag_two(C_Dsub_task *arg); template void dsub_unsym2unsym_diag_two >(C_Dsub_task > *arg); // end of template function dsub_unsym2unsym_diag_two template void dsub_sym2sym_two(C_Dsub_task *arg) { if (arg->isSkip) { return; } C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src, (-1), // ir_bgn_src2 arg->jc_bgn_src, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2sym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_sym2sym(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src2, (-1), // ir_bgn_src2 arg->jc_bgn_src2, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2sym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_sym2sym(tmp); delete tmp; } template void dsub_sym2sym_two(C_Dsub_task *arg); template void dsub_sym2sym_two(C_Dsub_task *arg); template void dsub_sym2sym_two >(C_Dsub_task > *arg); template void dsub_sym2sym_two >(C_Dsub_task > *arg); template void dsub_sym2sym_two(C_Dsub_task *arg); template void dsub_sym2sym_two >(C_Dsub_task > *arg); // end of template function dsub_sym2sym_two template void dsub_unsym2unsym_two(C_Dsub_task *arg) { if (arg->isSkip) { return; } C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src, (-1), // ir_bgn_src2 arg->jc_bgn_src, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2unsym(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src2, (-1), // ir_bgn_src2 arg->jc_bgn_src2, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2unsym(tmp); delete tmp; } template void dsub_unsym2unsym_two(C_Dsub_task *arg); template void dsub_unsym2unsym_two(C_Dsub_task *arg); template void dsub_unsym2unsym_two >(C_Dsub_task > *arg); template void dsub_unsym2unsym_two >(C_Dsub_task > *arg); template void dsub_unsym2unsym_two(C_Dsub_task *arg); template void dsub_unsym2unsym_two >(C_Dsub_task > *arg); // end of template function dsub_unsym2unsym_two template void dsub_unsym2diag_two(C_Dsub_task *arg) { if (arg->isSkip) { return; } C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src, (-1), // ir_bgn_src2 arg->jc_bgn_src, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2diag(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src2, (-1), // ir_bgn_src2 arg->jc_bgn_src2, (-1), // jc_bgn_src2 arg->dst_row, arg->dst_mtrx, (RectBlockMatrix *)NULL, // dst_pt2 arg->ir_block, arg->jc_block, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2unsym, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2diag(tmp); delete tmp; } template void dsub_unsym2diag_two(C_Dsub_task *arg); template void dsub_unsym2diag_two(C_Dsub_task *arg); template void dsub_unsym2diag_two >(C_Dsub_task > *arg); template void dsub_unsym2diag_two >(C_Dsub_task > *arg); template void dsub_unsym2diag_two(C_Dsub_task *arg); template void dsub_unsym2diag_two >(C_Dsub_task > *arg); // end of template function dsub_unsym2diag_two template void dsub_sym2rct_two(C_Dsub_task *arg) { C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src, (-1), // ir_bgn_src2 arg->jc_bgn_src, (-1), // jc_bgn_src2 arg->dst_row, (SquareBlockMatrix*)NULL, // src_pt2 arg->dst_pt, arg->ir_block, // 0, arg->jc_block, // 0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2rct, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); // tmp->verbose = false; dsub_sym2rct(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src2, (-1), // ir_bgn_src2 arg->jc_bgn_src2, (-1), // jc_bgn_src2 arg->dst_row, (SquareBlockMatrix*)NULL, // src_pt2 arg->dst_pt, arg->ir_block, //0, arg->jc_block, //0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_sym2rct, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); // tmp->verbose = false; dsub_sym2rct(tmp); delete tmp; } template void dsub_sym2rct_two(C_Dsub_task *arg); template void dsub_sym2rct_two(C_Dsub_task *arg); template void dsub_sym2rct_two >(C_Dsub_task > *arg); template void dsub_sym2rct_two >(C_Dsub_task > *arg); template void dsub_sym2rct_two(C_Dsub_task *arg); template void dsub_sym2rct_two >(C_Dsub_task > *arg); // end of template function dsub_sym2rct_two template void dsub_unsym2rct_two(C_Dsub_task *arg) { C_Dsub_task *tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src, (-1), // ir_bgn_src2 arg->jc_bgn_src, (-1), // jc_bgn_src2 arg->dst_row, (SquareBlockMatrix*)NULL, // src_pt2 arg->dst_pt, arg->ir_block,//0, arg->jc_block, //0, arg->dst_pt2, 0, 0, arg->src_pt, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2rct, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2rct(tmp); delete tmp; tmp = new C_Dsub_task(arg->atomic_size, arg->atomic_id, arg->ir_bgn, arg->ir_end, arg->jc_bgn, arg->jc_end, arg->ir_bgn_src2, (-1), // ir_bgn_src2 arg->jc_bgn_src2, (-1), // jc_bgn_src2 arg->dst_row, (SquareBlockMatrix*)NULL, // src_pt2 arg->dst_pt, arg->ir_block, //0, arg->jc_block,//0, arg->dst_pt2, 0, 0, arg->src_pt2, (SquareBlockMatrix*)NULL, // src_pt2 dsub_unsym2rct, false, // dummy *(arg->ops_complexity), arg->father_id, arg->level, arg->verbose, arg->fp); dsub_unsym2rct(tmp); delete tmp; } template void dsub_unsym2rct_two(C_Dsub_task *arg); template void dsub_unsym2rct_two(C_Dsub_task *arg); template void dsub_unsym2rct_two >(C_Dsub_task > *arg); template void dsub_unsym2rct_two >(C_Dsub_task > *arg); template void dsub_unsym2rct_two(C_Dsub_task *arg); template void dsub_unsym2rct_two >(C_Dsub_task > *arg); // end of template function dsub_unsym2rct_two //#define DEBUG_QUEUE_GENERATION2 //#define DEBUG_QUEUE_GENERATION //#define DEBUG_PREPARE_THREAD //#define DEBUG_PREPARE_THREAD_DEBUG template void C_Dsub_queue(bool isSym, int father_id, bool skip_flag, vector& queue, list > &child_contrib, vector* tasks_p, // _tasks_DSymmGEMM vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, // _tasks_DfillSymm vector* tasks_r, // _tasks_SparseLocalSchur vector& tasks_s, // _tasks_DSub[level + 1][(*it)] vector* tasks_d, // _tasks_deallocateLocalSchur vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp) { // list & child_contrib = child_contribs[*it]; const int diag_size = child_contrib.front().diag_size; const int father_row = child_contrib.front().father_row; const int size_res = diag_size % SIZE_B1; // const int size_res2 = offdiag_size % SIZE_B1; int num_block; num_block = diag_size / SIZE_B1 + (size_res != 0); // const int num_block2 = (offdiag_size / SIZE_B1 + (size_res2 != 0)); const int num_block2 = child_contrib.front().father_offdiag_pt->num_blocks_c(); const int block_diag_size = (num_block * (num_block + 1)) / 2; const int block_offdiag_size = num_block * num_block2; // vector* tasks_p = ((tasks_p_ == NULL) ? NULL : // (tasks_p_->size() > 0 ? tasks_p_ : NULL)); if (diag_size == 0) { queue.resize(1); int nb = father_id + 1; string task_name = ("i dummy : " + to_string(level) + " : " +to_string(nb)); C_dummy_arg *arg = new C_dummy_arg(verbose, &fp, nb); // *(arg->ops_complexity) = (-1L); queue[0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); queue[0]->parents->clear(); diss_printf(verbose, fp, "%s %d : %s\n", __FILE__, __LINE__, task_name.c_str()); return; } #ifdef DEBUG_PREPARE_THREAD cout << "father = " << (father_id + 1) // selfIndex^-1 << " # of child = " << child_contrib.size() << " [ "; #endif for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { #ifdef DEBUG_PREPARE_THREAD cout << (*jt).child_id << " "; #endif } #ifdef DEBUG_PREPARE_THREAD cout << "]" << endl; #endif #ifdef DEBUG_STRIPES for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { cout << "child = " << (*jt).child_id << endl; cout << "diag = "; for (list ::const_iterator kt = (*jt).diag_strip.begin(); kt != (*jt).diag_strip.end(); ++kt) { cout << "[ " << (*kt).begin_dst << " , " << (*kt).begin_src << " , " << (*kt).width << " ] "; } cout << endl; cout << "offdiag = "; for (list ::const_iterator kt = (*jt).offdiag_strip.begin(); kt != (*jt).offdiag_strip.end(); ++kt) { cout << "[ " << (*kt).begin_dst << " , " << (*kt).begin_src << " , " << (*kt).width << " ] "; } cout << endl; } // loop: jt #endif // if (child_contrib.size() == 2 && direct_flag) { // vector&queue = tasks_C_DSUB[*it]; // queue.resize(block_diag_size + num_block2); // vector*>&C_task_arg = lists_C_DSUB[*it]; // C_task_arg.resize(block_diag_size + num_block2); #ifdef DEBUG_PREPARE_THREAD_DEBUG cout << "2 children resize = " << block_diag_size + num_block2 << " "; #endif child_contribution child0 = child_contrib.front(); child_contribution child1 = child_contrib.back(); if ((child0.child_pt->dimension() == 0) || (child1.child_pt->dimension() == 0)) { diss_printf(verbose, fp , "%s %d : %d %d %d : %d %d %d\n", __FILE__, __LINE__, child0.child_id, (int)child0.diag_strip.size(), (int)child0.offdiag_strip.size(), child1.child_id, (int)child1.diag_strip.size(), (int)child1.offdiag_strip.size()); } list *strips_r, *strips_c; strips_r = new list [2]; strips_c = new list [2]; list strips_r01, strips_c01; int *child_id = new int[2]; child_id[0] = child0.child_id; child_id[1] = child1.child_id; // double ***child_pt = new double**[2]; SquareBlockMatrix** child_pt = new SquareBlockMatrix*[2]; child_pt[0] = child0.child_pt; child_pt[1] = child1.child_pt; SquareBlockMatrix *father_diag_pt = child0.father_diag_pt; RectBlockMatrix *father_offdiag_pt = child0.father_offdiag_pt; RectBlockMatrix *father_offdia2_pt = child0.father_offdiag_unsym_pt; diss_printf(verbose, fp, "%s %d : father id = %d\n", __FILE__, __LINE__, (father_id + 1)); if (child0.diag_strip.size() > 1 || child1.diag_strip.size() > 1) { #ifdef DEBUG_PREPARE_THREAD diss_printf(verbose, fp, "%s %d : row block is not continous\n", __FILE__, __LINE__); #endif combine_two_strips(strips_r[0], strips_r[1], strips_r01, child0.diag_strip, child1.diag_strip, child0.diag_size); } else { if ((child0.diag_strip.size() == 0) && (child1.diag_strip.size() == 0)) { diss_printf(verbose, fp, "%s %d : both rows are null : father = %d\n", __FILE__, __LINE__, (father_id + 1)); strips_r[0].clear(); strips_r[1].clear(); strips_r01.clear(); } if ((child0.diag_strip.size() == 0) || (child1.diag_strip.size() == 0)) { diss_printf(verbose, fp, "%s %d : one of rows is null / ", __FILE__, __LINE__); if (child0.diag_strip.size() == 0) { diss_printf(verbose, fp, " r0 null / "); strips_r[0].clear(); strips_r[1] = child1.diag_strip; } else { diss_printf(verbose, fp, "%d : %d / ", child0.diag_strip.front().begin_dst, child0.diag_strip.front().width); } if (child1.diag_strip.size() == 0) { diss_printf(verbose, fp, " r1 null\n"); strips_r[0] = child0.diag_strip; strips_r[1].clear(); } else { diss_printf(verbose, fp, "%d : %d\n", child1.diag_strip.front().begin_dst, child1.diag_strip.front().width); } strips_r01.clear(); } // (child0.diag_strip.size() == 0) || (child1.diag_strip.size() == 0) else { if ((child0.diag_strip.front().begin_dst != child1.diag_strip.front().begin_dst) || (child0.diag_strip.front().width != child1.diag_strip.front().width)) { diss_printf(verbose, fp, "%s %d : row blocks are not same / ", __FILE__, __LINE__); diss_printf(verbose, fp, "%d : %d : %d / %d : %d : %d\n", (int)child0.diag_strip.size(), child0.diag_strip.front().begin_dst, child0.diag_strip.front().width, (int)child1.diag_strip.size(), child1.diag_strip.front().begin_dst, child1.diag_strip.front().width); split_two_strips(strips_r[0], strips_r[1], strips_r01, child0.diag_strip.front(), child1.diag_strip.front()); diss_printf(verbose, fp, "%s %d : split of strips r0 ", __FILE__, __LINE__); for (list::const_iterator kt = strips_r[0].begin(); kt != strips_r[0].end(); ++kt) { diss_printf(verbose, fp, "%d : %d : %d ", (*kt).begin_dst, (*kt).begin_src, (*kt).width); } diss_printf(verbose, fp, " r1 "); for (list::const_iterator kt = strips_r[1].begin(); kt != strips_r[1].end(); ++kt) { diss_printf(verbose, fp, "%d : %d : %d ", (*kt).begin_dst, (*kt).begin_src, (*kt).width); } diss_printf(verbose, fp, "r01 "); for (list::const_iterator kt = strips_r01.begin(); kt != strips_r01.end(); ++kt) { diss_printf(verbose, fp, "%d : %d : %d : %d ", (*kt).begin_dst, (*kt).begin_src0, (*kt).begin_src1, (*kt).width); } diss_printf(verbose, fp, "\n"); } else { strips_r[0].clear(); strips_r[1].clear(); copy_two_strips(strips_r01, child0.diag_strip, child1.diag_strip); diss_printf(verbose, fp, "%s %d : r01 ", __FILE__, __LINE__); for (list::const_iterator kt = strips_r01.begin(); kt != strips_r01.end(); ++kt) { diss_printf(verbose, fp, "%d : %d : %d : %d ", (*kt).begin_dst, (*kt).begin_src0, (*kt).begin_src1, (*kt).width); } diss_printf(verbose, fp, "\n"); } }// (child0.diag_strip.size() == 0) || (child1.diag_strip.size() == 0) } // (child0.diag_strip.size() > 1 || child1.diag_strip.size() > 1) if ((child0.offdiag_strip.size() > 0) && (child1.offdiag_strip.size() > 0)) { combine_two_strips(strips_c[0], strips_c[1], strips_c01, child0.offdiag_strip, child1.offdiag_strip, child0.offdiag_size); } else if (child1.offdiag_strip.size() == 0) { strips_c[0]= child0.offdiag_strip; strips_c[1].clear(); strips_c01.clear(); } else { // (child0.offdiag_strip.size() == 0) strips_c[1]= child1.offdiag_strip; strips_c[0].clear(); strips_c01.clear(); } if ((child0.child_pt->dimension() == 0) || (child1.child_pt->dimension() == 0)) { diss_printf(verbose, fp, "%s %d : %d %d %d\n", __FILE__, __LINE__, (int)strips_c[0].size(), (int)strips_c[1].size(), (int)strips_c01.size()); } queue.resize(block_diag_size + num_block2); // queue.resize(block_diag_size + block_offdiag_size); diss_printf(verbose, fp, "%s %d : C_Dsub with father id = %d %d %d ", __FILE__, __LINE__, (father_id + 1), num_block, num_block2); if (skip_flag) { diss_printf(verbose, fp, "skipped\n"); } else { diss_printf(verbose, fp, "\n"); } int kk = 0; for (int kc0 = 0; kc0 < num_block; kc0++) { const int kc = kc0 * SIZE_B1; const int kc_end = (kc0 == num_block - 1) ? diag_size : (kc + SIZE_B1); for (int kr0 = 0; kr0 <= kc0; kr0++) { // running over upper blocks const int kr = kr0 * SIZE_B1; const int kr_end = (kr0 == num_block - 1) ? diag_size : (kr + SIZE_B1); list *> *C_task_arg = new list *>; // C_task_arg[kk] = new list; long *ops_sum = new long; *ops_sum = 0L; list* parents_r = new list[2]; list* parents_c = new list[2]; // (child0 + child01 + child1) * (child0 + child01 + child1) // = child0 * child0 + child0 * child01 // + child01 * child0 + child01 * child01 + child01 * child1 // + child1 * child01 + child1 * child1 // // diagonal contribution // tasks are stored in each queue _c_dsub_arg[*it][ ] // ---- child_ll * (child_ll + child01) ---- ll = 0, 1 for (int ll = 0; ll < 2; ll++) { for (list ::const_iterator mt = strips_r[ll].begin(); mt != strips_r[ll].end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; // continue loop : mt : strips_r[ll] } if (r_bgn_dst >= kr_end) { break; // break loop : mt : strips_r[ll] } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int ic_bgn = imax(kc, r_bgn_dst); const int ic_end = imin(kc_end, r_end_dst); // -- child_ll * child_ll if (kr == kc) { const long ops = ((long)(ir_end - ir_bgn) * ((long)(ir_end - ir_bgn) + 1L) / 2L); // diagonal : half size const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); if (ir_bgn < ir_end) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (-1), // jc_bgn (-1), // jc_end ir_bgn_src, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kr0, (RectBlockMatrix *)NULL, // dist_pt2 (-1), (-1), child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 (isSym ? dsub_sym2sym_diag : dsub_unsym2unsym_diag), false, // skip_flag ops, father_id, level, verbose, fp); tmp->child0_id = child_id[0]; tmp->child1_id = child_id[1]; C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); } // if (ir_bgn < ir_end) } // if (kr == kc) else { // kr < kc const long ops = ((long)(ir_end - ir_bgn) * (long)(ic_end - ic_bgn)); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int ic_bgn_src = ((*mt).begin_src + (ic_bgn - r_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int ic_end_src = ic_bgn_src + (ic_end - ic_bgn); if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (ic_bgn - kc), (ic_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 ic_bgn_src, (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dist_pt2 (-1), (-1), child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : dsub_unsym2unsym, false, // skip_flag ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], ic_bgn_src, ic_end_src, child_pt[ll]); //SIZE_B1); } // if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) } // if (kc == kr) // off-diagonal blocks list ::const_iterator nt = mt; ++nt; for (; nt != strips_r[ll].end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn)); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int jc_bgn_src = ((*nt).begin_src + (jc_bgn - c_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : ((kr0 == kc0) ? dsub_unsym2diag :dsub_unsym2unsym), false, // skip_flag ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt // -- child_ll * child01 for (list ::const_iterator nt = strips_r01.begin(); nt != strips_r01.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_bgn_dst < r_bgn_dst) { continue; } if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn)); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int jc_bgn_src = ((ll == 0 ? (*nt).begin_src0 : (*nt).begin_src1) + (jc_bgn - c_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 jc_bgn_src, // (*nt).begin_src[ll] + ... (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : ((kr0 == kc0) ? dsub_unsym2diag :dsub_unsym2unsym), // dsub_unsym2unsym, bug : 17 Jan.2016 found false, // skip_flag ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end))v } // loop : nt } // loop : mt } // loop : ll // ---- child01 * (child0 + child01 + child1) ---- // -- child01 * child01 for (list ::const_iterator mt = strips_r01.begin(); mt != strips_r01.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; } if (r_bgn_dst >= kr_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int ic_bgn = imax(kc, r_bgn_dst); const int ic_end = imin(kc_end, r_end_dst); if (kr == kc) { const long ops = ((long)(ir_end - ir_bgn) * ((long)(ir_end - ir_bgn) + 1L)); // diagonal : half size int ir_bgn_srcs[2], ir_end_srcs[2]; ir_bgn_srcs[0] = (*mt).begin_src0 + (ir_bgn - r_bgn_dst); ir_bgn_srcs[1] = (*mt).begin_src1 + (ir_bgn - r_bgn_dst); ir_end_srcs[0] = ir_bgn_srcs[0] + (ir_end - ir_bgn); ir_end_srcs[1] = ir_bgn_srcs[1] + (ir_end - ir_bgn); if (ir_bgn < ir_end) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (-1), // jc_bgn (-1), // jc_end ir_bgn_srcs[0], ir_bgn_srcs[1], (-1), // jc_bgn_src (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kr0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[0], child_pt[1], (isSym ? dsub_sym2sym_diag_two : dsub_unsym2unsym_diag_two), skip_flag, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += skip_flag ? 0L : ops; for (int ll = 0; ll < 2; ll++) { update_parents_list(parents_r[ll], ir_bgn_srcs[ll], ir_end_srcs[ll], child_pt[ll]); } } // if (ir_bgn < ir_end) } // kr != kc else { const long ops = ((long)(ir_end - ir_bgn) * (long)(ic_end - ic_bgn) * 2L); int ir_bgn_srcs[2], ir_end_srcs[2]; int ic_bgn_srcs[2], ic_end_srcs[2]; ir_bgn_srcs[0] = (*mt).begin_src0 + (ir_bgn - r_bgn_dst); ir_bgn_srcs[1] = (*mt).begin_src1 + (ir_bgn - r_bgn_dst); ic_bgn_srcs[0] = (*mt).begin_src0 + (ic_bgn - r_bgn_dst); ic_bgn_srcs[1] = (*mt).begin_src1 + (ic_bgn - r_bgn_dst); ir_end_srcs[0] = ir_bgn_srcs[0] + (ir_end - ir_bgn); ir_end_srcs[1] = ir_bgn_srcs[1] + (ir_end - ir_bgn); ic_end_srcs[0] = ic_bgn_srcs[0] + (ic_end - ic_bgn); ic_end_srcs[1] = ic_bgn_srcs[1] + (ic_end - ic_bgn); if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (ic_bgn - kc), (ic_end - kc), ir_bgn_srcs[0], ir_bgn_srcs[1], ic_bgn_srcs[0], ic_bgn_srcs[1], father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[0], child_pt[1], isSym ? dsub_sym2sym_two : dsub_unsym2unsym_two, skip_flag, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += skip_flag ? 0L : ops; for (int ll = 0; ll < 2; ll++) { update_parents_list(parents_r[ll], ir_bgn_srcs[ll], ir_end_srcs[ll], child_pt[ll]); update_parents_list(parents_c[ll], ic_bgn_srcs[ll], ic_end_srcs[ll], child_pt[ll]); } } // if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) } // if (kr == kc) // off-diagonal blocks list ::const_iterator nt = mt; ++nt; for (; nt != strips_r01.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn) * 2L); int ir_bgn_srcs[2], ir_end_srcs[2]; int jc_bgn_srcs[2], jc_end_srcs[2]; ir_bgn_srcs[0] = (*mt).begin_src0 + (ir_bgn - r_bgn_dst); ir_bgn_srcs[1] = (*mt).begin_src1 + (ir_bgn - r_bgn_dst); jc_bgn_srcs[0] = (*nt).begin_src0 + (jc_bgn - c_bgn_dst); jc_bgn_srcs[1] = (*nt).begin_src1 + (jc_bgn - c_bgn_dst); ir_end_srcs[0] = ir_bgn_srcs[0] + (ir_end - ir_bgn); ir_end_srcs[1] = ir_bgn_srcs[1] + (ir_end - ir_bgn); jc_end_srcs[0] = jc_bgn_srcs[0] + (jc_end - jc_bgn); jc_end_srcs[1] = jc_bgn_srcs[1] + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)){ C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_srcs[0], ir_bgn_srcs[1], jc_bgn_srcs[0], jc_bgn_srcs[1], father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[0], child_pt[1], isSym ? dsub_sym2sym_two : ((kr0 == kc0) ? dsub_unsym2diag_two : dsub_unsym2unsym_two), skip_flag, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += skip_flag ? 0L : ops; for (int ll = 0; ll < 2; ll++) { update_parents_list(parents_r[ll], ir_bgn_srcs[ll], ir_end_srcs[ll], child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_srcs[ll], jc_end_srcs[ll], child_pt[ll]); } } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { } // loop : nt // -- child01 * child_ll for (int ll = 0; ll < 2; ll++) { for (list ::const_iterator nt = strips_r[ll].begin(); nt != strips_r[ll].end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_bgn_dst < r_bgn_dst) { continue; } if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn)); const int ir_bgn_src = ((ll == 0 ? (*mt).begin_src0 : (*mt).begin_src1) + (ir_bgn - r_bgn_dst)); const int jc_bgn_src = // ((*nt).begin_dst + (jc_bgn - c_bgn_dst)); (*nt).begin_src + (jc_bgn - c_bgn_dst); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, // ((*mt).begin_dst[ll] + ... (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : ((kr0 == kc0) ? dsub_unsym2diag : dsub_unsym2unsym), false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : ll } // loop : mt string task_name = ("g " + to_string(kr0) + " " + to_string(kc0) + " : " + to_string(level) + " : " + to_string(father_id + 1)); // selfIndex^-1 queue[kk] = new C_task(C_DSUB, task_name, //task_name.str(), // task_name_cstr, (void *)C_task_arg, C_Dsub_task_exec, 1, 0, ops_sum); queue[kk]->parallel_max = block_diag_size; queue[kk]->parallel_id = kk; // added to manage parents of tasks_p and tasks_q if (tasks_r != NULL) { for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { queue[kk]->parents->push_back(tasks_r[(*jt).child_id][0]); } } if (tasks_q.size() > 0) { // queue[kk]->parents->push_back(tasks_q[father_id][0]); // diag queue[kk]->parents->push_back(tasks_q[0]); // diag } if (tasks_s.size() > 0) { const int itmp = (kc0 * (kc0 + 1)) / 2 + kr0; queue[kk]->parents->push_back(tasks_s[itmp]); } // if (tasks_p != NULL) { #ifdef DEBUG_QUEUE_GENERATION cout << "++ diag two children " << task_name.str().c_str() << " "; list *> *task_tmp = (list *> *)queue[kk]->func_arg; cout << "task size = " << task_tmp->size() << " : " << endl; for (list *>::const_iterator jt = task_tmp->begin(); jt != task_tmp->end(); jt++) { cout << (*jt)->atomic_id << " / " << (*jt)->atomic_size << " : " << (*jt)->ir_bgn << " / " << (*jt)->ir_end << " : " << (*jt)->jc_bgn << " / " << (*jt)->jc_end << endl; } #endif if (!skip_flag) { for (int ll = 0; ll < 2; ll++) { for (list::const_iterator mt = parents_r[ll].begin(); mt != parents_r[ll].end(); ++mt) { for (list::const_iterator nt = mt; nt != parents_r[ll].end(); ++nt) { // running upper const int idx = (isSym ? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector &tasks_tmq = tasks_d[child_id[ll]]; vector &indcolq = tasks_d_indcol[child_id[ll]]; if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); } if (!isSym && ((*nt) > (*mt))) { if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } for (list::const_iterator nt = parents_c[ll].begin(); nt != parents_c[ll].end(); ++nt) { if ((*nt) > (*mt)) { const int idx = (isSym ? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector &tasks_tmq = tasks_d[child_id[ll]]; vector &indcolq = tasks_d_indcol[child_id[ll]]; if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; // if (indcolp.size() > idx) { queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { // if (indcolq.size() > idx) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); if (!isSym) { if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; // if (indcolp.size() > (idx + 1)) { queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } } if (tasks_tmq.size() > 0) { // if (indcolq.size() > (idx + 1)) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } } // loop : nt } // loop : } // loop : ll } // if (!skip_flag) else { // verify skip if (verbose) { for (int ll = 0; ll < 2; ll++) { for (list::const_iterator mt = parents_r[ll].begin(); mt != parents_r[ll].end(); ++mt) { if ((*mt) > child_pt[ll]->num_blocks0()) { diss_printf(verbose, stderr, "%s %d : %d incorrect skip : %d %d\n", __FILE__, __LINE__, ll, (*mt), child_pt[ll]->num_blocks0()); } } for (list::const_iterator mt = parents_c[ll].begin(); mt != parents_c[ll].end(); ++mt) { if ((*mt) > child_pt[ll]->num_blocks0()) { diss_printf(verbose, stderr, "%s %d : %d incorrect skip : %d %d\n", __FILE__, __LINE__, ll, (*mt), child_pt[ll]->num_blocks0()); } } } } // if (verbose) } queue[kk]->parents->sort(compare_task_name); queue[kk]->parents->unique(); EraseNullParents(queue[kk]); for (int m = 0; m < 2; m++) { parents_r[m].clear(); parents_c[m].clear(); } delete [] parents_r; delete [] parents_c; kk++; } // loop : kr0 } // loop : kc0 // offdiagonal contribution for (int kc0 = 0; kc0 < num_block2; kc0++) { // const int kc = kc0 * SIZE_B1; // const int kc_end = ((kc0 == num_block2 - 1) ? offdiag_size : // (kc + SIZE_B1)); const int kc = father_offdiag_pt->IndexBlock_c(kc0); const int kc_end = father_offdiag_pt->IndexBlock_c(kc0 + 1); list *> *C_task_arg = new list *>; list* parents_r = new list[2]; list* parents_c = new list[2]; long *ops_sum = new long; *ops_sum = 0L; // ---- child_ll * (child_ll + child01) ---- for (int ll = 0; ll < 2; ll++) { for (int kr0 = 0; kr0 < num_block; kr0++) { // 02 Jul.2014 : Atsushi const int kr = kr0 * SIZE_B1; const int kr_end = (kr0 == num_block - 1) ? diag_size : (kr + SIZE_B1); for (list ::const_iterator mt = strips_r[ll].begin(); mt != strips_r[ll].end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; // of loop mt } if (r_bgn_dst >= kr_end ) { break; // of loop mt } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); // -- child_ll * child_ll for (list ::const_iterator nt = strips_c[ll].begin(); nt != strips_c[ll].end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(*mt).width * (long)(jc_end - jc_bgn)); const int ir_bgn_src = (*mt).begin_src + (ir_bgn - r_bgn_dst); const int jc_bgn_src = (*nt).begin_src + (jc_bgn - c_bgn_dst); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, // (*mt).begin_src, (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, (SquareBlockMatrix*)NULL, // father_pt father_offdiag_pt, kr0, // ir_block kc0, // jc_block isSym ? (RectBlockMatrix *)NULL : father_offdia2_pt, 0, // withou block 0, // withou block child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2rct : dsub_unsym2rct, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt // -- child_ll * child01 for (list ::const_iterator nt = strips_c01.begin(); nt != strips_c01.end(); ++nt) { const int c_bgn_dst= (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(*mt).width * (long)(jc_end - jc_bgn)); const int ir_bgn_src = (*mt).begin_src + (ir_bgn - r_bgn_dst); const int jc_bgn_src = ((ll == 0 ? (*nt).begin_src0 : (*nt).begin_src1) + (jc_bgn - c_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 jc_bgn_src, // (*nt)->begin_src[ll] + ... (-1), // jc_bgn_src2 father_row, (SquareBlockMatrix*)NULL, // father_pt father_offdiag_pt, kr0, // ir_block kc0, // jc_block isSym ? (RectBlockMatrix *)NULL : father_offdia2_pt, 0, // withou block 0, // withou block child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2rct : dsub_unsym2rct, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : mt } // loop kr0 } // loop : ll // ---- child01 * (child0 + child01 + child1) ---- for (int kr0 = 0; kr0 < num_block; kr0++) { // 02 Jul.2014 : Atsushi const int kr = kr0 * SIZE_B1; const int kr_end = (kr0 == num_block - 1) ? diag_size : (kr + SIZE_B1); for (list ::const_iterator mt = strips_r01.begin(); mt != strips_r01.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; } if (r_bgn_dst >= kr_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); // -- child01 * child_ll for (int ll = 0; ll < 2; ll++) { for (list ::const_iterator nt = strips_c[ll].begin(); nt != strips_c[ll].end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(*mt).width * (long)(jc_end - jc_bgn)); const int ir_bgn_src = (ll == 0 ? (*mt).begin_src0 : (*mt).begin_src1) + (ir_bgn - r_bgn_dst); const int jc_bgn_src = ((*nt).begin_src + (jc_bgn - c_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, // ((*mt).begin_src[ll] + ... (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, (SquareBlockMatrix*)NULL, // father_pt father_offdiag_pt, kr0, // ir_block kc0, // jc_block isSym ? (RectBlockMatrix *)NULL : father_offdia2_pt, 0, // withou block 0, // withou block child_pt[ll], (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2rct: dsub_unsym2rct, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt[ll]); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : ll // -- child01 * child01 for (list ::const_iterator nt = strips_c01.begin(); nt != strips_c01.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); const long ops = ((long)(*mt).width * (long)(jc_end - jc_bgn) * 2L); int ir_bgn_srcs[2], ir_end_srcs[2]; int jc_bgn_srcs[2], jc_end_srcs[2]; ir_bgn_srcs[0] = (*mt).begin_src0 + (ir_bgn - r_bgn_dst); ir_bgn_srcs[1] = (*mt).begin_src1 + (ir_bgn - r_bgn_dst); jc_bgn_srcs[0] = (*nt).begin_src0 + (jc_bgn - c_bgn_dst); jc_bgn_srcs[1] = (*nt).begin_src1 + (jc_bgn - c_bgn_dst); ir_end_srcs[0] = ir_bgn_srcs[0] + (ir_end - ir_bgn); ir_end_srcs[1] = ir_bgn_srcs[1] + (ir_end - ir_bgn); jc_end_srcs[0] = jc_bgn_srcs[0] + (jc_end - jc_bgn); jc_end_srcs[1] = jc_bgn_srcs[1] + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(1, 0, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_srcs[0], ir_bgn_srcs[1], jc_bgn_srcs[0], jc_bgn_srcs[1], father_row, (SquareBlockMatrix*)NULL, // father_pt father_offdiag_pt, kr0, // ir_block kc0, // jc_block isSym ? (RectBlockMatrix *)NULL : father_offdia2_pt, 0, // withou block 0, // withou block child_pt[0], child_pt[1], isSym ? dsub_sym2rct_two : dsub_unsym2rct_two, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; for (int ll = 0; ll < 2; ll++) { update_parents_list(parents_r[ll], ir_bgn_srcs[ll], ir_end_srcs[ll], child_pt[ll]); update_parents_list(parents_c[ll], jc_bgn_srcs[ll], jc_end_srcs[ll], child_pt[ll]); } // loop : ll } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : mt } // loop : kr0 string task_name = ("h " + to_string(kc0) + " : " + to_string(level) + " : " + to_string(father_id + 1)); // selfIndex^-1 queue[kk] = new C_task(C_DSUB, task_name, //task_name.str(), (void *)C_task_arg, C_Dsub_task_exec, 1, 0, ops_sum); queue[kk]->parallel_max = num_block2; queue[kk]->parallel_id = kc0; // added to manage parents of tasks_p and tasks_q if (tasks_r != NULL) { for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { queue[kk]->parents->push_back(tasks_r[(*jt).child_id][0]); } } if (tasks_q.size() > 0) { // queue[kk]->parents->push_back(tasks_q[father_id][1]); // offdiag queue[kk]->parents->push_back(tasks_q[1]); // offdiag } if (tasks_s.size() > 0) { // offiag for (int i = 0; i < num_block; i++) { const int itmp = kc0 * num_block + i + tasks_s[0]->parallel_max; queue[kk]->parents->push_back(tasks_s[itmp]); } } // if (tasks_p != NULL) { for (int ll = 0; ll < 2; ll++) { for (list::const_iterator mt = parents_r[ll].begin(); mt != parents_r[ll].end(); ++mt) { for (list::const_iterator nt = parents_c[ll].begin(); nt != parents_c[ll].end(); ++nt) { if ((*nt) >= (*mt)) { // upper block const int idx = (isSym ? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector &tasks_tmq = tasks_d[child_id[ll]]; vector &indcolq = tasks_d_indcol[child_id[ll]]; if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); } if (!isSym && ((*nt) > (*mt))) { if (tasks_p_flag) { vector &tasks_tmp = tasks_p[child_id[ll]]; vector &indcolp = tasks_p_indcol[child_id[ll]]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } } // loop : nt } // loop : mt } // loop : ll // }// if (tasks_p != NULL) { queue[kk]->parents->sort(compare_task_name); queue[kk]->parents->unique(); EraseNullParents(queue[kk]); for (int m = 0; m < 2; m++) { parents_r[m].clear(); parents_c[m].clear(); } delete [] parents_r; delete [] parents_c; kk++; } // loop : kc0 delete [] strips_r; delete [] strips_c; delete [] child_pt; delete [] child_id; #ifdef DEBUG_QUEUE_GENERATION2 cout << "father = " << (father_id + 1) << " contrib = 2 : queue size = " << queue.size() << endl; for (vector::const_iterator kt = queue.begin(); kt != queue.end(); ++kt) { cout << (*kt)->task_name << " :: " << (*kt)->parents->size() << " : " ; for (list::const_iterator jt = (*kt)->parents->begin(); jt != (*kt)->parents->end(); ++jt) { cout << (*jt)->task_name << " / "; } cout << endl; } // loop : kt #endif #ifdef DEBUG_PREPARE_THREAD_DEBUG cout << "kk = " << kk << endl; #endif } // if (child_contrib.size() == 2) { else { queue.resize(block_diag_size + block_offdiag_size); #ifdef DEBUG_PREPARE_THREAD_DEBUG cout << "resize = " << block_diag_size + block_offdiag_size << " "; #endif int kk = 0; // -- diagonal for (int kc0 = 0; kc0 < num_block; kc0++) { const int kc = kc0 * SIZE_B1; const int kc_end = (kc0 == (num_block - 1)) ? diag_size : (kc + SIZE_B1); for (int kr0 = 0; kr0 <= kc0; kr0++) { const int kr = kr0 * SIZE_B1; const int kr_end = (kr0 == (num_block - 1)) ? diag_size : (kr + SIZE_B1); list *> *C_task_arg = new list *>; // C_task_arg[kk] = new list; list* parents_r = new list[child_contrib.size()]; list* parents_c = new list[child_contrib.size()]; long *ops_sum = new long; *ops_sum = 0L; int count_atomics = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr || r_end_dst < kc) { continue; } if (r_bgn_dst >= kr_end || r_bgn_dst >= kc_end) { break; } count_atomics++; } for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; } if (r_bgn_dst >= kr_end ) { break; } list ::const_iterator nt = mt; ++nt; for (; nt != (*jt).diag_strip.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } count_atomics++; } // loop : nt } // loop : mt } // loop : jt int atomic_id = 0; int ll = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt, ll++) { SquareBlockMatrix* child_pt = (*jt).child_pt; SquareBlockMatrix* father_diag_pt = (*jt).father_diag_pt; for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; } if (r_bgn_dst >= kr_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int ic_bgn = imax(kc, r_bgn_dst); const int ic_end = imin(kc_end, r_end_dst); if (kr == kc) { const long ops = ((long)(ir_end - ir_bgn) * ((long)(ir_end - ir_bgn) + 1L) / 2L); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); if (ir_bgn < ir_end) { C_Dsub_task *tmp = new C_Dsub_task(count_atomics, atomic_id++, (ir_bgn - kr), (ir_end - kr), (-1), // jc_bgn (-1), // jc_end ir_bgn_src, (-1), // ir_bgn_src2 (-1), // jc_bgn_src (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kr0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt, (SquareBlockMatrix*)NULL, // src_pt2 (isSym ? dsub_sym2sym_diag : dsub_unsym2unsym_diag), false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt); } // if (ir_bgn < ir_end) } else { // kc > kr const long ops = ((long)(ir_end - ir_bgn) * (long)(ic_end - ic_bgn)); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int ic_bgn_src = ((*mt).begin_src + (ic_bgn - r_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int ic_end_src = ic_bgn_src + (ic_end - ic_bgn); if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) { C_Dsub_task *tmp = new C_Dsub_task(count_atomics, atomic_id++, (ir_bgn - kr), (ir_end - kr), (ic_bgn - kc), (ic_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 ic_bgn_src, (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt, (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : dsub_unsym2unsym, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt); update_parents_list(parents_c[ll], ic_bgn_src, ic_end_src, child_pt); } // if ((ir_bgn < ir_end) && (ic_bgn < ic_end)) } // if (kr == kc) } // loop mt // for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; } if (r_bgn_dst >= kr_end ) { break; } list ::const_iterator nt = mt; ++nt; for (; nt != (*jt).diag_strip.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); // index runs upper diagonal block ( mt < nt ) => (i < j) const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn)); const int ir_bgn_src = ((*mt).begin_src + (ir_bgn - r_bgn_dst)); const int jc_bgn_src = ((*nt).begin_src + (jc_bgn - c_bgn_dst)); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(count_atomics, atomic_id++, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, father_diag_pt, (RectBlockMatrix *)NULL, // dst_pt kr0, kc0, (RectBlockMatrix *)NULL, // dst_pt2 (-1), (-1), child_pt, (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2sym : ((kr0 == kc0) ? dsub_unsym2diag : dsub_unsym2unsym), false, ops, father_id, level, verbose, fp); // C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : mt } // loop : jt string task_name = ("i " + to_string(kr0) + " " + to_string(kc0) + " : " + to_string(level) + " : " + to_string(father_id + 1)); // selfIndex^-1 queue[kk] = new C_task(C_DSUB, task_name, (void *)C_task_arg, C_Dsub_task_exec, 1, 0, ops_sum); queue[kk]->parallel_max = block_diag_size; // block_total_size; queue[kk]->parallel_id = kk; // added to manage parents of tasks_p and tasks_q if (tasks_r != NULL) { for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { queue[kk]->parents->push_back(tasks_r[(*jt).child_id][0]); } } if (tasks_q.size() > 0) { // queue[kk]->parents->push_back(tasks_q[father_id][0]); // diag queue[kk]->parents->push_back(tasks_q[0]); // diag } if (tasks_s.size() > 0) { const int itmp = ((kc0 * (kc0 + 1)) / 2) + kr0; // diag queue[kk]->parents->push_back(tasks_s[itmp]); } // if (tasks_p != NULL) { ll = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt, ll++) { for (list::const_iterator mt = parents_r[ll].begin(); mt != parents_r[ll].end(); ++mt) { for (list::const_iterator nt = mt; nt != parents_r[ll].end(); ++nt) { const int idx = (isSym ? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector &tasks_tmq = tasks_d[(*jt).child_id]; vector &indcolq = tasks_d_indcol[(*jt).child_id]; if (tasks_p_flag) { vector &tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); } if (!isSym && ((*nt) > (*mt))) { if (tasks_p_flag) { vector &tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } for (list::const_iterator nt = parents_c[ll].begin(); nt != parents_c[ll].end(); ++nt) { if ((*nt) > (*mt)) { // upper block const int idx = (isSym ? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector &tasks_tmq = tasks_d[(*jt).child_id]; vector &indcolq = tasks_d_indcol[(*jt).child_id]; if (tasks_p_flag) { vector &tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); } if (!isSym) { if (tasks_p_flag) { vector &tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } } // loop : nt } // loop : mt } // loop : jt queue[kk]->parents->sort(compare_task_name); queue[kk]->parents->unique(); EraseNullParents(queue[kk]); for (int m = 0; m < child_contrib.size(); m++) { parents_r[m].clear(); parents_c[m].clear(); } delete [] parents_r; delete [] parents_c; kk++; } // loop : kc } // loop : kr // -- offdiagonal for (int kc0 = 0; kc0 < num_block2; kc0++) { const int kc = child_contrib.front().father_offdiag_pt->IndexBlock_c(kc0); const int kc_end = child_contrib.front().father_offdiag_pt->IndexBlock_c(kc0 + 1); for (int kr0 = 0; kr0 < num_block; kr0++) { const int kr = kr0 * SIZE_B1; const int kr_end = (kr0 == (num_block - 1)) ? diag_size : (kr + SIZE_B1); list *> *C_task_arg = new list *>; // C_task_arg[kk] = new list; list* parents_r = new list[child_contrib.size()]; list* parents_c = new list[child_contrib.size()]; long *ops_sum = new long; *ops_sum = 0L; int count_atomics = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; // of loop mt } if (r_bgn_dst >= kr_end ) { break; // of loop mt } for (list ::const_iterator nt = (*jt).offdiag_strip.begin(); nt != (*jt).offdiag_strip.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } count_atomics++; } } } // loop : jt int atomic_id = 0; int ll = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt, ll++) { const int father_row = (*jt).father_row; SquareBlockMatrix *child_pt = (*jt).child_pt; RectBlockMatrix *father_offdiag_pt = (*jt).father_offdiag_pt; RectBlockMatrix *father_offdia2_pt = (*jt).father_offdiag_unsym_pt; for (list ::const_iterator mt = (*jt).diag_strip.begin(); mt != (*jt).diag_strip.end(); ++mt) { const int r_bgn_dst = (*mt).begin_dst; const int r_end_dst = r_bgn_dst + (*mt).width; if (r_end_dst < kr) { continue; // of loop mt } if (r_bgn_dst >= kr_end ) { break; // of loop mt } const int ir_bgn = imax(kr, r_bgn_dst); const int ir_end = imin(kr_end, r_end_dst); for (list ::const_iterator nt = (*jt).offdiag_strip.begin(); nt != (*jt).offdiag_strip.end(); ++nt) { const int c_bgn_dst = (*nt).begin_dst; const int c_end_dst = c_bgn_dst + (*nt).width; if (c_end_dst < kc) { continue; } if (c_bgn_dst >= kc_end) { break; } const int jc_bgn = imax(kc, c_bgn_dst); const int jc_end = imin(kc_end, c_end_dst); // // dsub_sym2rct(ir_bgn, ir_end, jc_bgn, jc_end, // ir_bgn_src, jc_bgn_src, // src_pt, src_pt) const long ops = ((long)(ir_end - ir_bgn) * (long)(jc_end - jc_bgn)); const int ir_bgn_src =(*mt).begin_src + (ir_bgn - r_bgn_dst); const int jc_bgn_src =(*nt).begin_src + (jc_bgn - c_bgn_dst); const int ir_end_src = ir_bgn_src + (ir_end - ir_bgn); const int jc_end_src = jc_bgn_src + (jc_end - jc_bgn); if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) { C_Dsub_task *tmp = new C_Dsub_task(count_atomics, atomic_id++, (ir_bgn - kr), (ir_end - kr), (jc_bgn - kc), (jc_end - kc), ir_bgn_src, (-1), // ir_bgn_src2 jc_bgn_src, (-1), // jc_bgn_src2 father_row, (SquareBlockMatrix*)NULL, // father_pt father_offdiag_pt, kr0, // ir_block kc0, // jc_block isSym ? (RectBlockMatrix *)NULL : father_offdia2_pt, 0, // without block 0, // without block child_pt, (SquareBlockMatrix*)NULL, // src_pt2 isSym ? dsub_sym2rct : dsub_unsym2rct, false, ops, father_id, level, verbose, fp); C_task_arg->push_back(tmp); *ops_sum += ops; update_parents_list(parents_r[ll], ir_bgn_src, ir_end_src, child_pt); update_parents_list(parents_c[ll], jc_bgn_src, jc_end_src, child_pt); } // if ((ir_bgn < ir_end) && (jc_bgn < jc_end)) } // loop : nt } // loop : mt } // loop : jt string task_name = ("j " + to_string(kr0) + " " + to_string(kc0) + " : " + to_string(level) + " : " + to_string(father_id + 1)); // selfIndex^-1 queue[kk] = new C_task(C_DSUB, task_name, // task_name.str(), (void *)C_task_arg, C_Dsub_task_exec, 1, 0, ops_sum); queue[kk]->parallel_max = block_offdiag_size; // block_total_size; queue[kk]->parallel_id = kk - block_diag_size; // added to manage parents of tasks_p and tasks_q if (tasks_r != NULL) { for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt) { queue[kk]->parents->push_back(tasks_r[(*jt).child_id][0]); } } if (tasks_q.size() > 0) { // queue[kk]->parents->push_back(tasks_q[father_id][1]); // offdiag queue[kk]->parents->push_back(tasks_q[1]); // offdiag } if (tasks_s.size() > 0) { // offiag const int itmp = kc0 * num_block + kr0 + tasks_s[0]->parallel_max; queue[kk]->parents->push_back(tasks_s[itmp]); } // if (tasks_p != NULL) { ll = 0; for (typename list >::const_iterator jt = child_contrib.begin(); jt != child_contrib.end(); ++jt, ll++) { for (list::const_iterator mt = parents_r[ll].begin(); mt != parents_r[ll].end(); ++mt) { for (list::const_iterator nt = parents_c[ll].begin(); nt != parents_c[ll].end(); ++nt) { // ?? offdiag : to be debugged : 27 Mar.2012 Atsushi if ((*nt) >= (*mt)) { // upper block const int idx = (isSym? (((*nt) * ((*nt) + 1)) / 2 + (*mt)) : ((*nt) * (*nt) + 2 * (*mt))); vector&tasks_tmq = tasks_d[(*jt).child_id]; vector &indcolq = tasks_d_indcol[(*jt).child_id]; if (tasks_p_flag) { vector&tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx]]->parents->push_back(queue[kk]); } if (!isSym && ((*nt) > (*mt))) { if (tasks_p_flag) { vector&tasks_tmp = tasks_p[(*jt).child_id]; vector &indcolp = tasks_p_indcol[(*jt).child_id]; queue[kk]->parents->push_back(tasks_tmp[indcolp[idx + 1]]); } if (tasks_tmq.size() > 0) { tasks_tmq[indcolq[idx + 1]]->parents->push_back(queue[kk]); } } } } // loop : nt } // loop : mt } // loop : jt // } // if (tasks_p != NULL) { queue[kk]->parents->sort(compare_task_name); queue[kk]->parents->unique(); EraseNullParents(queue[kk]); for (int m = 0; m < child_contrib.size(); m++) { parents_r[m].clear(); parents_c[m].clear(); } delete [] parents_r; delete [] parents_c; kk++; } // loop : kr0 } // loop : kc0 } // else (child_contrib.size() == 2) } template void C_Dsub_queue(bool isSym, int father_id, bool skip_flag, vector& queue, list > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void C_Dsub_queue(bool isSym, int father_id, bool skip_flag, vector& queue, list > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void C_Dsub_queue >(bool isSym, int father_id, bool skip_flag, vector& queue, list > > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void C_Dsub_queue >(bool isSym, int father_id, bool skip_flag, vector& queue, list > > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void C_Dsub_queue(bool isSym, int father_id, bool skip_flag, vector& queue, list > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void C_Dsub_queue >(bool isSym, int father_id, bool skip_flag, vector& queue, list > > &child_contrib, vector* tasks_p, vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, vector* tasks_r, vector& tasks_s, vector* tasks_d, vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); // template void update_parents_list(list & parents, const int begin, const int end, SquareBlockMatrix* mtrx) // const int size_block) { const int ibgn = mtrx->BlockIndex(begin); const int iend = mtrx->BlockIndex(end - 1); for (int m = ibgn; m <= iend; m++) { bool flag = true; for (list::const_iterator it = parents.begin(); it != parents.end(); ++it) { if (*it == m) { flag = false; break; } } if (flag) { parents.push_back(m); } } } template void update_parents_list(list & parents, const int begin, const int end, SquareBlockMatrix* mtrx); template void update_parents_list(list & parents, const int begin, const int end, SquareBlockMatrix* mtrx); template void update_parents_list >(list & parents, const int begin, const int end, SquareBlockMatrix >* mtrx); template void update_parents_list >(list & parents, const int begin, const int end, SquareBlockMatrix >* mtrx); template void update_parents_list(list & parents, const int begin, const int end, SquareBlockMatrix* mtrx); template void update_parents_list >(list & parents, const int begin, const int end, SquareBlockMatrix >* mtrx); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_Dsub.hpp000664 000000 000000 00000010434 14037356732 024162 0ustar00rootroot000000 000000 /*! \file C_Dsub.hpp \brief routines for substiution of off-diagonal matrix with strips \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/blas.hpp" #include "Driver/C_threads_tasks.hpp" template void C_Dsub_task_exec(void *arg_); template void dsub_sym2sym_diag(C_Dsub_task *arg); template void dsub_unsym2unsym_diag(C_Dsub_task *arg); template void dsub_sym2sym(C_Dsub_task *arg); template void dsub_unsym2unsym(C_Dsub_task *arg); template void dsub_unsym2diag(C_Dsub_task *arg); template void dsub_sym2rct(C_Dsub_task *arg); template void dsub_unsym2rct(C_Dsub_task *arg); template void dsub_sym2sym_diag_two(C_Dsub_task *arg); template void dsub_unsym2unsym_diag_two(C_Dsub_task *arg); template void dsub_sym2sym_two(C_Dsub_task *arg); template void dsub_unsym2unsym_two(C_Dsub_task *arg); template void dsub_unsym2diag_two(C_Dsub_task *arg); template void dsub_sym2rct_two(C_Dsub_task *arg); template void dsub_unsym2rct_two(C_Dsub_task *arg); template void C_Dsub_queue(bool isSym, int father_id, bool skip_flag, vector& queue, list > &child_contrib, vector* tasks_p, // _tasks_DSymmGEMM vector* tasks_p_indcol, const bool tasks_p_flag, vector& tasks_q, // _tasks_DfillSymm vector* tasks_r, // _tasks_SparseLocalSchur vector& tasks_s, // _tasks_DSub[level + 1][(*it)] vector* tasks_d, // _tasks_deallocateLocalSchur vector* tasks_d_indcol, int level, const bool direct_flag, const bool verbose, FILE *fp); template void update_parents_list(list & parents, const int begin, const int end, SquareBlockMatrix* mtrx); FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_KernDetect.cpp000664 000000 000000 00000133325 14037356732 025315 0ustar00rootroot000000 000000 /*! \file C_KernDetect.cpp \brief Kernel detection algorithm : symm <= DOI: 10.1002/nme.4729 / unsymm \author Atsushi. Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionLibrary.hpp" #include "Driver/C_KernDetect.hpp" #include "Algebra/ColumnMatrix.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" // T may be std::complex of U and W is in higher precision than T template bool check_kern(const int n0, const int lda, const int n, W *a_ini, int *permute, const int dim_augkern, const U &eps, const U &eps_param, const bool flag_sym, U *errors, const bool verbose, FILE *fp) { bool flag; // W *a_q, *a_fq, *proj, *nsp, *nsp2; // W *v, *alpha; const W zero(0.0); const W one(1.0); const W none(-1.0); const U Uzero(0.0); ColumnMatrix a_q(n, n); ColumnMatrix a_fq(n, n); ColumnMatrix proj(n, n); ColumnMatrix nsp(n, n); ColumnMatrix nsp2(n,n); VectorArray v(n); VectorArray alpha(n); for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { const int ij = permute[i] + permute[j] * lda; a_fq(i, j) = a_ini[ij]; } } // duplicate of a_fq a_q.copy(a_fq); if (flag_sym) { full_ldlt(n, a_fq.addrCoefs(), n); } else { full_ldu(n, a_fq.addrCoefs(), n); } int n1 = n - n0; for (int j = 0; j < n0; j++) { for (int i = 0; i < n1; i++) { nsp(i, j) = a_q(i, (j + n1)); // nsp[i + j * n] = a_q[i + (j + n1) * n]; } for (int i = n1; i < n; i++) { nsp(i, j) = zero; // nsp[i + j * n] = zero; } nsp((n1 + j), j) = none; } full_fwbw_perturb_multi(n1, n0, a_q.addrCoefs(), n, a_fq.addrCoefs(), nsp.addrCoefs(), dim_augkern, eps, flag_sym); // compute projection matrix for (int i = 0; i < n0; i++) { for (int j = 0; j <= i; j++) { proj(i, j) = blas_dot(n, nsp.addrCoefs() + (i * n), 1, nsp.addrCoefs() + (j * n), 1); // lower proj(j, i) = blas_conj(proj(i, j)); // upper } } // Hermite symmteric with complex inner product full_ldlh(n0, proj.addrCoefs(), n); for (int m = (n0 - 1); m <= (n0 + 1); m++) { int k = n - m; U res_err = U(0.0); for (int j = 0; j < n; j++) { for (int i = 0; i < k; i++) { nsp2(i, j) = a_q(i, j); // nsp2[i + j * n] = a_q[i + j * n]; } } full_fwbw_perturb_multi(k, n, a_q.addrCoefs(), n, a_fq.addrCoefs(), nsp2.addrCoefs(), dim_augkern, eps, flag_sym); for (int j = 0; j < n; j++) { for (int i = k; i < n; i++) { nsp2(i, j) = zero; // nsp2[i + j * n] = zero; } nsp2(j, j) -= one; // nsp2[j + j * n] -= one; } for (int j = 0; j < k; j++) { for (int i = 0; i < n; i++) { v[i] = nsp2(i, j); // v[i] = nsp2[i + j * n]; } U res = conv_prec(blas_l2norm(n, v.addrCoefs(), 1)); res_err = res_err > res ? res_err : res; } for (int j = k; j < n; j++) { for (int i = 0; i < n; i++) { v[i] = nsp2(i, j); // v[i] = nsp2[i + j * n]; } // alpha = 1; beta = 0; blas_gemv(CblasTrans, n, n0, one, nsp.addrCoefs(), n, v.addrCoefs(), 1, zero, alpha.addrCoefs(), 1); full_fwbw_part(n0, proj.addrCoefs(), n, alpha.addrCoefs()); // alpha = -1; beta = 1; blas_gemv(CblasNoTrans, n, n0, none, nsp.addrCoefs(), n, alpha.addrCoefs(), 1, one, v.addrCoefs(), 1); U res = conv_prec(blas_l2norm(n, v.addrCoefs(), 1)); res_err = res_err > res ? res_err : res; } errors[m - n0 + 1] = res_err; } flag = false; if ((errors[0] > eps_param) && (errors[1] < eps_param) && (errors[2] > eps_param)) { flag = true; } return flag; } template bool check_kern(const int n0, const int lda, const int n, quadruple *a_ini, int *permute, const int dim_augkern, const double &eps, const double &eps_param, const bool flag_sym, double *errors, const bool verbose, FILE *fp); template bool check_kern, double, complex, quadruple>(const int n0, const int lda, const int n, complex *a_ini, int *permute, const int dim_augkern, const double &eps, const double &eps_param, const bool flag_sym, double *errors, const bool verbose, FILE *fp); #ifndef NO_OCTRUPLE template bool check_kern(const int n0, const int lda, const int n, octruple *a_ini, int *permute, const int dim_augkern, const quadruple &eps, const quadruple &eps_param, const bool flag_sym, quadruple *errors, const bool verbose, FILE *fp); template bool check_kern, quadruple, complex, octruple>(const int n0, const int lda, const int n, complex *a_ini, int *permute, const int dim_augkern, const quadruple &eps, const quadruple &eps_param, const bool flag_sym, quadruple *errors, const bool verbose, FILE *fp); #endif // // T may be std::complex of U and W is in higher precision than T template U check_matrixerr(const int lda, const int n, W *a, const int dim_augkern, const int k, int *permute, const U &eps, const bool flag_sym) { U error; const U Uzero(0.0); const W one(1.0); ColumnMatrix nsp2(n, n); ColumnMatrix nsp3(n, n); ColumnMatrix nsp4(n, n); VectorArray v(n); for (int i = 0; i < (n * n); i++) { nsp2.addrCoefs()[i] = one; } // permutation is given for (int j = 0; j < k; j++) { for (int i = 0; i < k; i++) { const int ij0 = permute[i] + permute[j] * lda; nsp2(i, j) = W(a[ij0]); // const int ij1 = i + j * n; } } nsp3.copy(nsp2); nsp4.copy(nsp2); if (flag_sym) { full_ldlt(k, nsp3.addrCoefs(), n); // factorization is done in } // higher accurary else { full_ldu(k, nsp3.addrCoefs(), n); } full_fwbw_perturb_multi(k, k, nsp4.addrCoefs(), n, nsp3.addrCoefs(), nsp2.addrCoefs(), dim_augkern, eps, flag_sym); for (int i = 0; i < k; i++) { nsp2(i, i) -= one; // nsp2[i + i * n] -= one; } error = matrix_infty_norm(k, nsp2.addrCoefs(), n); return error; } template double check_matrixerr(const int lda, const int n, quadruple *a, const int dim_augkern, const int k, int *permute, const double &eps, const bool flag_sym); template double check_matrixerr, double, complex, quadruple>(const int lda, const int n, complex *a, const int dim_augkern, const int k, int *permute, const double &eps, const bool flag_sym); #ifndef NO_OCTRUPLE template quadruple check_matrixerr(const int lda, const int n, octruple *a, const int dim_augkern, const int k, int *permute, const quadruple &eps, const bool flag_sym); template quadruple check_matrixerr, quadruple, complex, octruple>(const int lda, const int n, complex *a, const int dim_augkern, const int k, int *permute, const quadruple &eps, const bool flag_sym); #endif // template void verify_kernels(const int n0, const int n, const T *a_ini, const T *a_fact, const int lda, const double eps, U *errors, const bool verbose, FILE *fp) { const T one(1.0); const T zero(0.0); const T none(-1.0); ColumnMatrix a_p(n, n); const int n1 = n - n0; for (int i = 0; i < n; i++) { for (int j = 0; j < n ; j++) { // const int ij0 = permute[i] + permute[j] * n; const int ij0 = i + j * lda; a_p(i, j) = a_ini[ij0]; // const int ij1 = i + j * n; } } // compute [A_11^-1 A_12] // [ -I ] VectorArray v(n); VectorArray w(n); for (int j = n1; j < n; j++) { for (int i = 0; i < n1; i++) { // const int ij0 = permute[i] + permute[j] * n; const int ij0 = i + j * lda; v[i] = a_ini[ij0]; } full_fwbw_part(n1, (T *)a_fact, lda, v.addrCoefs()); //cast : 22 May 2018 for (int i = n1; i < n; i++) { v[i] = zero; } v[j] = none; blas_gemv(CblasNoTrans, n, n, one, a_p.addrCoefs(), n, v.addrCoefs(), 1, zero, w.addrCoefs(), 1); #if 0 diss_printf(verbose, fp, "%s %d verify_kernels %d\n", __FILE__, __LINE__, n); for (int i = 0; i < n; i++) { diss_printf(verbose, fp, "%d %s %s\n", i, tostring(v[i]).c_str(), tostring(w[i]).c_str()); } #endif errors[j - n1] = blas_l2norm(n, w.addrCoefs(), 1); } } template void verify_kernels(const int n0, const int n, const double *a_ini, const double *a_fact, const int lda, const double eps, double *errors, const bool verbose, FILE *fp); template void verify_kernels, double>(const int n0, const int n, const complex *a_ini, const complex *a_fact, const int lda, const double eps, double *errors, const bool verbose, FILE *fp); template void verify_kernels(const int n0, const int n, const quadruple *a_ini, const quadruple *a_fact, const int lda, const double eps, quadruple *errors, const bool verbose, FILE *fp); template void verify_kernels, quadruple>(const int n0, const int n, const complex *a_ini, const complex *a_fact, const int lda, const double eps, quadruple *errors, const bool verbose, FILE *fp); // template void HouseholderVector_complex(int n, T *x, T *v, T *gamma) { const U zero(0.0); const U one(1.0); const U two(2.0); const U onehalf(1.5); const T czero(zero, zero); const T cone(one, zero); const T ctwo(two, zero); const U pi(M_PI); U s = blas_l2norm2((n - 1), &x[1], 1); v[0] = cone; for (int i = 1; i < n; i++) { v[i] = x[i]; } if (s == zero) { *gamma = czero; } else { // U x0arg = std::arg(x[0]); U x0arg = atan2(x[0].imag(), x[0].real()); const T alpha = complex(cos(x0arg), sin(x0arg)); U xabs = sqrt(x[0].real() * x[0].real() + x[0].imag() * x[0].imag()); if ((x0arg >= pi / two) && (x0arg < pi * onehalf)) { v[0] = alpha * (xabs + sqrt(s)); } else { v[0] = alpha * (-s) / (xabs + sqrt(s)); } const U v0r(v[0].real()); const U v0i(v[0].imag()); const U v0sq = v0r * v0r + v0i * v0i; *gamma = ctwo * v0sq / (s + v0sq); T z = one / v[0]; for (int i = 0; i < n; i++) { v[i] *= z; } } } template void HouseholderVector_complex, double>(int n, complex *x, complex *v, complex *gamma); template void HouseholderVector_complex, quadruple>(int n, complex *x, complex *v, complex *gamma); template void HouseholderVector_complex, float>(int n, complex *x, complex *v, complex *gamma); // template void HouseholderVector(int n, T *x, T *v, T *gamma) { const T one(1.0); const T zero(0.0); const T two(2.0); const T s = blas_l2norm2((n - 1), &x[1], 1); v[0] = one; for (int i = 1; i < n; i++) { v[i] = x[i]; } if (s == zero) { *gamma = zero; } else { T z = sqrt(x[0] * x[0] + s); if (x[0] <= zero) { v[0] = x[0] - z; } else { v[0] = (-s) / (x[0] + z); } *gamma = two * v[0] * v[0] / (s + v[0] * v[0]); z = one / v[0]; for (int i = 0; i < n; i++) { v[i] *= z; } } } template<> void HouseholderVector >(int n, complex *x, complex *v, complex *gamma) { HouseholderVector_complex, double>(n, x, v, gamma); } template<> void HouseholderVector >(int n, complex *x, complex *v, complex *gamma) { HouseholderVector_complex, quadruple>(n, x, v, gamma); } template<> void HouseholderVector >(int n, complex *x, complex *v, complex *gamma) { HouseholderVector_complex, octruple>(n, x, v, gamma); } template<> void HouseholderVector >(int n, complex *x, complex *v, complex *gamma) { HouseholderVector_complex, float>(n, x, v, gamma); } template void HouseholderVector(int n, double *x, double *v, double *gamma); template void HouseholderVector(int n, quadruple *x, quadruple *v, quadruple *gamma); template void HouseholderVector(int n, float *x, float *v, float *gamma); #if 0 template void HouseholderVector >(int n, complex *x, complex *v, complex *gamma); template void HouseholderVector >(int n, complex *x, complex *v, complex *gamma); #endif // template void HouseholderReflection(int n, T *a, int lda, T *v, T *w, const T &gamma) { const T one(1.0); const T zero(0.0); blas_gemv(CblasConjTrans, n, n, one, a, lda, v, 1, zero, w, 1); T ngamma = (-gamma); blas_gerc(n, n, ngamma, v, 1, w, 1, a, lda); } template void HouseholderReflection(int n, double *a, int lda, double *v, double *w, const double &gamma); template void HouseholderReflection >(int n, complex *a, int lda, complex *v, complex *w, const complex &gamma); template void HouseholderReflection(int n, quadruple *a, int lda, quadruple *v, quadruple *w, const quadruple &gamma); template void HouseholderReflection >(int n, complex *a, int lda, complex *v, complex *w, const complex &gamma); template void HouseholderReflection(int n, float *a, int lda, float *v, float *w, const float &gamma); template void HouseholderReflection >(int n, complex *a, int lda, complex *v, complex *w, const complex &gamma); // // T may be complex of U template int hqr_pivot(const int n, T *a, int *permute) { const T Tzero(0.0); const U Uzero(0.0); int n0, k; VectorArray cc(n); VectorArray col(n); VectorArray v(n); VectorArray w(n); for (int i = 0 ; i < n; i++) { cc[i] = blas_l2norm2(n, &a[i * n], 1); // norm2 returns double value permute[i] = i; } k = 0; { U tmp(0.0); for (int i = 0; i < n; i++) { if (cc[i] > tmp) { tmp = cc[i]; k = i; // find the first entry that attains the maximum value } } // loop : i } n0 = 0; for (int m = 0; m < n; m++) { if (k > m) { // swap k-th and m-th columns of A[] int kk = permute[m]; permute[m] = permute[k]; permute[k] = kk; for (int i = 0; i < n; i++) { col[i] = a[i + m * n]; } for (int i = 0; i < n; i++) { a[i + m * n] = a[i + k * n]; } for (int i = 0; i < n; i++) { a[i + k * n] = col[i]; } U c = cc[m]; cc[m] = cc[k]; cc[k] = c; } // if (k > m) int nm = n - m; T gamma; HouseholderVector(nm, &a[m + m * n], v.addrCoefs(), &gamma); HouseholderReflection(nm, &a[m + m * n], n, v.addrCoefs(), w.addrCoefs(), gamma); for (int i = (m + 1); i < n; i++) { a[i + m * n] = Tzero; // = v[i] : to keep Householder matrix } for (int i = (m + 1); i < n; i++) { cc[i] = blas_l2norm2((n - m - 1), &a[m + 1 + i * n], 1); } U tt(0.0); for (int i = (m + 1); i < n; i++) { if (cc[i] > tt) { tt = cc[i]; k = i; // find the first entry that attains the maximum value } } // loop : i if (tt == Uzero) { n0 = n - (m + 1); break; } } // loop : m return (n - n0); } template int hqr_pivot(const int n, double *a, int *permute); template int hqr_pivot, double>(const int n, complex *a, int *permute); template int hqr_pivot(const int n, quadruple *a, int *permute); template int hqr_pivot, quadruple>(const int n, complex *a, int *permute); template int hqr_pivot(const int n, float *a, int *permute); template int hqr_pivot, float>(const int n, complex *a, int *permute); // template bool ComputeDimKernel_(int *n0, bool *flag_unsym_permute, const T *a_, const int n, const bool sym_flag, const int dim_augkern, const U eps_machine, // for perturbation const double eps_piv, const bool verbose, FILE *fp) { const W zero(0.0); const Y Yzero(0.0); const Y Yone(1.0); const U Uzero(0.0); Y Yeps_machine; Yeps_machine = conv_prec(eps_machine); // dimension of the image of the matrix a is at least one int nn0, n1, n2; int n_dim = n + 1; int *permute = new int[n_dim]; ColumnMatrix aq0(n_dim, n_dim); ColumnMatrix ad0(n, n); ColumnMatrix a1(n_dim, n_dim); ColumnMatrix aq_fact(n_dim, n_dim); ColumnMatrix ad_fact(n, n); VectorArray aa_diag(n_dim); // VectorArray diag_scale(n_dim); VectorArray rr(n_dim - 1); int *permute_d = new int[n_dim]; int *permute_q = new int[n_dim]; int *permute_right = new int[n_dim]; int *permute_left = new int[n_dim]; bool flag_tmp; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { a1(i, j) = conv_prec(a_[i + j * n]); } } if (sym_flag) { flag_tmp = false; for (int i = 0; i < n; i++) { permute_left[i] = permute_right[i] = i; } } else { double pivot = 1.0; double fop; double epspiv = todouble(eps_machine); ldu_full_permute(&nn0, 0, n, a1.addrCoefs(), n_dim, &pivot, permute_right, permute_left, epspiv, &fop); flag_tmp = false; for (int i = 0; i < n; i++) { if (permute_left[i] != permute_right[i]) { flag_tmp = true; break; } } } diss_printf(verbose, fp, "%s %d : %s pivots\n", __FILE__, __LINE__, flag_tmp ? "full" : "symmetric "); for (int i = 0; i < n; i++) { diss_printf(verbose, fp, "%d : %d %d %s\n", i, permute_left[i], permute_right[i], tostring(a1(i, i)).c_str()); // tostring(tolower(diag_scale[i])).c_str()); } for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { a1(i, j) = conv_prec(a_[permute_left[i] + permute_right[j] * n]); } } diss_printf(verbose, fp, "%s %d : permuted Schur complement %d\n", __FILE__, __LINE__, n); for (int i = 0; i < n; i++) { diss_printf(verbose, fp, "%d ", i); for (int j = 0; j < n; j++) { diss_printf(verbose, fp, "%s ", tostring(conv_prec(a1(i, j))).c_str()); } diss_printf(verbose, fp, "\n"); } for (int i = 0; i < n; i++) { W tmp = zero; for (int j = 0; j < n; j++) { tmp += a1(i, j) + (random_bool() ? Yeps_machine : Yzero); } a1(i, n) = tmp; a1(n, n) += tmp + (random_bool() ? Yeps_machine : Yzero); } for (int j = 0; j < n; j++) { W tmp = zero; for (int i = 0; i < n; i++) { tmp += a1(i, j) + (random_bool() ? Yeps_machine : Yzero); } a1(n, j) = tmp; } // column and row of inflated matrix are scaled Y diag_scalen = sqrt(Yone / blas_abs(a1(n, n))); for (int i = 0; i < n; i++) { a1(i, n) *= diag_scalen; a1(n, i) *= diag_scalen; } a1(n, n) *= (diag_scalen * diag_scalen); diss_printf(verbose, fp, "%s %d : the original Schur complement %d\n", __FILE__, __LINE__, n); for (int i = 0; i < n; i++) { diss_printf(verbose, fp, "%d ", i); for (int j = 0; j < n; j++) { diss_printf(verbose, fp, "%s ", tostring(conv_prec(a_[i + j * n])).c_str()); } diss_printf(verbose, fp, "\n"); } diss_printf(verbose, fp, "%s %d : the scaled last Schur complement %d\n", __FILE__, __LINE__, n_dim); for (int i = 0; i < n_dim; i++) { diss_printf(verbose, fp, "%d ", i); for (int j = 0; j < n_dim; j++) { diss_printf(verbose, fp, "%s ", tostring(conv_prec(a1(i,j))).c_str()); } diss_printf(verbose, fp, "\n"); } aq0.copy(a1); aq_fact.copy(a1); n1 = hqr_pivot(n_dim, aq0.addrCoefs(), permute); diss_printf(verbose, fp, "%s %d dimension of the image deteced by d_hqr_pivot() is %d\n", __FILE__, __LINE__, n1); diss_printf(verbose, fp, "%s %d QR\n", __FILE__, __LINE__); for (int i = 0; i < n_dim; i++) { diss_printf(verbose, fp, "%d : %d ", i, permute[i]); for (int j = 0; j < n_dim; j++) { diss_printf(verbose, fp, "%s ", tostring(conv_prec(aq0(i, j))).c_str()); } diss_printf(verbose, fp, "\n"); } for (int i = 0; i < n1; i++) { aa_diag[i] = aq0(i, i); } list pos_gap; vector kernel_dim; pos_gap.push_back(dim_augkern); for (int i = 0; i < (n1 - 1); i++) { rr[i] = todouble(blas_abs(aa_diag[i + 1] / aa_diag[i])); } // find largest gap inside of invertible part double aug_diff = 1.0; for (int i = 0; i < (dim_augkern - 1); i++) { if (aug_diff > rr[i]) { aug_diff = rr[i]; } } diss_printf(verbose, fp, "%s %d : aug_diff = %s esp = %.12e\n", __FILE__, __LINE__, tostring(aug_diff).c_str(), eps_piv); for (int i = (dim_augkern - 1); i < (n1 - 1); i++) { if (rr[i] < (aug_diff * eps_piv)) { diss_printf(verbose, fp, "rr[%d] : %s\n", i, tostring(rr[i]).c_str()); pos_gap.push_back(i + 1); } } const bool flag_perturb = ((rr[dim_augkern - 1] < aug_diff) && (blas_abs(aa_diag[n1 - 1]) > Yeps_machine * Y(eps_piv))); const U eps_perturb = flag_perturb ? eps_machine : Uzero; diss_printf(verbose, fp, "%s %d : rr[%d] = %s => perturb = %s %s\n", __FILE__, __LINE__, (dim_augkern - 1), tostring(rr[dim_augkern - 1]).c_str(), tostring(eps_perturb).c_str(), tostring(blas_abs(aa_diag[n1 - 1])).c_str() ); int n_dim1; if (flag_perturb) { // condition number of the invertible part is moderate n2 = n1; // and the diagonal entry becomes etreamly small // n1 may be less than n_dim when a diagnal of QR == 0 for (int i = 0; i < (n1 - 1); i++) { if (rr[i] < (todouble(eps_machine) / sqrt(eps_piv))) { n2 = i + 1; break; } } n_dim1 = n2 > dim_augkern ? n2 : n_dim; pos_gap.push_back(n2 - 1); // matrix being inflated } else { n_dim1 = n_dim; pos_gap.push_back(n); // matrix being inflated : n = n_dim - 1 } rr.free(); pos_gap.sort(); pos_gap.unique(); for (list::const_iterator it = pos_gap.begin(); it != pos_gap.end(); ++it) { kernel_dim.push_back(n_dim1 - (*it)); } diss_printf(verbose, fp, "%s %d : kernel_dim = %d : ", __FILE__, __LINE__, (int)kernel_dim.size()); for (vector::const_iterator it = kernel_dim.begin(); it != kernel_dim.end(); ++it) { diss_printf(verbose, fp, "%d ", *it); } diss_printf(verbose, fp, "\n"); int flag; flag = VerifyDimKernel(&nn0, permute_q, n_dim1, aq_fact.addrCoefs(), kernel_dim, sym_flag, dim_augkern, eps_perturb, verbose, fp); if (flag == false) { nn0 = 1; // iflated matrix is generated as singular } // nn0 += (n_dim - n2) - 1; // nn0--; // detection of kernel is done for dim_augkern > 0 and at least // if (nn0 > 0) { { const double eps_machine_double = todouble(eps_machine); U *errors_d = new U[nn0 + 1]; list nnn; if (nn0 > 2) { nnn.push_back(nn0 - 2); } nnn.push_back(nn0 - 1); // nnn.push_back(nn0); for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { ad_fact(i, j) = a_[i + j * n]; } } { double pivot_ref = 0.0; for (int i = 0; i < n ; i++) { double tmp = blas_abs(a_[i + i * n]); pivot_ref = pivot_ref > tmp ? pivot_ref : tmp; } int nnn0; double fop; if (sym_flag) { full_ldlt_permute(&nnn0, 0, n, ad_fact.addrCoefs(), n, &pivot_ref, permute_d, eps_machine_double, &fop); } else { full_ldu_permute(&nnn0, 0, n, ad_fact.addrCoefs(), n, &pivot_ref, permute_d, eps_machine_double, &fop); } } for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { ad0(i, j) = a_[permute_d[i] + permute_d[j] * n]; } } diss_printf(verbose, fp, "%s %d : verify errors : size = %d : ", __FILE__, __LINE__, nnn.size()); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { diss_printf(verbose, fp, "%d ", (*it)); } diss_printf(verbose, fp, "\n"); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { const int nn = (*it); if (nn > 0) { verify_kernels(nn, n, ad0.addrCoefs(), ad_fact.addrCoefs(), n, eps_machine_double, errors_d, verbose, fp); diss_printf(verbose, fp, "errors of %d kernels\n", nn); for (int i = 0; i < nn; i++) { diss_printf(verbose, fp, "%d : %s\n", i, tostring(errors_d[i]).c_str()); } } } if (flag_tmp) { for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { ad_fact(i, j) = a_[permute_left[i] + permute_right[j] * n]; } } { double pivot_ref = 0.0; for (int i = 0; i < n ; i++) { double tmp = blas_abs(a_[i + i * n]); pivot_ref = pivot_ref > tmp ? pivot_ref : tmp; } int nnn0; double fop; if (sym_flag) { full_ldlt_permute(&nnn0, 0, n, ad_fact.addrCoefs(), n, &pivot_ref, permute_d, eps_machine_double, &fop); } else { full_ldu_permute(&nnn0, 0, n, ad_fact.addrCoefs(), n, &pivot_ref, permute_d, eps_machine_double, &fop); } } for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { ad0(i, j) = a_[permute_d[permute_left[i]] + permute_d[permute_right[j]] * n]; } } diss_printf(verbose, fp, "%s %d : verify errors : size = %d : ", __FILE__, __LINE__, nnn.size()); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { diss_printf(verbose, fp, "%d ", (*it)); } diss_printf(verbose, fp, "\n"); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { const int nn = (*it); if (nn > 0) { verify_kernels(nn, n, ad0.addrCoefs(), ad_fact.addrCoefs(), n, eps_machine_double, errors_d, verbose, fp); diss_printf(verbose, fp, "errors of %d kernels\n", nn); for (int i = 0; i < nn; i++) { diss_printf(verbose, fp, "%d : %s\n", i, tostring(errors_d[i]).c_str()); } } } } // if (flag_tmp) // nnn.clear(); Y *errors_q = new Y[nn0 + 1]; for (int j = 0; j < n_dim; j++) { for (int i = 0; i < n_dim; i++) { aq0(i, j) = a1(permute_q[i], permute_q[j]); } } for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { const int nn = (*it) + 1; verify_kernels(nn, n_dim, aq0.addrCoefs(), aq_fact.addrCoefs(), n_dim, eps_machine_double, errors_q, verbose, fp); diss_printf(verbose, fp, "errors of %d kernels\n", nn); for (int i = 0; i < nn; i++) { diss_printf(verbose, fp, "%d : %s\n", i, tostring(errors_q[i]).c_str()); } } for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { aq_fact(i, j) = conv_prec(a_[i + j * n]); } } { double pivot_ref = 0.0; for (int i = 0; i < n ; i++) { double tmp = blas_abs(a_[i + i * n]); pivot_ref = pivot_ref > tmp ? pivot_ref : tmp; } int nnn0; double fop; if (sym_flag) { full_ldlt_permute(&nnn0, 0, n, aq_fact.addrCoefs(), n_dim, &pivot_ref, permute_q, eps_machine_double, &fop); } else { full_ldu_permute(&nnn0, 0, n, aq_fact.addrCoefs(), n_dim, &pivot_ref, permute_q, eps_machine_double, &fop); } } for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { aq0(i, j) = conv_prec(a_[permute_q[i] + permute_q[j] * n]); } } diss_printf(verbose, fp, "%s %d : verify errors : size = %d : ", __FILE__, __LINE__, nnn.size()); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { diss_printf(verbose, fp, "%d ", (*it)); } diss_printf(verbose, fp, "\n"); for (list::const_iterator it = nnn.begin(); it != nnn.end(); ++it) { const int nn = (*it); if (nn > 0) { verify_kernels(nn, n, aq0.addrCoefs(), aq_fact.addrCoefs(), n_dim, eps_machine_double, errors_q, verbose, fp); diss_printf(verbose, fp, "errors of %d kernels\n", nn); for (int i = 0; i < nn; i++) { diss_printf(verbose, fp, "%d : %s\n", i, tostring(errors_q[i]).c_str()); } } } nnn.clear(); delete [] errors_q; delete [] errors_d; } // if (nn0 > 0) delete [] permute; delete [] permute_d; delete [] permute_q; delete [] permute_left; delete [] permute_right; *n0 = nn0 + (n_dim - n_dim1) - 1; diss_printf(verbose, fp, "%s %d : dimension of the kernel is %d\n", __FILE__, __LINE__, *n0); *flag_unsym_permute = flag_tmp; return flag; } template bool ComputeDimKernel_(int *n0, bool *flag_unsym_permute, const double *a_, const int n, const bool sym_flag, const int dim_augkern, const double eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool ComputeDimKernel_, double, complex, quadruple>(int *n0, bool *flag_unsym_permute, const complex *a_, const int n, const bool sym_flag, const int dim_augkern, const double eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool ComputeDimKernel_(int *n0, bool *flag_unsym_permute, const quadruple *a_, const int n, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool ComputeDimKernel_, quadruple, complex, octruple>(int *n0, bool *flag_unsym_permute, const complex *a_, const int n, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool ComputeDimKernel(int *n0, bool *flag_unsym_permute, const T *a_, const int n, const bool sym_flag, const int dim_augkern, const U eps_machine, // for perturbation const double eps_piv, const bool verbose, FILE *fp) { fprintf(stderr, "%s %d : specialized template not implemented\n", __FILE__, __LINE__); } template<> bool ComputeDimKernel(int *n0, bool *flag_unsym_permute, const double *a_, const int n, const bool sym_flag, const int dim_augkern, const double eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } template<> bool ComputeDimKernel, double>(int *n0, bool *flag_unsym_permute, const complex *a_, const int n, const bool sym_flag, const int dim_augkern, const double eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_, double, complex, quadruple>(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } template<> bool ComputeDimKernel(int *n0, bool *flag_unsym_permute, const quadruple *a_, const int n, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } template<> bool ComputeDimKernel, quadruple>(int *n0, bool *flag_unsym_permute, const complex *a_, const int n, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_, quadruple, complex, octruple>(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } template<> bool ComputeDimKernel(int *n0, bool *flag_unsym_permute, const float *a_, const int n, const bool sym_flag, const int dim_augkern, const float eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } template<> bool ComputeDimKernel, float>(int *n0, bool *flag_unsym_permute, const complex *a_, const int n, const bool sym_flag, const int dim_augkern, const float eps_machine, const double eps_piv, const bool verbose, FILE *fp) { return ComputeDimKernel_, float, complex, double>(n0, flag_unsym_permute, a_, n, sym_flag, dim_augkern, eps_machine, eps_piv, verbose, fp); } // template bool VerifyDimKernel(int *nn0_, int *permute_q, int n_dim, W* a_fact, vector &kernel_dim, const bool sym_flag, const int dim_augkern, const U eps_machine, const bool verbose, FILE *fp) { U Uzero(0.0); U Uone(1.0); int nn, nn0; int k; double pivot_ref, pivot_ref_q; // int *permute_q = new int[n_dim]; ColumnMatrix a2(n_dim, n_dim); U *errors = new U[6]; blas_copy((n_dim * n_dim), a_fact, 1, a2.addrCoefs(), 1); // for (int i = 0; i < (n_dim * n_dim); i++) { // a2[i] = a_fact[i]; // } pivot_ref = 0.0; k = 0; for (int i = 0; i < n_dim; i++) { double dtmp = blas_abs(a_fact[k]); // accuracy? : 14 Jul.2015 Atsushi k += (n_dim + 1); pivot_ref = (pivot_ref < dtmp ? dtmp : pivot_ref); } pivot_ref_q = pivot_ref; diss_printf(verbose, fp, "pivot_ref_q = %.12e\n", pivot_ref_q); const int nn1 = 1; // matrix has at least one dimensional kernel // qfull_sym_gauss_part is only used to get permute_q[] if (sym_flag) { double fop; const double eps0 = todouble(eps_machine); full_ldlt_permute(&nn, nn1, n_dim, a_fact, n_dim, &pivot_ref_q, permute_q, eps0, &fop); } else { double fop; const double eps0 = todouble(eps_machine); full_ldu_permute(&nn, nn1, n_dim, a_fact, n_dim, &pivot_ref_q, permute_q, eps0, &fop); } // question: eps_machine is ok? : 06 Jan.2013 // => jump between diagonal is within double precision : 27 Jan.2013 diss_printf(verbose, fp, "%s %d : permutation : ", __FILE__, __LINE__); for (int i = 0; i < n_dim; i++) { diss_printf(verbose, fp, "%d ", permute_q[i]); } diss_printf(verbose, fp, "\n"); vector dims(n_dim); for (int i = 0; i < n_dim; i++) { dims[i] = i + 1; } vector errors_image(dims.size()); U error_max, error_min, Ueps; error_min = Uone / eps_machine; error_max = Uzero; Ueps = machine_epsilon(); for (int i = 0; i < dims.size(); i++) { errors_image[i] = check_matrixerr(n_dim, n_dim, a2.addrCoefs(), dim_augkern, dims[i], permute_q, eps_machine, sym_flag); } for (int i = 0; i < dims.size(); i++) { diss_printf(verbose, fp, "%d : %s\n", dims[i], tostring(errors_image[i]).c_str()); } U err_image = errors_image[0]; for (int i = 1; i < dims.size(); i++) { if (dims[i] > dim_augkern) { break; } err_image = err_image < errors_image[i] ? errors_image[i] : err_image; } int count_err_image_updated = 0; int dim_image = dim_augkern; diss_printf(verbose, fp, "err_image = %s ", tostring(err_image).c_str()); U eps_param0 = sqrt(err_image * errors_image.back()); bool flag = false; bool flag0 = false; diss_printf(verbose, fp, "eps_param0 = %s\n", tostring(eps_param0).c_str()); for (vector::const_iterator it = kernel_dim.begin(); it != kernel_dim.end(); ++it) { nn0 = *it; flag0 = check_kern(nn0, n_dim, n_dim, a2.addrCoefs(), permute_q, dim_augkern, eps_machine, eps_param0, sym_flag, errors, verbose, fp); diss_printf(verbose, fp, "%d : %s / %s / %s\n", nn0, tostring(errors[0]).c_str(), tostring(errors[1]).c_str(), tostring(errors[2]).c_str()); // if (nn0 > 1) { if (!flag0 && (nn0 > 1)) { diss_printf(verbose, fp, "first trial by error from image %d %s -> %s fails\n", n_dim, tostring(errors_image.back()).c_str(), tostring(eps_param0).c_str()); flag0 = check_kern((nn0 - 1), n_dim, n_dim, a2.addrCoefs(), permute_q, dim_augkern, eps_machine, eps_param0, sym_flag, &errors[3], verbose, fp); diss_printf(verbose, fp, "%d : %s / %s / %s\n", (nn0 - 1), tostring(errors[3]).c_str(), tostring(errors[4]).c_str(), tostring(errors[5]).c_str()); diss_printf(verbose, fp, "diff = %s\n", tostring(errors[0] - errors[4]).c_str()); // criteria of equality U xtmp = ((fabs(errors[0]) > fabs(errors[4])) ? fabs(errors[0]) : fabs(errors[4])); xtmp = sqrt(xtmp * eps_machine); U eps_param1 = (errors[3] + errors[4]) / U(2); eps_param1 = sqrt(eps_param1 * err_image); eps_param0 = sqrt(err_image * errors_image.back()); diss_printf(verbose, fp, "%s + %s = %s / %s\n", tostring(errors[3]).c_str(), tostring(errors[4]).c_str(), tostring(eps_param1).c_str(), tostring(eps_param0).c_str()); if (errors[3] < eps_param0) { diss_printf(verbose, fp, "not satisfies the condition %s < %s\n", tostring(eps_param0).c_str(), tostring(errors[3]).c_str()); int itmp = n_dim - nn0; U err_image_tmp; err_image_tmp = check_matrixerr(n_dim, n_dim, a2.addrCoefs(), dim_augkern, itmp, permute_q, eps_machine, sym_flag); diss_printf(verbose, fp, "part of %d (%d - %d) is regular, err_image=%s/%s\n", itmp, n_dim, nn0, tostring(err_image_tmp).c_str(), tostring(err_image).c_str()); if (err_image < err_image_tmp) { dim_image = itmp; count_err_image_updated++; err_image = err_image_tmp; } continue; } if ((errors[0] > eps_param1) && (errors[1] < eps_param1)) { if ((fabs(errors[0] - errors[4]) < xtmp)) { diss_printf(verbose, fp, "found with %d dim. %d updated\n", dim_image, count_err_image_updated); } else{ diss_printf(verbose, fp, "found with %d dim. %d updated, needs be refactorized\n", dim_image, count_err_image_updated); } flag = true; break; } } // if (nn0 > 0) else { if (flag0) { diss_printf(verbose, fp, "found\n"); flag = true; break; } } } // loop : it if (!flag) { diss_printf(verbose, fp, "kernel detection routine does not work : "); if (kernel_dim.size() == 1) { diss_printf(verbose, fp, "detection by Householder = %d\n", nn0); nn0 = 0; // n0 = kernel_dim.front(); } else { diss_printf(verbose, fp, "unclear : "); nn0 = kernel_dim.front(); for (vector::const_iterator it = kernel_dim.begin(); it != kernel_dim.end(); it++) { diss_printf(verbose, fp, "%d ", (*it)); if (((*it) + dim_augkern) != n_dim) { nn0 = (*it); diss_printf(verbose, fp, " / "); } else { diss_printf(verbose, fp, "+ %d = %d /", dim_augkern, n_dim); } } diss_printf(verbose, fp, "\n"); diss_printf(verbose, fp, "detection by Householder = %d\n", nn0); } } else { if ((kernel_dim.size() == 1) && (kernel_dim.front() != nn0)) { if (nn0 != 1) { diss_printf(verbose, fp, "strange_matrix\n"); nn0 = 0; flag = false; } } } // delete [] a2; delete [] errors; // delete [] permute_q; *nn0_ = nn0; return flag; } template bool VerifyDimKernel(int *nn0_, int *permute_q, int n_dim, quadruple* a_fact, vector& kernel_dim, const bool sym_flag, const int dim_augkern, const double eps_machine, const bool verbose, FILE *fp); template bool VerifyDimKernel, double, complex, quadruple>(int *nn0_, int *permute_q, int n_dim, complex* a_fact, vector& kernel_dim, const bool sym_flag, const int dim_augkern, const double eps_machine, const bool verbose, FILE *fp); #ifndef NO_OCTRUPLE template bool VerifyDimKernel(int *nn0_, int *permute_q, int n_dim, octruple* a_fact, vector& kernel_dim, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const bool verbose, FILE *fp); template bool VerifyDimKernel, quadruple, complex, octruple>(int *nn0_, int *permute_q, int n_dim, complex* a_fact, vector& kernel_dim, const bool sym_flag, const int dim_augkern, const quadruple eps_machine, const bool verbose, FILE *fp); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_KernDetect.hpp000664 000000 000000 00000012431 14037356732 025314 0ustar00rootroot000000 000000 /*! \file C_KernDetect.hpp \brief Kernel detection algorithm : symm <= DOI: 10.1002/nme.4729 / unsymm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DRIVER_C_KERNDETECT #define _DRIVER_C_KERNDETECT #include "Compiler/arithmetic.hpp" #include "Compiler/blas.hpp" #include "Driver/C_BlasRoutines.hpp" template void copy_matrix_permute_(const int lda, int n, U *b, T *a, int *permute); template void copy_matrix_permute_complex_(const int lda, int n, complex *b, complex *a, int *permute); template bool check_kern(const int n0, const int lda, const int n, T *a_ini, int *permute, const int dim_augkern, const U &eps, const U &eps_param, const bool flag_sym, U *errors, const bool verbose, FILE *fp); //template template bool check_kern_(const int n0, const int lda, const int n, T *a_ini, int *permute, const int dim_augkern, const U &eps, const U &eps_param, const bool flag_sym, U *errors, const bool verbose, FILE *fp); template U check_matrixerr(const int lda, const int n, T *a, const int dim_augkern, const int k, int *permute, const U &eps, const bool flag_sym); template U check_matrixerr_(const int lda, const int n, T *a, const int dim_augkern, const int k, int *permute, const U &eps, const bool flag_sym); template void verify_kernels(const int n0, const int n, const T *a_ini, const int lda, const bool isSym, const double eps, U *errors); template int hqr_pivot(const int n, T *a, int *permute); template void HouseholderVector(int n, T *x, T *v, T *gamma); template void HouseholderVector_complex(int n, T *x, T *v, T *gamma); template void HouseholderReflection(int n, T *a, int lda, T *v, T *w, const T &gamma); template bool ComputeDimKernel(int *n0, bool *flag_unsym_permute, const T *a_, const int n, const bool sym_flag, const int dim_augkern, const U eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool ComputeDimKernel_(int *n0, bool *flag_unsym_permute, const T *a_, const int n, const bool sym_flag, const int dim_augkern, const U eps_machine, const double eps_piv, const bool verbose, FILE *fp); template bool VerifyDimKernel(int *nn0_, int *permute_q, int n_dim, W* a_fact, vector &kernel_dim, const bool sym_flag, const int dim_augkern, const U eps_machine, const bool verbose, FILE *fp); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_threads_tasks.cpp000664 000000 000000 00000332305 14037356732 026123 0ustar00rootroot000000 000000 /*! \file C_threads_tasks.hpp \brief tasks executed asynchronously with threads \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Apr. 22th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionLibrary.hpp" #include #include #include // for isnan() #include "Compiler/blas.hpp" #include "Driver/C_threads_tasks.hpp" #include "Driver/C_KernDetect.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" // #define DEBUG_SVD #ifdef DEBUG_SVD #include "mkl_lapack.h" #endif void C_dummy(void *arg_) { }; //#define DEBUG_ERASENULLPARENTS int EraseNullParents(vector &queue) { int count = 0; for (vector::const_iterator it = queue.begin(); it != queue.end(); ++it) { for (list::iterator jt = (*it)->parents->begin(); jt != (*it)->parents->end(); ) { if (*(*jt)->ops_complexity == 0L) { count++; jt = (*it)->parents->erase(jt); } else { ++jt; } } } return count; } int EraseNullParents(C_task * task) { int count = 0; for (list::iterator jt = task->parents->begin(); jt != task->parents->end(); ) { if (*(*jt)->ops_complexity == 0L) { count++; jt = task->parents->erase(jt); } else { ++jt; } } return count; } template void C_SparseSymbFact(void *arg_) { C_SparseSymbFact_arg *arg = (C_SparseSymbFact_arg *)arg_; TridiagBlockMatrix** tridiag = arg->tridiag; const int colors = arg->colors; int *color_mask = arg->color_mask; const int nrow = arg->nrow; const int nnz0 = arg->csr_diag->nnz; int* prow0 = arg->csr_diag->ptRows; int* indcols0 = arg->csr_diag->indCols; int* indvals0 = arg->csr_diag->indVals; // bool verbose = arg->verbose; // FILE *fp = *(arg->fp); for (int i = 0; i < colors; i++) { tridiag[i]->SymbolicFact((i + 1), colors, color_mask, nrow, nnz0, prow0, indcols0, indvals0); // } } template void C_SparseSymbFact(void *arg_); template void C_SparseSymbFact(void *arg_); template void C_SparseSymbFact, double>(void *arg_); template void C_SparseSymbFact, quadruple>(void *arg_); template void C_SparseSymbFact(void *arg_); template void C_SparseSymbFact, float>(void *arg_); // void c_getrealtime_(uint64_t &tmprofiles, const int &m) { elapsed_t *tmprfs = (elapsed_t *)tmprofiles; get_realtime(&tmprfs[m]); } void c_fileout_(uint64_t &fp_prt, char *strgs, const int &force_stderr) { if (force_stderr == 1) { fprintf(stderr, "%s\n", strgs); } else { FILE *fp = (FILE *)fp_prt; fprintf(fp, "%s\n", strgs); } } int compare_source_dist_index(const void *_a, const void *_b) { source_dist_index *a = (source_dist_index *)_a; source_dist_index *b = (source_dist_index *)_b; if (a->global_i == b->global_i) { return (int)(a->global_j - b->global_j); } else { return (int)(a->global_i - b->global_i); } } int convert_array2strip(list &strips, const vector& array) { const int size = array.size(); strips.clear(); int in_strip = 0; int begin_dst, width; for (int i = 0; i < size; i++) { if ((in_strip == 0) && (array[i] != (-1))) { in_strip = 1; begin_dst = i; } if (i < (size - 1)) { if ((in_strip == 1) && ((array[i + 1] == (-1)) || (array[i + 1] != (array[i] + 1)))) { in_strip = 0; width = (i + 1) - begin_dst; // constructor strips.push_back(index_strip(begin_dst, array[begin_dst], // begin_src width)); if (array[i + 1] != (array[i] + 1)) { begin_dst = i + 1; } } } } if (in_strip == 1) { width = size - begin_dst; strips.push_back(index_strip(begin_dst, array[begin_dst], // begin_src width)); } return strips.size(); } int combine_two_strips(list &stripsa, list &stripsb, list &stripsc, list &strips0, list &strips1, const int size) { stripsa.clear(); stripsb.clear(); stripsc.clear(); if (strips0.size() == 0 && strips1.size() == 0) { return 0; } if (strips0.size() == 0) { stripsa.clear(); stripsb.clear(); for (list::const_iterator it = strips1.begin(); it != strips1.end(); ++it) { stripsc.push_back(index_strip2((*it).begin_dst, -1, (*it).begin_src, (*it).width)); } return strips1.size(); } if (strips1.size() == 0) { stripsa.clear(); stripsb.clear(); for (list::const_iterator it = strips0.begin(); it != strips0.end(); ++it) { stripsc.push_back(index_strip2((*it).begin_dst, (*it).begin_src, -1, (*it).width)); } return strips0.size(); } vector ary0(size, (-1)); vector ary1(size, (-1)); int in_strip0, in_strip; int begin_dst = (-1), width; for (list::const_iterator it = strips0.begin(); it != strips0.end(); ++it) { int i0 = (*it).begin_dst; int i1 = (*it).begin_src; for (int i = 0; i < (*it).width; i++) { ary0[i0++] = i1++; } } for (list::const_iterator it = strips1.begin(); it != strips1.end(); ++it) { int i0 = (*it).begin_dst; int i1 = (*it).begin_src; for (int i = 0; i < (*it).width; i++) { ary1[i0++] = i1++; } } in_strip0 = in_strip = 0; for (int i = 0; i < size; i++) { // save current status on strip in_strip0 = in_strip; if ((in_strip == 0 || in_strip == 2) && (ary0[i] != (-1))) { in_strip += 1; } if ((in_strip == 0 || in_strip == 1) && (ary1[i] != (-1))) { in_strip += 2; } if (in_strip > in_strip0) { // strip0 or strip1 -> union of strips0 and strips1 if (in_strip0 > 0) { width = i - begin_dst; switch (in_strip0) { case 1: stripsa.push_back(index_strip(begin_dst, ary0[begin_dst], width)); break; case 2: stripsb.push_back(index_strip(begin_dst, ary1[begin_dst], width)); break; } } begin_dst = i; } // save current status on strip in_strip0 = in_strip; // if (i < (size - 1)) { // decreasing into strip0 or strip1 or null if ((in_strip == 1 || in_strip == 3) && ((ary0[i + 1] == (-1)) || (ary0[i + 1] != (ary0[i] + 1)))) { in_strip -= 1; } if ((in_strip == 2 || in_strip == 3) && ((ary1[i + 1] == (-1)) || (ary1[i + 1] != (ary1[i] + 1)))) { in_strip -= 2; } if (in_strip0 > in_strip) { // output union of strip0 and strip1, strip0 or strip1 width = (i + 1) - begin_dst; switch (in_strip0) { case 1: stripsa.push_back(index_strip(begin_dst, ary0[begin_dst], width)); break; case 2: stripsb.push_back(index_strip(begin_dst, ary1[begin_dst], width)); break; case 3: stripsc.push_back(index_strip2(begin_dst, ary0[begin_dst], ary1[begin_dst], width)); break; } // strip0 or strip1 if (in_strip > 0) { begin_dst = i + 1; } in_strip0 = in_strip; } // if (in_strip0 > in_strip) { } // if (i < (size - 1)) } // loop : i if (in_strip > 0) { width = size - begin_dst; switch (in_strip0) { case 1: stripsa.push_back(index_strip(begin_dst, ary0[begin_dst], width)); break; case 2: stripsb.push_back(index_strip(begin_dst, ary1[begin_dst], width)); break; case 3: stripsc.push_back(index_strip2(begin_dst, ary0[begin_dst], ary1[begin_dst], width)); break; } } return (stripsa.size() + stripsb.size() + stripsc.size()); } void copy_two_strips(list &strips2, list &strips0, list &strips1) { strips2.clear(); list ::const_iterator mt0 = strips0.begin(); list ::const_iterator mt1 = strips1.begin(); for ( ; ((mt0 != strips0.end()) && (mt1 != strips1.end())); ++mt0, ++mt1) { strips2.push_back(index_strip2((*mt0).begin_dst, (*mt0).begin_src, (*mt1).begin_src, (*mt0).width)); } } void split_two_strips(list &strips0, list &strips1, list &strips2, index_strip strip0, index_strip strip1) { // assumption : union of strip0 and strip1 is not empty // strips0 = strip0 \setminus strip1 // strips1 = strip1 \setminus strip0 // strips01 = strip0 \cap strip1 strips0.clear(); strips1.clear(); strips2.clear(); int begin_dst = 0; int width = 0; int begin_src0 = strip0.begin_src; int begin_src1 = strip1.begin_src; int end_dst0 = strip0.begin_dst + strip0.width; int end_dst1 = strip1.begin_dst + strip1.width; if (strip0.begin_dst < strip1.begin_dst) { width = strip1.begin_dst - strip0.begin_dst; strips0.push_back(index_strip(strip0.begin_dst, strip0.begin_src, width)); begin_src0 += width; } else if (strip0.begin_dst > strip1.begin_dst) { width = strip0.begin_dst - strip1.begin_dst; strips1.push_back(index_strip(strip1.begin_dst, strip1.begin_src, width)); begin_src1 += width; } begin_dst += width; const int end_dst = end_dst0 < end_dst1 ? end_dst0 : end_dst1; const int width2 = end_dst - begin_dst; strips2.push_back(index_strip2(begin_dst, begin_src0, begin_src1, width2)); if (end_dst0 < end_dst1) { strips1.push_back(index_strip(end_dst, // 27 Nov.2016 bug found begin_src1 + width2, // begin_src0 + width end_dst1 - end_dst0)); } else if (end_dst0 > end_dst1) { strips0.push_back(index_strip(end_dst, begin_src0 + width2, // begin_src1 + width end_dst0 - end_dst1)); } } void print_strips(const char *name, list &strips, FILE *fp) { fprintf(fp, "%s : ", name); for (list ::const_iterator mt = strips.begin(); mt != strips.end(); ++mt) { fprintf(fp, "[ %d %d %d ] ", (*mt).begin_dst, (*mt).begin_src, (*mt).width); } fprintf(fp, "\n"); } void print_strips(const char *name, list &strips, FILE *fp) { fprintf(fp, "%s : ", name); for (list ::const_iterator mt = strips.begin(); mt != strips.end(); ++mt) { fprintf(fp, "[ %d %d %d %d ] ", (*mt).begin_dst, (*mt).begin_src0, (*mt).begin_src1, (*mt).width); } fprintf(fp, "\n"); } bool C_task_seq_complexity_smaller(C_task_seq *a, C_task_seq *b) { if (a->ops_complexity < b->ops_complexity) { return true; } else { return false; } } bool C_task_seq_complexity_greater(C_task_seq *a, C_task_seq *b) { if (a->ops_complexity > b->ops_complexity) { return true; } else { return false; } } bool C_task_seq_beginidx_smaller(C_task_seq *a, C_task_seq *b) { if (a->begin < b->begin) { return true; } else { return false; } } void assign_tasks_statically(list *queue_static, list &queue_dynamic, long *nops_sum, list &task_seq_tmp, const int task_id, const char *task_name_, const int level, const int phase, const long nops_block_total, int num_threads) { const long nops_per_thread = nops_block_total / (long)num_threads; #ifdef DEBUG_PREPARE_THREAD cout << "nops_bock_total = " << nops_block_total << " nops_per_thread = " << nops_per_thread << endl; #endif list::const_iterator it; int task_begin; vector > queue_divided(num_threads); vector excluding_list(num_threads, 0); long *nops_sum0 = new long[num_threads]; // for debugging #ifdef DEBUG_PREPARE_THREAD cout << "excluded indices "; #endif int excluded = 0; for (int i = 0; i < num_threads; i++) { nops_sum0[i] = nops_sum[i]; // for debugging if (nops_sum[i] >= nops_per_thread) { excluding_list[i] = 1; excluded++; #ifdef DEBUG_PREPARE_THREAD cout << i << " "; #endif } } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif if (excluded == num_threads) { for (int i = 0; i < num_threads; i++) { excluding_list[i] = 0; } } int ll = (-1); while(excluding_list[++ll] == 1) { #ifdef DEBUG_PREPARE_THREAD if (nops_sum[ll] > 0) { cout << "+ll = " << ll << " start = " << nops_sum[ll] << " " ; } #endif } it = task_seq_tmp.begin(); task_begin = (*it)->begin; int flag = 0; for ( ; it != task_seq_tmp.end(); ++it) { long nops = 0L; for (int i = (*it)->begin; i < (*it)->end; i++) { flag = 0; vector& jt = *((*it)->queue); const long ltmp = *(jt[i]->ops_complexity); int ii = 0; // initializing if ((nops_sum[ll] < nops_per_thread) && ((nops_sum[ll] + (ltmp / 2UL)) >= nops_per_thread)) { if (ll == (num_threads - 1)) { // include blocks into the last thread nops_sum[ll] += ltmp; nops += ltmp; #if 1 flag = (-2); ii = i; #else flag = 0; #endif } else { ii = i; flag = 1; } } else { nops_sum[ll] += ltmp; nops += ltmp; if (nops_sum[ll] > nops_per_thread) { if (ll == (num_threads - 1)) { // include blocks into the last thread #if 1 flag = (-2); ii = i + 1; #else flag = 0; #endif } else { ii = i + 1; flag = (-1); } } } // 7 May 2014 : to accpet atomic_size > 1 if (ii < (*it)->queue->size()) { ii -= (*(*it)->queue)[ii]->atomic_id; } if (flag != 0) { if (nops > 0L) { // to avoid zero addition when nops_sum[ll] exceed // the nops_per_thread at the beginning string task_name = (task_name_ + to_string(level) + " : " + (*it)->task_name + " :: " + to_string(task_begin) + " : " + to_string(ii)); // char *task_name_cstr = new char[task_name.str().size() + 1]; // strcpy(task_name_cstr, task_name.str().c_str()); // cout << "-ll = " << ll << " nops_sum = " << nops_sum[ll] << " " // << task_name_cstr << endl; C_task_seq* tmp = new C_task_seq(task_id, task_name, (-1), // mutex_id TASK_SINGLE, 1, level, phase, (*it)->queue, task_begin, ii, nops); queue_divided[ll].push_back(tmp); } #if 1 if (flag == (-2)) { task_begin = ii; break; // loop : i } #endif while(excluding_list[++ll] == 1) { // cout << "+ll = " << ll << " start = " << nops_sum[ll] << " " ; } if (ii < (*it)->end) { // flag == 1 task_begin = ii; } else { list::const_iterator kt = it; ++kt; if (kt != task_seq_tmp.end()) { // for the next queue task_begin = (*kt)->begin; } } if (flag == 1) { nops = ltmp; nops_sum[ll] += ltmp; } else { nops = 0L; } } } // loop : i #if 1 if (flag == (-2)) { // special case moving static to dynamic break; // loop : it } #endif if (flag >= 0) { string task_name = (task_name_ + to_string(level) + " : " + (*it)->task_name + " :: " + to_string(task_begin) + " : " + to_string((*it)->end)); C_task_seq* tmp = new C_task_seq(task_id, task_name, // task_name_cstr, (-1), // mutex_id TASK_SINGLE, 1, level, phase, (*it)->queue, task_begin, (*it)->end, nops); queue_divided[ll].push_back(tmp); list::const_iterator jt = it; ++jt; if (jt != task_seq_tmp.end()) { // for the next queue task_begin = (*jt)->begin; } } } // loop : it #if 1 if (flag == (-2)) { // task_begin -= (*(*it)->queue)[task_begin]->atomic_id; long nops = 0L; for (int i = task_begin; i < (*it)->end; i++) { nops += *((*(*it)->queue)[i]->ops_complexity); } string task_name = (task_name_ + to_string(level) + " : " + (*it)->task_name + " :: " + to_string(task_begin) + " : " + to_string((*it)->end)); C_task_seq* tmp = new C_task_seq(task_id, task_name, 0, // mutex_id TASK_PARALLEL, num_threads, level, phase, (*it)->queue, task_begin, (*it)->end, nops); queue_dynamic.push_back(tmp); ++it; for ( ; it != task_seq_tmp.end(); ++it) { for (int i = (*it)->begin; i < (*it)->end; i++) { nops += *((*(*it)->queue)[i]->ops_complexity); } string task_name = (task_name_ + to_string(level) + " : " + (*it)->task_name + " :: " + to_string((*it)->begin) + " : " + to_string((*it)->end)); C_task_seq* tmp = new C_task_seq(task_id, task_name, 0, // mutex_id TASK_PARALLEL, num_threads, level, phase, (*it)->queue, (*it)->begin, (*it)->end, nops); queue_dynamic.push_back(tmp); } // loop : it } // flag == (-2) #endif #ifdef DEBUG_PREPARE_THREAD cout << "-- assigned task -- per thread = " << nops_per_thread << " -- "; for (list::const_iterator it = task_seq_tmp.begin(); it != task_seq_tmp.end(); ++it) { cout << (*it)->task_name << " ; " << (*it)->begin << " : " << (*it)->end << " / " ; } cout << "-- " << endl; #endif for (ll = 0; ll < num_threads; ll++) { #ifdef DEBUG_PREPARE_THREAD cout << "thread_id " << ll << " nops_sum0 = " << nops_sum0[ll] << " : nops_sum = " << nops_sum[ll] << " : "; #endif queue_divided[ll].sort(C_task_seq_beginidx_smaller); for (list::const_iterator it = queue_divided[ll].begin(); it != queue_divided[ll].end(); ++it) { #ifdef DEBUG_PREPARE_THREAD cout << (*it)->task_name << " "; #endif queue_static[ll % num_threads].push_back(*it); } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif } // loop : ll for (list::iterator it = task_seq_tmp.begin(); it != task_seq_tmp.end(); ++it) { delete (*it); // delete [] (*it)->task_name; (*it) = NULL; } task_seq_tmp.clear(); // unbalance in assigned tasks is to be reduced by greedy execution but // unbalance from the beginning is passed to the next block for (int i = 0; i < num_threads; i++) { if (excluding_list[i]) { nops_sum[i] -= nops_per_thread; } else { nops_sum[i] = 0L; } } delete [] nops_sum0; } template int dimKernDense(vector &singIdx, const int n, const int aug_dim, const U eps_machine, const double eps_piv, SquareBlockMatrix &D, T *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp) { // output == (-1) : refactorized / >= 0 : dim of the kernel // check dim of factorized matrix int sing_max = singIdx.size(); int aug_max = sing_max + aug_dim; // map aug_ind; list aug_ind; vector aug_ind0, aug_ind1; aug_ind0.resize(aug_max); aug_ind1.resize(aug_max); VectorArray a_diag(aug_max); const T zero(0.0); const T one(1.0); const T none(-1.0); diss_printf(verbose, fp, "%s %d : Schur complement form %d x %d : block size = %d\n", __FILE__, __LINE__, n, aug_max, D.block_size()); RectBlockMatrix b(n, aug_max, D.block_size()); RectBlockMatrix c(n, aug_max, D.block_size()); ColumnMatrix s; s.init(aug_max, aug_max); int *permute_d = new int[sing_max]; vector &permute = D.getPermute(); for (int k = 0; k < sing_max; k++) { aug_ind.push_back(singIdx[k]); } int k0 = (n - 1); int k1 = sing_max; while ((k1 < aug_max) && k0 >= 0) { bool flag = true; for (int i = 0; i < sing_max; i++) { if (singIdx[i] == k0) { flag = false; break; } } if (flag) { aug_ind.push_back(k0); k1++; } k0--; } // while aug_ind.sort(); { // for scope of int i int i = 0; for (list::const_iterator it = aug_ind.begin(); it != aug_ind.end(); ++it, i++) { aug_ind0[i] = (*it); aug_ind1[i] = permute[(*it)]; } } aug_ind.clear(); diss_printf(verbose, fp, "%s %d : suspicious dimension of pivots %d + %d\n", __FILE__, __LINE__, sing_max, aug_dim); for (int i = 0; i < aug_max; i++) { diss_printf(verbose, fp, "%d : %d %d %s\n", i, aug_ind0[i], aug_ind1[i], tostring(D.diag(aug_ind0[i])).c_str()); } // save diagonal entries coressponding to nullification for (int i = 0; i < aug_max; i++) { a_diag[i] = D.diag(aug_ind0[i]); // a[aug_ind0[j] * (n + 1)]; } if (isSym) { for (int j = 0; j < aug_max; j++) { D.diag(aug_ind0[j]) = zero; // nullifying diagonals for (int i = 0; i < n; i++) { // access upper for symmetric matrix const int ii = i > aug_ind1[j] ? aug_ind1[j] : i; const int jj = i > aug_ind1[j] ? i : aug_ind1[j]; b(i, j) = a[ii + jj * n]; } // loop : i } // loop : j } else { for (int j = 0; j < aug_max; j++) { D.diag(aug_ind0[j]) = zero; for (int i = 0; i < n; i++) { const int jj = aug_ind1[j]; b(i, j) = a[i + jj * n]; c(i, j) = a[jj + i * n]; } } } for (int j = 0; j < aug_max; j++) { for (int i = 0; i < aug_max; i++) { s(i, j) = b(aug_ind1[i], j); } } DTRSMScale_arg *tmp_arg = new DTRSMScale_arg(isSym, &D, &b, &c, n, // nrow aug_max, // ncol (-1), // kblock 0, // lblock (-1), // mblock &aug_ind0, //singval, (!isFullPermute), verbose, &fp, -1); //dummy C_DTRSMScale_solve(tmp_arg); delete tmp_arg; for (int i = 0; i < b.num_blocks_c(); i++) { for (int j = 0; j < b.num_blocks_c(); j++) { for (int k = 0; k < b.num_blocks_r(); k++) { int nrow = c.nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, c.ncolBlock(i), b.ncolBlock(j), nrow, none, // alpha = -1 c.addrCoefBlock(k, i), nrow, b.addrCoefBlock(k, j), nrow, one, // beta = 1 s.addrCoefs() + (c.IndexBlock_r(i) + b.IndexBlock_c(j) * aug_max), aug_max); } } // loop : i } // loop : j // symmetrize if (isSym) { for (int j = 0; j < aug_max; j++) { for (int i = 0; i < j; i++) { s[j + i * aug_max] = s[i + j * aug_max]; } } } ColumnMatrix ss(aug_max, aug_max); ss.copy(s); bool flagShrinkSchur = false; int nn0 = sing_max; int *permute1 = new int[aug_max]; #if 0 { double pivot = 1.0; double fop; int n0; n0 = 0; if (isSym) { full_ldlt_permute(&nn0, n0, aug_max, ss.addrCoefs(), aug_max, &pivot, permute1, eps_piv, &fop); } // if (arg->isSym) else { full_ldu_permute(&nn0, n0, aug_max, ss.addrCoefs(), aug_max, &pivot, permute1, eps_piv, &fop); } diss_printf(verbose, fp, "%s %d : %d -> %d ", __FILE__, __LINE__, sing_max, nn0); if (nn0 < sing_max) { flagShrinkSchur = true; diss_printf(verbose, fp, "schrink the Schur complement by recursive computing\n"); for (int i = 0; i < aug_max; i++) { diss_printf(verbose, fp, "%d ", permute1[i]); } diss_printf(verbose, fp, "\n"); } else { diss_printf(verbose, fp, "\n"); } } #endif int aug_max1 = aug_max; if (flagShrinkSchur) { aug_max1 = aug_dim + nn0; int aug_max0 = aug_max - aug_max1; ColumnMatrix sc(aug_max1, aug_max1); for (int j = 0; j < aug_max1; j++) { const int jj = permute1[aug_max - aug_max1 + j]; for (int i = 0; i < aug_max1; i++) { const int ii = permute1[aug_max - aug_max1 + i]; sc(i, j) = s(ii, jj); } } ColumnMatrix upper(aug_max0, aug_max1); ColumnMatrix lower(aug_max0, aug_max1); if (!isSym) { for (int j = 0; j < aug_max1; j++) { const int jj = permute1[aug_max - aug_max1 + j]; for (int i = 0; i < aug_max0; i++) { const int ii = permute1[i]; upper(i, j) = s(ii, jj); lower(i, j) = s(jj, ii); } } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, aug_max0, aug_max1, one, ss.addrCoefs(), aug_max, upper.addrCoefs(), aug_max0); for (int i = 0; i < aug_max0; i++) { const T stmp = ss(i, i); for (int j = 0; j < aug_max1; j++) { upper(i, j) *= stmp; } } blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, aug_max0, aug_max1, one, ss.addrCoefs(), aug_max, lower.addrCoefs(), aug_max0); } else { for (int j = 0; j < aug_max1; j++) { const int jj = permute1[aug_max - aug_max1 + j]; for (int i = 0; i < aug_max0; i++) { const int ii = permute1[i]; lower(i, j) = s(ii, jj); // copy from upper } } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, aug_max0, aug_max1, one, ss.addrCoefs(), aug_max, lower.addrCoefs(), aug_max0); for (int i = 0; i < aug_max0; i++) { const T stmp = ss(i, i); for (int j = 0; j < aug_max1; j++) { upper(i, j) = lower(i, j) * stmp; } } } blas_gemm(CblasTrans, CblasNoTrans, aug_max1, aug_max1, aug_max0, none, lower.addrCoefs(), aug_max0, upper.addrCoefs(), aug_max0, one, sc.addrCoefs(), aug_max1); upper.free(); lower.free(); s.free(); s.init(aug_max1, aug_max1); s.copy(sc); // blas_copy((aug_max1 * aug_max1), sc, 1, s, 1); sc.free(); sing_max = nn0; } // if (flagShrinkSchur) { ss.free(); delete [] permute1; int n2; bool flag, flag_unsym_pivot; flag = ComputeDimKernel(&n2, &flag_unsym_pivot, s.addrCoefs(), aug_max1, isSym, aug_dim, eps_machine, eps_piv, verbose, fp); diss_printf(verbose, fp, "%s %d : detected dim. of the kernel = %d %s\n", __FILE__, __LINE__, n2, flag_unsym_pivot ? "full" : "symmetric"); // // restore stored diagonal entries nullified for augmented dimenison for (int i = 0; i < aug_max; i++) { D.diag(aug_ind0[i]) = a_diag[i]; } if (flag_unsym_pivot) { return 0; // force kernel detection failure : 15 May 2018 } if ((n2 != sing_max)) { if (refactorize) { diss_printf(verbose, fp, "%s %d : sing_max = %d n2 = %d -> refactorized\n", __FILE__, __LINE__, sing_max, n2); return (-1); } } // if (n2 != sing_max) aug_ind0.clear(); aug_ind1.clear(); a_diag.free(); b.free(); c.free(); s.free(); delete [] permute_d; return n2; } template int dimKernDense(vector &singIdx, const int n, const int aug_dim, const double eps_machine, const double eps_piv, SquareBlockMatrix &D, double *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); template int dimKernDense(vector &singIdx, const int n, const int aug_dim, const quadruple eps_machine, const double eps_piv, SquareBlockMatrix &D, quadruple *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); template int dimKernDense, double>(vector &singIdx, const int n, const int aug_dim, const double eps_mahcine, const double eps_piv, SquareBlockMatrix > &D, complex *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); template int dimKernDense, quadruple>(vector &singIdx, const int n, const int aug_dim, const quadruple eps_machine, const double eps_piv, SquareBlockMatrix > &D, complex *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); template int dimKernDense(vector &singIdx, const int n, const int aug_dim, const float eps_machine, const double eps_piv, SquareBlockMatrix &D, float *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); template int dimKernDense, float>(vector &singIdx, const int n, const int aug_dim, const float eps_mahcine, const double eps_piv, SquareBlockMatrix > &D, complex *a, const bool refactorize, const bool isFullPermute, const bool isSym, const bool verbose, FILE *fp); // template void calc_relative_norm(double *norm_l2, double *norm_infty, const T *v, const T *u, const int dim) { fprintf(stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); } template<> void calc_relative_norm(double *norm_l2, double *norm_infty, const double *v, const double *u, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ytmp1, ytmp0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { ytmp1 = v[i]; ytmp0 = u[i]; if (ytmp1 < 0.0) { ytmp1 *= (-1.0); } if (ytmp0 < 0.0) { ytmp0 *= (-1.0); } xtmp0 = (ytmp0 > xtmp0) ? ytmp0 : xtmp0; ztmp0 += ytmp0 * ytmp0; xtmp1 = (ytmp1 > xtmp1) ? ytmp1 : xtmp1; ztmp1 += ytmp1 * ytmp1; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_norm(double *norm_l2, double *norm_infty, const quadruple *v, const quadruple *u, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ytmp1, ytmp0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { ytmp1 = quad2double(v[i]); ytmp0 = quad2double(u[i]); if (ytmp1 < 0.0) { ytmp1 *= (-1.0); } if (ytmp0 < 0.0) { ytmp0 *= (-1.0); } xtmp0 = (ytmp0 > xtmp0) ? ytmp0 : xtmp0; ztmp0 += ytmp0 * ytmp0; xtmp1 = (ytmp1 > xtmp1) ? ytmp1 : xtmp1; ztmp1 += ytmp1 * ytmp1; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_norm >(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { const double ytmp1 = std::abs(v[i]); const double ytmp0 = std::abs(u[i]); xtmp0 = ytmp0 > xtmp0 ? ytmp0 : xtmp0; ztmp0 += std::real(u[i] * std::conj(u[i])); xtmp1 = ytmp1 > xtmp1 ? ytmp1 : xtmp1; ztmp1 += std::real(v[i] * std::conj(v[i])); } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_norm >(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { quadruple vr = v[i].real(); quadruple vi = v[i].imag(); quadruple ur = u[i].real(); quadruple ui = u[i].imag(); const double ytmp1 = quad2double(sqrt(vr * vr + vi * vi)); const double ytmp0 = quad2double(sqrt(ur * ur + ui * ui)); xtmp0 = ytmp0 > xtmp0 ? ytmp0 : xtmp0; ztmp0 += quad2double(ur * ur + ui * ui); xtmp1 = ytmp1 > xtmp1 ? ytmp1 : xtmp1; ztmp1 += quad2double(vr * vr + vi * vi); } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template void calc_relative_norm(double *norm_l2, double *norm_infty, const float *v, const float *u, const int dim); template void calc_relative_norm >(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const int dim); // template void calc_relative_normscaled(double *norm_l2, double *norm_infty, const T *v, const T *u, const Z *w, const int dim) { fprintf(stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); } template<> void calc_relative_normscaled(double *norm_l2, double *norm_infty, const double *v, const double *u, const double *w, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ytmp1, ytmp0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { ytmp0 = u[i] / w[i]; ytmp1 = v[i] * w[i]; if (ytmp1 < 0.0) { ytmp1 *= (-1.0); } if (ytmp0 < 0.0) { ytmp0 *= (-1.0); } xtmp0 = (ytmp0 > xtmp0) ? ytmp0 : xtmp0; ztmp0 += ytmp0 * ytmp0; xtmp1 = (ytmp1 > xtmp1) ? ytmp1 : xtmp1; ztmp1 += ytmp1 * ytmp1; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_normscaled(double *norm_l2, double *norm_infty, const quadruple *v, const quadruple *u, const quadruple *w, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ytmp1, ytmp0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { ytmp0 = quad2double(u[i] / w[i]); ytmp1 = quad2double(v[i] * w[i]); if (ytmp1 < 0.0) { ytmp1 *= (-1.0); } if (ytmp0 < 0.0) { ytmp0 *= (-1.0); } xtmp0 = (ytmp0 > xtmp0) ? ytmp0 : xtmp0; ztmp0 += ytmp0 * ytmp0; xtmp1 = (ytmp1 > xtmp1) ? ytmp1 : xtmp1; ztmp1 += ytmp1 * ytmp1; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_normscaled, double>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const double *w, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { const double ytmp0 = std::abs(u[i]) / w[i]; const double ytmp1 = std::abs(v[i]) * w[i]; const double ww = w[i] * w[i]; xtmp0 = ytmp0 > xtmp0 ? ytmp0 : xtmp0; ztmp0 += std::real(u[i] * std::conj(u[i])) / ww; xtmp1 = ytmp1 > xtmp1 ? ytmp1 : xtmp1; ztmp1 += std::real(v[i] * std::conj(v[i])) * ww; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } template<> void calc_relative_normscaled, quadruple>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const quadruple *w, const int dim) { double xtmp1 = 0.0, xtmp0 = 0.0; double ztmp1 = 0.0, ztmp0 = 0.0; for (int i = 0; i < dim; i++) { quadruple vr = v[i].real(); quadruple vi = v[i].imag(); quadruple ur = u[i].real(); quadruple ui = u[i].imag(); const double ytmp0 = quad2double(sqrt(ur * ur + ui * ui) / w[i]); const double ytmp1 = quad2double(sqrt(vr * vr + vi * vi) * w[i]); const double ww = quad2double(w[i] * w[i]); xtmp0 = ytmp0 > xtmp0 ? ytmp0 : xtmp0; ztmp0 += quad2double(ur * ur + ui * ui) / ww; xtmp1 = ytmp1 > xtmp1 ? ytmp1 : xtmp1; ztmp1 += quad2double(vr * vr + vi * vi) * ww; } *norm_l2 = sqrt(ztmp1 / ztmp0); *norm_infty = xtmp1 / xtmp0; } // template void calc_relative_normscaled(double *norm_l2, double *norm_infty, const float *v, const float *u, const float *w, const int dim); template void calc_relative_normscaled, float>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const float *w, const int dim); // int CSR_sym2unsym(CSR_indirect *unsym, const int *ptSymRows, const int *indSymCols, const int *map_eqn, const int *remap_eqn, const int dim, const bool upper_flag, bool verbose, FILE *fp) { int* nbIndPerRow = new int[dim]; // memset(nbIndPerRow, 0, dim*sizeof(int)); for (int i = 0; i < dim; i++) { nbIndPerRow[i] = 0; } for (int i = 0; i < dim; i++) { const int ii = remap_eqn[i]; nbIndPerRow[i] += ptSymRows[ii + 1] - ptSymRows[ii]; int ibegin = ptSymRows[ii] + (upper_flag ? 1 : 0); int iend = ptSymRows[ii + 1] + (upper_flag ? 0 : (-1)); for (int kk = ibegin; kk < iend; kk++) { nbIndPerRow[map_eqn[indSymCols[kk]]]++; // 3 Jan.2014 } } // Build unsym->ptRows array : // ................... unsym->ptRows[0] = 0; for (int i = 0; i < dim; i++) { unsym->ptRows[i + 1] = unsym->ptRows[i] + nbIndPerRow[i]; } // CHECK(unsym->ptRows[dim] == (2 * nz - dim), // "error in sym2unsym() : Wrong number of non zeros elemnts in ptRows !"); // Allocate and fill indices columns : // memset(nbIndPerRow, 0, (dim * sizeof(int))); for (int i = 0; i < dim; i++) { nbIndPerRow[i] = 0; } // for upper case, nbIndPerRow[i] keeps entries added by transposed operation // but for lower case counts all nonzero entries in progress for (int i = 0; i < dim; i++) { int itmp = unsym->ptRows[i] + nbIndPerRow[i]; const int ii = remap_eqn[i]; for (int kk = ptSymRows[ii]; kk < ptSymRows[ii + 1]; kk++) { unsym->indCols[itmp] = map_eqn[indSymCols[kk]]; unsym->indVals[itmp] = kk; //map_indcols[kk]; itmp++; } // loop : kk if (!upper_flag) { nbIndPerRow[i] = itmp - unsym->ptRows[i]; } // memcpy(indCols + (ptRows[i] + nbIndPerRow[i]), // indSymCols + ptSymRows[i], // (ptSymRows[i + 1] - ptSymRows[i]) * sizeof(int)); int ibegin = ptSymRows[ii] + (upper_flag ? 1 : 0); int iend = ptSymRows[ii + 1] + (upper_flag ? 0 : (-1)); for (int kk = ibegin; kk < iend; kk++) { const int j = map_eqn[indSymCols[kk]]; const int jtmp = unsym->ptRows[j] + nbIndPerRow[j]; unsym->indCols[jtmp] = i; unsym->indVals[jtmp] = kk; //map_indcols[kk]; nbIndPerRow[j]++; } // loop : kk } // loop : i delete [] nbIndPerRow; return unsym->ptRows[dim]; } bool CSR_unsym2unsym(CSR_indirect *unsym, const int *ptUnSymRows, const int *indUnSymCols, const int *map_eqn, const int *remap_eqn, //const int *map_indcols, const int dim, const bool verbose, FILE *fp) { int* nbIndPerRow = new int[dim]; for (int i = 0; i < dim; i++) { const int ii = remap_eqn[i]; nbIndPerRow[i] = ptUnSymRows[ii + 1] - ptUnSymRows[ii]; } unsym->ptRows[0] = 0; for (int i = 0; i < dim; i++) { unsym->ptRows[i + 1] = unsym->ptRows[i] + nbIndPerRow[i]; } for (int i = 0; i < dim; i++) { // running over new index const int ii = remap_eqn[i]; // access to the original data of node int itmp = unsym->ptRows[i]; for (int kk = ptUnSymRows[ii]; kk < ptUnSymRows[ii + 1]; kk++, itmp++) { const int jj = indUnSymCols[kk]; const int j = map_eqn[jj]; unsym->indCols[itmp] = j; // access to the new data unsym->indVals[itmp] = kk; // access to the orignal data of unsym->indCols bool found = false; for (int mm = ptUnSymRows[jj]; mm < ptUnSymRows[jj + 1]; mm++) { if (indUnSymCols[mm] == ii) { // based on the original CSR data unsym->indVals_unsym[itmp] = mm; // map_indcols[mm]; found = true; break; } } if (!found) { diss_printf(verbose, fp, "%s %d : symmetric place of (%d, %d) -> (%d %d) not found\n", __FILE__, __LINE__, ii, jj, i, j); return false; } } } delete [] nbIndPerRow; return true; } void swap_queues_n(vector &queue, vector &queue_index, const int ii, const int jj, const int n, vector &tmp, vector &tmp_index) { for (int l = 0; l < n; l++) { tmp[l] = queue[ii * n + l]; tmp_index[l] = queue_index[ii * n + l]; } for (int l = 0; l < n; l++) { queue[ii * n + l] = queue[(ii + 1) * n + l]; queue_index[ii * n + l] = queue_index[(ii + 1) * n + l]; } for (int l = 0; l < n; l++) { queue[jj * n + l] = tmp[l]; queue_index[jj * n + l] = tmp_index[l]; } } bool compare_task_name(C_task *first, C_task *second) { // 'g', 'h', 'i', 'j' should be in higher order if (first->task_name[0] >= 'h') { if (second->task_name[0] >= 'h') { return (strcmp(first->task_name, second->task_name) >=0 ? false : true); } else { return true; } } if (second->task_name[0] >= 'h') { return false; } return (strcmp(first->task_name, second->task_name) >=0 ? false : true); } // ===================================================================== template int count_diag_negative_real(SquareBlockMatrix& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const T xtmp = Diag.diag(i); if (xtmp < T(0.0)) { count++; } } return count; } template int count_diag_negative_complex(SquareBlockMatrix >& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const complex xtmp = Diag.diag(i); if ((xtmp.real() < T(0.0)) && xtmp.imag() == T(0.0)) { count++; } } return count; } template int count_diag_negative(SquareBlockMatrix& Diag) { fprintf(stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); return (-1); } template<> int count_diag_negative(SquareBlockMatrix& Diag) { return count_diag_negative_real(Diag); } template<> int count_diag_negative(SquareBlockMatrix& Diag) { return count_diag_negative_real(Diag); } template<> int count_diag_negative >(SquareBlockMatrix >& Diag) { return count_diag_negative_complex(Diag); } template<> int count_diag_negative >(SquareBlockMatrix >& Diag) { return count_diag_negative_complex(Diag); } template<> int count_diag_negative(SquareBlockMatrix& Diag) { return count_diag_negative_real(Diag); } template<> int count_diag_negative >(SquareBlockMatrix >& Diag) { return count_diag_negative_complex(Diag); } // template int count_diag_negative(SubSquareMatrix& Diag) { fprintf(stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); return (-1); } template<> int count_diag_negative(SubSquareMatrix& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const double xtmp = Diag(i, i); if (xtmp < 0.0) { count++; } } return count; } template<> int count_diag_negative(SubSquareMatrix& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const double xtmp = quad2double(Diag(i, i)); if (xtmp < 0.0) { count++; } } return count; } template<> int count_diag_negative >(SubSquareMatrix >& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const complex &xtmp = Diag(i, i); if (quad2double(xtmp.imag()) < 0.0 && quad2double(xtmp.real()) == 0.0) { count++; } } return count; } template<> int count_diag_negative >(SubSquareMatrix >& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const complex xtmp = Diag(i, i); if (xtmp.imag() < 0.0 && xtmp.real() == 0.0) { count++; } } return count; } template<> int count_diag_negative(SubSquareMatrix& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const float xtmp = Diag(i, i); if (xtmp < 0.0) { count++; } } return count; } template<> int count_diag_negative >(SubSquareMatrix >& Diag) { int count = 0; const int dim_diag = Diag.dimension(); for (int i = 0; i < dim_diag; i++) { const complex xtmp = Diag(i, i); if (xtmp.imag() < 0.0 && xtmp.real() == 0.0) { count++; } } return count; } // template void C_SparseNumFact(void *arg_) { C_SparseNumFact_arg *arg = (C_SparseNumFact_arg *)arg_; TridiagBlockMatrix **tridiag = arg->tridiag; int colors = arg->colors; int nrow = arg->nrow; double eps_pivot = *(arg->eps_pivot); const bool kernel_detection = *(arg->kernel_detection); const bool higher_precision = *(arg->higher_precision); const int dim_aug_kern = *(arg->dim_aug_kern); const U eps_machine = *(arg->eps_machine); T *coefs = arg->coefs; double *pivot = arg->pivot; // all diagonal entries of sparse matrices // equal to 1, set at the initialization SquareBlockMatrix& D = *(arg->D); vector &list_sing = D.getSingIdx(); double nopd; const bool verbose = arg->verbose; FILE *fp = *(arg->fp); elapsed_t t0, t1; get_realtime(&t0); int nsing = 0; bool detected = true; nopd = 0.0; *pivot = 1.0; // matrix is scaled so that the diagonal entries take 1 for (int i = 0; i < colors; i++) { double pivot1; double nopd1; tridiag[i]->NumericFact(coefs, eps_pivot, &pivot1, kernel_detection, higher_precision, dim_aug_kern, eps_machine, &nopd1); nopd += nopd1; // tridiag[i0->SingularNode(list_sing); if (!tridiag[i]->detected()) { detected = false; // tridiag[i]->nsing() == (-1); } nsing += tridiag[i]->nsing(); *pivot = *pivot < pivot1 ? *pivot : pivot1; } list_sing.resize(nsing); { int j = 0; for (int i = 0; i < colors; i++) { vector list_sing_tmp; tridiag[i]->SingularNode(list_sing_tmp); for (vector::const_iterator it = list_sing_tmp.begin(); it != list_sing_tmp.end(); ++it, j++) { list_sing[j] = (*it); } } } get_realtime(&t1); diss_printf(verbose, fp, "%s %d : %d : pivot = %g n0 = %d detected = %s\n", __FILE__, __LINE__, arg->nb, *pivot, nsing, detected ? "true" : "false"); D.set_lastPivot(*pivot); D.set_KernelDetected(detected); D.set_rank(nrow - nsing); //*(arg->pivot) = pivot; *(arg->nopd) = (long)nopd; // ?? uninitialized ?? } template void C_SparseNumFact(void *arg_); template void C_SparseNumFact(void *arg_); template void C_SparseNumFact, double>(void *arg_); template void C_SparseNumFact, quadruple>(void *arg_); template void C_SparseNumFact(void *arg_); template void C_SparseNumFact, float>(void *arg_); // template void C_SparseLocalSchur(void *arg_) { C_SparseNumFact_arg *arg = (C_SparseNumFact_arg *)arg_; TridiagBlockMatrix **tridiag = arg->tridiag; int colors = arg->colors; int *color_mask = arg->color_mask; int nrow = arg->nrow; int ncol = arg->ncol; T *coefs = arg->coefs; int *prow1 = arg->csr_offdiag->ptRows; int *indcols1 = arg->csr_offdiag->indCols; int *indvals1 = arg->csr_offdiag->indVals; int *indvals2 = arg->isSym ? (int *)NULL : arg->csr_offdiag->indVals_unsym; elapsed_t *tt = arg->tt; double *nops = new double[3]; // SquareBlockMatrix &localSchur = *(arg->localSchur); localSchur.allocate(); double nopsum = 0.0; for (int i = 0; i < colors; i++) { tridiag[i]->ComputeSchur(nrow, color_mask, ncol, prow1, indcols1, indvals1, indvals2, coefs, SIZE_B1, localSchur, nops, tt); // get the excat flop by direct counting nopsum += nops[0] + nops[2]; } *(arg->nops) = (long)nopsum; delete [] nops; } template void C_SparseLocalSchur(void *arg_); template void C_SparseLocalSchur(void *arg_); template void C_SparseLocalSchur, double>(void *arg_); template void C_SparseLocalSchur, quadruple>(void *arg_); template void C_SparseLocalSchur(void *arg_); template void C_SparseLocalSchur, float>(void *arg_); // template<> void dump_matrix(FILE *fp, const int nrow, double *a) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : ", i); for (int j = i; j < nrow; j++) { fprintf(fp, "%g ", a[i + nrow * j]); } fprintf(fp, "\n"); } } template void dump_matrix(FILE *fp, const int nrow, T *a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template void dump_matrix(FILE *fp, const int nrow, quadruple *a); template void dump_matrix >(FILE *fp, const int nrow, complex *a); template void dump_matrix >(FILE *fp, const int nrow, complex *a); template void dump_matrix >(FILE *fp, const int nrow, complex *a); template<> void dump_matrix(FILE *fp, const int nrow, const int ncol, double *a) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < ncol; j++) { fprintf(fp, "%g ", a[i + nrow * j]); } fprintf(fp, "\n"); } } template<> void dump_matrix(FILE *fp, const int nrow, const int ncol, float *a) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < ncol; j++) { fprintf(fp, "%g ", a[i + nrow * j]); } fprintf(fp, "\n"); } } template void dump_matrix(FILE *fp, const int nrow, const int ncol, T *a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template void dump_matrix(FILE *fp, const int nrow, const int ncol, quadruple *a); template void dump_matrix >(FILE *fp, const int nrow, const int ncol, complex *a); template void dump_matrix >(FILE *fp, const int nrow, const int ncol, complex *a); template void dump_matrix >(FILE *fp, const int nrow, const int ncol, complex *a); template<> void dump_matrix(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, double *a) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < ncol; j++) { fprintf(fp, "%g ", a[kk + i + nn * j]); } fprintf(fp, "\n"); } } template<> void dump_matrix(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, float *a) { for (int i = 0; i < nrow; i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < ncol; j++) { fprintf(fp, "%g ", a[kk + i + nn * j]); } fprintf(fp, "\n"); } } template void dump_matrix(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, T *a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template void dump_matrix(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, quadruple *a); template void dump_matrix >(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, complex *a); template void dump_matrix >(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, complex *a); template void dump_matrix >(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, complex *a); template<> void dump_matrix(FILE *fp, RectBlockMatrix &a) { for (int i = 0; i < a.dimension_r(); i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < a.dimension_c(); j++) { fprintf(fp, "%g ", a(i, j)); } fprintf(fp, "\n"); } } template<> void dump_matrix(FILE *fp, RectBlockMatrix &a) { for (int i = 0; i < a.dimension_r(); i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < a.dimension_c(); j++) { fprintf(fp, "%g ", a(i, j)); } fprintf(fp, "\n"); } } template void dump_matrix(FILE *fp, RectBlockMatrix &a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template<> void dump_matrix(FILE *fp, SquareBlockMatrix &a) { // if (a.isSym()) { if (1) { // 16 Jan.debug for (int i = 0; i < a.dimension(); i++) { fprintf(fp, "%d : ", i); for (int j = i; j < a.dimension() ; j++) { fprintf(fp, "%16.8e ", a(i, j)); } fprintf(fp, "\n"); } } else { for (int i = 0; i < a.dimension(); i++) { fprintf(fp, "%d : ", i); for (int j = 0; j < a.dimension(); j++) { fprintf(fp, "%16.8e ", a(i, j)); } fprintf(fp, "\n"); } } } template void dump_matrix(FILE *fp, RectBlockMatrix &a); template void dump_matrix >(FILE *fp, RectBlockMatrix > &a); template void dump_matrix >(FILE *fp, RectBlockMatrix > &a); template void dump_matrix >(FILE *fp, RectBlockMatrix > &a); // template void dump_matrix(FILE *fp, SquareBlockMatrix &a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template void dump_matrix(FILE *fp, SquareBlockMatrix &a); template void dump_matrix >(FILE *fp, SquareBlockMatrix > &a); template void dump_matrix >(FILE *fp, SquareBlockMatrix > &a); template void dump_matrix >(FILE *fp, SquareBlockMatrix > &a); // template<> void dump_matrix(FILE *fp, const int nrow, const int nnz, int *prow, int *indcols, int *indvals, double *a) { // verify_nan(fp, nnz, a); for (int i = 0; i < nrow; i++) { if (prow[i + 1] > prow[i]) { fprintf(fp, "%d : ", i); for (int j = prow[i]; j < prow[i + 1]; j++) { fprintf(fp, "%d:%d:%g ", indcols[j], indvals[j], a[indvals[j]]); } fprintf(fp, "\n"); } } } template void dump_matrix(FILE *fp, const int nrow, const int nnz, int *prow, int *indcols, int *indvals, T *a) { fprintf(stderr, "%s %d : general case is not defined\n", __FILE__, __LINE__); } template void dump_matrix >(FILE *fp, const int nrow, const int nnz, int *prow, int *indcols, int *indvals, complex *a); // template void C_FillMatrix_diag(void *arg_) { C_FillMatrix_arg *arg = (C_FillMatrix_arg *)arg_; SquareBlockMatrix& D = *arg->D; CSR_indirect csr_diag = *arg->csr_diag; T *coefs = arg->coefs; D.ZeroClear(); for (int i = 0; i < csr_diag.n; i++) { for (int k = csr_diag.ptRows[i]; k < csr_diag.ptRows[i + 1]; k++) { const int j = csr_diag.indCols[k]; D(i, j) = coefs[csr_diag.indVals[k]]; } } // } template void C_FillMatrix_diag(void *arg_); template void C_FillMatrix_diag(void *arg_); template void C_FillMatrix_diag >(void *arg_); template void C_FillMatrix_diag >(void *arg_); template void C_FillMatrix_diag(void *arg_); template void C_FillMatrix_diag >(void *arg_); // template void C_FillMatrix_offdiag(void *arg_) { C_FillMatrix_arg *arg = (C_FillMatrix_arg *)arg_; RectBlockMatrix& upper = *arg->upper; CSR_indirect csr_offdiag = *arg->csr_offdiag; T *coefs = arg->coefs; upper.ZeroClear(); for (int i = 0; i < csr_offdiag.n; i++) { for (int k = csr_offdiag.ptRows[i]; k < csr_offdiag.ptRows[i + 1]; k++) { const int j = csr_offdiag.indCols[k]; // access to (i, j) = i + j * n is not efficient upper(i, j) = coefs[csr_offdiag.indVals[k]]; } } if (!arg->isSym) { RectBlockMatrix& lower = *arg->lower; lower.ZeroClear(); for (int i = 0; i < csr_offdiag.n; i++) { for (int k = csr_offdiag.ptRows[i]; k < csr_offdiag.ptRows[i + 1]; k++) { const int j = csr_offdiag.indCols[k]; // access to (i, j) = i + j * n is not efficient lower(i, j) = coefs[csr_offdiag.indVals_unsym[k]]; } } } } template void C_FillMatrix_offdiag(void *arg_); template void C_FillMatrix_offdiag(void *arg_); template void C_FillMatrix_offdiag >(void *arg_); template void C_FillMatrix_offdiag >(void *arg_); template void C_FillMatrix_offdiag(void *arg_); template void C_FillMatrix_offdiag >(void *arg_); // template void DSchurGEMM_diag(void *arg_) { const T zero(0.0); const T one(1.0); DSchurGEMM_arg *arg = (DSchurGEMM_arg *)arg_; SquareBlockMatrix *localSchur = arg->localSchur; localSchur->allocateBlock(arg->i_block, arg->i_block); T *s = localSchur->addrCoefBlock(arg->i_block, arg->i_block); const int nrow = localSchur->nrowBlock(arg->i_block, arg->i_block); // alpha = 1; // to replace by symmetric dgemm which compute only upper part of the matrix if (arg->isSym) { for (int k = 0; k < arg->lower->num_blocks_r(); k++) { const T beta = ((k == 0) ? zero : one); const T *ptLt = arg->lower->addrCoefBlock(k, arg->i_block); const T *ptU = arg->upper->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower->nrowBlock(k); C_gemm_symm(nrow, // arg->block_nrow, nnrow, // arg->nrow, one, // alpha ptLt, nnrow, // arg->nrow, ptU, // ptL, // arg->ptL, nnrow, // arg->nrow, beta, s, // arg->s + (arg->i_row + arg->i_row * arg->ncol), nrow); } // loop : k } else { for (int k = 0; k < arg->lower->num_blocks_r(); k++) { const T beta = ((k == 0) ? zero : one); const T *ptLt = arg->lower->addrCoefBlock(k, arg->i_block); const T *ptU = arg->upper->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, // arg->block_nrow, arg->block_nrow, nrow, nrow, nnrow, // arg->nrow, one, // alpha ptLt, // ptUt, // arg->ptUt, nnrow, // arg->nrow, ptU, // ptL, // arg->ptL, nnrow, // arg->nrow, beta, s, // arg->s + (arg->i_row + arg->i_row * arg->ncol), nrow); } // loop : k } } template void DSchurGEMM_diag(void *arg_); template void DSchurGEMM_diag(void *arg_); template void DSchurGEMM_diag >(void *arg_); template void DSchurGEMM_diag >(void *arg_); template void DSchurGEMM_diag(void *arg_); template void DSchurGEMM_diag >(void *arg_); // template void DSchurGEMM_diag_two(void *arg_) { DSchurGEMM_two_arg *arg = (DSchurGEMM_two_arg *)arg_; if (arg->isSkip) { return; } SquareBlockMatrix *fatherDiag = arg->localSchur; T *s = fatherDiag->addrCoefBlock(arg->i_block, arg->i_block); const int nrow = fatherDiag->nrowBlock(arg->i_block, arg->i_block); const T none(-1.0); const T one(1.0); // to replace by symmetric dgemm which compute only upper part of the matrix if (arg->isSym) { for (int k = 0; k < arg->lower0->num_blocks_r(); k++) { const T *ptLt0 = arg->lower0->addrCoefBlock(k, arg->i_block); const T *ptU0 = arg->upper0->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower0->nrowBlock(k); C_gemm_symm(nrow, // arg->block_nrow, nnrow, // arg->nrow0, none, // alpha ptLt0, nnrow, // arg->nrow0, ptU0, nnrow, // arg->nrow0, one, // beta s, nrow); } for (int k = 0; k < arg->lower1->num_blocks_r(); k++) { const T *ptLt1 = arg->lower1->addrCoefBlock(k, arg->i_block); const T *ptU1 = arg->upper1->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower1->nrowBlock(k); C_gemm_symm(nrow, // arg->block_nrow, nnrow, // arg->nrow1, none, // alpha ptLt1, nnrow, //arg->nrow1, ptU1, nnrow, // arg->nrow1, one, // beta s, nrow); } } else { for (int k = 0; k < arg->lower0->num_blocks_r(); k++) { const T *ptLt0 = arg->lower0->addrCoefBlock(k, arg->i_block); const T *ptU0 = arg->upper0->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower0->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, // arg->block_nrow, arg->block_nrow, nrow, nrow, nnrow, // arg->nrow0, none, // alpha ptLt0, nnrow, // arg->nrow0, ptU0, nnrow, // arg->nrow0, one, // beta, s, nrow); } for (int k = 0; k < arg->lower1->num_blocks_r(); k++) { const T *ptLt1 = arg->lower1->addrCoefBlock(k, arg->i_block); const T *ptU1 = arg->upper1->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower1->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, //arg->block_nrow, arg->block_nrow, nrow, nrow, nnrow, // arg->nrow1, none, // alpha ptLt1, nnrow, // arg->nrow1, ptU1, nnrow, //arg->nrow1, one, // beta, s, nrow); } } } template void DSchurGEMM_diag_two(void *arg_); template void DSchurGEMM_diag_two(void *arg_); template void DSchurGEMM_diag_two >(void *arg_); template void DSchurGEMM_diag_two >(void *arg_); template void DSchurGEMM_diag_two(void *arg_); template void DSchurGEMM_diag_two >(void *arg_); // template void DSchurGEMM_offdiag(void *arg_) { const T zero(0.0); const T one(1.0); DSchurGEMM_arg *arg = (DSchurGEMM_arg *)arg_; SquareBlockMatrix *localSchur = arg->localSchur; localSchur->allocateBlock(arg->i_block, arg->j_block); T *s = localSchur->addrCoefBlock(arg->i_block, arg->j_block); const int nrow = localSchur->nrowBlock(arg->i_block, arg->j_block); const int ncol = localSchur->ncolBlock(arg->i_block, arg->j_block); // alpha = 1 // block_nrow * block_ncol : S(i,j) = L(i)^T U(j) if (arg->isTrans) { for (int k = 0; k < arg->lower->num_blocks_r(); k++) { const T beta = ((k == 0) ? zero : one); const T *ptLt = arg->lower->addrCoefBlock(k, arg->j_block); const T *ptU = arg->upper->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, //arg->nrow, one, // alpha, ptLt, nnrow, // arg->nrow, ptU, nnrow, // arg->nrow, beta, s, nrow); } } else { for (int k = 0; k < arg->lower->num_blocks_r(); k++) { const T beta = ((k == 0) ? zero : one); const T *ptLt = arg->lower->addrCoefBlock(k, arg->i_block); const T *ptU = arg->upper->addrCoefBlock(k, arg->j_block); int nnrow = arg->lower->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, // arg->nrow, one, // alpha ptLt, nnrow, // arg->nrow, ptU, nnrow, // arg->nrow, beta, s, nrow); } } } template void DSchurGEMM_offdiag(void *arg_); template void DSchurGEMM_offdiag(void *arg_); template void DSchurGEMM_offdiag >(void *arg_); template void DSchurGEMM_offdiag >(void *arg_); template void DSchurGEMM_offdiag(void *arg_); template void DSchurGEMM_offdiag >(void *arg_); // template void DSchurGEMM_offdiag_two(void *arg_) { DSchurGEMM_two_arg *arg = (DSchurGEMM_two_arg *)arg_; if (arg->isSkip) { return; } SquareBlockMatrix *fatherDiag = arg->localSchur; T *s = fatherDiag->addrCoefBlock(arg->i_block, arg->j_block); const int nrow = fatherDiag->nrowBlock(arg->i_block, arg->j_block); const int ncol = fatherDiag->ncolBlock(arg->i_block, arg->j_block); const T none(-1.0); const T one(1.0); // block_nrow * block_ncol : S(i,j) = L(i)^T U(j) if (arg->isTrans) { for (int k = 0; k < arg->lower0->num_blocks_r(); k++) { const T *ptLt0 = arg->lower0->addrCoefBlock(k, arg->j_block); const T *ptU0 = arg->upper0->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower0->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, // arg->nrow0, none, //alpha, ptLt0, nnrow, // arg->nrow0, ptU0, nnrow, // arg->nrow0, one, //beta, s, nrow); } for (int k = 0; k < arg->lower1->num_blocks_r(); k++) { const T *ptLt1 = arg->lower1->addrCoefBlock(k, arg->j_block); const T *ptU1 = arg->upper1->addrCoefBlock(k, arg->i_block); int nnrow = arg->lower1->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, // arg->nrow1, none, // alpha, ptLt1, nnrow, // arg->nrow1, ptU1, nnrow, // arg->nrow1, one, // beta, s, nrow); } } else { for (int k = 0; k < arg->lower0->num_blocks_r(); k++) { const T *ptLt0 = arg->lower0->addrCoefBlock(k, arg->i_block); const T *ptU0 = arg->upper0->addrCoefBlock(k, arg->j_block); int nnrow = arg->lower0->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, // arg->nrow0, none, //alpha, ptLt0, nnrow, // arg->nrow0, ptU0, nnrow, // arg->nrow0, one, //beta, s, nrow); } for (int k = 0; k < arg->lower1->num_blocks_r(); k++) { const T *ptLt1 = arg->lower1->addrCoefBlock(k, arg->i_block); const T *ptU1 = arg->upper1->addrCoefBlock(k, arg->j_block); int nnrow = arg->lower1->nrowBlock(k); blas_gemm(CblasTrans, CblasNoTrans, nrow, ncol, nnrow, // arg->nrow1, none, // alpha, ptLt1, nnrow, // arg->nrow1, ptU1, nnrow, // arg->nrow1, one, //beta, s, nrow); } } } template void DSchurGEMM_offdiag_two(void *arg_); template void DSchurGEMM_offdiag_two(void *arg_); template void DSchurGEMM_offdiag_two >(void *arg_); template void DSchurGEMM_offdiag_two >(void *arg_); template void DSchurGEMM_offdiag_two(void *arg_); template void DSchurGEMM_offdiag_two >(void *arg_); // template void C_DTRSMScale_diag_upper(void *arg_) { DTRSMScale_arg *arg = (DTRSMScale_arg *)arg_; int ncol = arg->ncol; vector &permute = arg->LDLt->getPermute(); const int k = arg->kblock; const int kk = arg->LDLt->IndexBlock(k); vector* singLocNodes0 = arg->singLocNodes0; const int n0 = singLocNodes0->size(); T *LDLt = arg->LDLt->addrCoefBlock(k, k); T *upper = arg->upper->addrCoefBlock(k, arg->lblock); // T *lower; const int nrow = arg->LDLt->nrowBlock(k, k); const T one(1.0); const T zero(0.0); int nn0 = 0; if (arg->isSym) { arg->lower->allocateBlock(k, arg->lblock); T *lower = arg->lower->addrCoefBlock(k, arg->lblock); if (arg->localPermute) { VectorArray xtmp(arg->upper->nrowBlock(k)); for (int j = 0; j < ncol; j++) { int i, ii; int jnrow = j * nrow; for (ii = kk, i = 0; i < nrow; i++, ii++) { const int ip = permute[ii] - kk; xtmp[i] = upper[ip + jnrow]; } blas_copy(nrow, xtmp.addrCoefs(), 1, lower + jnrow, 1); } // loop : j xtmp.free(); } if (n0 > 0) { for (vector::const_iterator it = singLocNodes0->begin(); it != singLocNodes0->end(); ++it) { if (((*it) >= kk) && ((*it) < (kk + nrow))) { nn0++; int itmp = (*it) - kk; for (int j = 0; j < ncol; j++, itmp += nrow) { lower[itmp] = zero; } } } // loop : it } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, (nrow - nn0), // skip computation nullified entries ncol, one, //alpha, LDLt, nrow, // lower, nrow); //lower + kk, n for (int j = 0; j < ncol; j++) { const int jnrow = j * nrow; for (int i = 0; i < nrow; i++) { upper[i + jnrow] = lower[i + jnrow] * LDLt[i * (nrow + 1)]; } } } // if (arg->isSym) else { // lower = arg->lower->addrCoefBlock(k, arg->lblock); if (arg->localPermute) { VectorArray xtmp(arg->upper->nrowBlock(k)); for (int j = 0; j < ncol; j++) { int i, ii; int jnrow = j * nrow; for (ii = kk, i = 0; i < nrow; i++, ii++) { const int ip = permute[ii] - kk; xtmp[i] = upper[ip + jnrow]; } blas_copy(nrow, xtmp.addrCoefs(), 1, upper + jnrow, 1); } // loop : j xtmp.free(); } if (n0 > 0) { for (vector::const_iterator it = singLocNodes0->begin(); it != singLocNodes0->end(); ++it) { if (((*it) >= kk) && ((*it) < (kk + nrow))) { int itmp = (*it) - kk; nn0++; for (int j = 0; j < ncol; j++, itmp += nrow) { upper[itmp] = zero; } } } // loop : it } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, (nrow - nn0), // skip computation nullified entries ncol, one, //alpha, LDLt, nrow, upper, nrow); // upper + kk, n); for (int j = 0; j < ncol; j++) { const int jnrow = j * nrow; for (int i = 0; i < nrow; i++) { upper[i + jnrow] *= LDLt[i * (nrow + 1)]; } } } // else if (arg->isSym) } template void C_DTRSMScale_diag_upper(void *arg_); template void C_DTRSMScale_diag_upper(void *arg_); template void C_DTRSMScale_diag_upper >(void *arg_); template void C_DTRSMScale_diag_upper >(void *arg_); template void C_DTRSMScale_diag_upper(void *arg_); template void C_DTRSMScale_diag_upper >(void *arg_); // template void C_DTRSMScale_offdiag_upper(void *arg_) { const T none(-1.0); const T one(1.0); DTRSMScale_arg *arg = (DTRSMScale_arg *)arg_; int ncol = arg->ncol; const int k = arg->kblock; const int m = arg->mblock; vector* singLocNodes0 = arg->singLocNodes0; const int n0 = singLocNodes0->size(); const int kk = arg->LDLt->IndexBlock(k); const int nrow = arg->LDLt->nrowBlock(k, k); const int nrow1 = arg->LDLt->ncolBlock(k, m); // int nn0 = 0; if (n0 > 0) { for (vector::const_iterator it = singLocNodes0->begin(); it != singLocNodes0->end(); ++it) { if (((*it) >= kk) && ((*it) < (kk + nrow))) { nn0++; } } // loop : it } if (arg->isSym) { blas_gemm(CblasTrans, CblasNoTrans, nrow1, ncol, // based on row/column sizes of "upper" (nrow - nn0), // skip computation nullified entries none, // alpha, arg->LDLt->addrCoefBlock(k, m), // transposed upper arg->LDLt->nrowBlock(k, m), arg->lower->addrCoefBlock(k, arg->lblock), arg->lower->nrowBlock(k), one, // beta, arg->upper->addrCoefBlock(m, arg->lblock), arg->upper->nrowBlock(m)); } else { blas_gemm(CblasTrans, CblasNoTrans, nrow1, ncol, // based on row/column sizes of "upper" (nrow - nn0), // skip computation nullified entries none, // alpha, arg->LDLt->addrCoefBlock(m, k), // transposed lower arg->LDLt->nrowBlock(m, k), arg->upper->addrCoefBlock(k, arg->lblock), arg->upper->nrowBlock(k), one, // beta, arg->upper->addrCoefBlock(m, arg->lblock), arg->upper->nrowBlock(m)); } } template void C_DTRSMScale_offdiag_upper(void *arg_); template void C_DTRSMScale_offdiag_upper(void *arg_); template void C_DTRSMScale_offdiag_upper >(void *arg_); template void C_DTRSMScale_offdiag_upper >(void *arg_); template void C_DTRSMScale_offdiag_upper(void *arg_); template void C_DTRSMScale_offdiag_upper >(void *arg_); // template void C_DTRSMScale_diag_lower(void *arg_) { const T one(1.0); const T zero(0.0); DTRSMScale_arg *arg = (DTRSMScale_arg *)arg_; int ncol = arg->ncol; vector &permute = arg->LDLt->getPermute(); const int k = arg->kblock; T *lower = arg->lower->addrCoefBlock(k, arg->lblock); const int nrow = arg->LDLt->nrowBlock(k, k); const int kk = arg->LDLt->IndexBlock(k); vector* singLocNodes0 = arg->singLocNodes0; const int n0 = singLocNodes0->size(); // direct use of lower block matrix if (arg->localPermute) { VectorArray xtmp(arg->lower->nrowBlock(k)); // SIZE_B1]; for (int j = 0; j < ncol; j++) { int i, ii; int jnrow = j * nrow; for (ii = kk, i = 0; i < nrow; i++, ii++) { const int ip = permute[ii] - kk; xtmp[i] = lower[ip + jnrow]; } blas_copy(nrow, xtmp.addrCoefs(), 1, lower + jnrow, 1); } // loop : j xtmp.free(); } // nullifying rows corresponding to singular nodes int nn0 = 0; if (n0 > 0) { for (vector::const_iterator it = singLocNodes0->begin(); it != singLocNodes0->end(); ++it) { if (((*it) >= kk) && ((*it) < (kk + nrow))) { nn0++; int itmp = (*it) - kk; for (int j = 0; j < ncol; j++, itmp += nrow) { lower[itmp] = zero; } } } // loop : it } // if (n0 > 0) blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, (nrow - nn0), // skip computation nullified entries ncol, one, // alpha, arg->LDLt->addrCoefBlock(k, k), arg->LDLt->nrowBlock(k, k), lower, nrow); } template void C_DTRSMScale_diag_lower(void *arg_); template void C_DTRSMScale_diag_lower >(void *arg_); template void C_DTRSMScale_diag_lower(void *arg_); template void C_DTRSMScale_diag_lower >(void *arg_); template void C_DTRSMScale_diag_lower(void *arg_); template void C_DTRSMScale_diag_lower >(void *arg_); // template void C_DTRSMScale_offdiag_lower(void *arg_) { DTRSMScale_arg *arg = (DTRSMScale_arg *)arg_; int ncol = arg->ncol; const int k = arg->kblock; const int m = arg->mblock; const int nrow = arg->LDLt->nrowBlock(k, k); const int kk = arg->LDLt->IndexBlock(k); const int nrow1 = arg->LDLt->ncolBlock(k, m); vector* singLocNodes0 = arg->singLocNodes0; const int n0 = singLocNodes0->size(); int nn0 = 0; if (n0 > 0) { for (vector::const_iterator it = singLocNodes0->begin(); it != singLocNodes0->end(); ++it) { if (((*it) >= kk) && ((*it) < (kk + nrow))) { nn0++; } } // loop : it } // x -= A_21 P_1^T L_11^-1 D_11^-1 zz const T none(-1.0); const T one(1.0); blas_gemm(CblasTrans, CblasNoTrans, nrow1, ncol, (nrow - nn0), // skip computation nullified entries none, // alpha, arg->LDLt->addrCoefBlock(k, m), // transposed upper arg->LDLt->nrowBlock(k, m), arg->lower->addrCoefBlock(k, arg->lblock), arg->lower->nrowBlock(k), one, // beta, arg->lower->addrCoefBlock(m, arg->lblock), arg->lower->nrowBlock(m)); } template void C_DTRSMScale_offdiag_lower(void *arg_); template void C_DTRSMScale_offdiag_lower(void *arg_); template void C_DTRSMScale_offdiag_lower >(void *arg_); template void C_DTRSMScale_offdiag_lower >(void *arg_); template void C_DTRSMScale_offdiag_lower(void *arg_); template void C_DTRSMScale_offdiag_lower >(void *arg_); // template void C_DTRSMScale_solve(void *arg_) { DTRSMScale_arg *arg = (DTRSMScale_arg *)arg_; // should be a local array belonging to the same CPU bool verbose = arg->verbose; FILE *fp = *(arg->fp); const int num_block = arg->LDLt->num_blocks(); if (!arg->localPermute) { VectorArray xtmp(arg->nrow); vector &permute = arg->LDLt->getPermute(); diss_printf(verbose, fp, "%s %d : non blocked forward substituion\n", __FILE__, __LINE__); // const int num_block = arg->LDLt->num_blocks(); if (arg->isSym) { for (int k = 0; k < num_block; k++) { arg->lower->allocateBlock(k, arg->lblock); } } for (int j = 0; j < arg->ncol; j++) { for (int i = 0; i < arg->nrow; i++) { xtmp[i] = (*arg->upper)(permute[i], j); } // blas_copy(arg->nrow, xtmp, 1, arg->upper, 1); // block copy // arg->nrow need to be composed into blocks : 22 Feb.2016 for (int i = 0; i < arg->nrow; i++) { (*arg->upper)(i, j) = xtmp[i]; } } if (arg->isSym) { for (int j = 0; j < arg->ncol; j++) { for (int i = 0; i < arg->nrow; i++) { (*arg->lower)(i, j) = (*arg->upper)(i, j); } } } else { for (int j = 0; j < arg->ncol; j++) { for (int i = 0; i < arg->nrow; i++) { xtmp[i] = (*arg->lower)(permute[i], j); } for (int i = 0; i < arg->nrow; i++) { (*arg->lower)(i, j) = xtmp[i]; } } } xtmp.free(); } // if (!arg->localPermute) // const int num_block = arg->LDLt->num_blocks(); for (int l = 0; l < (*arg->upper).num_blocks_c(); l++) { arg->lblock = l; arg->ncol = (*arg->upper).ncolBlock(l); for (int k = 0; k < num_block; k++) { arg->kblock = k; C_DTRSMScale_diag_upper(arg_); for (int m = (k + 1); m < num_block; m++) { arg->mblock = m; C_DTRSMScale_offdiag_upper(arg_); } if (!arg->isSym) { C_DTRSMScale_diag_lower(arg_); for (int m = (k + 1); m < num_block; m++) { arg->mblock = m; C_DTRSMScale_offdiag_lower(arg_); } } // if (!arg->isSym) } // loop : k } } template void C_DTRSMScale_solve(void *arg_); template void C_DTRSMScale_solve(void *arg_); template void C_DTRSMScale_solve >(void *arg_); template void C_DTRSMScale_solve >(void *arg_); template void C_DTRSMScale_solve(void *arg_); template void C_DTRSMScale_solve >(void *arg_); // template void C_deallocLower(void *arg_) { C_deallocLower_arg *arg = (C_deallocLower_arg *)arg_; if (arg->isSym) { arg->lower->free(); } } template void C_deallocLower(void *arg_); template void C_deallocLower(void *arg_); template void C_deallocLower >(void *arg_); template void C_deallocLower >(void *arg_); template void C_deallocLower(void *arg_); template void C_deallocLower >(void *arg_); // template void C_deallocLocalSchur(void *arg_) { C_deallocLocalSchur_arg *arg = (C_deallocLocalSchur_arg *)arg_; arg->localSchur->free(arg->i_block, arg->j_block); } template void C_deallocLocalSchur(void *arg_); template void C_deallocLocalSchur(void *arg_); template void C_deallocLocalSchur >(void *arg_); template void C_deallocLocalSchur >(void *arg_); template void C_deallocLocalSchur(void *arg_); template void C_deallocLocalSchur >(void *arg_); // template void C_SparseFw(void *arg_) { const T zero(0.0); C_SparseFw_arg *arg = (C_SparseFw_arg *)arg_; int dim = arg->dim; const bool isTrans = **(arg->isTrans); const int nrhs = **(arg->nrhs); // const void* diag_sparse = arg->diag_sparse; int colors = arg->colors; TridiagBlockMatrix **tridiag = arg->tridiag; T *x = *(arg->x); T *yi = *(arg->yi); T *zi = *(arg->zi); T *coef = arg->coef; // arg->ptDA->getCoef(); const int n_diag = arg->n_diag; const int n_offdiag = arg->n_offdiag; const int *ptRows = arg->ptRows; const int *indCols = arg->indCols; // arg->inTrans is selected as runing time const int *indVals = **(arg->isTrans) ? arg->indVals_unsym : arg->indVals; const int *loc2glob_diag = arg->loc2glob_diag; // get from the global array for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mdim = m * dim; for (int i = 0; i < n_diag; i++) { const int ii = loc2glob_diag[i]; yi[i + mn_diag] = x[ii + mdim]; } } // loop : m if (nrhs == 1) { for (int i = 0; i < colors; i++) { tridiag[i]->SolveSingle(true, isTrans, yi); } } else { ColumnMatrix rhs(n_diag, nrhs, yi, false); for (int i = 0; i < colors; i++) { tridiag[i]->SolveMulti(true, isTrans, nrhs, rhs); } } // zero clean for (int i = 0; i < n_offdiag * nrhs; i++) { zi[i] = zero; } // transposed Block SpMV : unsymmetric indVals is given as indVals_unsym for (int i = 0; i < n_diag; i++) { for (int k = ptRows[i]; k < ptRows[i + 1]; k++) { const int j = indCols[k]; T tmp = coef[indVals[k]]; for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mn_offdiag = m * n_offdiag; zi[j + mn_offdiag] += tmp * yi[i + mn_diag]; } } } } template void C_SparseFw(void *arg_); template void C_SparseFw(void *arg_); template void C_SparseFw, double>(void *arg_); template void C_SparseFw, quadruple>(void *arg_); template void C_SparseFw(void *arg_); template void C_SparseFw, float>(void *arg_); // template void C_SparseBw(void *arg_) { const T zero(0.0); C_SparseBw_arg *arg = (C_SparseBw_arg *)arg_; int d = arg->nb; int dim = arg->dim; const bool isTrans = **(arg->isTrans); const int nrhs = **(arg->nrhs); Dissection::Tree *btree = arg->btree; const int level_last = arg->level_last; // const void* diag_sparse = arg->diag_sparse; int colors = arg->colors; TridiagBlockMatrix **tridiag = arg->tridiag; T *x = *(arg->x); T *xi = *(arg->xi); T ***yy = arg->yy; T *yi = *(arg->yi); T *zi = *(arg->zi); T *coef = arg->coef; // arg->ptDA->getCoef(); const int *ptRows = arg->ptRows; const int *indCols = arg->indCols; const int *indVals = isTrans ? arg->indVals_unsym : arg->indVals; const int n_diag = btree->sizeOfDomain(d); const int n_offdiag = btree->sizeOfFathersStrips(d); for (int i = 0; i < (n_diag * nrhs); i++) { xi[i] = zero; } // reading data does not cause conflict between processors int offset_src = 0; for (int ll = (level_last - 1); ll >= 0; ll--) { const int father_id = btree->nthfatherIndex(d, (level_last - ll)); const int father_id0 = btree->selfIndex(father_id); const Dissection::SetOfStrips &diag = btree->getFathersStrips(d)[ll]; for (Dissection::SetOfStrips::const_iterator it = diag.begin(); it != diag.end(); ++it) { for (int m = 0; m < nrhs; m++) { const int mn_offdiag = m * n_offdiag; const int mn_diag = m * btree->sizeOfDomain(father_id); int i, i0, i1; for (i = 0, i0 = (*it).begin_src + offset_src, i1 = (*it).begin_dst; i < (*it).width; i++, i0++, i1++) { zi[i0 + mn_offdiag] = (*yy[father_id0])[i1 + mn_diag]; } // loop : i } // loop : m } // loop : it offset_src += diag.numberOfIndices(); } // loop : ll // SpMV operations : y_d = A_ds y_s for (int i = 0; i < n_diag; i++) { for (int k = ptRows[i]; k < ptRows[i + 1]; k++) { const int jj = indCols[k]; const T tmp = coef[indVals[k]]; for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mn_offdiag = m * n_offdiag; xi[i + mn_diag] += tmp * zi[jj + mn_offdiag]; } } } // const int idom = _btree->selfIndex(d); if (nrhs == 1) { for (int i = 0; i < colors; i++) { tridiag[i]->SolveSingle(true, isTrans, xi); } } else { ColumnMatrix rhs(n_diag, nrhs, xi, false); for (int i = 0; i < colors; i++) { tridiag[i]->SolveMulti(true, isTrans, nrhs, rhs); } } for (int i = 0; i < (n_diag * nrhs); i++) { yi[i] -= xi[i]; // without permutation } // write back to the global array const int *loc2glob_diag = btree->getDiagLoc2Glob(d); for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mdim = m * dim; for (int i = 0; i < n_diag; i++) { const int ii = loc2glob_diag[i]; x[ii + mdim] = yi[i + mn_diag]; } } // loop : m } template void C_SparseBw(void *arg_); template void C_SparseBw(void *arg_); template void C_SparseBw, double>(void *arg_); template void C_SparseBw, quadruple>(void *arg_); template void C_SparseBw(void *arg_); template void C_SparseBw, float>(void *arg_); // template void C_Dsub_FwBw(void *arg_) { C_Dsub_FwBw_arg *arg = (C_Dsub_FwBw_arg *)arg_; const int dim = arg->dim; const int nrhs = *(arg->nrhs)[0]; const int level = arg->level; Dissection::Tree *btree = arg->btree; list* diag_contribs = arg->diag_contribs; T *yi = *(arg->yi); T ***zi = arg->zi; bool access_global = arg->access_global; if (access_global) { T *x = *(arg->x); // get from the global array const int n_diag = arg->n_diag; const int *loc2glob_diag = arg->loc2glob_diag; for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mdim = m * dim; for (int i = 0; i < n_diag; i++) { const int ii = loc2glob_diag[i]; yi[i + mn_diag] = x[ii + mdim]; } } // loop : m } for (list::const_iterator it = diag_contribs->begin(); it != diag_contribs->end(); ++it) { const int child_id = (*it).child_id; const int child_id0 = btree->selfIndex(child_id); if ((btree->nodeLayer(child_id) == level) && (btree->sizeOfDomain(child_id) > 0)) { // 08 Nov.2016 Atsushi for (list::const_iterator mt = (*it).diag_strip.begin(); mt != (*it).diag_strip.end(); ++mt) { for (int m = 0; m < nrhs; m++) { const int mn_offdiag = m * (*it).child_column; const int mn_diag = m * (*it).father_row; int i, i0, i1; for (i = 0, i0 = (*mt).begin_src, i1 = (*mt).begin_dst; i < (*mt).width; i++, i0++, i1++) { yi[i1 + mn_diag] -= (*zi[child_id0])[i0 + mn_offdiag]; } // loop : i } // loop : m } // loop : mt } // if (_btree->nodeLayer((*it).child_id) == level_last) { } // loop : it } template void C_Dsub_FwBw(void *arg_); template void C_Dsub_FwBw(void *arg_); template void C_Dsub_FwBw >(void *arg_); template void C_Dsub_FwBw >(void *arg_); template void C_Dsub_FwBw(void *arg_); template void C_Dsub_FwBw >(void *arg_); // template void C_Dfill_FwBw(void *arg_) { C_Dfill_FwBw_arg *arg = (C_Dfill_FwBw_arg *)arg_; const int nrhs = *(arg->nrhs)[0]; const int d = arg->d; const int level = arg->level; Dissection::Tree *btree = arg->btree; const int n_offdiag = arg->n_offdiag; T ***yi = arg->yi; T *zi = *(arg->zi); int offset_src = 0; for (int ll = (level - 1); ll >= 0; ll--) { const int father_id = btree->nthfatherIndex(d, (level - ll)); const int father_id0 = btree->selfIndex(father_id); const Dissection::SetOfStrips &diag = btree->getFathersStrips(d)[ll]; for (Dissection::SetOfStrips::const_iterator it = diag.begin(); it != diag.end(); ++it) { for (int m = 0; m < nrhs; m++) { const int mn_offdiag = m * n_offdiag; const int mn_diag = m * btree->sizeOfDomain(father_id); int i, i0, i1; for (i = 0, i0 = (*it).begin_src + offset_src, i1 = (*it).begin_dst; i < (*it).width; i++, i0++, i1++) { zi[i0 + mn_offdiag] = (*yi[father_id0])[i1 + mn_diag]; } // loop : i } // loop : m } // loop : it offset_src += diag.numberOfIndices(); } // loop : ll } template void C_Dfill_FwBw(void *arg_); template void C_Dfill_FwBw(void *arg_); template void C_Dfill_FwBw >(void *arg_); template void C_Dfill_FwBw >(void *arg_); template void C_Dfill_FwBw(void *arg_); template void C_Dfill_FwBw >(void *arg_); // template void C_DenseFwBw_diag(void *arg_) { const T one(1.0); const T zero(0.0); C_DenseFwBw_arg *arg = (C_DenseFwBw_arg *)arg_; const int dim = arg->dim; // global dimension, used when nrhs > 1 const bool isTrans = **(arg->isTrans); const int nrhs = **(arg->nrhs); const int n_diag = arg->n_diag; const int nrow = arg->nrow; const int k_block = arg->k_block; T *xi = *(arg->xi); const bool isSym = arg->isSym; const bool isBackward = arg->isBackward; const bool isFirstBlock = arg->isFirstBlock; SquareBlockMatrix &Diag = *(arg->LDLt); const bool isBlocked = Diag.isBlocked(); T *LDLt = Diag.addrCoefBlock(k_block, k_block); const int LDLt_nrow = Diag.nrowBlock(k_block, k_block); const int kk = Diag.IndexBlock(k_block); vector singIdx0 = Diag.getSingIdx0(); const bool verbose = arg->verbose; FILE *fp = *(arg->fp); if (isBlocked) { if (!isBackward) { T *yi = *(arg->yi); vector &permute = Diag.getPermute(); // permutation : original index -> factorized index for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; for (int i = kk; i < kk + nrow; i++) { xi[i + mn_diag] = yi[permute[i] + mn_diag]; } } } } else { // if (isBlocked) // permute the whole RHS by the k == 0 block if (isFirstBlock && (isBackward == false)) { T *yi = *(arg->yi); vector &permute = Diag.getPermute(); // permutation : original index -> factorized index for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; for (int i = 0; i < n_diag; i++) { xi[i + mn_diag] = yi[permute[i] + mn_diag]; } } } } // if (isBlocked) if (nrhs == 1) { int nn0 = 0; if (singIdx0.size() > 0) { for (vector::const_iterator it = singIdx0.begin(); it != singIdx0.end(); ++it) { // inside of the block if (((*it) >= kk) && ((*it) < (kk + nrow))) { xi[(*it)] = zero; nn0++; } } // if (d0 == 0) degmv for the regular part of singIdx0 } // if (singIdx.size() > 0) const int nn1 = nrow - nn0; // invertible part if (isSym) { blas_trsv(CblasLower, (isBackward ? CblasTrans: CblasNoTrans), CblasUnit, nn1, LDLt, LDLt_nrow, //n_diag, xi + kk, 1); } else { if (isTrans) { if (isBackward) { //Lower bocks of matrix are not scaled then scale here int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { xi[i] *= LDLt[itmp]; } } blas_trsv((isBackward ? CblasLower : CblasUpper), CblasTrans, CblasUnit, nn1, LDLt, LDLt_nrow, //n_diag, xi + kk, 1); } else { blas_trsv((isBackward ? CblasUpper : CblasLower), CblasNoTrans, CblasUnit, nn1, LDLt, LDLt_nrow, //n_diag, xi + kk, 1); } } // if (d0 == 0) degmv for the regular part of singIdx0 // if (singIdx.size() > 0) if (!isBackward) { T *wi = *(arg->wi); if (isSym) { int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { wi[i] = xi[i]; xi[i] *= LDLt[itmp]; } } else { if(isTrans) { for (int i = kk; i < (kk + nrow); i++) { wi[i] = xi[i]; // Upper bocks of matrix are scaled } } else { int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { xi[i] *= LDLt[itmp]; // Lower bocks of matrix are not scaled wi[i] = xi[i]; } } } } } else { // if (nrhs != 1) int nn0 = 0; if (singIdx0.size() > 0) { for (vector::const_iterator it = singIdx0.begin(); it != singIdx0.end(); ++it) { // inside of th block if (((*it) >= kk) && ((*it) < (kk + nrow))) { nn0++; int itmp = (*it); for (int m = 0; m < nrhs; m++, itmp += n_diag) { xi[itmp] = zero; } } } } // if (singIdx.size() > 0) const int nn1 = nrow - nn0; // invertible part if (isSym) { blas_trsm(CblasLeft, CblasLower, (isBackward ? CblasTrans : CblasNoTrans), CblasUnit, nn1, nrhs, one, //alpha, LDLt, LDLt_nrow, //n_diag, xi + kk, n_diag); } else { if (isTrans) { if (isBackward) { int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; xi[i + mn_diag] *= LDLt[itmp]; } } } blas_trsm(CblasLeft, (isBackward ? CblasLower : CblasUpper), CblasTrans, CblasUnit, nn1, nrhs, one, // alpha, LDLt, LDLt_nrow, //n_diag, xi + kk, n_diag); } else { blas_trsm(CblasLeft, (isBackward ? CblasUpper : CblasLower), CblasNoTrans, CblasUnit, nn1, nrhs, one, //alpha, LDLt, LDLt_nrow, //n_diag, xi + kk, n_diag); } } if (!isBackward) { T *wi = *(arg->wi); if(isSym) { int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; wi[i + mn_diag] = xi[i + mn_diag]; // for yl[] - A_li xi[] : l>i xi[i + mn_diag] *= LDLt[itmp]; } } } else { if (isTrans) { for (int i = kk; i < (kk + nrow); i++) { for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; wi[i + mn_diag] = xi[i + mn_diag]; // for yl[] - A_li xi[] : l>i } } } else { int itmp = 0; for (int i = kk; i < (kk + nrow); i++, itmp += (LDLt_nrow + 1)) { for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; xi[i + mn_diag] *= LDLt[itmp]; wi[i + mn_diag] = xi[i + mn_diag]; // for yl[] - A_li xi[] : l>i } } } } } // if (!isBackward) } // if (nrhs == 1) if (isBlocked) { if (isBackward) { T *yi = *(arg->yi); vector &permute = Diag.getPermute(); for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; for (int i = kk; i < kk + nrow; i++) { yi[permute[i] + mn_diag] = xi[i + mn_diag]; } for (int i = kk; i < kk + nrow; i++) { xi[i + mn_diag] = yi[i + mn_diag]; } } if(isFirstBlock) { int *loc2glob_diag = arg->loc2glob; T *x = *(arg->x); for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mdim = m * dim; for (int i = 0; i < n_diag; i++) { const int ii = loc2glob_diag[i]; x[ii + mdim] = xi[i + mn_diag]; } } // loop : m } // if(isFirstBlock) } // if (isBackward) // forward substitution with block permutation : internally no permutation } else { if (isFirstBlock && isBackward) { T *yi = *(arg->yi); vector &permute = Diag.getPermute(); for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; for (int i = 0; i < n_diag; i++) { yi[permute[i] + mn_diag] = xi[i + mn_diag]; } } int *loc2glob_diag = arg->loc2glob; T *x = *(arg->x); for (int m = 0; m < nrhs; m++) { const int mn_diag = m * n_diag; const int mdim = m * dim; for (int i = 0; i < n_diag; i++) { const int ii = loc2glob_diag[i]; x[ii + mdim] = yi[i + mn_diag]; } } // loop : m } // if (isFirstBlock && isBackward) { } // if (isBlocked) } template void C_DenseFwBw_diag(void *arg_); template void C_DenseFwBw_diag(void *arg_); template void C_DenseFwBw_diag >(void *arg_); template void C_DenseFwBw_diag >(void *arg_); template void C_DenseFwBw_diag(void *arg_); template void C_DenseFwBw_diag >(void *arg_); // template void C_DenseFwBw_offdiag(void *arg_) { C_DenseFwBwOffdiag_arg *arg = (C_DenseFwBwOffdiag_arg *)arg_; const bool isTrans = **(arg->isTrans); const int nrhs = **(arg->nrhs); const int ldb = arg->ldb; const int ldc = arg->ldc; const int nrow = arg->nrow; const int ncol = arg->ncol; T *xi = *(arg->xi); T *yi; const int ii = arg->ii; const int jj = arg->jj; T *a; const T alpha = arg->alpha; const T beta = arg->beta; const bool trans = arg->trans; const bool isLower = arg->isLower; int i_block, j_block; int lda; // #define DEBUG_INDEX #ifdef DEBUG_INDEX fprintf(stderr, "%s %d : nrow = %d ncol = %d %s [%d %d] [%d %d] %d : %s %s ", __FILE__, __LINE__, nrow, ncol, isLower ? "Lower" : "Upper", arg->i_block, arg->j_block, ii, jj, nrhs, trans ? "T" : "N", isTrans ? "trans" : "normal"); #endif { if (arg->LDLt->isBlocked()) { yi = *(arg->yi); } else { yi = *(arg->zi); } if (trans) { // noly for _isSym == true i_block = arg->j_block; j_block = arg->i_block; } else { if (isTrans) { i_block = arg->j_block; j_block = arg->i_block; } else { i_block = arg->i_block; j_block = arg->j_block; } } a = arg->LDLt->addrCoefBlock(i_block, j_block); lda = arg->LDLt->nrowBlock(i_block, j_block); #ifdef DEBUG_INDEX fprintf(stderr, "%d x %d\n", arg->LDLt->nrowBlock(arg->j_block, arg->i_block), arg->LDLt->ncolBlock(arg->j_block, arg->i_block)); #endif // if (isLower) { if (nrhs == 1) { // nrow and ncol are based on geometrical size of the block // data storage is transposed in the case of the lower block blas_gemv(CblasTrans, ncol, nrow, // based on a alpha, a, lda, xi + ii, 1, beta, yi + jj, 1); } else { blas_gemm(CblasTrans, CblasNoTrans, nrow, nrhs, ncol, // based on yi[] alpha, a, lda, xi + ii, ldb, beta, yi + jj, ldc); } } else { if (nrhs == 1) { blas_gemv(CblasNoTrans, nrow, ncol, // based on a alpha, a, lda, xi + ii, 1, beta, yi + jj, 1); } else { blas_gemm(CblasNoTrans, CblasNoTrans, nrow, nrhs, ncol, // based on yi[] alpha, a, lda, xi + ii, ldb, beta, yi + jj, ldc); } } } // if (isLocalDiag) } template void C_DenseFwBw_offdiag(void *arg_); template void C_DenseFwBw_offdiag(void *arg_); template void C_DenseFwBw_offdiag >(void *arg_); template void C_DenseFwBw_offdiag >(void *arg_); template void C_DenseFwBw_offdiag(void *arg_); template void C_DenseFwBw_offdiag >(void *arg_); // template void C_StripsFwBw_offdiag(void *arg_) { C_StripsFwBwOffdiag_arg *arg = (C_StripsFwBwOffdiag_arg *)arg_; const bool isTrans = **(arg->isTrans); const int nrhs = **(arg->nrhs); const int ldb = arg->ldb; const int ldc = arg->ldc; const int ii = arg->ii; const int jj = arg->jj; const T alpha = arg->alpha; const T beta = arg->beta; // const bool trans = arg->trans; const bool isLower = arg->isLower; T *xi = *(arg->xi); T *yi = *(arg->yi); RectBlockMatrix *aa = isTrans ? arg->lower : arg->upper; const int i_block = isLower ? arg->i_block : arg->j_block; const int j_block = isLower ? arg->j_block : arg->i_block; if (isLower) { if (nrhs == 1) { // nrow and ncol are based on geometrical size of the block // data storage is transposed in the case of the lower block blas_gemv(CblasTrans, aa->nrowBlock(i_block), // based on matrix A aa->ncolBlock(j_block), // alpha, aa->addrCoefBlock(i_block, j_block), aa->nrowBlock(i_block), // lda, xi + ii, 1, beta, yi + jj, 1); } else { blas_gemm(CblasTrans, CblasNoTrans, aa->ncolBlock(j_block), // based on yi[] nrhs, aa->nrowBlock(i_block), // alpha, aa->addrCoefBlock(i_block, j_block), aa->nrowBlock(i_block), // lda, xi + ii, ldb, beta, yi + jj, ldc); } } else { const int nblocks_col = aa->num_blocks_c(); if (nrhs == 1) { for (int j = 0; j < nblocks_col; j++) { // sequential operation. int jjj = aa->IndexBlock_c(j); blas_gemv(CblasNoTrans, // nrow, ncol, // based on a aa->nrowBlock(i_block), // nrow, aa->ncolBlock(j), // ncol, alpha, aa->addrCoefBlock(i_block, j), aa->nrowBlock(i_block), // lda xi + jjj, 1, beta, yi + jj, 1); } } else { for (int j = 0; j < nblocks_col; j++) { // sequential operation. int jjj = aa->IndexBlock_c(j); blas_gemm(CblasNoTrans, CblasNoTrans, aa->nrowBlock(i_block), // nrow, nrhs, aa->ncolBlock(j), // ncol, // based on yi[] alpha, aa->addrCoefBlock(i_block, j), aa->nrowBlock(i_block), // lda, xi + jjj, ldb, beta, yi + jj, ldc); } } // if (nrhs == 1) } // if (isLower) } template void C_StripsFwBw_offdiag(void *arg_); template void C_StripsFwBw_offdiag(void *arg_); template void C_StripsFwBw_offdiag >(void *arg_); template void C_StripsFwBw_offdiag >(void *arg_); template void C_StripsFwBw_offdiag(void *arg_); template void C_StripsFwBw_offdiag >(void *arg_); // template void erase_task(C_task *& task) { bool flag = false; switch(task->task_id) { case C_DSUB: { list* > *tt = (list* > *)task->func_arg; for (typename list* >::iterator kt = tt->begin(); kt != tt->end(); ++kt) { delete (*kt); // C_Dsub_task *tmp = new C_Dsub_task( ... ) (*kt) = NULL; } flag = true; // fprintf(stderr, "%s %d : %s\n", __FILE__, __LINE__, task->task_name); delete tt; tt = NULL; } break; case C_SPARSESYMBFACT: { C_SparseSymbFact_arg *tt = (C_SparseSymbFact_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_SPARSENUMFACT: { C_SparseNumFact_arg *tt = (C_SparseNumFact_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_SPARSESCHUR: { C_SparseNumFact_arg *tt = (C_SparseNumFact_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_FILLMATRIX: { C_FillMatrix_arg *tt = (C_FillMatrix_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DFULL_SYM_GAUSS: { C_dfull_gauss_arg *tt = (C_dfull_gauss_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DINV_DL_TIMESU: { C_dinvDL_timesU_arg *tt = (C_dinvDL_timesU_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DHALF_SCHUR_B: { C_dupdateb_Schur_arg *tt = (C_dupdateb_Schur_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DHALF_SCHUR_BT: { C_dupdateb_Schur_arg *tt = (C_dupdateb_Schur_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DTRSMSCALE: { DTRSMScale_arg *tt = (DTRSMScale_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DGEMM_LOCAL_MULT: { DSchurGEMM_arg *tt = (DSchurGEMM_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DGEMM_LOCAL_TWO: { DSchurGEMM_arg *tt = (DSchurGEMM_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DGEMM_DIRECT_TWO: { DSchurGEMM_two_arg *tt = (DSchurGEMM_two_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DEALLOCLOWER: { C_deallocLower_arg *tt = (C_deallocLower_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DEALLOCLOCALSCHUR: { C_deallocLocalSchur_arg *tt = (C_deallocLocalSchur_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_SPARSESYMFW: { C_SparseFw_arg *tt = (C_SparseFw_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DSUB_FWBW: { C_Dsub_FwBw_arg *tt = (C_Dsub_FwBw_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DENSE_SYMFW_DIAG: { C_DenseFwBw_arg *tt = (C_DenseFwBw_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DENSE_SYMFW_OFFDIAG: { C_DenseFwBwOffdiag_arg *tt = (C_DenseFwBwOffdiag_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_STRIPS_SYMFW_OFFDIAG: { C_StripsFwBwOffdiag_arg *tt = (C_StripsFwBwOffdiag_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DENSE_SYMFILL: { C_Dfill_FwBw_arg *tt = (C_Dfill_FwBw_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_SPARSESYMBW: { C_SparseBw_arg *tt = (C_SparseBw_arg *)task->func_arg; delete tt; tt = NULL; } break; case C_DUMMY: { C_dummy_arg *tt = (C_dummy_arg *)task->func_arg; delete tt; tt = NULL; } break; } if (flag) { delete task->ops_complexity; // allocated in C_Dsub_quee() } // cout << task->task_name << endl; delete task; task = NULL; } template void erase_task(C_task *& task); template void erase_task(C_task *& task); template void erase_task, double>(C_task *& task); template void erase_task, quadruple>(C_task *& task); template void erase_task(C_task *& task); template void erase_task, float>(C_task *& task); // template void full_gauss3(int *n0, T *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp) { bool flag; int nn0, nn1; double fop; nn1 = *n0; diss_printf(verbose, fp, "%s %d : full_sym_gauss3 is not yet implemented %d ", __FILE__, __LINE__, nn1); if (isSym) { diss_printf(verbose, fp,"ldlt_permute\n"); flag = full_ldlt_permute(&nn0, nn1, n, a, n, pivot, permute, eps, &fop); } else { diss_printf(verbose, fp,"ldu_permute\n"); flag = full_ldu_permute(&nn0, nn1, n, a, n, pivot, permute, eps, &fop); } *n0 = nn0; } template void full_gauss3(int *n0, double *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); template void full_gauss3(int *n0, quadruple *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); template void full_gauss3, double >(int *n0, complex *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); template void full_gauss3, quadruple >(int *n0, complex *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); template void full_gauss3(int *n0, float *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); template void full_gauss3, float >(int *n0, complex *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); // template void dump_vectors(int nrow, int nn0, T *v, string fname) { fprintf(stderr, "%s %d : speci1alized template is not yet defined.\n", __FILE__, __LINE__); } template<> void dump_vectors(int nrow, int nn0, double *v, string fname) { FILE *fp; if ((fp = fopen(fname.c_str(), "w")) != NULL) { for (int i = 0 ; i < nrow; i++) { fprintf(fp, "%d ", i); for (int j = 0; j < nn0; j++) { fprintf(fp, "%g ", v[i + j * nrow]); } fprintf(fp, "\n"); } fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname.c_str()); } } template void dump_vectors >(int nrow, int nn0, complex *v, string fname); template void dump_vectors(int nrow, int nn0, quadruple *v, string fname); template void dump_vectors >(int nrow, int nn0, complex *v, string fname); template void dump_vectors(int nrow, int nn0, float *v, string fname); template void dump_vectors >(int nrow, int nn0, complex *v, string fname); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/C_threads_tasks.hpp000664 000000 000000 00000153271 14037356732 026133 0ustar00rootroot000000 000000 /*! \file C_threads_tasks.hpp \brief tasks executed asynchronously with threads \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Apr. 22th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _C_THREADS_TASKS_ #define _C_THREADS_TASKS_ #include #include # include "Compiler/blas.hpp" # include "Compiler/OptionCompiler.hpp" #include #include #include #include #include #include "Compiler/elapsed_time.hpp" # include "Algebra/SquareMatrix.hpp" # include "Algebra/SquareBlockMatrix.hpp" # include "Algebra/RectBlockMatrix.hpp" # include "Algebra/ColumnMatrix.hpp" # include "Splitters/BisectionTree.hpp" # include "Algebra/CSR_matrix.hpp" # include "Driver/TridiagBlockMatrix.hpp" using namespace std; // set manually the same value defined in Moudels/blak_blocksize.f90 #define SIZE_B1 240 #define SIZE_DGEMM_SYMM_DTRSV 40 //#define SIZE_B1 600 //#define SIZE_DGEMM_SYMM_DTRSV 100 #define C_DHALF_SCHUR_BLOCK_GEMM 1.0 #define C_DHALF_SCHUR_BLOCK_GEMV 2.0 #define FW_SOLVESCLAE_DTRSM 3.0 #define BLAS3_GEMM_OPT 0.5 // #define TASK_NAME_SIZE 256 const string _null_name; struct source_dist_index { int source; int dist; int global_i; int global_j; ~source_dist_index() {} source_dist_index() {} source_dist_index(int source_, int dist_, int global_i_, int global_j_) : source(source_), dist(dist_), global_i(global_i_), global_j(global_j_) {} source_dist_index(const source_dist_index &im) { // if (&im != this) { : no need to be declared source = im.source; dist = im.dist; global_i = im.global_i; global_j = im.global_j; // } } }; template struct C_Dsub_task { int ir_bgn; int ir_end; int jc_bgn; int jc_end; int ir_bgn_src; int ir_bgn_src2; int jc_bgn_src; int jc_bgn_src2; int dst_row; SquareBlockMatrix *dst_mtrx; RectBlockMatrix *dst_pt; int ir_block; int jc_block; RectBlockMatrix *dst_pt2; int ir2_block; int jc2_block; SquareBlockMatrix *src_pt; SquareBlockMatrix *src_pt2; void (*func)(C_Dsub_task *); bool isSkip; int atomic_size; int atomic_id; int parallel_max; int parallel_id; long *ops_complexity; int father_id; int child0_id; int child1_id; int level; bool verbose; bool debug; int child_id; FILE *fp; ~C_Dsub_task() { delete ops_complexity; } C_Dsub_task() {} C_Dsub_task(int atomic_size_, int atomic_id_, int ir_bgn_, int ir_end_, int jc_bgn_, int jc_end_, int ir_bgn_src_, int ir_bgn_src2_, int jc_bgn_src_, int jc_bgn_src2_, int dst_row_, SquareBlockMatrix* dst_mtrx_, RectBlockMatrix *dst_pt_, int ir_block_, int jc_block_, RectBlockMatrix *dst_pt2_, int ir2_block_, int jc2_block_, SquareBlockMatrix *src_pt_, SquareBlockMatrix *src_pt2_, void (*func_)(C_Dsub_task *), bool isSkip_, long ops_complexity_, int father_id_, int level_, bool verbose_, FILE *fp_) : ir_bgn(ir_bgn_), ir_end(ir_end_), jc_bgn(jc_bgn_), jc_end(jc_end_), ir_bgn_src(ir_bgn_src_), ir_bgn_src2(ir_bgn_src2_), jc_bgn_src(jc_bgn_src_), jc_bgn_src2(jc_bgn_src2_), dst_row(dst_row_), dst_mtrx(dst_mtrx_), dst_pt(dst_pt_), ir_block(ir_block_), jc_block(jc_block_), dst_pt2(dst_pt2_), ir2_block(ir2_block_), jc2_block(jc2_block_), src_pt(src_pt_), src_pt2(src_pt2_), func(func_), isSkip(isSkip_), atomic_size(atomic_size_), atomic_id(atomic_id_), father_id(father_id_), level(level_), verbose(verbose_), fp(fp_) { debug = false; child_id = 0; ops_complexity = new long; *ops_complexity = ops_complexity_; } C_Dsub_task(const C_Dsub_task &im) { atomic_size = im.atomic_size; atomic_id = im.atomic_id; ir_bgn = im.ir_bgn; ir_end = im.ir_end; jc_bgn = im.jc_bgn; jc_end = im.jc_end; ir_bgn_src = im.ir_bgn_src; ir_bgn_src2 = im.ir_bgn_src2; jc_bgn_src = im.jc_bgn_src; jc_bgn_src2 = im.jc_bgn_src2; dst_row = im.dst_row; dst_mtrx = im.dst_mtrx; dst_pt = im.dst_pt; ir_block = im.ir_block; jc_block = im.jc_block; dst_pt2 = im.dst_pt2; ir2_block = im.ir2_block; jc2_block = im.jc2_block; src_pt = im.src_pt; src_pt2 = im.src_pt2; func = im.func; isSkip = im.isSkip; father_id = im.father_id; level = im.level; verbose = im.verbose; debug = im.debug; child_id = im.child_id; fp = im.fp; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); // copy value } }; typedef struct { double pivot; double w; double eps; double delta; } pivot_param; #define DIST_TASK_CRITICAL 20 #define TASK_WAITING 0 #define TASK_WORKING 1 #define TASK_DONE 2 #define TASK_SINGLE 8 #define TASK_PARALLEL 9 #define C_DUMMY 1 #define C_DIAG_START 2 #define C_DFULL 4 #define C_DFULL_SYM_GAUSS 5 #define C_DINV_DL_TIMESU 6 #define C_DHALF_SCHUR_B 70 #define C_DHALF_SCHUR_BT 71 #define C_DTRSM 8 #define C_DTRSM1 9 #define C_DTRSMSCALE 10 #define C_DGEMM 16 #define C_DGEMM1 17 #define C_DGEMM_LOCAL_MULT 18 #define C_DGEMM_LOCAL_TWO 19 #define C_DGEMM_DIRECT_TWO 20 #define C_DSUB 32 #define C_DSUB1 33 #define C_DEALLOCATE 34 #define C_DEALLOCATE1 35 #define C_DEALLOCLOCALSCHUR 36 #define C_DEALLOCLOWER 40 #define C_FILLMATRIX 64 #define C_FILLMATRIX1 65 #define C_SPARSESYMBFACT 128 #define C_SPARSENUMFACT 256 #define C_SPARSESCHUR 257 #define C_SPARSELOCALSCHUR 512 #define C_SPARSELOCALSCHUR1 513 #define C_SPARSESOLVER 384 #define C_FWBW 1024 #define C_SPARSESYMFW 1025 #define C_SPARSESYMBW 1026 #define C_DSUB_FWBW 1027 #define C_DENSE_SYMFW_DIAG 1028 #define C_DENSE_SYMFW_OFFDIAG 1029 #define C_STRIPS_SYMFW_OFFDIAG 1030 #define C_DENSE_SYMSCALE 1031 #define C_DENSE_SYMFILL 1032 template struct C_SparseSymbFact_arg { TridiagBlockMatrix **tridiag; int nrow; int colors; int *color_mask; const CSR_indirect* csr_diag; long *ops_complexity; long *nopd; long *nops; bool verbose; FILE **fp; ~C_SparseSymbFact_arg() { delete ops_complexity; delete nopd; delete nops; } C_SparseSymbFact_arg() {} C_SparseSymbFact_arg (TridiagBlockMatrix **tridiag_, int colors_, int *color_mask_, int nrow_, const CSR_indirect* csr_diag_, bool verbose_, FILE **fp_ ) : tridiag(tridiag_), nrow(nrow_), colors(colors_), color_mask(color_mask_), csr_diag(csr_diag_), verbose(verbose_), fp(fp_) { ops_complexity = new long; nopd = new long; nops = new long; } C_SparseSymbFact_arg(const C_SparseSymbFact_arg &im) { tridiag = im.tridiag; colors = im.colors; color_mask = im.color_mask; nrow = im.nrow; csr_diag = im.csr_diag; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); nopd = new long; *nopd = *(im.nopd); nops = new long; *nops = *(im.nops); verbose = im.verbose; fp = im.fp; } }; template struct C_SparseNumFact_arg { TridiagBlockMatrix **tridiag; bool isSym; int colors; int *color_mask; int nnz; T *coefs; int nrow; int ncol; const CSR_indirect* csr_diag; const CSR_indirect* csr_offdiag; SquareBlockMatrix* D; // to store pivot information double *eps_pivot; double *pivot; bool *kernel_detection; bool *higher_precision; int *dim_aug_kern; U *eps_machine; SquareBlockMatrix* localSchur; long *ops_complexity; long *nopd; long *nops; bool verbose; elapsed_t *tt; FILE **fp; int nb; ~C_SparseNumFact_arg() { delete ops_complexity; delete nopd; delete nops; delete [] tt; } C_SparseNumFact_arg() {} C_SparseNumFact_arg (TridiagBlockMatrix **tridiag_, bool isSym_, int colors_, int *color_mask_, int nnz_, T *coefs_, int nrow_, int ncol_, const CSR_indirect* csr_diag_, const CSR_indirect* csr_offdiag_, SquareBlockMatrix* D_, double *eps_pivot_, double *pivot_, bool *kernel_detection_, bool *higher_precision_, int *dim_aug_kern_, U *eps_machine_, SquareBlockMatrix* localSchur_, bool verbose_, FILE **fp_, int nb_) : tridiag(tridiag_), isSym(isSym_), colors(colors_), color_mask(color_mask_), nnz(nnz_), coefs(coefs_), nrow(nrow_), ncol(ncol_), csr_diag(csr_diag_), csr_offdiag(csr_offdiag_), D(D_), eps_pivot(eps_pivot_), pivot(pivot_), kernel_detection(kernel_detection_), higher_precision(higher_precision_), dim_aug_kern(dim_aug_kern_), eps_machine(eps_machine_), localSchur(localSchur_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; nopd = new long; nops = new long; tt = new elapsed_t[5]; } C_SparseNumFact_arg(const C_SparseNumFact_arg &im) { tridiag = im.tridiag; isSym = im.isSym; colors = im.colors; color_mask = im.color_mask; nnz = im.nnz; coefs = im.coefs; nrow = im.nrow; ncol = im.ncol; csr_diag = im.csr_diag; csr_offdiag = im.csr_offdiag; D = im.D; eps_pivot = im.eps_pivot; pivot = im.pivot; kernel_detection = im.kernel_detection; higher_precision = im.higher_precision; dim_aug_kern = im.dim_aug_kern; eps_machine = im.eps_machine; localSchur = im.localSchur; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); nopd = new long; *nopd = *(im.nopd); nops = new long; *nops = *(im.nops); verbose = im.verbose; tt = im.tt; fp = im.fp; nb = im.nb; } }; template struct C_FillMatrix_arg { bool isSym; SquareBlockMatrix* D; RectBlockMatrix* upper; RectBlockMatrix* lower; const CSR_indirect *csr_diag; const CSR_indirect *csr_offdiag; T *coefs; long *ops_complexity; bool verbose; FILE **fp; int nb; ~C_FillMatrix_arg() { delete ops_complexity; } C_FillMatrix_arg() {} C_FillMatrix_arg (bool isSym_, SquareBlockMatrix* D_, RectBlockMatrix* upper_, RectBlockMatrix* lower_, const CSR_indirect *csr_diag_, const CSR_indirect *csr_offdiag_, T *coefs_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), D(D_), upper(upper_), lower(lower_), csr_diag(csr_diag_), csr_offdiag(csr_offdiag_), coefs(coefs_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } C_FillMatrix_arg(const C_FillMatrix_arg &im) { isSym = im.isSym; D = im.D; upper = im.upper; lower = im.lower; csr_diag = im.csr_diag; csr_offdiag = im.csr_offdiag; coefs =im.coefs; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); verbose = im.verbose; fp = im.fp; nb = im.nb; } }; template struct C_dfull_gauss_arg { int task_position; int id_level; int num_block; int id_block; SquareBlockMatrix* D; SquareBlockMatrix* localSchur; ColumnMatrix *a; // ColumnMatrix *diag; // T **diag; int *permute_block; int n; int upper_ncol; int nrow; int i1_block; double *eps_piv; double *pivot; double *pivot0; double *pivot1; bool isSym; long *ops_complexity; bool *kernel_detection; int *aug_dim; U *eps_machine; bool *quit; int to_next_task; bool verbose; FILE **fp; int nb; ~C_dfull_gauss_arg() { delete ops_complexity; } C_dfull_gauss_arg() {} C_dfull_gauss_arg(bool isSym_, int task_position_, int id_level_, int num_block_, int id_block_, SquareBlockMatrix* D_, SquareBlockMatrix* localSchur_, ColumnMatrix *a_, //T **a_, // ColumnMatrix *diag_, //T **diag_, int *permute_block_, int n_, int upper_ncol_, int nrow_, int i1_block_, double *eps_piv_, double *pivot_, double *pivot0_, double *pivot1_, bool *kernel_detection_, int *aug_dim_, U *eps_machine_, bool verbose_, FILE **fp_, int nb_) : task_position(task_position_), id_level(id_level_), num_block(num_block_), id_block(id_block_), D(D_), // lower(lower_), localSchur(localSchur_), a(a_), // diag(diag_), permute_block(permute_block_), n(n_), upper_ncol(upper_ncol_), nrow(nrow_), i1_block(i1_block_), eps_piv(eps_piv_), pivot(pivot_), pivot0(pivot0_), pivot1(pivot1_), isSym(isSym_), kernel_detection(kernel_detection_), aug_dim(aug_dim_), eps_machine(eps_machine_), to_next_task(0), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } C_dfull_gauss_arg(const C_dfull_gauss_arg &im) { isSym = im.isSym; task_position = im.task_position; id_level = im.id_level; num_block = im.num_block; id_block = im.id_block; D = im.D; localSchur = im.localSchur; a = im.a; // diag = im.diag; permute_block = im.permute_block; n = im.n; upper_ncol = im.upper_ncol; nrow = im.nrow; i1_block = im.i1_block; eps_piv = im.eps_piv; pivot = im.pivot; pivot0 = im.pivot0; pivot1 = im.pivot1; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); kernel_detection = im.kernel_detection; aug_dim = im.aug_dim; eps_machine = im.eps_machine; fp = im.fp; nb = im.nb; quit = im.quit; to_next_task = im.to_next_task; } }; template struct C_dinvDL_timesU_arg{ bool isSym; int task_position; int id_level; int num_block; int id_block; SquareBlockMatrix* D; ColumnMatrix* a; // T **a; int n; int nrow; int ncol; int i1_block; int jj_block; long *ops_complexity; bool verbose; FILE **fp; int nb; ~C_dinvDL_timesU_arg() { delete ops_complexity; } C_dinvDL_timesU_arg() {} C_dinvDL_timesU_arg(bool isSym_, int task_position_, int id_level_, int num_block_, int id_block_, SquareBlockMatrix* D_, ColumnMatrix *a_, int n_, int nrow_, int ncol_, int i1_block_, int jj_block_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), task_position(task_position_), id_level(id_level_), num_block(num_block_), id_block(id_block_), D(D_), a(a_), n(n_), nrow(nrow_), ncol(ncol_), i1_block(i1_block_), jj_block(jj_block_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } C_dinvDL_timesU_arg(const C_dinvDL_timesU_arg &im) { isSym = im.isSym; task_position = im.task_position; id_level = im.id_level; num_block = im.num_block; id_block = im.id_block; D = im.D; a = im.a; n = im.n; nrow = im.nrow; ncol = im.ncol; i1_block = im.i1_block; jj_block = im.jj_block; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); verbose = im.verbose; fp = im.fp; nb = im.nb; } }; template struct C_dupdateb_Schur_arg{ bool isSym; int task_position; int id_level; int num_block; int id_block; SquareBlockMatrix* D; ColumnMatrix *a; // T **a; int n; int nrow; int ncol; int b_size; int i1_block; int ii_block; int jj_block; long *ops_complexity; bool verbose; FILE **fp; int nb; ~C_dupdateb_Schur_arg() { delete ops_complexity; } C_dupdateb_Schur_arg() {} C_dupdateb_Schur_arg(int isSym_, int task_position_, int id_level_, int num_block_, int id_block_, SquareBlockMatrix* D_, ColumnMatrix *a_, // T **a_, int n_, int nrow_, int ncol_, int b_size_, int i1_block_, int ii_block_, int jj_block_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), task_position(task_position_), id_level(id_level_), num_block(num_block_), id_block(id_block_), D(D_), a(a_), n(n_), nrow(nrow_), ncol(ncol_), b_size(b_size_), i1_block(i1_block_), ii_block(ii_block_), jj_block(jj_block_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } C_dupdateb_Schur_arg(const C_dupdateb_Schur_arg &im) { isSym = im.isSym; task_position = im.task_position; id_level = im.id_level; num_block = im.num_block; id_block = im.id_block; D = im.D; a = im.a; n = im.n; nrow = im.nrow; ncol = im.ncol; b_size = im.b_size; i1_block = im.i1_block; ii_block = im.ii_block; jj_block = im.jj_block; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); verbose = im.verbose; fp = im.fp; nb = im.nb; } }; template struct DTRSMScale_arg { SquareBlockMatrix* LDLt; RectBlockMatrix* upper; // double *x; RectBlockMatrix* lower; // double *z; int nrow; int ncol; int kblock; int lblock; int mblock; vector* singLocNodes0; bool localPermute; bool isSym; long *ops_complexity; bool verbose; FILE **fp; int nb; bool debug; ~DTRSMScale_arg() { delete ops_complexity; } DTRSMScale_arg() {} DTRSMScale_arg(bool isSym_, SquareBlockMatrix *LDLt_, RectBlockMatrix* upper_, // double *x_; RectBlockMatrix* lower_, // double *z_; int nrow_, int ncol_, int kblock_, int lblock_, int mblock_, vector* singLocNodes0_, bool localPermute_, bool verbose_, FILE **fp_, int nb_) : LDLt(LDLt_), upper(upper_), lower(lower_), nrow(nrow_), ncol(ncol_), kblock(kblock_), lblock(lblock_), mblock(mblock_), singLocNodes0(singLocNodes0_), localPermute(localPermute_), isSym(isSym_), verbose(verbose_), fp(fp_), nb(nb_) { debug = false; ops_complexity = new long; } DTRSMScale_arg(const DTRSMScale_arg &im) { LDLt = im.LDLt; upper = im.upper; lower = im.lower; // offset = im.offset; nrow = im.nrow; ncol = im.ncol; kblock = im.kblock; lblock = im.lblock; mblock = im.mblock; singLocNodes0 = im.singLocNodes0; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); localPermute = im.localPermute; isSym = im.isSym; verbose = im.verbose; fp = im.fp; nb = im.nb; debug = im.debug; } }; template struct DSchurGEMM_arg { bool isSym; bool isTrans; RectBlockMatrix* lower; RectBlockMatrix* upper; int nrow; int i_block; // row block index int j_block; // column block index SquareBlockMatrix *localSchur; long *ops_complexity; bool verbose; FILE **fp; int nb; ~DSchurGEMM_arg() { delete ops_complexity; } DSchurGEMM_arg() {} DSchurGEMM_arg(bool isSym_, bool isTrans_, RectBlockMatrix* lower_, RectBlockMatrix* upper_, int nrow_, int i_block_, int j_block_, SquareBlockMatrix *localSchur_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), isTrans(isTrans_), lower(lower_), upper(upper_), nrow(nrow_), i_block(i_block_), j_block(j_block_), localSchur(localSchur_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } DSchurGEMM_arg(const DSchurGEMM_arg &im) { isSym = im.isSym; isTrans = im.isTrans; lower = im.lower; upper = im.upper; nrow = im.nrow; i_block = im.i_block; j_block = im.j_block; localSchur = im.localSchur; *ops_complexity = *(im.ops_complexity); verbose = im.verbose; fp = im.fp; nb = im.nb; } }; template struct DSchurGEMM_two_arg { bool isSym; bool isTrans; RectBlockMatrix* lower0; RectBlockMatrix* upper0; int nrow0; RectBlockMatrix* lower1; RectBlockMatrix* upper1; int nrow1; int i_block; // row block index int j_block; // column block index SquareBlockMatrix *localSchur; long *ops_complexity; bool isSkip; bool verbose; FILE **fp; int nb; ~DSchurGEMM_two_arg() { delete ops_complexity; } DSchurGEMM_two_arg() {} DSchurGEMM_two_arg(bool isSym_, bool isTrans_, RectBlockMatrix* lower0_, RectBlockMatrix* upper0_, int nrow0_, RectBlockMatrix* lower1_, RectBlockMatrix* upper1_, int nrow1_, int i_block_, int j_block_, SquareBlockMatrix *localSchur_, bool isSkip_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), isTrans(isTrans_), lower0(lower0_), upper0(upper0_), nrow0(nrow0_), lower1(lower1_), upper1(upper1_), nrow1(nrow1_), i_block(i_block_), j_block(j_block_), localSchur(localSchur_), isSkip(isSkip_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } DSchurGEMM_two_arg(const DSchurGEMM_two_arg &im) { isSym = im.isSym; isTrans = im.isTrans; lower0 = im.lower0; upper0 = im.upper0; nrow0 = im.nrow0; lower1 = im.lower1; upper1 = im.upper1; nrow1 = im.nrow1; i_block = im.i_block; j_block = im.j_block; localSchur = im.localSchur; *ops_complexity = *(im.ops_complexity); isSkip = im.isSkip; verbose = im.verbose; fp = im.fp; nb = im.nb; } }; template struct C_deallocLower_arg { bool isSym; RectBlockMatrix* lower; long *ops_complexity; ~C_deallocLower_arg() { delete ops_complexity; } C_deallocLower_arg() {} C_deallocLower_arg(bool isSym_, RectBlockMatrix* lower_, long ops_complexity_) : isSym(isSym_), lower(lower_) { ops_complexity = new long; *ops_complexity = ops_complexity_; } C_deallocLower_arg(const C_deallocLower_arg &im) { isSym = im.isSym; lower = im.lower; ops_complexity = new long; *ops_complexity = *(im.ops_complexity); } }; template struct C_deallocLocalSchur_arg { bool isSym; SquareBlockMatrix* localSchur; int i_block; int j_block; long *ops_complexity; bool verbose; FILE **fp; int nb; ~C_deallocLocalSchur_arg() { delete ops_complexity; } C_deallocLocalSchur_arg() {} C_deallocLocalSchur_arg(bool isSym_, SquareBlockMatrix *localSchur_, int i_block_, int j_block_, bool verbose_, FILE **fp_, int nb_) : isSym(isSym_), localSchur(localSchur_), i_block(i_block_), j_block(j_block_), verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; } C_deallocLocalSchur_arg(const C_deallocLocalSchur_arg &im) { isSym = im.isSym; i_block = im.i_block; j_block = im.j_block; localSchur = im.localSchur; ops_complexity = im.ops_complexity; verbose = im.verbose; fp = im.fp; nb = im.nb; } }; struct C_dummy_arg { long *ops_complexity; bool verbose; FILE **fp; int nb; ~C_dummy_arg() { delete ops_complexity; } C_dummy_arg() {} C_dummy_arg(bool verbose_, FILE **fp_, int nb_) : verbose(verbose_), fp(fp_), nb(nb_) { ops_complexity = new long; *ops_complexity = (-1L); } C_dummy_arg(const C_dummy_arg & im) { ops_complexity = im.ops_complexity; fp = im.fp; nb = im.nb; } }; template struct C_SparseFw_arg { int colors; int nb; bool isSym; int dim; bool **isTrans; int **nrhs; // void* diag_sparse; TridiagBlockMatrix **tridiag; T **x; T **yi; T **zi; T *coef; // SparseMatrix* ptDA; // int n_diag; int n_offdiag; int *ptRows; int *indCols; int *indVals; int *indVals_unsym; int *loc2glob_diag; long *ops_complexity; ~C_SparseFw_arg() { delete ops_complexity; } C_SparseFw_arg() {} C_SparseFw_arg(int colors_, int nb_, bool isSym_, int dim_, bool **isTrans_, int **nrhs_, TridiagBlockMatrix **tridiag_, T **x_, T **yi_, T **zi_, T *coef_, // SparseMatrix* ptDA_, // int n_diag_, int n_offdiag_, int *ptRows_, int *indCols_, int *indVals_, int *indVals_unsym_, int *loc2glob_diag_) : colors(colors_), nb(nb_), isSym(isSym_), dim(dim_), isTrans(isTrans_), nrhs(nrhs_), tridiag(tridiag_), x(x_), yi(yi_), zi(zi_), coef(coef_), // ptDA(ptDA_), // n_diag(n_diag_), n_offdiag(n_offdiag_), ptRows(ptRows_), indCols(indCols_), indVals(indVals_), indVals_unsym(indVals_unsym_), loc2glob_diag(loc2glob_diag_) { ops_complexity = new long; } C_SparseFw_arg(const C_SparseFw_arg &im) { colors = im.colors; nb = im.nb; isSym = im.isSym; dim = im.dim; isTrans = im.isTrans; nrhs = im.nrhs; tridiag = im.tridiag; x = im.x; yi = im.yi; zi = im.zi; coef = im.coef; // ptDA = im.ptDA; // n_diag = im.n_diag; n_offdiag = im.n_offdiag; ptRows = im.ptRows; indCols = im.indCols; indVals = im.indVals; indVals_unsym = im.indVals_unsym; loc2glob_diag = im.loc2glob_diag; } }; template struct C_SparseBw_arg { int colors; int nb; bool isSym; // not used, for debugging int dim; bool **isTrans; int **nrhs; Dissection::Tree *btree; int level_last; TridiagBlockMatrix **tridiag; T **x; T ***yy; T **xi; T **yi; T **zi; T *coef; // SparseMatrix* ptDA; // int *ptRows; int *indCols; int *indVals; int *indVals_unsym; long *ops_complexity; ~C_SparseBw_arg() { delete ops_complexity; } C_SparseBw_arg() {} C_SparseBw_arg(int colors_, int nb_, bool isSym_, int dim_, bool **isTrans_, int **nrhs_, Dissection::Tree *btree_, int level_last_, TridiagBlockMatrix **tridiag_, T **x_, T ***yy_, T **xi_, T **yi_, T **zi_, T *coef_, // SparseMatrix* ptDA_, // int *ptRows_, int *indCols_, int *indVals_, int *indVals_unsym_) : colors(colors_), nb(nb_), isSym(isSym_), dim(dim_), isTrans(isTrans_), nrhs(nrhs_), btree(btree_), level_last(level_last_), // diag_sparse(diag_sparse_), tridiag(tridiag_), x(x_), yy(yy_), xi(xi_), yi(yi_), zi(zi_), coef(coef_), // ptDA(ptDA_), // ptRows(ptRows_), indCols(indCols_), indVals(indVals_), indVals_unsym(indVals_unsym_) { ops_complexity = new long; } C_SparseBw_arg(const C_SparseBw_arg &im) { colors = im.colors; nb = im.nb; isSym = im.isSym; dim = im.dim; isTrans = im.isTrans; nrhs = im.nrhs; btree = im.btree; level_last = im.level_last; tridiag = im.tridiag; x = im.x; yy = im.yy; xi = im.xi; yi = im.yi; zi = im.zi; coef = im.coef; // ptDA = im.ptDA; // ptRows = im.ptRows; indCols = im.indCols; indVals = im.indVals; indVals_unsym = im.indVals_unsym; } }; struct C_task { int task_id; int referred; char *task_name; // is allocated by char [] when structure is constructed bool task_name_allocated; void *func_arg; void (*func)(void *); int atomic_size; int atomic_id; unsigned char status; list* parents; list* parents_work; int parallel_max; int parallel_id; long *ops_complexity; // value is assigned in func_arg->ops_complexity long flop; elapsed_t t0, t1; int thread_id; int broadcast_deadlock; bool quit_queue; int to_next_task; bool verbose; FILE ***fp; ~C_task() { if (task_name_allocated) { delete [] task_name; } delete parents; delete parents_work; } C_task() {} C_task(int task_id_, string task_name_, void *func_arg_, void (*func_)(void *), int atomic_size_, int atomic_id_, long *ops_complexity_ ) : task_id(task_id_), referred(0), func_arg(func_arg_), func(func_), atomic_size(atomic_size_), atomic_id(atomic_id_), status(TASK_WAITING), parallel_max(1), parallel_id(0), ops_complexity(ops_complexity_), broadcast_deadlock(0), quit_queue(false), to_next_task(0) { if (!task_name_.empty()) { task_name = new char[task_name_.length() + 1]; strcpy(task_name, task_name_.c_str()); task_name_allocated = true; } else { task_name_allocated = false; } parents = new list; parents_work = new list; get_realtime(&t0); // reset time as created one COPYTIME(t1, t0); thread_id = (-1); // not executed yet } C_task(const C_task &im) { task_id = im.task_id; referred = im.referred; if (im.task_name_allocated) { task_name = new char[strlen(im.task_name) + 1]; strcpy(task_name, im.task_name); task_name_allocated = true; } else { task_name_allocated = false; } func_arg = im.func_arg; func = im.func; atomic_size = im.atomic_size; atomic_id = im.atomic_id; status = im.status; parents = im.parents; parents_work = im.parents_work; parallel_max = im.parallel_max; parallel_id = im.parallel_id; ops_complexity = im.ops_complexity; // copy pointer // nops = im.nops; flop = im.flop; t0 = im.t0; t1 = im.t1; thread_id = im.thread_id; quit_queue = im.quit_queue; to_next_task = im.to_next_task; verbose = im.verbose; fp = im.fp; } }; struct C_task_seq { int task_id; int referred; char *task_name; // is allocated by char [] when structure is constructed bool task_name_allocated; int mutex_id; int parallel_single; // TASK_PARALLEL/TASK_SINGLE int num_threads; // how to describe some (DFullLDLt)s which are shared // assignment of processor group for DFullLDLt is done by statically // 5 processors is assigned to 2 + 1 + 1 + 1 / 3 + 2 / 5 // with concerning task size int level; int phase; vector *queue; int begin; int end; unsigned char status; int pos; long ops_complexity; ~C_task_seq() { if (task_name_allocated) { delete [] task_name; } } C_task_seq() {} C_task_seq(int task_id_, string task_name_, int mutex_id_, int parallel_single_, int num_threads_, int level_, int phase_, vector *queue_, int begin_, int end_, long ops_complexity_) : task_id(task_id_), referred(0), // task_name(task_name_), mutex_id(mutex_id_), parallel_single(parallel_single_), num_threads(num_threads_), level(level_), phase(phase_), queue(queue_), begin(begin_), end(end_), status(TASK_WAITING), pos(begin_), ops_complexity(ops_complexity_) { if (!task_name_.empty()) { task_name = new char[task_name_.length() + 1]; strcpy(task_name, task_name_.c_str()); task_name_allocated = true; } else { task_name_allocated = false; } } ; C_task_seq(const C_task_seq &im) { task_id = im.task_id; referred = im.referred; if (im.task_name_allocated) { task_name = new char[strlen(im.task_name) + 1]; strcpy(task_name, im.task_name); task_name_allocated = true; } else { task_name_allocated = false; } mutex_id = im.mutex_id; parallel_single = im.parallel_single; num_threads = im.num_threads; level = im.level; phase = im.phase; queue = im.queue; // copy pointer begin = im.begin; end = im.end; status = im.status; pos = im.pos; ops_complexity = im.ops_complexity; } }; bool C_task_seq_complexity_smaller(C_task_seq *a, C_task_seq *b); bool C_task_seq_complexity_greater(C_task_seq *a, C_task_seq *b); bool C_task_seq_beginidx_smaller(C_task_seq *a, C_task_seq *b); struct index_strip { int begin_dst; int begin_src; int width; ~index_strip() {} index_strip() {} index_strip(int begin_dst_, int begin_src_, int width_) : begin_dst(begin_dst_), begin_src(begin_src_), width(width_) {} index_strip(const index_strip &im) { begin_dst = im.begin_dst; begin_src = im.begin_src; width = im.width; } }; struct index_strip2 { int begin_dst; int begin_src0; int begin_src1; int width; ~index_strip2() {} index_strip2() {} index_strip2(int begin_dst_, int begin_src0_, int begin_src1_, int width_) : begin_dst(begin_dst_), begin_src0(begin_src0_), begin_src1(begin_src1_), width(width_) {} index_strip2(const index_strip2 &im) { begin_dst = im.begin_dst; begin_src0 = im.begin_src0; begin_src1 = im.begin_src1; width = im.width; } }; template struct child_contribution { int child_id; int diag_size; int offdiag_size; list diag_strip; list offdiag_strip; int father_row; SquareBlockMatrix *father_diag_pt; RectBlockMatrix *father_offdiag_pt; RectBlockMatrix *father_offdiag_unsym_pt; int child_row; SquareBlockMatrix* child_pt; // double **child_pt; ~child_contribution() {} child_contribution() {} child_contribution(int child_id_, int diag_size_, int offdiag_size_, list diag_strip_, list offdiag_strip_, SquareBlockMatrix *father_diag_pt_, RectBlockMatrix *father_offdiag_pt_, RectBlockMatrix *father_offdiag_unsym_pt_, int child_row_, SquareBlockMatrix* child_pt_ ) : child_id(child_id_), diag_size(diag_size_), offdiag_size(offdiag_size_), diag_strip(diag_strip_), offdiag_strip(offdiag_strip_), father_row(diag_size_), // father_row(father_row_), father_diag_pt(father_diag_pt_), father_offdiag_pt(father_offdiag_pt_), father_offdiag_unsym_pt(father_offdiag_unsym_pt_), child_row(child_row_), child_pt(child_pt_) {} child_contribution(const child_contribution &im) { child_id = im.child_id; diag_size = im.diag_size; offdiag_size = im.offdiag_size; diag_strip = im.diag_strip; offdiag_strip = im.offdiag_strip; father_row = im.father_row; father_diag_pt = im.father_diag_pt; father_offdiag_pt = im.father_offdiag_pt; father_offdiag_unsym_pt = im.father_offdiag_unsym_pt; child_row = im.child_row; child_pt = im.child_pt; } }; struct diag_contribution { int child_id; int child_row; int child_column; int father_row; list diag_strip; ~diag_contribution() {} diag_contribution() {} diag_contribution(int child_id_, int child_row_, int child_column_, int father_row_, list diag_strip_) : child_id(child_id_), child_row(child_row_), child_column(child_column_), father_row(father_row_), diag_strip(diag_strip_) {} diag_contribution(const diag_contribution &im) { child_id = im.child_id; child_row = im.child_row; child_column = im.child_column; father_row = im.father_row; diag_strip = im.diag_strip; } }; template struct C_Dsub_FwBw_arg { int dim; int **nrhs; int n_diag; bool access_global; int level; Dissection::Tree *btree; list* diag_contribs; T **x; T **yi; T ***zi; int *loc2glob_diag; long *ops_complexity; ~C_Dsub_FwBw_arg() { delete ops_complexity; } C_Dsub_FwBw_arg() {} C_Dsub_FwBw_arg(int dim_, int **nrhs_, int n_diag_, bool access_global_, int level_, Dissection::Tree *btree_, list* diag_contribs_, T **x_, T **yi_, T ***zi_, int *loc2glob_diag_) : dim(dim_), nrhs(nrhs_), n_diag(n_diag_), access_global(access_global_), level(level_), btree(btree_), diag_contribs(diag_contribs_), x(x_), yi(yi_), zi(zi_), loc2glob_diag(loc2glob_diag_) { ops_complexity = new long; } C_Dsub_FwBw_arg(const C_Dsub_FwBw_arg &im) { dim = im.dim; nrhs = im.nrhs; n_diag = im.n_diag; level = im.level; access_global = im.access_global; btree = im.btree; diag_contribs = im.diag_contribs; x = im.x; yi = im.yi; zi = im.zi; loc2glob_diag = im.loc2glob_diag; ops_complexity = im.ops_complexity; } }; template struct C_Dfill_FwBw_arg { int **nrhs; int d; int level; Dissection::Tree *btree; int n_offdiag; T ***yi; T **zi; long *ops_complexity; ~C_Dfill_FwBw_arg() { delete ops_complexity; } C_Dfill_FwBw_arg() {} C_Dfill_FwBw_arg(int **nrhs_, int d_, int level_, Dissection::Tree *btree_, int n_offdiag_, T ***yi_, T **zi_) : nrhs(nrhs_), d(d_), level(level_), btree(btree_), n_offdiag(n_offdiag_), yi(yi_), zi(zi_) { ops_complexity = new long; } C_Dfill_FwBw_arg(const C_Dfill_FwBw_arg &im) { nrhs = im.nrhs; d = im.d; level = im.level; btree = im.btree; n_offdiag = im.n_offdiag; yi = im.yi; zi = im.zi; ops_complexity = im.ops_complexity; } }; template struct C_DenseFwBw_arg { bool isSym; bool isBackward; int dim; bool **isTrans; int **nrhs; int n_diag; int nrow; int k_block; int kk; T **xi; T **wi; T **yi; T **x; SquareBlockMatrix *LDLt; bool isFirstBlock; bool isLastBlock; int *loc2glob; long *ops_complexity; bool verbose; FILE **fp; ~C_DenseFwBw_arg() { delete ops_complexity; } C_DenseFwBw_arg() {} C_DenseFwBw_arg(bool isSym_, bool isBackward_, int dim_, bool **isTrans_, int **nrhs_, int n_diag_, int nrow_, int k_block_, int kk_, T **xi_, T **wi_, T **yi_, T **x_, SquareBlockMatrix *LDLt_, bool isFirstBlock_, bool isLastBlock_, int *loc2glob_, bool verbose_, FILE **fp_) : isSym(isSym_), isBackward(isBackward_), dim(dim_), isTrans(isTrans_), nrhs(nrhs_), n_diag(n_diag_), nrow(nrow_), k_block(k_block_), kk(kk_), xi(xi_), wi(wi_), yi(yi_), x(x_), LDLt(LDLt_), isFirstBlock(isFirstBlock_), isLastBlock(isLastBlock_), loc2glob(loc2glob_), verbose(verbose_), fp(fp_) { ops_complexity = new long; } C_DenseFwBw_arg(C_DenseFwBw_arg &im) { isSym = im.isSym; isBackward = im.isBackward; dim = im.dim; isTrans = im.isTrans; nrhs = im.nrhs; n_diag = im.n_diag; nrow = im.nrow; k_block = im.k_block; kk = im.kk; xi = im.xi; wi = im.wi; yi = im.yi; x = im.x; LDLt = im.LDLt; isFirstBlock = im.isFirstBlock; isLastBlock = im.isLastBlock; loc2glob = im.loc2glob; verbose = im.verbose; fp = im.fp; } }; template struct C_DenseFwBwOffdiag_arg { bool trans; bool isLower; int dim; bool **isTrans; int **nrhs; int lda; int ldb; int ldc; int nrow; int ncol; T **xi; int ii; T **yi; T **zi; int jj; SquareBlockMatrix *LDLt; int i_block; int j_block; T alpha; T beta; long *ops_complexity; ~C_DenseFwBwOffdiag_arg() { delete ops_complexity; } C_DenseFwBwOffdiag_arg() {} C_DenseFwBwOffdiag_arg(bool trans_, bool isLower_, int dim_, bool **isTrans_, int **nrhs_, int lda_, int ldb_, int ldc_, int nrow_, int ncol_, T **xi_, int ii_, T **yi_, T **zi_, int jj_, SquareBlockMatrix *LDLt_, int i_block_, int j_block_, T alpha_, T beta_) : trans(trans_), isLower(isLower_), dim(dim_), isTrans(isTrans_), nrhs(nrhs_), lda(lda_), ldb(ldb_), ldc(ldc_), nrow(nrow_), ncol(ncol_), xi(xi_), ii(ii_), yi(yi_), zi(zi_), jj(jj_), LDLt(LDLt_), i_block(i_block_), j_block(j_block_), alpha(alpha_), beta(beta_) { ops_complexity = new long; } C_DenseFwBwOffdiag_arg(C_DenseFwBwOffdiag_arg &im) { trans = im.trans; isLower = im.isLower; dim = im.dim; isTrans = im.isTrans; nrhs = im.nrhs; lda = im.lda; ldb = im.ldb; ldc = im.ldc; nrow = im.nrow; ncol = im.ncol; xi = im.xi; ii = im.ii; yi = im.yi; zi = im.zi; jj = im.jj; LDLt = im.LDLt; i_block = im.i_block; j_block = im.j_block; alpha = im.alpha; beta = im.beta; ops_complexity = im.ops_complexity; } }; template struct C_StripsFwBwOffdiag_arg { bool isLower; int dim; bool **isTrans; int **nrhs; int lda; int ldb; int ldc; int nrow; int ncol; T **xi; int ii; T **yi; int jj; RectBlockMatrix *upper; RectBlockMatrix *lower; int i_block; int j_block; T alpha; T beta; long *ops_complexity; ~C_StripsFwBwOffdiag_arg() { delete ops_complexity; } C_StripsFwBwOffdiag_arg() {} C_StripsFwBwOffdiag_arg(bool isLower_, int dim_, bool **isTrans_, int **nrhs_, int lda_, int ldb_, int ldc_, int nrow_, int ncol_, T **xi_, int ii_, T **yi_, int jj_, RectBlockMatrix *upper_, RectBlockMatrix *lower_, int i_block_, int j_block_, T alpha_, T beta_) : isLower(isLower_), dim(dim_), isTrans(isTrans_), nrhs(nrhs_), lda(lda_), ldb(ldb_), ldc(ldc_), nrow(nrow_), ncol(ncol_), xi(xi_), ii(ii_), yi(yi_), jj(jj_), upper(upper_), lower(lower_), i_block(i_block_), j_block(j_block_), alpha(alpha_), beta(beta_) { ops_complexity = new long; } C_StripsFwBwOffdiag_arg(C_StripsFwBwOffdiag_arg &im) { isLower = im.isLower; dim = im.dim; isTrans = im.isTrans; nrhs = im.nrhs; lda = im.lda; ldb = im.ldb; ldc = im.ldc; nrow = im.nrow; ncol = im.ncol; xi = im.xi; ii = im.ii; yi = im.yi; jj = im.jj; upper = im.upper; lower = im.lower; i_block = im.i_block; j_block = im.j_block; alpha = im.alpha; beta = im.beta; ops_complexity = im.ops_complexity; } }; #define RESTRICT void print_strips(const char *name, list &strips, FILE *fp); void print_strips(const char *name, list &strips, FILE *fp); void assign_tasks_statically(list *queue_static, list &queue_dynamic, long *nops_sum, list &task_seq_tmp, const int task_id, const char *task_name_, const int level, const int phase, const long nops_block_total, int num_threads); template int count_diag_negative(SquareBlockMatrix& Diag); template<> int count_diag_negative(SquareBlockMatrix& Diag); template<> int count_diag_negative >(SquareBlockMatrix >& Diag); template<> int count_diag_negative(SquareBlockMatrix& Diag); template<> int count_diag_negative >(SquareBlockMatrix >& Diag); template<> int count_diag_negative(SquareBlockMatrix& Diag); template<> int count_diag_negative >(SquareBlockMatrix >& Diag); template int count_diag_negative(SubSquareMatrix& Diag); template<> int count_diag_negative(SubSquareMatrix& Diag); template<> int count_diag_negative >(SubSquareMatrix >& Diag); template<> int count_diag_negative(SubSquareMatrix& Diag); template<> int count_diag_negative >(SubSquareMatrix >& Diag); template<> int count_diag_negative(SubSquareMatrix& Diag); template<> int count_diag_negative >(SubSquareMatrix >& Diag); template void full_gauss3(int *n0, T *a, const int n, double *pivot, int *permute, const bool isSym, const double eps, const bool verbose, FILE *fp); void CopyUpper2LowerSymm(double RESTRICT *s, const double RESTRICT *s_t, const int size_b2, int i, int block_nrow, int ncol); void CopyUpper2LowerSquare(double RESTRICT *s, const double RESTRICT *s_t, const int size_b2, int i, int j, int block_nrow, int block_ncol, int ncol); template void C_SparseSymbFact(void *arg_); template void C_SparseNumFact(void *arg_); template void C_SparseLocalSchur(void *arg_); template void dump_matrix(FILE *fp, const int nrow, T *a); template<> void dump_matrix(FILE *fp, const int nrow, double *a); template void dump_matrix(FILE *fp, const int nrow, const int ncol, T *a); template<> void dump_matrix(FILE *fp, const int nrow, const int ncol, double *a); template void dump_matrix(FILE *fp, const int kk, const int nrow, const int ncol, const int nn, T *a); template void dump_matrix(FILE *fp, RectBlockMatrix &a); template void dump_matrix(FILE *fp, SquareBlockMatrix &a); template void dump_matrix(FILE *fp, const int nrow, const int nnz, int *prow, int *indcols, int *indvals, T *a); template void C_dfull_gauss_b(void *arg_); template void C_dinvDL_timesU(void *arg_); template void C_dupdateb_Schur_diag(void *arg_); template void C_dupdateb_Schur_offdiag(void *arg_); template void C_gauss_whole_pivot(void *arg_); template void C_dupdateb_Schur_offdiag_t(void *arg_); void C_CopyLowerMatrix(int n, const double *A, double *LDlt); void C_DTRSMScale_solve_seq(void *arg_); template void C_FillSymMatrix(void *arg_); template void C_FillMatrix_diag(void *arg_); template void C_FillMatrix_offdiag(void *arg_); template void DSchurGEMM_diag(void *arg_); template void DSchurGEMM_diag_two(void *arg_); template void DSchurGEMM_offdiag(void *arg_); template void DSchurGEMM_offdiag_two(void *arg_); template void C_DTRSMScale_diag_upper(void *arg_); template void C_DTRSMScale_offdiag_upper(void *arg_); template void C_DTRSMScale_diag_lower(void *arg_); template void C_DTRSMScale_offdiag_lower(void *arg_); template void C_DTRSMScale_solve(void *arg_); template void C_deallocLower(void *arg_); template void C_deallocLocalSchur(void *arg_); template void C_SparseFw(void *arg_); template void C_SparseBw(void *arg_); template void C_Dsub_FwBw(void *arg_); template void C_DenseFwBw_diag(void *arg_); template void C_DenseFwBw_offdiag(void *arg_); template void C_StripsFwBw_offdiag(void *arg_); template void C_Dfill_FwBw(void *arg_); template void erase_task(C_task *& task); void C_dummy(void *arg_); #define imin(a, b) ((a) < (b) ? (a) : (b)) #define imax(a, b) ((a) > (b) ? (a) : (b)) int compare_DTRSMScale_task(const void *_a, const void *_b); int compare_DSchurGEMM_task(const void* _a, const void* _b); int compare_source_dist_index(const void *_a, const void *_b); int convert_array2strip(list &strips, vector& array); int combine_two_strips(list &stripsa, list &stripsb, list &stripsc, list &strips0, list &strips1, const int size); void copy_two_strips(list &strips2, list &strips0, list &strips1); void split_two_strips(list &strips0, list &strips1, list &strips2, index_strip strip0, index_strip strip1); template int dimKernDense(vector &singIdx, const int n, const int aug_dim, const U eps_machine, const double eps_piv, SquareBlockMatrix &D, T *a, const bool refactorize, const bool isBlocked, const bool isSym, const bool verbose, FILE *fp); template void calc_relative_norm(double *norm_l2, double *norm_infty, const T *v, const T *u, const int dim); template<> void calc_relative_norm(double *norm_l2, double *norm_infty, const double *v, const double *u, const int dim); template<> void calc_relative_norm(double *norm_l2, double *norm_infty, const quadruple *v, const quadruple *u, const int dim); template<> void calc_relative_norm >(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const int dim); template<> void calc_relative_norm >(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const int dim); template void calc_relative_normscaled(double *norm_l2, double *norm_infty, const T *v, const T *u, const U *w, const int dim); template<> void calc_relative_normscaled(double *norm_l2, double *norm_infty, const double *v, const double *u, const double *w, const int dim); template<> void calc_relative_normscaled(double *norm_l2, double *norm_infty, const quadruple *v, const quadruple *u, const quadruple *w, const int dim); template<> void calc_relative_normscaled(double *norm_l2, double *norm_infty, const quadruple *v, const quadruple *u, const double *w, const int dim); template<> void calc_relative_normscaled, double>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const double *w, const int dim); template<> void calc_relative_normscaled, quadruple>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const quadruple *w, const int dim); template<> void calc_relative_normscaled, double>(double *norm_l2, double *norm_infty, const complex *v, const complex *u, const double *w, const int dim); int CSR_sym2unsym(CSR_indirect *unsym, const int *ptSymRows, const int *indSymCols, const int *map_eqn, const int *remap_eqn, const int dim, const bool upper_flag = true, const bool verbose = false, FILE *fp = NULL); bool CSR_unsym2unsym(CSR_indirect *unsym, const int *ptUnSymRows, const int *indUnSymCols, const int *map_eqn, const int *remap_eqn, const int dim, const bool vebose, FILE *fp); void swap_queues_n(vector &queue, vector &queue_index, const int ii, const int jj, const int n, vector &tmp, vector &tmp_index); int EraseNullParents(vector &queue); int EraseNullParents(C_task *task); bool compare_task_name(C_task *first, C_task *second); extern "C" { void c_getrealtime_(uint64_t &tmprofiles, const int &m); void c_fileout_(uint64_t &fp_prt, char *strgs, const int &force_stderr); } void swap_2x2pivots(const int way, int *pivot_width, int *permute_q, const int dim_augkern, const int nn0, const int n_dim, double *a1, long double *aq, double *d1, long double *d1q, double *a_fact); template void dump_vectors(int nrow, int nn0, T *v, string fname); void ComputeSVD(double *b, const double *a_, const int n); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/CopyMatrix.cpp000664 000000 000000 00000050332 14037356732 025116 0ustar00rootroot000000 000000 /*! \file CopyMatrix.cpp \brief task mangemanet of dissection algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Aug. 09th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Driver/CopyMatrix.hpp" template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a) { b->ptRows() = a->ptRows(); b->indCols() = a->indCols(); const int nnz = a->nnz(); b->coefs().resize(nnz); for (int i = 0; i < nnz; i++) { b->coefs()[i] = conv_prec(a->coefs()[i]); } } template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a); template void CopySparseMatrix, complex >(SparseMatrix > *b, SparseMatrix > *a); template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a); template void CopySparseMatrix, complex > (SparseMatrix > *b, SparseMatrix > *a); template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a); template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a); template void CopySparseMatrix, complex >(SparseMatrix > *b, SparseMatrix > *a); template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a) { // b.init(a.dimension(), a.block_size(), a.isSym(), 0); // initialize b b.allocate(); b.getPermute() = a.getPermute(); // copy vector _permute b.getNsingBlock() = a.getNsingBlock(); // copy vector _nsing_block b.getSingIdx() = a.getSingIdx(); // copy vector _singIdx b.getSingIdx0() = a.getSingIdx0(); // copy vector _singIdx0 b.set_KernelDetected(a.KernelDetected()); // copy bool _kernelDetected b.set_rank(a.rank()); // copy int _rank, int _nsing b.set_lastPivot(a.lastPivot()); // copy double _lastpiv if (a.isBlocked()) { // copy bool _isblocked b.setBlocked(); } else { b.unsetBlocked(); } if (a.isSym()) { const int num_blocks = a.num_blocks(); for (int i = 0 ; i < num_blocks; i++) { for (int j = i ; j < num_blocks; j++) { const int nrow = a.nrowBlock(i); const int ncol = a.nrowBlock(j); for (int k = 0; k < (nrow * ncol); k++) { b.addrCoefBlock(i, j)[k] = conv_prec(a.addrCoefBlock(i, j)[k]); } } // loop : j } // loop : i } else { const int num_blocks = a.num_blocks(); for (int i = 0 ; i < num_blocks; i++) { for (int j = 0 ; j < num_blocks; j++) { const int nrow = a.nrowBlock(i); const int ncol = a.nrowBlock(j); for (int k = 0; k < (nrow * ncol); k++) { b.addrCoefBlock(i, j)[k] = conv_prec(a.addrCoefBlock(i, j)[k]); } } // loop : j } // loop : i } } template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a); template void CopySquareBlockMatrix, complex > (SquareBlockMatrix > &b, SquareBlockMatrix > &a); template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a); template void CopySquareBlockMatrix, complex > (SquareBlockMatrix > &b, SquareBlockMatrix > &a); template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a); template void CopySquareBlockMatrix, complex > (SquareBlockMatrix > &b, SquareBlockMatrix > &a); template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a); template void CopySquareBlockMatrix, complex > (SquareBlockMatrix > &b, SquareBlockMatrix > &a); // template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a) { for (int i = 0 ; i < a.num_blocks_r(); i++) { for (int j = 0 ; j < a.num_blocks_c(); j++) { const int nrow = a.nrowBlock(i); const int ncol = a.ncolBlock(j); for (int k = 0; k < (nrow * ncol); k++) { b.addrCoefBlock(i, j)[k] = conv_prec(a.addrCoefBlock(i, j)[k]); } } // loop : j } // loop : i } template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a); template void CopyRectBlockMatrix, complex > (RectBlockMatrix > &b, RectBlockMatrix > &a); template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a); template void CopyRectBlockMatrix, complex > (RectBlockMatrix > &b, RectBlockMatrix > &a); template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a); template void CopyRectBlockMatrix, complex > (RectBlockMatrix > &b, RectBlockMatrix > &a); template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a); template void CopyRectBlockMatrix, complex > (RectBlockMatrix > &b, RectBlockMatrix > &a); // template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, W *coef) { b.init(a.dimension(), a.block_size(), a.isSym()); b.setNfront(a.Nfront()); b.setMaxdim(a.maxdim()); b.setNnz(a.nnz()); b.setNop(a.nop()); b.setDiag_block_alloc_status(a.diag_block_alloc_status()); b.setNscol(a.nscol()); b.setNsing(a.nsing()); b.setDetected(a.detected()); b.getPtRows() = a.getPtRows(); b.getIndCols() = a.getIndCols(); b.getIndVals() = a.getIndVals(); b.getNew2old() = a.getNew2old(); b.getP_front() = a.getP_front(); b.getPermute() = a.getPermute(); b.getPermute_ginv() = a.getPermute_ginv(); b.getP_diag() = a.getP_diag(); b.getP_upper() = a.getP_upper(); b.getList_schur() = a.getList_schur(); b.getList_elim() = a.getList_elim(); b.getNum_null() = a.getNum_null(); b.setCoef(coef); const int nfront = a.Nfront(); b.getaddrDiagMatrix() = new ColumnMatrix[nfront]; #ifndef SPARSE_OFFDIAG b.getaddrLowerMatrix() = new ColumnMatrix[nfront]; b.getaddrUpperMatrix() = new ColumnMatrix[nfront]; #endif for (int n = 0; n < nfront; n++) { { const int nbCols = a.getaddrDiagMatrix()[n].nbColumns(); const int nbRows = a.getaddrDiagMatrix()[n].nbRows(); const int size = a.getaddrDiagMatrix()[n].size(); b.getaddrDiagMatrix()[n].init(nbRows, nbCols); // allocation for (int i = 0; i < size; i++) { b.getaddrDiagMatrix()[n].coefs()[i] = conv_prec(a.getaddrDiagMatrix()[n].coefs()[i]); } } #ifndef SPARSE_OFFDIAG { const int nbCols = a.getaddrLowerMatrix()[n].nbColumns(); const int nbRows = a.getaddrLowerMatrix()[n].nbRows(); const int size = a.getaddrLowerMatrix()[n].size(); b.getaddrLowerMatrix()[n].init(nbRows, nbCols); // allocation b.getaddrUpperMatrix()[n].init(nbRows, nbCols); // allocation for (int i = 0; i < size; i++) { b.getaddrLowerMatrix()[n].coefs()[i] = conv_prec(a.getaddrLowerMatrix()[n].coefs()[i]); b.getaddrUpperMatrix()[n].coefs()[i] = conv_prec(a.getaddrUpperMatrix()[n].coefs()[i]); } } #endif } b.getA12().init(a.getA12().nbRows(), a.getA12().nbColumns()); // allocation for (int i = 0; i < a.getA12().size(); i++) { b.getA12().coefs()[i] = conv_prec(a.getA12().coefs()[i]); } b.getA21().init(a.getA21().nbRows(), a.getA21().nbColumns()); // allocation for (int i = 0; i < a.getA21().size(); i++) { b.getA21().coefs()[i] = conv_prec(a.getA21().coefs()[i]); } b.getS22().init(a.getS22().nbRows(), a.getS22().nbColumns()); // allocation for (int i = 0; i < a.getS22().size(); i++) { b.getS22().coefs()[i] = conv_prec(a.getS22().coefs()[i]); } } template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, double *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix, double> &b, TridiagBlockMatrix, quadruple> &a, complex *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, quadruple *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix, quadruple> &b, TridiagBlockMatrix, double> &a, complex *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, float *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix, float> &b, TridiagBlockMatrix, double> &a, complex *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, double *coef); template void CopyTridiagBlockMatrix(TridiagBlockMatrix, double> &b, TridiagBlockMatrix, float> &a, complex *coef); template void CopyDissectionMatrix(DissectionMatrix *b, DissectionMatrix *a, SquareBlockMatrix *diag, // pointers RectBlockMatrix *lower, RectBlockMatrix *upper) { b->setNb(a->nb()); b->setLevel(a->level()); b->setNrow(a->nrow()); b->setNcol_offdiag(a->ncol_offdiag()); b->setIsSym(a->isSym()); b->setIslast(a->islast()); b->setAlignedFather(a->alignedFather()); // copy pointer to matrices that have lower accuracy b->setColorTridiagBlockMatrix(a->ColorTridiagBlockMatrix()); b->paddrdiagBlock() = diag; // b->paddrtridiagBlock() = tridiag; b->paddrupperBlock() = lower; b->paddrupperBlock() = upper; } template void CopyDissectionMatrix(DissectionMatrix *b, DissectionMatrix *a, SquareBlockMatrix *diag, RectBlockMatrix *lower, RectBlockMatrix *upper); template void CopyDissectionMatrix, quadruple, complex, double>(DissectionMatrix, double> *b, DissectionMatrix, quadruple> *a, SquareBlockMatrix > *diag, RectBlockMatrix > *lower, RectBlockMatrix > *upper); // template void CopyDissectionMatrix (DissectionMatrix *b, DissectionMatrix *a, SquareBlockMatrix *diag, RectBlockMatrix *lower, RectBlockMatrix *upper); template void CopyDissectionMatrix, double, complex, quadruple> (DissectionMatrix, quadruple> *b, DissectionMatrix, double> *a, SquareBlockMatrix > *diag, RectBlockMatrix > *lower, RectBlockMatrix > *upper); template void CopyDissectionMatrix(DissectionMatrix *b, DissectionMatrix *a, SquareBlockMatrix *diag, RectBlockMatrix *lower, RectBlockMatrix *upper); template void CopyDissectionMatrix, double, complex, float>(DissectionMatrix, float> *b, DissectionMatrix, double> *a, SquareBlockMatrix > *diag, RectBlockMatrix > *lower, RectBlockMatrix > *upper); // template void CopyDissectionMatrix (DissectionMatrix *b, DissectionMatrix *a, SquareBlockMatrix *diag, RectBlockMatrix *lower, RectBlockMatrix *upper); template void CopyDissectionMatrix, float, complex, double> (DissectionMatrix, double> *b, DissectionMatrix, float> *a, SquareBlockMatrix > *diag, RectBlockMatrix > *lower, RectBlockMatrix > *upper); // template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a) { b.getSlduList() = a.getSlduList(); { const int dim = a.getSldu().dimension(); b.getSldu().init(a.getSldu().loc2glob()); for (int i = 0; i < (dim * dim); i++) { b.getSldu().addrCoefs()[i] = conv_prec(a.getSldu().addrCoefs()[i]); } for (int i = 0; i < dim; i++) { b.getSldu().addr2x2()[i] = conv_prec(a.getSldu().addr2x2()[i]); } b.getSldu().getPivotWidth() = a.getSldu().getPivotWidth(); b.getSldu().getPivot2x2() = a.getSldu().getPivot2x2(); b.getSldu().getPermute() = a.getSldu().getPermute(); } { const bool isUpper = a.getArow()->isUpper(); const bool isSym = a.getArow()->isSymmetric(); const int dim = a.getArow()->dimension(); const int nnz = a.getArow()->nnz(); b.getArow() = new SparseMatrix(dim, nnz, &(a.getArow()->getRows()[0]), &(a.getArow()->getIndCols()[0]), isSym, isUpper); for (int i = 0; i < nnz; i++) { b.getArow()->Coef(i) = conv_prec(a.getArow()->Coef(i)); } } { const bool isUpper = a.getAcol()->isUpper(); const bool isSym = a.getAcol()->isSymmetric(); const int dim = a.getAcol()->dimension(); const int nnz = a.getAcol()->nnz(); b.getAcol() = new SparseMatrix(dim, nnz, &(a.getAcol()->getRows()[0]), &(a.getAcol()->getIndCols()[0]), isSym, isUpper); for (int i = 0; i < nnz; i++) { b.getAcol()->Coef(i) = conv_prec(a.getAcol()->Coef(i)); } } { const int size = a.getScol().size(); for (int i = 0; i < size; i++) { b.getScol().addrCoefs()[i] = conv_prec(a.getScol().addrCoefs()[i]); } } } template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a); template void CopySchurMatrix(SchurMatrix > &b, SchurMatrix > &a); template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a); template void CopySchurMatrix(SchurMatrix > &b, SchurMatrix > &a); template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a); template void CopySchurMatrix(SchurMatrix > &b, SchurMatrix > &a); template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a); template void CopySchurMatrix(SchurMatrix > &b, SchurMatrix > &a); // template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a) { b.set_dimension(a.dimension()); b.getSingIdx() = a.getSingIdx(); b.getKernListEq() = a.getKernListEq(); { b.getKernBasis().init(a.getKernBasis().nbRows(), a.getKernBasis().nbColumns()); const int size = a.getKernBasis().size(); for (int i = 0; i < size; i++) { b.getKernBasis().addrCoefs()[i] = conv_prec(a.getKernBasis().addrCoefs()[i]); } } { b.getTKernBasis().init(a.getKernBasis().nbRows(), a.getKernBasis().nbColumns()); const int size = a.getTKernBasis().size(); for (int i = 0; i < size; i++) { b.getTKernBasis().addrCoefs()[i] = conv_prec(a.getTKernBasis().addrCoefs()[i]); } } { const int dim = a.getKernProj().dimension(); b.getKernProj().init(dim); for (int i = 0; i < (dim * dim); i++) { b.getKernProj().addrCoefs()[i] = conv_prec(a.getKernProj().addrCoefs()[i]); } b.getKernProj().getPermute() = a.getKernProj().getPermute(); } { const int dim = a.getTKernProj().dimension(); b.getTKernProj().init(dim); for (int i = 0; i < (dim * dim); i++) { b.getTKernProj().addrCoefs()[i] = conv_prec(a.getTKernProj().addrCoefs()[i]); } b.getTKernProj().getPermute() = a.getTKernProj().getPermute(); } { const int dim = a.getNTKernProj().dimension(); b.getNTKernProj().init(dim); for (int i = 0; i < (dim * dim); i++) { b.getNTKernProj().addrCoefs()[i] = conv_prec(a.getNTKernProj().addrCoefs()[i]); } b.getNTKernProj().getPermute() = a.getNTKernProj().getPermute(); } } template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a); template void CopyKernelMatrix, complex >(KernelMatrix > &b, KernelMatrix > &a); template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a); template void CopyKernelMatrix, complex >(KernelMatrix > &b, KernelMatrix > &a); template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a); template void CopyKernelMatrix, complex >(KernelMatrix > &b, KernelMatrix > &a); template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a); template void CopyKernelMatrix, complex >(KernelMatrix > &b, KernelMatrix > &a); FreeFem-sources-4.9/3rdparty/dissection/src/Driver/CopyMatrix.hpp000664 000000 000000 00000007062 14037356732 025125 0ustar00rootroot000000 000000 /*! \file CopyMatrix.hpp \brief task mangemanet of dissection algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Aug. 09th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _COPY_MATRIX_ #define _COPY_MATRIX_ #include #include #include "Driver/DissectionMatrix.hpp" #include using std::vector; template void CopySparseMatrix(SparseMatrix *b, SparseMatrix *a); template void CopySquareBlockMatrix(SquareBlockMatrix &b, SquareBlockMatrix &a); template void CopyRectBlockMatrix(RectBlockMatrix &b, RectBlockMatrix &a); template void CopyTridiagBlockMatrix(TridiagBlockMatrix &b, TridiagBlockMatrix &a, W *coef); template void CopyDissectionMatrix(DissectionMatrix* a, DissectionMatrix* b, SquareBlockMatrix *diag, // pointers RectBlockMatrix *lower, RectBlockMatrix *upper); template void CopySchurMatrix(SchurMatrix &b, SchurMatrix &a); template void CopyKernelMatrix(KernelMatrix &b, KernelMatrix &a); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionDefault.hpp000664 000000 000000 00000006314 14037356732 026436 0ustar00rootroot000000 000000 /*! \file DissectionDefault.hpp \brief definition of default value for factorization \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 24th 2015 \date Sep. 29th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DISSECTION_DEFAULT_ #define _DISSECTION_DEFAULT_ #define SCOTCH_DECOMPOSER 0 #define METIS_DECOMPOSER 1 #define TRIDIAG_DECOMPOSER 2 #define NO_SCALING 0 // needs to be compatible to the definition in #define DIAGONAL_SCALING 1 // normalize(), SparseMatrix.cpp #define KKT_SCALING 2 // #define MINNODES 256 // minimum size of the first layer of dissection #define SIZE_TRIDIAG 1000 // more than this value, dissection is used #define DIM_AUG_KERN 4 // appropriate for indefinite matrix #define EPS_PIVOT 1.0e-2 #define TOL_PIVOT 1.0e-5 // for recursion of sparse factorization #define MIN_TRIDIAG_SIZE 50 // the size to avoid Cuthill-McKee ordering #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionMatrix.cpp000664 000000 000000 00000310575 14037356732 026320 0ustar00rootroot000000 000000 /*! \file DissectionMatrix.hpp \brief management of threads for factorization and Fw/Bw substitution \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionLibrary.hpp" #include #include "Driver/DissectionMatrix.hpp" #include "Algebra/SparseRenumbering.hpp" // to_stirng is available in C++11 but NEC SXC++ does not support it #include "Compiler/DissectionIO.hpp" template DissectionMatrix::DissectionMatrix(Dissection::Tree *btree, const int nb, bool isSym, const bool verbose, FILE *fp) : _nb(nb), _diag(NULL), _lower(NULL), _upper(NULL), _isSym(isSym) { const int level_last = btree->NumberOfLevels() - 1; _level = btree->nodeLayer(_nb); _nrow = btree->sizeOfDomain(_nb); _ncol_offdiag = btree->sizeOfFathersStrips(_nb); if (_nrow == 0) { _ncol_offdiag = 0; // for safety } // no need to be allocated through the whole process // // _loc2glob_diag = btree->getDiagLoc2Glob(_nb); // _loc2glob_offdiag = btree->getOffdiagLoc2Glob(_nb); _csr_diag = &(btree->getDiagCSR(_nb)); _csr_offdiag = &(btree->getOffdiagCSR(_nb)); _upper = new RectBlockMatrix; _lower = new RectBlockMatrix; // for unsymmetric matrix _lower block is stored in transposed way to use // the same strips as _upper block // bool later_allocation _localSchur = new SquareBlockMatrix; if (_level == level_last) { _islast = true; _color_mask = new int[_nrow]; _colors = getColorMaskCSR(_color_mask, _csr_diag, verbose, fp); _tridiag = new TridiagBlockMatrix*[_colors]; for (int i = 0; i < _colors; i++) { _tridiag[i] = new TridiagBlockMatrix(_nrow, SIZE_B1, _isSym, nb, verbose, fp); } // the value of the last pivot and singluar nodes to be stored in _diag _diag = new SquareBlockMatrix; _upper->init(_nrow, _ncol_offdiag, SIZE_B1, 0); _upper->allocate(); _lower->init(_nrow, _ncol_offdiag, SIZE_B1, 0); if (!_isSym) { _lower->allocate(); } _localSchur->init(_ncol_offdiag, SIZE_B1, _isSym, 0); // 02 Feb.2014 _alignedFather = false; } else { _islast = false; if (_nrow > 0) { _diag = new SquareBlockMatrix(_nrow, SIZE_B1, _isSym); } else { _diag = new SquareBlockMatrix(); // dummy } _tridiag = (TridiagBlockMatrix **)NULL; if (_nb > 1) { int ntmp = 0; if (_nrow > 0) { const int b_id = btree->brotherIndex(_nb); const int ll = _level - 1; const Dissection::SetOfStrips& f0 = btree->getFathersStrips(_nb)[ll]; const Dissection::SetOfStrips& f1 = btree->getFathersStrips(b_id)[ll]; _alignedFather = false; if ((btree->sizeOfDomain(b_id) > 0) && (f0.numberOfStrips() == 1) && (f1.numberOfStrips() == 1)) { Dissection::SetOfStrips::const_iterator it0 = f0.begin(); Dissection::SetOfStrips::const_iterator it1 = f1.begin(); if (((*it0).begin_src == (*it1).begin_src) && ((*it0).width == (*it1).width)) { _alignedFather = true; ntmp = f0.numberOfIndices(); } } } else { _alignedFather = false; // the other brother is treaed as nonaligned } if (!_alignedFather) { diss_printf(verbose, fp, "%s %d : %d : %s\n", __FILE__, __LINE__, _nb, (_alignedFather ? "aligned" : "nonaligned")); } _upper->init(_nrow, _ncol_offdiag, SIZE_B1, ntmp); _upper->allocate(); _lower->init(_nrow, _ncol_offdiag, SIZE_B1, ntmp); if (!_isSym) { _lower->allocate(); } _localSchur->init(_ncol_offdiag, SIZE_B1, _isSym, ntmp); // 02 Feb.2014 } } // pointer to working array which is allocated and deallocated in // C_dfull_sym_gauss_b() _factorize_LDLt = new ColumnMatrix; //new T*; // _factorize_LDLt_diag = new ColumnMatrix; // new T*; diss_printf(verbose, fp, "%s %d : %d : nrow : %d : %d = %d + %d num_blocks : %d = %d + %d\n", __FILE__, __LINE__, _nb, _nrow, _ncol_offdiag, _localSchur->dimension0(), _localSchur->dimension1(), _localSchur->num_blocks(), _localSchur->num_blocks0(), _localSchur->num_blocks1()); } template DissectionMatrix:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); template DissectionMatrix, double>:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); template DissectionMatrix:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); template DissectionMatrix, quadruple>:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); template DissectionMatrix:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); template DissectionMatrix, float>:: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); // template void DissectionMatrix::C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp) { string task_name = "sparse_s_fact : " + to_string(_nb); long opsl = _csr_diag->n; // since factarization is based on skyline // complexity is a linear function of size C_SparseSymbFact_arg *arg = new C_SparseSymbFact_arg(_tridiag, _colors, _color_mask, btree->sizeOfDomain(_nb), _csr_diag, verbose, fp); // *(arg->nopd) = opsl; // very rough estimate, will be updated after run *(arg->ops_complexity) = opsl; queue[0] = new C_task(C_SPARSESYMBFACT, task_name, (void *)arg, C_SparseSymbFact, 1, // atomic_size 0, // atomic_id arg->ops_complexity // ops_complexity ); } template void DissectionMatrix:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); // template void DissectionMatrix::C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, T *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, U *eps_machine, vector& task_q, const bool verbose, FILE **fp) { string task_name = "n : " + to_string(_nb); // char *task_name_cstr = new char[task_name.str().size() + 1]; // strcpy(task_name_cstr, task_name.str().c_str()); // complexity is a linear function of size const int nrow = btree->sizeOfDomain(_nb); const int ncol = btree->sizeOfFathersStrips(_nb); C_SparseNumFact_arg *arg = new C_SparseNumFact_arg(_tridiag, //_num_Sys, _isSym, _colors, _color_mask, nnz, //ptDA->nz(), // nnz coefs, //ptDA->getCoef(), // *coefs nrow, ncol, _csr_diag, _csr_offdiag, _diag, eps_pivot, pivot, kernel_detection, higher_precision, aug_dim, eps_machine, _localSchur, verbose, fp, _nb); const long ncoll = ncol; const long nrowl = nrow; *(arg->nopd) = ncoll * ncoll * nrowl; // very rough estimate , // will be updated after run queue[0] = new C_task(C_SPARSENUMFACT, task_name, (void *)arg, C_SparseNumFact, 1, // atomic_size 0, // atomic_id ((C_SparseSymbFact_arg *)(task_q[0]->func_arg))->nopd); } template void DissectionMatrix:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, double *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, double *eps_machine, vector& task_q, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, double *eps_machine, vector& task_q, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, quadruple *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, quadruple *eps_machine, vector& task_q, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, quadruple *eps_machine, vector& task_q, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, float *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, float *eps_machine, vector& task_q, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, float *eps_machine, vector& task_q, const bool verbose, FILE **fp); // template void DissectionMatrix::C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, T *coefs, vector& task_p, const bool verbose, FILE **fp) { string task_name = "o : " + to_string(_nb); // char *task_name_cstr = new char[task_name.str().size() + 1]; // strcpy(task_name_cstr, task_name.str().c_str()); // complexity is a linear function of size C_SparseNumFact_arg *arg = new C_SparseNumFact_arg(_tridiag, _isSym, _colors, _color_mask, nnz, // ptDA->nz(), // nnz coefs, // ptDA->getCoef(), // *coefs btree->sizeOfDomain(_nb), btree->sizeOfFathersStrips(_nb), _csr_diag, _csr_offdiag, _diag, (double *)NULL, // eps_pivot (double *)NULL, // pivot (bool *)NULL, // kernel detection dummy (bool *)NULL, // higher preicision dummy (int *)NULL, // dummy (U *)NULL, // dummy _localSchur, verbose, fp, _nb); queue[0] = new C_task(C_SPARSESCHUR, task_name, (void *)arg, C_SparseLocalSchur, 1, // atomic_size 0, // atomic_id ((C_SparseNumFact_arg *)(task_p[0]->func_arg))->nopd ); queue[0]->fp = &(arg->fp); queue[0]->parents->push_back(task_p[0]); } template void DissectionMatrix:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, double *coefs, vector& task_p, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, vector& task_p, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, quadruple *coefs, vector& task_p, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, vector& task_p, const bool verbose, FILE **fp); template void DissectionMatrix:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, float *coefs, vector& task_p, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, complex *coefs, vector& task_p, const bool verbose, FILE **fp); // template void DissectionMatrix::C_FillMatrix_queue(vector& queue, int nnz, T *coefs, const bool verbose, FILE **fp) { { string task_name = "l : " + to_string(_nb); // 16 Sep.2014 : Atsushi // -1L is used for initialization of the array <=> dependecy long ops; if (_level > 0) { // replace -1L by 1L : 29 Nov.2016 Atsushi // ops = _csr_offdiag->nnz == 0 ? (-1L) : (long)_csr_offdiag->nnz; ops = _csr_offdiag->nnz == 0 ? 1L : (long)_csr_offdiag->nnz; } else { ops = 0L; } // if (ops > 0) need to be skippid : 29 Aug.2014 Atsushi C_FillMatrix_arg *arg = new C_FillMatrix_arg(_isSym, (SquareBlockMatrix*)NULL, _upper, (_isSym ? (RectBlockMatrix*)NULL : _lower), (CSR_indirect*)NULL, _csr_offdiag, coefs, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[1] = new C_task(C_FILLMATRIX, task_name, (void *)arg, C_FillMatrix_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity // ops_complexity ); } { // begin scope : task_name string task_name = "k : " + to_string(_nb); long ops = (long)(_csr_diag->nnz + _csr_diag->n) / 2L; C_FillMatrix_arg *arg = new C_FillMatrix_arg(_isSym, _diag, (RectBlockMatrix*)NULL, // upper (RectBlockMatrix*)NULL, // lower _csr_diag, (CSR_indirect*)NULL, coefs, verbose, fp, _nb); // ptDA->getCoef()); *(arg->ops_complexity) = ops; queue[0] = new C_task(C_FILLMATRIX, task_name, (void *)arg, C_FillMatrix_diag, 1, // atomic_size 0, // atomic_id arg->ops_complexity // ops_complexity ); } } template void DissectionMatrix:: C_FillMatrix_queue(vector& queue, int nnz, double *coefs, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: C_FillMatrix_queue(vector& queue, int nnz, complex *coefs, const bool verbose, FILE **fp); template void DissectionMatrix:: C_FillMatrix_queue(vector& queue, int nnz, quadruple *coefs, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: C_FillMatrix_queue(vector& queue, int nnz, complex *coefs, const bool verbose, FILE **fp); template void DissectionMatrix:: C_FillMatrix_queue(vector& queue, int nnz, float *coefs, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: C_FillMatrix_queue(vector& queue, int nnz, complex *coefs, const bool verbose, FILE **fp); // template int DissectionMatrix::C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, U *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp) { const int n = _diag->dimension(); // _nrow const int upper_ncol = getUpperNCol(); // _ncol_offdiag const int num_block = _diag->num_blocks(); int num_tasks1; int ipos, jpos; int parent_id; if (_level == 0) { num_tasks1 = ((num_block - 1) * num_block * (num_block + 1) / 6 + num_block); } else { num_tasks1 = 0; } if (n == 0) { queue.resize(1); string task_name = ("a dummy : " + to_string(_level) + " : " +to_string(_nb)); C_dummy_arg *arg = new C_dummy_arg(verbose, fp, _nb); // *(arg->ops_complexity) = (-1L); queue[0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); queue[0]->parents->clear(); task_ptr.clear(); return 1; } int atomic_size, atomic_id; // int *ptr; if (_level == 0) { task_ptr.resize(2 * num_block + 1); // ptr = new int[2 * num_block + 1]; } else { task_ptr.resize(num_block + 1); } #ifdef DEBUG_MUTEX_C_DFULLLDLT cout << "**** C_DFullLDLt " << " starts with " << n << " sized-matrix by " << SIZE_B1 << " ****" << endl; #endif // keeping starting index of triangular factorizations at k-th level task_ptr[0] = 0; for (int k = 0; k < num_block; k++) { task_ptr[k + 1] = task_ptr[k] + ((num_block - k) * (num_block - k + 1)) / 2; } if (_level == 0) { int kk = num_block; for (int k = 0; k < num_block; k++, kk++) { task_ptr[kk + 1] = (task_ptr[kk] + ((num_block - k - 1) * (num_block - k)) / 2 + 1); } } #ifdef DEBUG_C_DFULLLDLT cout << "group_id = " << group_id << " : "; for (int k = 0; k < num_block + 1; k++) { cout << task_ptr[k] << " "; } cout << endl; #endif // queue with "blocked" pivot strategy // generating tasks // a^0, {b^0_0, c^0_00, a^1}, b^0_1,....,b^0_m, c^0_01,c^0_11,...,c^0_mm, // {b^1_0,c^1_00 a^2}, b^1_1,....,b^1_{m-1}, c^0_01,...,c^0_{m-1}{m-1}, ipos = (-1); // jpos = 0; for (int k = 0; k < num_block; k++) { const int nrow = _diag->nrowBlock(k); int task_position; task_position = 0; if (k == 0) { task_position = 1; } if (k == (num_block - 1)) { task_position += 2; } // task a^k { // scope for char *task_name_cstr if (k == 0) { ipos = 0; atomic_size = 1; atomic_id = 0; } else { ipos = task_ptr[k - 1] + 3; atomic_size = 3; atomic_id = 2; } string task_name = ("a " + to_string(k) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow; const long ops = nrowl * (nrowl + 1L) * (nrowl + 2L) + nrowl; C_dfull_gauss_arg *arg = new C_dfull_gauss_arg(_isSym, task_position, // int task_position _level, num_block, k, _diag, _localSchur, _factorize_LDLt, // pointer for T[n * n] // _factorize_LDLt_diag, // diag (int *)NULL, // int *permute_block n, // int n upper_ncol, // lower matrix size nrow, // int nrow k, // int i1 eps_piv, pivot, pivot0, pivot1, kernel_detection, aug_dim, eps_machine, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DFULL_SYM_GAUSS, task_name, (void *)arg, C_dfull_gauss_b, atomic_size, atomic_id, arg->ops_complexity // ops_complexity ); arg->quit = &queue[ipos]->quit_queue; queue[ipos]->fp = &(arg->fp); task_indcol[jpos++] = ipos; if (k == 0) { // a^0 is the root queue[ipos]->parallel_max = 1; queue[ipos]->parallel_id = 0; // depndency on dgemm/dsub of previous factorization level queue[ipos]->parents->push_back(task_p[0]); } else { queue[ipos]->parallel_max = (-1); queue[ipos]->parallel_id = 0; // no need to write dependency : sequential operation inside of atom // resolves dependency // parent_id = task_ptr[k - 1] + 2; // c^{k-1}_{0,0} // queue[ipos]->parents->push_back(queue[parent_id]); } if (task_position / 2 == 1) { queue[ipos]->to_next_task = num_tasks1; } } // scope for char *task_namefp_cstr for (int j = 0; j < (num_block - 1 - k); j++) { const int jj = k + j + 1; // task b^k_j { // scope for char *task_name_cstr // lower part stored in transposed const int ncol = _diag->ncolBlock(jj, k); if (j == 0) { // a^{k}, b^{k}_0 ipos = task_ptr[k] + 1; atomic_size = 3; atomic_id = 0; } else { // a^{k}, b^{k}_0 c^{k}_00 a^{k+1}, b^{k}_1, ... // three tasks a^{k} c^{k}_00 a^{k+1} prior {b^{k}} ipos = task_ptr[k] + j + 3; atomic_size = 1; atomic_id = 0; } string task_name = ("b " + to_string(k) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow; const long ncoll = (long)ncol; const long ops = (nrowl - 1L) * nrowl * ncoll + nrowl * ncoll; // dtrsm for (nrow x ncol matrix) + D^-1 (nrow x ncol matrix) C_dinvDL_timesU_arg *arg = new C_dinvDL_timesU_arg(_isSym, task_position, _level, num_block, k, _diag, //D, _factorize_LDLt, n, // int n nrow, // int nrow ncol, // int ncol k, // int i1 jj, // k + j + 1, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DINV_DL_TIMESU, task_name, (void *)arg, C_dinvDL_timesU, atomic_size, atomic_id, arg->ops_complexity // ops_complexity ); task_indcol[jpos++] = ipos; queue[ipos]->parallel_max = (num_block - k - 1); queue[ipos]->parallel_id = j; if (k == 0) { // depndency on dgemm/dsub of previous factorization level if (_isSym || !isChldrnAlgnd) { const int jtmp = ((j + 1) * (j + 2)) / 2; queue[ipos]->parents->push_back(task_p[jtmp]); } else { const int jtmp = (j + 1) * (j + 1); queue[ipos]->parents->push_back(task_p[jtmp]); queue[ipos]->parents->push_back(task_p[jtmp + 1]); } parent_id = task_ptr[k]; // a^0 queue[ipos]->parents->push_back(queue[parent_id]); } else { parent_id = task_ptr[k - 1] + 3; // a^k queue[ipos]->parents->push_back(queue[parent_id]); // c^{k-1}_{0,j+1} parent_id = (task_ptr[k - 1] + (num_block - k + 2) + ((j + 1) * (j + 2)) / 2); queue[ipos]->parents->push_back(queue[parent_id]); } // if (k == 0) } // scope for char *task_name_cstr : task b^k_j } for (int j = 0; j < (num_block - 1 - k); j++) { const int jj = k + j + 1; const int ncol = _diag->ncolBlock(jj, k); for (int i = 0; i < j; i++) { // task c^k_{i,j} { ipos = (task_ptr[k] + 2 + // a^0 a^1 (num_block - k - 1) + // b^0_0 ,... b^0_{num_blok-k-2} (j * (j + 1)) / 2 + // size of upper triangle i); // string task_name = ("c " + to_string(k) + " " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow; const long ncoll = (long)ncol; const long ops = nrowl * nrowl * ncoll * 2L; // optimized BLAS3 gemm : opt. factor = 1/0.5 C_dupdateb_Schur_arg *arg = new C_dupdateb_Schur_arg(_isSym, task_position, _level, num_block, k, _diag, //D, _factorize_LDLt, // _factorize_LDLt_work, n, nrow, ncol, SIZE_B1, k, // i1 k + i + 1, jj, // k + j + 1, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DHALF_SCHUR_B, task_name, // task_name_cstr, (void *)arg, C_dupdateb_Schur_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity // ops_complexity ); task_indcol[jpos++] = ipos; // // c^k_00 is computed with b^k_0 and a^{k+1} queue[ipos]->parallel_max = ((num_block - k - 1) * (num_block - k)) / 2 - 1; queue[ipos]->parallel_id = (j * (j + 1)) / 2 + i - 1 ; if (k == 0) { // depndency on dgemm/dsub of previous factorization level // (i + 1)-th row, (j + 1)-th column if (_isSym || !isChldrnAlgnd) { const int jtmp = ((j + 1) * (j + 2)) / 2 + i + 1; queue[ipos]->parents->push_back(task_p[jtmp]); } else { const int jtmp = ((j + 1) * (j + 1)) + 2 * i + 2; queue[ipos]->parents->push_back(task_p[jtmp]); queue[ipos]->parents->push_back(task_p[jtmp + 1]); } } else { // if (k > 0) { // c^{k-1}_{i+1,j+1} parent_id = (task_ptr[k - 1] + (num_block - k + 2) + ((j + 1) * (j + 2)) / 2 + (i + 1)); queue[ipos]->parents->push_back(queue[parent_id]); } if (i == 0) { parent_id = task_ptr[k] + 1; // b^k_i queue[ipos]->parents->push_back(queue[parent_id]); } else { parent_id = task_ptr[k] + 3 + i; queue[ipos]->parents->push_back(queue[parent_id]); } // if (i == 0) // by definition of the loop, i < j parent_id = task_ptr[k] + 3 + j; // b^k_j queue[ipos]->parents->push_back(queue[parent_id]); } // scope for char *task_name_cstr :: task c^k_{i,j} } // loop : i // task c^k_{j,j} { if (j == 0) { ipos = task_ptr[k] + 2; atomic_size = 3; atomic_id = 1; } else { ipos = (task_ptr[k] + 2 + // a^0 a^1 (num_block - k -1) + // b^0_0 ,... b^0_{num_blok-k-2} (j * (j + 1)) / 2 + // size of upper triangle j); atomic_size = 1; atomic_id = 0; } string task_name = ("c " + to_string(k) + " " + to_string(j) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); // char *task_name_cstr = new char[task_name.str().size() + 1]; // strcpy(task_name_cstr, task_name.str().c_str()); const long nrowl = (long)nrow; const long ncoll = (long)ncol; const long ops = nrowl * ncoll * (ncoll + 1L); // non-optimized gemm' : opt. factor = 1 C_dupdateb_Schur_arg *arg = new C_dupdateb_Schur_arg(_isSym, task_position, _level, num_block, k, _diag, // D, _factorize_LDLt, // _factorize_LDLt_work, n, nrow, // int nrow ncol, // int ncol SIZE_B1, k, k + j + 1, (-1), verbose, fp, _nb); // jj *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DHALF_SCHUR_B, task_name, (void *)arg, C_dupdateb_Schur_diag, atomic_size, atomic_id, arg->ops_complexity // ops_complexity ); task_indcol[jpos++] = ipos; if (j > 0) { // c^k_00 is computed with b^k_0 and a^{k+1} queue[ipos]->parallel_max = ((num_block - k - 1) * (num_block - k)) / 2 - 1; queue[ipos]->parallel_id = (j * (j + 1)) / 2 + j - 1 ; } else { queue[ipos]->parallel_max = (-1); queue[ipos]->parallel_id = 0; } if (k == 0) { // depndency on dgemm/dsub of previous factorization level // (j + 1)-th row, (j + 1)-th column if (_isSym || !isChldrnAlgnd) { const int jtmp = ((j + 1) * (j + 2)) / 2 + j + 1; queue[ipos]->parents->push_back(task_p[jtmp]); } else { const int jtmp = (j + 1) * (j + 1) + 2 * j + 2; queue[ipos]->parents->push_back(task_p[jtmp]); } } else { // if (k > 0) { // c^{k-1}_{j+1,j+1} parent_id = (task_ptr[k - 1] + (num_block - k + 2) + ((j + 1) * (j + 2)) / 2 + (j + 1)); queue[ipos]->parents->push_back(queue[parent_id]); } // else { depndency to dgemm of previous factorization level } if (j > 0) { parent_id = task_ptr[k] + 3 + j; // b^k_j queue[ipos]->parents->push_back(queue[parent_id]); } // no need to write dependency : sequential operation inside of atom // resolves dependency // else { // parent_id = task_ptr[k] + 1; // b^k_0 // queue[ipos]->parents->push_back(queue[parent_id]); //} } // scope for char *task_name_cstr : c^k_{j,j} } // loop : j } // loop : k if (_level == 0) { // 22 Jan.2013 : Atsushi : without refact. for level > 0 // queue for refactorization with full pivot strategy ipos = task_ptr[num_block]; for (int k = 0; k < num_block; k++) { const int nrow = _diag->nrowBlock(k); int task_position; task_position = 0; if (k == 0) { task_position = 1; } if (k == (num_block - 1)) { task_position += 2; } // task a^k { // scope for char *task_name_cstr // pv_param.pivot is updating during factorizaion string task_name = ("A " + to_string(k) + " : " +to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow; const long ops = nrowl * (nrowl + 1L) * (nrowl + 2L) + nrowl; C_dfull_gauss_arg *arg = new C_dfull_gauss_arg(_isSym, task_position, // int task_position _level, num_block, k, _diag, // D, // _lower, _localSchur, _factorize_LDLt, //_factorize_LDLt_diag, // diag (int *)NULL, // int *permute_block n, // int n upper_ncol, // lower matrix size nrow, // int nrow k, //kk, // int i1 eps_piv, pivot, pivot0, pivot1, kernel_detection, aug_dim, eps_machine, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DFULL_SYM_GAUSS, task_name, // task_name_cstr, (void *)arg, C_gauss_whole_pivot, atomic_size, atomic_id, arg->ops_complexity // ops_complexity ); arg->quit = &queue[ipos]->quit_queue; queue[ipos]->fp = &(arg->fp); task_indcol[jpos++] = ipos; queue[ipos]->parallel_max = 1; queue[ipos]->parallel_id = 0; queue[ipos]->to_next_task = (((num_block - 1 - k) * (num_block - k) * (num_block + 1 - k)) / 6 + num_block - k - 1); if (k == 0) { const int jj = task_ptr[num_block - 1]; queue[ipos]->parents->push_back(queue[jj]); // the last } else { // depending on all C_dupdateb_Schur_offdiagt()s in the previous level const int jbegin = task_ptr[(k - 1) + num_block] + 1; const int jend = task_ptr[k + num_block]; for (int j = jbegin; j < jend; j++) { queue[ipos]->parents->push_back(queue[j]); } } ipos++; } // scope for char *task_name_cstr int itmp = 0; for (int j = 0; j < (num_block - 1 - k); j++) { const int jj = k + j + 1; // const int nrow1 = (j == (num_block - 2 - k)) ? size_res : SIZE_B1; const int nrow1 = _diag->nrowBlock(jj); for (int i = j; i < (num_block - 1 - k); i++) { const int ii = k + i + 1; // const int ncol1 = (i == (num_block - 2 - k)) ? size_res : SIZE_B1; const int ncol1 = _diag->nrowBlock(ii); // task c^k_{i,j} { string task_name = ("C " + to_string(k) + " " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow1; const long ncoll = (long)ncol1; const long ops = nrowl * nrowl * ncoll * 2L; // optimized BLAS3 gemm : opt. factor = 1/0.5 C_dupdateb_Schur_arg *arg = new C_dupdateb_Schur_arg(_isSym, task_position, _level, num_block, k, _diag, // D, _factorize_LDLt, n, nrow1, ncol1, SIZE_B1, k, //kk, ii, // k + i + 1 jj, // k + j + 1, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DHALF_SCHUR_BT, task_name, (void *)arg, C_dupdateb_Schur_offdiag_t, 1, // atomic_size 0, // atomic_id arg->ops_complexity // ops_complexity ); task_indcol[jpos++] = ipos; // // c^k_00 is computed with b^k_0 and a^{k+1} queue[ipos]->parallel_max = ((num_block - k - 1) * (num_block - k)) / 2; queue[ipos]->parallel_id = itmp++; // itmp = i + j * n_block - ... const int jj = task_ptr[k + num_block]; queue[ipos]->parents->push_back(queue[jj]); ipos++; } } // loop : j } // loop : i } // loop : k // nop += (double)n * (double)n * (double)n / 3.0; } // if (_level == 0) // return ipos; } template int DissectionMatrix:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, double *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); template int DissectionMatrix, double>:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, double *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, quadruple *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); template int DissectionMatrix, quadruple >:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, quadruple *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, float *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); template int DissectionMatrix, float>:: C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, float *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); // template int DissectionMatrix:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp) { int itmp, jtmp; const int nrow = getUpperNRow(); // _nrow const int num_block = _diag->num_blocks(); const int task_p_offset = (num_block * (num_block + 1)) / 2; const int num_block_col = _upper->num_blocks_c(); // vector &singidx = singIdx(); if (nrow == 0) { queue.resize(1); string task_name = ("du dummy : " + to_string(_level) + " : " +to_string(_nb)); C_dummy_arg *arg = new C_dummy_arg(verbose, fp, _nb); // *(arg->ops_complexity) = (-1L); queue[0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); queue[0]->parents->clear(); return 1; } itmp = ((num_block * (num_block + 1)) / 2) * num_block_col; if (!_isSym) { itmp *= 2; } queue.resize(itmp); qparents_index.resize(itmp); queue_index.resize(itmp); // initialize for (int i = 0; i < itmp; i++) { queue_index[i] = i; } // vector ptr, indcol; ptr.resize(num_block + 1); jtmp = (num_block + 1) * num_block; itmp = _isSym ? (jtmp / 2) : jtmp; indcol.resize(itmp); // upper and lower are stored sequentially ptr[0] = 0; for (int k = 0; k < num_block; k++) { ptr[k + 1] = ptr[k] + (num_block - k); // based on the symmetric case } int ipos, jpos, atomic_size, atomic_id; jpos = 0; for (int k = 0; k < num_block; k++) { if (k == 0) { ipos = 0; atomic_size = 1; atomic_id = 0; } else { ipos = _isSym ? (ptr[k - 1] + 2) : (ptr[k - 1] + 2) * 2; atomic_size = 2; atomic_id = 1; } int nrow_block = _diag->nrowBlock(k, k); const long nrow_blockl = (long)nrow_block; int iipos = ipos * num_block_col; for (int l = 0; l < num_block_col; l++) { int nrhs_block = _upper->ncolBlock(l); const long nrhs_blockl = (long)nrhs_block; const long ops = nrow_blockl * nrow_blockl *nrhs_blockl; DTRSMScale_arg *arg = new DTRSMScale_arg(_isSym, _diag, // ldlt, _upper, // _lower, // not yet allocated // pt_offset, nrow, nrhs_block, k, // l, (-1), // mblock &singidx, true, // localPermute verbose, fp, _nb); *(arg->ops_complexity) = ops; string task_name = ("du " + to_string(k) + " " + to_string(l) + " : " + to_string(_level) + " : " + to_string(_nb)); queue[iipos] = new C_task(C_DTRSMSCALE, task_name, (void *)arg, C_DTRSMScale_diag_upper, atomic_size, atomic_id, arg->ops_complexity); queue[iipos]->fp = &(arg->fp); // dependency on LDLt : alpha^(k) jtmp = task_o_indcol[task_o_ptr[k]]; queue[iipos]->parents->push_back(task_o[jtmp]); if (k == 0) { queue[iipos]->parents->push_back(task_p[task_p_offset + l]); } else { if (_isSym) { jtmp = indcol[ptr[k - 1]] * num_block_col + l; } else { jtmp = indcol[2 * ptr[k - 1]] * num_block_col + l; } // queue[iipos]->parents->push_back(queue[jtmp]); qparents_index[iipos].push_back(jtmp); } indcol[jpos] = ipos; iipos++; } //loop : l if (!_isSym) { for (int l = 0; l < num_block_col; l++) { int nrhs_block = _lower->ncolBlock(l); const long nrhs_blockl = (long)nrhs_block; // lower DTRSM without scaling // to prevent elimination from the task queue to keep task-dependency const long ops = nrow_blockl == 1L ? 1L : (nrow_blockl * (nrow_blockl - 1L) * nrhs_blockl); DTRSMScale_arg *arg = new DTRSMScale_arg(_isSym, _diag, //ldlt, (RectBlockMatrix *)NULL, _lower, // not yet allocated // pt_offset, nrow, nrhs_block, k, // l, (-1), // mblock &singidx, true, // localPermute verbose, fp, _nb); *(arg->ops_complexity) = ops; string task_name = ("dl " + to_string(k) + " " + to_string(l) + " : " + to_string(_level) + " : " + to_string(_nb)); queue[iipos] = new C_task(C_DTRSMSCALE, task_name, (void *)arg, C_DTRSMScale_diag_lower, atomic_size, atomic_id, arg->ops_complexity); queue[iipos]->fp = &(arg->fp); // dependency on LDLt : alpha^(k) jtmp = task_o_indcol[task_o_ptr[k]]; queue[iipos]->parents->push_back(task_o[jtmp]); if (k == 0) { queue[iipos]->parents->push_back(task_p[task_p_offset + l]); } else { jtmp = indcol[2 * ptr[k - 1] + 1] * num_block_col + l; // queue[iipos]->parents->push_back(queue[jtmp]); qparents_index[iipos].push_back(jtmp); } iipos++; indcol[jpos + 1] = ipos + 1; } } // loop : l jpos += _isSym ? 1 : 2; for (int m = (k + 1); m < num_block; m++) { if (m == (k + 1)) { ipos = _isSym ? (ptr[k] + 1) : ((ptr[k] + 1) * 2); atomic_size = 2; atomic_id = 0; } else { ipos = _isSym ? (ptr[k] + (m - k + 1)) : ((ptr[k] + (m - k + 1)) * 2); atomic_size = 1; atomic_id = 0; } int ncol_block = _diag->ncolBlock(k, m); const long ncol_blockl = (long)ncol_block; int iipos = ipos * num_block_col; for (int l = 0; l < num_block_col; l++) { int nrhs_block = _upper->ncolBlock(l); const long nrhs_blockl = (long)nrhs_block; const long ops = (2L * nrow_blockl * ncol_blockl * nrhs_blockl); DTRSMScale_arg *arg = new DTRSMScale_arg(_isSym, _diag, //ldlt, _upper, // _lower, // not yet allocated // pt_offset, nrow, nrhs_block, k, // l, m, // mblock &singidx, true, // localPermute verbose, fp, _nb); string task_name = ("dU " + to_string(k) + " " + to_string(m) + " " + to_string(l) + " : " + to_string(_level) + " : " + to_string(_nb)); *(arg->ops_complexity) = ops; queue[iipos] = new C_task(C_DTRSMSCALE, task_name, (void *)arg, C_DTRSMScale_offdiag_upper, atomic_size, atomic_id, arg->ops_complexity); queue[iipos]->fp = &(arg->fp); // Dependency on LDLt : beta^(k)_(m) jtmp = task_o_indcol[task_o_ptr[k] + m - k]; queue[iipos]->parents->push_back(task_o[jtmp]); jtmp = (indcol[_isSym ? ptr[k] : ptr[k] * 2] * num_block_col + l); qparents_index[iipos].push_back(jtmp); // if (k > 0) { const int ktmp = (_isSym ? (ptr[k - 1] + m - k + 1) : ((ptr[k - 1] + m - k + 1) * 2)); jtmp = (indcol[ktmp] * num_block_col + l); qparents_index[iipos].push_back(jtmp); // } indcol[jpos] = ipos; iipos++; } if (!_isSym) { for (int l = 0; l < num_block_col; l++) { int nrhs_block = _lower->ncolBlock(l); const long nrhs_blockl = (long)nrhs_block; const long ops = (2L * nrow_blockl * ncol_blockl * nrhs_blockl); DTRSMScale_arg *arg = new DTRSMScale_arg(_isSym, _diag, //ldlt, (RectBlockMatrix *)NULL, _lower, // not yet allocated // pt_offset, nrow, nrhs_block, k, // l, m, // mblock &singidx, true, // localPermute verbose, fp, _nb); *(arg->ops_complexity) = ops; string task_name = ("dL " + to_string(k) + " " + to_string(m) + " " + to_string(l) + " : " + to_string(_level) + " : " + to_string(_nb)); queue[iipos] = new C_task(C_DTRSMSCALE, task_name, (void *)arg, C_DTRSMScale_offdiag_lower, atomic_size, atomic_id, arg->ops_complexity); queue[iipos]->fp = &(arg->fp); // dependency on LDLt : beta^(k)_(m) jtmp = task_o_indcol[task_o_ptr[k] + m - k]; queue[iipos]->parents->push_back(task_o[jtmp]); jtmp = indcol[2 * ptr[k] + 1] * num_block_col + l; qparents_index[iipos].push_back(jtmp); // k-th beginning block if (k > 0) { const int ktmp = (ptr[k - 1] + m - k + 1) * 2 + 1; jtmp = (indcol[ktmp] * num_block_col + l); qparents_index[iipos].push_back(jtmp); // } iipos++; indcol[jpos + 1] = ipos + 1; } // if (!isSym) } // loop : l jpos += _isSym ? 1 : 2; } // loop : m } // loop k return queue.size(); } template int DissectionMatrix:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); template int DissectionMatrix, double>:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); template int DissectionMatrix, quadruple>:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); template int DissectionMatrix, float>:: C_DTRSMScale_queue(vector &queue, vector > &qparents_index, vector &queue_index, vector &indcol, vector &ptr, Dissection::Tree *btree, vector &task_o, vector &task_o_indcol, vector &task_o_ptr, vector &task_p, const bool verbose, FILE **fp); // template void DissectionMatrix:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp) { int itmp, jtmp; const int num_block = _diag->num_blocks(); const int num_block_col = _upper->num_blocks_c(); vector tmp; vector tmp_index; if (num_block == 1) { if (!_isSym) { tmp.resize(2 * num_block_col); tmp_index.resize(2 * num_block_col); itmp = 0; for (int l = 0; l < _localSchur->num_blocks0(); l++, itmp += 2) { tmp[itmp] = queue[l]; // upper tmp_index[itmp] = queue_index[l]; tmp[itmp + 1] = queue[l + num_block_col]; // lower tmp_index[itmp + 1] = queue_index[l + num_block_col]; } for (int l = _localSchur->num_blocks0(); l < num_block_col; l++, itmp += 2) { tmp[itmp] = queue[l]; // upper tmp_index[itmp] = queue_index[l]; tmp[itmp + 1] = queue[l + num_block_col]; // lower tmp_index[itmp + 1] = queue_index[l + num_block_col]; } for (int i = 0; i < (2 * num_block_col); i++) { queue[i] = tmp[i]; queue_index[i] = tmp_index[i]; } } } else if (num_block == 2) { tmp.resize(3 * num_block_col); tmp_index.resize(3 * num_block_col); if (!_isSym) { swap_queues_n(queue, queue_index, 1, 2, num_block_col, tmp, tmp_index); swap_queues_n(queue, queue_index, 3, 4, num_block_col, tmp, tmp_index); swap_queues_n(queue, queue_index, 2, 3, num_block_col, tmp, tmp_index); } for (int l = 0; l < num_block_col; l++) { tmp[3 * l] = queue[l]; tmp_index[3 * l] = queue_index[l]; tmp[3 * l + 1] = queue[num_block_col + l]; tmp_index[3 * l + 1] = queue_index[num_block_col + l]; tmp[3 * l + 2] = queue[2 * num_block_col + l]; tmp_index[3 * l + 2] = queue_index[2 * num_block_col + l]; for (int m = 0; m < 3; m++) { tmp[3 * l + m]->atomic_size = 3; tmp[3 * l + m]->atomic_id = m; } } for (int l = 0; l < (3 * num_block_col); l++) { queue[l] = tmp[l]; queue_index[l] = tmp_index[l]; } // loop : l if (!_isSym) { for (int l = 0; l < num_block_col; l++) { tmp[3 * l] = queue[3 * num_block_col + l]; tmp_index[3 * l] = queue_index[3 * num_block_col + l]; tmp[3 * l + 1] = queue[4 * num_block_col + l]; tmp_index[3 * l + 1] = queue_index[4 * num_block_col + l]; tmp[3 * l + 2] = queue[5 * num_block_col + l]; tmp_index[3 * l + 2] = queue_index[5 * num_block_col + l]; for (int m = 0; m < 3; m++) { tmp[3 * l + m]->atomic_size = 3; tmp[3 * l + m]->atomic_id = m; } } for (int l = 0; l < (3 * num_block_col); l++) { queue[3 * num_block_col + l] = tmp[l]; queue_index[3 * num_block_col + l] = tmp_index[l]; } // loop : l } if (!_isSym) { tmp.resize(6 * num_block_col); tmp_index.resize(6 * num_block_col); itmp = 0; for (int l = 0; l < _localSchur->num_blocks0(); l++, itmp += 6) { for (int k = 0; k < 3; k++) { tmp[itmp + k] = queue[3 * l + k]; // upper tmp_index[itmp + k] = queue_index[3 * l + k]; tmp[itmp + 3 + k] = queue[3 * (l + num_block_col) + k]; // lower tmp_index[itmp + 3 + k] = queue_index[3 * (l + num_block_col) + k]; } } for (int l = _localSchur->num_blocks0(); l < num_block_col; l++, itmp += 6) { for (int k = 0; k < 3; k++) { tmp[itmp + k] = queue[3 * l + k]; // upper tmp_index[itmp + k] = queue_index[3 * l + k]; tmp[itmp + 3 + k] = queue[3 * (l + num_block_col) + k]; // lower tmp_index[itmp + 3 + k] = queue_index[3 * (l + num_block_col) + k]; } } for (int i = 0; i < (6 * num_block_col); i++) { queue[i] = tmp[i]; queue_index[i] = tmp_index[i]; } } } // if (num_block == 1) else if (num_block == 2) else { tmp.resize(2 * num_block_col); tmp_index.resize(2 * num_block_col); if (!_isSym) { for (int i = 0; i < indcol.size(); i++) { const int ii = i * num_block_col; if ((i % 2 == 1) && (queue[ii]->atomic_size == 2) && (queue[ii]->atomic_id == 0)) { swap_queues_n(queue, queue_index, i, (i + 1), num_block_col, tmp, tmp_index); i++; } } // loop : i } // if (!isSym) for (int i = 0; i < indcol.size(); i++) { const int ii = i * num_block_col; if ((queue[ii]->atomic_size == 2) && (queue[ii]->atomic_id == 0)) { for (int l = 0; l < num_block_col; l++) { tmp[2 * l] = queue[ii + l]; tmp_index[2 * l] = queue_index[ii + l]; tmp[2 * l + 1] = queue[(i + 1) * num_block_col + l]; tmp_index[2 * l + 1] = queue_index[(i + 1) * num_block_col + l]; } for (int l = 0; l < (2 * num_block_col); l++) { queue[ii + l] = tmp[l]; queue_index[ii + l] = tmp_index[l]; } i++; } } // loop : it } // if (num_block == 2) if (num_block < 3) { if (_isSym) { int parallel_max = _localSchur->num_blocks0(); int nsize = ((num_block == 1) ? 1 : 3); for (int l = 0; l < parallel_max; l++) { itmp = l * nsize; for (int k = 0; k < nsize; k++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = l; } } parallel_max = num_block_col - _localSchur->num_blocks0(); for (int l = 0; l < parallel_max; l++) { itmp = (l + _localSchur->num_blocks0()) * nsize; for (int k = 0; k < nsize; k++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = l; } } } else { int parallel_max = _localSchur->num_blocks0() * 2; int nsize = ((num_block == 1) ? 1 : 3); for (int l = 0; l < parallel_max; l++) { itmp = l * nsize; for (int k = 0; k < nsize; k++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = l; } } parallel_max = (num_block_col - _localSchur->num_blocks0()) * 2; for (int l = 0; l < parallel_max; l++) { itmp = (l + 2 * _localSchur->num_blocks0()) * nsize; for (int k = 0; k < nsize; k++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = l; } } } } else { // if (num_block < 3) int parallel_max = (_isSym ? 1 : 2) * num_block_col; for (int l = 0; l < num_block_col; l++) { queue[l]->parallel_max = parallel_max; queue[l]->parallel_id = l; } if (!_isSym) { for (int l = 0; l < num_block_col; l++) { queue[num_block_col + l]->parallel_max = parallel_max; queue[num_block_col + l]->parallel_id = num_block_col + l; } } for (int k = 0; k < (num_block - 1); k++) { if (_isSym) { parallel_max = (ptr[k + 1] - ptr[k] - 1) * num_block_col; jtmp = 0; for (int i = (ptr[k] + 1); i < (ptr[k + 1] + 1); i++) { itmp = i * num_block_col; for (int l = 0; l < num_block_col; l++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = jtmp; if (((queue[itmp]->atomic_size == 2) && (queue[itmp]->atomic_id == 1)) || (queue[itmp]->atomic_size == 1)) { jtmp++; } } // loop : l } // loop : i } // _isSym else { parallel_max = (2 * (ptr[k + 1] - ptr[k]) - 2) * num_block_col; jtmp = 0; for (int i = (ptr[k] * 2 + 2); i < (ptr[k + 1] * 2 + 2); i++) { itmp = i * num_block_col; for (int l = 0; l < num_block_col; l++, itmp++) { queue[itmp]->parallel_max = parallel_max; queue[itmp]->parallel_id = jtmp; if (((queue[itmp]->atomic_size == 2) && (queue[itmp]->atomic_id == 1)) || (queue[itmp]->atomic_size == 1)) { jtmp++; } } // loop : l } // loop : i } } // loop : k } // if (num_block < 2) tmp_index.resize(queue_index.size()); vector tmp_old(queue_index); for (int i = 0; i < queue_index.size(); i++) { tmp_index[queue_index[i]] = i; } for (int i = 0; i < queue_index.size(); i++) { queue_index[i] = tmp_index[i]; } { // begin : scope j int j = 0; for (vector::const_iterator it = queue.begin(); it != queue.end(); ++it, j++) { const int jj = tmp_old[j]; for (list::const_iterator kt = qparents_index[jj].begin(); kt != qparents_index[jj].end(); ++kt) { (*it)->parents->push_back(queue[queue_index[(*kt)]]); } // loop : kt } // loop : it } // end : scope j // merging all dependency inside of atomic operation : not optimal but // minimal modification of cheking of task dependency during excution for (vector::const_iterator it = queue.begin(); it != queue.end(); ++it) { switch((*it)->atomic_size) { case 2: { vector::const_iterator jt = it; C_task *task_tmp = (*jt); ++it; (*jt)->parents->merge(*((*it)->parents)); (*jt)->parents->unique(); // remove dependency inside of atomic operation for (list::iterator mt = (*jt)->parents->begin(); mt != (*jt)->parents->end(); ++mt) { if ((*mt) == (*jt)) { mt = (*jt)->parents->erase(mt); } } *((*it)->parents) = *((*jt)->parents); (*it)->parents->push_back(task_tmp); } break; case 3: { vector::const_iterator jt = it; C_task *task_tmp0 = (*jt); ++it; (*jt)->parents->merge(*((*it)->parents)); (*jt)->parents->unique(); // remove dependency inside of atomic operation for (list::iterator mt = (*jt)->parents->begin(); mt != (*jt)->parents->end(); ++mt) { if ((*mt) == (*jt)) { task_tmp0 = (*mt); mt = (*jt)->parents->erase(mt); } } vector::const_iterator kt = it; C_task *task_tmp1 = (*kt); ++it; (*jt)->parents->merge(*((*it)->parents)); (*jt)->parents->unique(); // remove dependency inside of atomic operation for (list::iterator mt = (*jt)->parents->begin(); mt != (*jt)->parents->end(); ++mt) { if (((*mt) == (*jt)) || ((*mt) == (*kt))) { mt = (*jt)->parents->erase(mt); } } *((*kt)->parents) = *((*jt)->parents); *((*it)->parents) = *((*jt)->parents); (*kt)->parents->push_back(task_tmp0); (*it)->parents->push_back(task_tmp0); (*it)->parents->push_back(task_tmp1); } break; default: break; } } for (vector::const_iterator it = queue.begin(); it != queue.end(); ++it) { (*it)->parents->sort(compare_task_name); (*it)->parents->unique(); } } template void DissectionMatrix:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); template void DissectionMatrix, double>:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); template void DissectionMatrix:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); template void DissectionMatrix, quadruple>:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); template void DissectionMatrix:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); template void DissectionMatrix, float>:: C_DTRSMScale_rearrange(vector &queue, vector > &qparents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); // template int DissectionMatrix::C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix *upper1, RectBlockMatrix *lower1, bool isSkip, bool isDirect, SquareBlockMatrix* fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp) { const int nrow = _upper->nbRows(); // _nrow const int nrow1 = isSkip ? (-1) : upper1->nbRows(); // dummy for isSkip const int num_block = _localSchur->num_blocks(); const int num_block0 = _localSchur->num_blocks0(); const int num_block1 = num_block - num_block0; int queue_size; if (nrow == 0 && _level != 0) { queue.resize(1); string task_name = ("f dummy : " + to_string(_level) + " : " +to_string(_nb)); diss_printf(verbose, *fp, "%s %d : %s\n", __FILE__, __LINE__, task_name.c_str()); C_dummy_arg *arg = new C_dummy_arg(verbose, fp, _nb); // *(arg->ops_complexity) = (-1L); queue[0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[0]->parents->clear(); return 1; } queue_size = (_isSym ? ((num_block * (num_block + 1)) / 2) : (num_block * num_block)); queue.resize(queue_size); indcol.resize(queue_size); int pncol_block = _upper->num_blocks_c(); // int qncol_block = isSkip ? (-1) : upper1->num_blocks_c(); // int ipos = 0; int parallel_max0, parallel_max1, parallel_max2; parallel_max0 = (_isSym ? (num_block0 * (num_block0 + 1)) / 2 : (num_block0 * num_block0)); parallel_max2 = (_isSym ? (num_block1 * (num_block1 + 1)) / 2 : (num_block1 * num_block1)); parallel_max1 = (_isSym ? (num_block * (num_block + 1)) / 2 : (num_block * num_block)) - parallel_max0 - parallel_max2; for (int j = 0; j < num_block0; j++) { for (int i = 0; i < j; i++) { const long block_nrowl = (long)fdiag->nrowBlock(i, j); const long block_ncoll = (long)fdiag->ncolBlock(i, j); const long nrowl = (long)nrow; const long nrowwl = isSkip ? nrowl : nrowl + (long)nrow1; { if (isDirect) { const long ops = (isSkip ? 0L : // flag to skip the dependency (block_nrowl * block_ncoll * nrowwl * 2L)); string task_name = ("F " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_two_arg *arg = new DSchurGEMM_two_arg(_isSym, false, // isTrans _lower, _upper, nrow, lower1, upper1, nrow1, i, j, fdiag, isSkip, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_DIRECT_TWO, task_name, (void *)arg, DSchurGEMM_offdiag_two, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); if(!isSkip) { const int mmp = task_p_ptr.size() - 1; const int mmq = task_q_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + i; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } for (int m = 0; m < task_q_ptr[mmq]; m++) { int nn; nn = task_q_indcol[m] * qncol_block + i; queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); nn = task_q_indcol[m] * qncol_block + j; queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + i; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } for (int m = 0; m < task_q_ptr[mmq]; m++) { int nn; nn = task_q_indcol[2 * m + 1] * qncol_block + i; // lower queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); nn = task_q_indcol[2 * m] * qncol_block + j; // upper queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); } } if (task_s != NULL) { const int ktmp = (j * (j + 1)) / 2 + i; // diagonal block DSUB queue[ipos]->parents->push_back((*task_s)[ktmp]); } } // if (!isSkip) } // if (isDirect) else { const long ops = (isSkip ? 0L : // flag to skip the dependency (block_nrowl * block_ncoll * nrowl * 2L)); string task_name = ("f " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, false, // isTrans _lower, _upper, nrow, i, j, _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_TWO, task_name, (void *)arg, DSchurGEMM_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + i; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + i; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } } // else if (isDirect) queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max0; queue[ipos]->parallel_id = ipos; const int itmp = _isSym ? ((j * (j + 1)) / 2 + i) : (j * j + 2 * i); indcol[itmp] = ipos; ipos++; // lower block } if (!_isSym) { if (isDirect) { const long ops = (isSkip ? 0L : // flag to skip the dependency (block_nrowl * block_ncoll * nrowwl * 2L)); string task_name = ("F " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_two_arg *arg = new DSchurGEMM_two_arg(_isSym, true, // isTrans _upper, _lower, nrow, upper1, lower1, nrow1, j, // transposed i, fdiag, isSkip, // verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_DIRECT_TWO, task_name, (void *)arg, DSchurGEMM_offdiag_two, 1, 0, arg->ops_complexity); queue[ipos]->fp = &(arg->fp); if(!isSkip) { for (int m = 0; m < (task_p_ptr.size() - 1); m++) { int mm, nn; mm = 2 * task_p_ptr[m]; // upper nn = task_p_indcol[mm] * pncol_block + i; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); mm = 2 * task_p_ptr[m] + 1; // lower nn = task_p_indcol[mm] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } for (int m = 0; m < (task_q_ptr.size() - 1); m++) { int mm, nn; mm = 2 * task_q_ptr[m]; // upper nn = task_q_indcol[mm] * qncol_block + i; queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); mm = 2 * task_q_ptr[m] + 1; // lower nn = task_q_indcol[mm] * qncol_block + j; queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); } if (task_s != NULL) { const int ktmp = (j * (j + 1)) / 2 + i; // diagonal block DSUB queue[ipos]->parents->push_back((*task_s)[ktmp]); } } } // if (isDirect) else { const long ops = (isSkip ? 0L : // flag to skip the dependency (block_nrowl * block_ncoll * nrowl * 2L)); string task_name = ("f " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, true, // isTrans _upper, _lower, nrow, j, // trasnposed i, _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_TWO, task_name, (void *)arg, DSchurGEMM_offdiag, 1, 0, arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + i; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } // else if (isDirect) queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max0; queue[ipos]->parallel_id = ipos; const int itmp = j * j + 2 * i + 1; // lower part of unsymmetric matrix indcol[itmp] = ipos; ipos++; } // (!_isSym) } // loop : i // diagonal { const long block_ncoll = (long)fdiag->ncolBlock(j, j); const long nrowl = (long)nrow; const long nrowwl = isSkip ? nrowl : nrowl + (long)nrow1; if (isDirect) { const long ops = (isSkip ? 0L : (_isSym ? (block_ncoll * (block_ncoll + 1L) * nrowwl) : (block_ncoll * block_ncoll * nrowwl * 2L))); string task_name = ("F " + to_string(j) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_two_arg *arg = new DSchurGEMM_two_arg(_isSym, false, // _lower, // _upper, // nrow, lower1, // upper1, // nrow1, j, (-1), fdiag, isSkip, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_DIRECT_TWO, task_name, // task_name_cstr, (void *)arg, DSchurGEMM_diag_two, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); if(!isSkip) { const int mmp = task_p_ptr.size() - 1; const int mmq = task_q_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } for (int m = 0; m < task_q_ptr[mmq]; m++) { int nn; nn = task_q_indcol[m] * qncol_block + j; queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } for (int m = 0; m < task_q_ptr[mmq]; m++) { int nn; nn = task_q_indcol[2 * m + 1] * qncol_block + j; // lower queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); nn = task_q_indcol[2 * m] * qncol_block + j; // upper queue[ipos]->parents->push_back(task_q[task_q_index[nn]]); } } if (task_s != NULL) { const int ktmp = (j * (j + 1)) / 2 + j; // diagonal block DSUB queue[ipos]->parents->push_back((*task_s)[ktmp]); } } } // if (isDirect) else { const long ops = (isSkip ? 0L : (_isSym ? (block_ncoll * (block_ncoll + 1L) * nrowl) : (block_ncoll * block_ncoll * nrowl * 2L))); string task_name = ("f " + to_string(j) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, false, // isTrans _lower, // _upper, // nrow, j, (-1), _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_TWO, task_name, // task_name_cstr, (void *)arg, DSchurGEMM_diag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } } // else if (isDirect) queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max0; queue[ipos]->parallel_id = ipos; const int itmp = _isSym ? ((j * (j + 1)) / 2 + j) : (j * j + 2 * j); indcol[itmp] = ipos; ipos++; } } // loop : j for (int j = num_block0; j < num_block; j++) { for (int i = 0; i < num_block0; i++) { const long block_nrowl = (long)_localSchur->nrowBlock(i, j); const long block_ncoll = (long)_localSchur->ncolBlock(i, j); const long nrowl = (long)nrow; { const long ops = block_nrowl * block_ncoll * nrowl * 2L; string task_name = ("f " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, false, // isTrans _lower, _upper, nrow, i, //(i * SIZE_B1), j, //(j * SIZE_B1), _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_TWO, task_name, (void *)arg, DSchurGEMM_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + i; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + i; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } queue[ipos]->parents->sort(compare_task_name); queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max1; queue[ipos]->parallel_id = ipos - parallel_max0; const int itmp = _isSym ? ((j * (j + 1)) / 2 + i) : (j * j + 2 * i); indcol[itmp] = ipos; ipos++; // lower block } if (!_isSym) { const long ops = block_nrowl * block_ncoll * nrowl * 2L; string task_name = ("f " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, true, // isTrans _upper, _lower, nrow, j, // transposed i, _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_TWO, task_name, (void *)arg, DSchurGEMM_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + i; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } queue[ipos]->parents->sort(compare_task_name); queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max1; queue[ipos]->parallel_id = ipos - parallel_max0; const int itmp = j * j + 2 * i + 1; indcol[itmp] = ipos; ipos++; } // (!_isSym) } // loop : i } for (int j = num_block0; j < num_block; j++) { for (int i = num_block0; i < j; i++) { const long block_nrowl = (long)_localSchur->nrowBlock(i, j); const long block_ncoll = (long)_localSchur->ncolBlock(i, j); const long nrowl = (long)nrow; { const long ops = block_nrowl * block_ncoll * nrowl * 2L; string task_name = ("f " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, false, // isTrans _lower, _upper, nrow, i, //(i * SIZE_B1), j, //(j * SIZE_B1), _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_MULT, task_name, (void *)arg, DSchurGEMM_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + i; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + i; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } queue[ipos]->parents->sort(compare_task_name); queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max2; queue[ipos]->parallel_id = ipos - parallel_max0 - parallel_max1; const int itmp = _isSym ? ((j * (j + 1)) / 2 + i) : (j * j + 2 * i); indcol[itmp] = ipos; ipos++; // lower block } if (!_isSym) { const long ops = block_nrowl * block_ncoll * nrowl * 2L; string task_name = ("f " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, true, // isTrans _upper, _lower, nrow, j, // transposed i, _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_MULT, task_name, (void *)arg, DSchurGEMM_offdiag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + i; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } queue[ipos]->parents->sort(compare_task_name); queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max2; queue[ipos]->parallel_id = ipos - parallel_max0 - parallel_max1; const int itmp = j * j + 2 * i + 1; indcol[itmp] = ipos; ipos++; } // (!_isSym) } // loop : i // diagonal { string task_name = ("f " + to_string(j) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrowl = (long)nrow; const long block_ncoll = (long)_localSchur->ncolBlock(j, j); const long ops = (_isSym ? (block_ncoll * (block_ncoll + 1L) * nrowl) : (block_ncoll * block_ncoll * nrowl * 2L)); DSchurGEMM_arg *arg = new DSchurGEMM_arg(_isSym, false, // isTrans _lower, // _upper, // nrow, j, //(j * SIZE_B1), (-1), _localSchur, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DGEMM_LOCAL_MULT, task_name, // task_name_cstr, (void *)arg, DSchurGEMM_diag, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int mmp = task_p_ptr.size() - 1; if (_isSym) { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[m] * pncol_block + j; queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } else { for (int m = 0; m < task_p_ptr[mmp]; m++) { int nn; nn = task_p_indcol[2 * m + 1] * pncol_block + j; // lower queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); nn = task_p_indcol[2 * m] * pncol_block + j; // upper queue[ipos]->parents->push_back(task_p[task_p_index[nn]]); } } queue[ipos]->parents->sort(compare_task_name); queue[ipos]->parents->unique(); queue[ipos]->parallel_max = parallel_max2; queue[ipos]->parallel_id = ipos - parallel_max0 - parallel_max1; const int itmp = _isSym ? ((j * (j + 1)) / 2 + j) : (j * j + 2 * j); indcol[itmp] = ipos; ipos++; } } // loop : j return queue.size(); } template int DissectionMatrix:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix *upper1, RectBlockMatrix *lower1, bool isSkip, bool isDirect, SquareBlockMatrix* fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); template int DissectionMatrix, double>:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix > *upper1, RectBlockMatrix > *lower1, bool isSkip, bool isDirect, SquareBlockMatrix > *fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix *upper1, RectBlockMatrix *lower1, bool isSkip, bool isDirect, SquareBlockMatrix* fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); template int DissectionMatrix, quadruple>:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix > *upper1, RectBlockMatrix > *lower1, bool isSkip, bool isDirect, SquareBlockMatrix > *fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); template int DissectionMatrix:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix *upper1, RectBlockMatrix *lower1, bool isSkip, bool isDirect, SquareBlockMatrix* fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); template int DissectionMatrix, float>:: C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix > *upper1, RectBlockMatrix > *lower1, bool isSkip, bool isDirect, SquareBlockMatrix > *fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); // template void DissectionMatrix::C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp) { const int num_block = _localSchur->num_blocks(); const int num_block0 = _localSchur->num_blocks0(); const int num_block1 = num_block - num_block0; int sizeq; sizeq = (_isSym ? (num_block0 * num_block1 + (num_block1 * (num_block1 + 1)) / 2) : (num_block0 * num_block1 * 2 + num_block1 * num_block1)); queue.resize(sizeq); sizeq = (_isSym ? (num_block * (num_block + 1) / 2) : (num_block * num_block)); indcol.resize(sizeq); int ipos = 0; for (int j = num_block0; j < num_block; j++) { for (int i = 0; i < num_block0; i++) { { string task_name = ("m " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrow = (long)_localSchur->nrowBlock(i, j); const long ncol = (long)_localSchur->ncolBlock(i, j); const long ops = nrow * ncol; C_deallocLocalSchur_arg *arg = new C_deallocLocalSchur_arg(_isSym, _localSchur, i, j, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DEALLOCLOCALSCHUR, task_name, (void *)arg, C_deallocLocalSchur, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int itmp = _isSym ? ((j * (j + 1)) / 2 + i) : (j * j + 2 * i); indcol[itmp] = ipos; ipos++; // lower block } if (!_isSym) { string task_name = ("m " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); const long ops = 1L; // dummy C_deallocLocalSchur_arg *arg = new C_deallocLocalSchur_arg(_isSym, _localSchur, j, i, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DEALLOCLOCALSCHUR, task_name, (void *)arg, C_deallocLocalSchur, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int itmp = j * j + 2 * i + 1; indcol[itmp] = ipos; ipos++; } // (!_isSym) } // loop : i } for (int j = num_block0; j < num_block; j++) { for (int i = num_block0; i < j; i++) { { string task_name = ("m " + to_string(i) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrow = (long)_localSchur->nrowBlock(i, j); const long ncol = (long)_localSchur->ncolBlock(i, j); const long ops = nrow * ncol; C_deallocLocalSchur_arg *arg = new C_deallocLocalSchur_arg(_isSym, _localSchur, i, j, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DEALLOCLOCALSCHUR, task_name, (void *)arg, C_deallocLocalSchur, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int itmp = _isSym ? ((j * (j + 1)) / 2 + i) : (j * j + 2 * i); indcol[itmp] = ipos; ipos++; // lower block } if (!_isSym) { string task_name = ("m " + to_string(j) + " " + to_string(i) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrow = (long)_localSchur->nrowBlock(i, j); const long ncol = (long)_localSchur->ncolBlock(i, j); const long ops = nrow * ncol; // dummy C_deallocLocalSchur_arg *arg = new C_deallocLocalSchur_arg(_isSym, _localSchur, j, i, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DEALLOCLOCALSCHUR, task_name, (void *)arg, C_deallocLocalSchur, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int itmp = j * j + 2 * i + 1; indcol[itmp] = ipos; ipos++; } // (!_isSym) } // loop : i // diagonal { string task_name = ("m " + to_string(j) + " " + to_string(j) + " : " + to_string(_level) + " : " + to_string(_nb)); const long nrow = (long)_localSchur->nrowBlock(j, j); const long ops = nrow * nrow; // dummy C_deallocLocalSchur_arg *arg = new C_deallocLocalSchur_arg(_isSym, _localSchur, j, j, verbose, fp, _nb); *(arg->ops_complexity) = ops; queue[ipos] = new C_task(C_DEALLOCLOCALSCHUR, task_name, (void *)arg, C_deallocLocalSchur, 1, // atomic_size 0, // atomic_id arg->ops_complexity); queue[ipos]->fp = &(arg->fp); const int itmp = _isSym ? ((j * (j + 1)) / 2 + j) : (j * j + 2 * j); indcol[itmp] = ipos; ipos++; } } // loop : j } template void DissectionMatrix:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); template void DissectionMatrix:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); template void DissectionMatrix:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); // template void DissectionMatrix:: ChildContrib(list > *child_contribs, Dissection::Tree *btree, vector* >& dM, const bool verbose, FILE **fp) { int offset_diag_src = 0; for (int ll = (_level - 1); ll >= 0; ll--) { // copy of strips with shifting position from inside of each block to // continous block list diag; list offdiag; const int father_id = btree->nthfatherIndex(_nb, (_level - 1 - ll) + 1); const Dissection::SetOfStrips &diag_xj = btree->getFathersStrips(_nb)[ll]; int father_id0 = btree->selfIndex(father_id); DissectionMatrix &fatherM = *dM[father_id0]; for (Dissection::SetOfStrips::const_iterator it = diag_xj.begin(); it != diag_xj.end(); ++it) { // destination is in dense matrix, then without offest // offset = (*it).begin_dst - (*itf).begin_src = (*it).begin_dst; // offset_offdiagf_src = 0; // (*itf).begin_src + offset_offdiagf_src + offset // == (*it).begin_dst diag.push_back(index_strip((*it).begin_dst, (*it).begin_src + offset_diag_src, (*it).width)); } offset_diag_src += diag_xj.numberOfIndices(); int offset_offdiagc_src = offset_diag_src; for (int m = (ll - 1); m >= 0; m--) { const Dissection::SetOfStrips &offdiag_xjc = btree->getFathersStrips(_nb)[m]; const Dissection::SetOfStrips &offdiag_xjf = btree->getFathersStrips(father_id)[m]; // int offset_offdiagf_src = 0; for (int k = ll - 1; k > m; k--) { offset_offdiagf_src += btree->getFathersStrips(father_id)[k].numberOfIndices(); } // strip is in increasing order with both begin_src (within condensed // adderss of strips) and begin_dst (with column address in the block) // Dissection::SetOfStrips::const_iterator itf = offdiag_xjf.begin(); Dissection::SetOfStrips::const_iterator itf; for (Dissection::SetOfStrips::const_iterator itc = offdiag_xjc.begin(); itc != offdiag_xjc.end(); ++itc) { // find a strip in offdiag strips with father_id // bool found = false; int offset = 0; for (itf = offdiag_xjf.begin(); itf != offdiag_xjf.end(); ++itf) { // for (; itf != offdiag_xjf.end(); ++itf) { if (((*itf).begin_dst <= (*itc).begin_dst) && ((*itf).begin_dst + (*itf).width) >= ((*itc).begin_dst + (*itc).width)) { offset = (*itc).begin_dst - (*itf).begin_dst; // found = true; break; } } offdiag.push_back(index_strip(((*itf).begin_src + offset_offdiagf_src + offset), (*itc).begin_src + offset_offdiagc_src, (*itc).width)); } offset_offdiagc_src += offdiag_xjc.numberOfIndices(); } //loop : ll // cout << "** father_id = " << father_id << endl; list > &tmp = child_contribs[father_id0]; if (fatherM.nrow() == 0 || _nrow == 0) { diss_printf(verbose, *fp, "%s %d : %d/%d %d/%d %d %d:%d\n", __FILE__, __LINE__, _nrow, _nb, fatherM.nrow(), father_id, ll, (int)diag.size(), (int)offdiag.size()); diag.clear(); if (_nrow == 0) { offdiag.clear(); } } int child_id = btree->selfIndex(_nb); tmp.push_back(child_contribution(child_id, fatherM.nrow(), fatherM.getUpperNCol(), diag, offdiag, fatherM.addrdiagBlock(), fatherM.addrupperBlock(), (!_isSym) ? fatherM.addrlowerBlock() : (RectBlockMatrix *)NULL, _ncol_offdiag, _localSchur)); } // loop : ll } template void DissectionMatrix:: ChildContrib(list > *child_contribs, Dissection::Tree *btree, vector* >& dM, const bool verbose, FILE **fp); template void DissectionMatrix, double>:: ChildContrib(list > > *child_contribs, Dissection::Tree *btree, vector, double>* >& dM, const bool verbose, FILE **fp); template void DissectionMatrix:: ChildContrib(list > *child_contribs, Dissection::Tree *btree, vector* >& dM, const bool verbose, FILE **fp); template void DissectionMatrix, quadruple>:: ChildContrib(list > > *child_contribs, Dissection::Tree *btree, vector, quadruple>* >& dM, const bool verbose, FILE **fp); template void DissectionMatrix:: ChildContrib(list > *child_contribs, Dissection::Tree *btree, vector* >& dM, const bool verbose, FILE **fp); template void DissectionMatrix, float>:: ChildContrib(list > > *child_contribs, Dissection::Tree *btree, vector, float>* >& dM, const bool verbose, FILE **fp); // template void DissectionMatrix::deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q) { string task_name = "M : " + to_string(_nb); // char *task_name_cstr = new char[task_name.str().size() + 1]; // strcpy(task_name_cstr, task_name.str().c_str()); const int ops_complexity = _ncol_offdiag * _nrow; C_deallocLower_arg *arg = new C_deallocLower_arg(isSym, _lower, ops_complexity); queue = new C_task(C_DEALLOCLOWER, task_name, (void *)arg, C_deallocLower, 1, // atomic_size 0, // atomic_id arg->ops_complexity); for (vector::const_iterator it = task_p.begin(); it != task_p.end(); ++it) { queue->parents->push_back(*it); } if (isDirect) { for (int i = 0; i < task_q[0]->parallel_max; i++) { queue->parents->push_back(task_q[i]); } queue->parents->sort(compare_task_name); queue->parents->unique(); } } template void DissectionMatrix:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); template void DissectionMatrix, double>:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); template void DissectionMatrix:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); template void DissectionMatrix, quadruple>:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); template void DissectionMatrix:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); template void DissectionMatrix, float>:: deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionMatrix.hpp000664 000000 000000 00000031761 14037356732 026322 0ustar00rootroot000000 000000 /*! \file DissectionMatrix.hpp \brief management of threads for factorization and Fw/Bw substitution \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // # ifndef _DRIVER_DISSECTIONMATRIX_ # define _DRIVER_DISSECTIONMATRIX_ #include #include #include "Splitters/BisectionTree.hpp" #include "Algebra/PlainMatrix.hpp" #include "Algebra/SquareMatrix.hpp" #include "Algebra/SquareBlockMatrix.hpp" #include "Algebra/RectBlockMatrix.hpp" #include "Algebra/SparseMatrix.hpp" #include "Algebra/ColumnMatrix.hpp" #include "Driver/TridiagBlockMatrix.hpp" #include "Driver/C_threads_tasks.hpp" using std::vector; template class DissectionMatrix { public: DissectionMatrix(Dissection::Tree *btree, const int nb, const bool isSym, const bool verbose, FILE *fp); DissectionMatrix(const DissectionMatrix & im) { (*this) = im; } ~DissectionMatrix() { if (_islast) { delete [] _color_mask; for (int i = 0; i < _colors; i++) { _tridiag[i]->free(); delete _tridiag[i]; } delete [] _tridiag; } _diag->free(); // free() is compatible to zero-sized matrix _lower->free(); _upper->free(); _localSchur->free(); // this may be redundant : 26 Jun.2014 delete _factorize_LDLt; delete _diag; delete _lower; delete _upper; delete _localSchur; } DissectionMatrix& operator = ( const DissectionMatrix &im) { if ( &im != this) { _nb = im._nb; _level = im._level; _nrow = im._nrow; _ncol_offdiag = im._ncol_offdiag; _nop = im._nop; _diag = im._diag->clone(); if (!_isSym) { _lower->copy(*im._lower); } _upper->copy(*im._upper); _isSym = im._isSym; _localSchur = &im.localSchurBlock(); _factorize_LDLt = im._factorize_LDLt; // _factorize_LDLt_diag = im._factorize_LDLt_diag; _csr_diag = im._csr_diag; _csr_offdiag = im._csr_offdiag; _alignedFather = im._alignedFather; } return *this; } int ColorTridiagBlockMatrix() { return _colors ; } void setColorTridiagBlockMatrix(int colors) { _colors = colors; } int nrow() { return _nrow; } SquareBlockMatrix* addrdiagBlock() { return _diag; } SquareBlockMatrix*& paddrdiagBlock() { return _diag; } SquareBlockMatrix& diagBlock() { return *_diag; } const SquareBlockMatrix& diagBlock() const { return *_diag;} TridiagBlockMatrix **addrtridiagBlock() { return _tridiag; } TridiagBlockMatrix **&paddrtridiagBlock() { return _tridiag; } RectBlockMatrix* addrupperBlock() { return _upper; } RectBlockMatrix*& paddrupperBlock() { return _upper; } RectBlockMatrix& upperBlock() { return *_upper; } const RectBlockMatrix& upperBlock() const { return *_upper; } RectBlockMatrix* addrlowerBlock() { return _lower; } RectBlockMatrix*& paddrlowerBlock() { return _lower; } RectBlockMatrix& lowerBlock() { return *_lower; } const RectBlockMatrix& lowerBlock() const { return *_lower; } bool isSym() const { return _isSym; } void setAlignedFather() { _alignedFather = true; } bool isAlignedFather() const { return _alignedFather; } bool isFactorized() const { return _diag->isFactorized(); } PlainMatrix& loclSchurBlock(); const vector& singIdxPermute() const { return _diag->getSingIdx(); } vector& singIdxPermute() { return _diag->getSingIdx(); } const vector& singIdx() const { return _diag->getSingIdx0(); } vector& singIdx() { return _diag->getSingIdx0(); } int KernelDetected() const { return _diag->KernelDetected(); } int KernelDim() const { return _diag->dim_kern(); } void SetKernelDim(int nsing) { _diag->set_dim_kernel(nsing); } SquareMatrix& localSchurBlock() { return *_localSchur; } const SquareMatrix& localSchurBlock() const { return *_localSchur; } T getLastPivot() const { return _diag->lastPivot(); } void setLastPivot(T pivot_val) { _diag->set_lastPivot(pivot_val); } void C_SparseSymbFact_queue(vector& queue, Dissection::Tree *btree, const bool verbose, FILE **fp); void C_SparseNumFact_queue(vector& queue, Dissection::Tree *btree, int nnz, T *coefs, double *eps_pivot, double *pivot, bool *kernel_detection, bool *higher_precision, int *aug_dim, U *eps_machine, vector& task_q, const bool verbose, FILE **fp); void C_SparseLocalSchur_queue(vector& queue, Dissection::Tree *btree, int nnz, T *cofes, vector& task_p, const bool verbose, FILE **fp); void C_FillMatrix_queue(vector& queue, int nnz, T *cofes, const bool verbose, FILE **fp); int C_DFullLDLt_queue(vector& queue, vector& task_indcol, vector& task_ptr, double *eps_piv, bool *kernel_detection, int *aug_dim, U *eps_machine, double *pivot, double *pivot0, double *pivot1, vector& task_p, const bool isChldrnAlgnd, const bool verbose, FILE **fp); int C_DTRSMScale_queue(vector &queue, vector > &queue_parents_index, vector &queue_index, vector& indcol, vector& ptr, Dissection::Tree *btree, vector& task_o, vector& task_indcol, vector& task_ptr, vector& task_p, const bool verbose, FILE **fp); void C_DTRSMScale_rearrange(vector &queue, vector > &queue_parents_index, vector &queue_index, vector& indcol, vector& ptr, const bool verbose, FILE *fp); int C_DGEMM_local_queue(vector &queue, vector &indcol, RectBlockMatrix *upper1, RectBlockMatrix *lower1, bool isSkip, bool isDirect, SquareBlockMatrix* fdiag, vector &task_p, vector &task_p_index, vector &task_p_indcol, vector &task_p_ptr, vector &task_q, vector &task_q_index, vector &task_q_indcol, vector &task_q_ptr, vector *task_s, const bool verbose, FILE **fp); void C_deallocLocalSchur_queue(vector &queue, vector &indcol, const bool verbose, FILE **fp); void ChildContrib(list > *child_contribs, Dissection::Tree *btree, vector* >& dissectionMatrix, const bool verbose, FILE **fp); void deallocLower_queue(C_task*& queue, bool isSym, vector &task_p, bool isDirect, vector &task_q); int getUpperNRow() const { return upperBlock().nbRows(); } int getUpperNCol() const { return upperBlock().nbColumns(); } unsigned nb() const { return _nb; } unsigned level() const { return _level; } int ncol_offdiag() const {return _ncol_offdiag; } int nop() const { return _nop; } bool islast() const { return _islast; } bool alignedFather() const { return _alignedFather; } void setNb(unsigned nb) { _nb = nb; } void setLevel(unsigned level) { _level = level; } void setNrow(int nrow) { _nrow = nrow; } void setNcol_offdiag(int ncol_offdiag) { _ncol_offdiag = ncol_offdiag; } void setNop(int nop) { _nop = nop; } void setIsSym(bool isSym) { _isSym = isSym; } void setIslast(bool islast) { _islast = islast; } void setAlignedFather(bool alignedFather) { _alignedFather = alignedFather; } private: unsigned _nb; unsigned _level; int _nrow; int _ncol_offdiag; int _nop; // complexity of factorization SquareBlockMatrix* _diag; // pointer to the diagonal block TridiagBlockMatrix** _tridiag; // pointer to the tridiagonal blocks RectBlockMatrix* _lower; // pointer to lower off-diagonal block RectBlockMatrix* _upper; // pointer to upper off-diagonal block bool _isSym; bool _islast; bool _alignedFather; int _colors; int *_color_mask; SquareBlockMatrix *_localSchur; ColumnMatrix *_factorize_LDLt; // ColumnMatrix *_factorize_LDLt_diag; const CSR_indirect *_csr_diag; const CSR_indirect *_csr_offdiag; }; template class SchurMatrix { public: SchurMatrix() { _full_pivoting = false;} ~SchurMatrix() { } void free() { _sldu.free(); _sldu_list.clear(); // _sldu_list_left.clear(); delete _arow; delete _acol; _scol.free(); _schur.free(); } SubSquareMatrix& getSldu() { return _sldu; } vector& getSlduList() { return _sldu_list; } vector& getSlduListLeft() { return _full_pivoting ? _sldu_list_left : _sldu_list; } SparseMatrix*& getArow() { return _arow; } SparseMatrix*& getAcol() { return _acol; } ColumnMatrix& getScol() { return _scol; } ColumnMatrix& getSchur() { return _schur; } // for debugging bool isFullPivoting() { return _full_pivoting; } void setFullPivoting(const bool full_pivoting) { _full_pivoting = full_pivoting; } private: SubSquareMatrix _sldu; vector _sldu_list; // _sldu_list_right; vector _sldu_list_left; SparseMatrix* _arow; SparseMatrix* _acol; ColumnMatrix _scol; ColumnMatrix _schur; bool _full_pivoting; }; template class KernelMatrix { public: KernelMatrix(int dimension = 0) : _dimension(dimension), _full_pivoting(false) { } ~KernelMatrix() { } void free() { _singIdx.clear(); _kern_list_eq.clear(); _kern_basis.free(); _tkern_basis.free(); _kern_proj.free(); _tkern_proj.free(); _ntkern_proj.free(); } int dimension() const { return _dimension; } void set_dimension(int dimension){ _dimension = dimension; } vector& getSingIdx() { return _singIdx; } vector& getKernListEq() { return _kern_list_eq; } vector& getKernListEqLeft() { return _full_pivoting ? _kern_list_eq_left : _kern_list_eq; } ColumnMatrix& getKernBasis() {return _kern_basis; } ColumnMatrix& getTKernBasis() { return _tkern_basis; } SquareMatrix& getKernProj() { return _kern_proj; } SquareMatrix& getTKernProj() { return _tkern_proj; } SquareMatrix& getNTKernProj() { return _ntkern_proj; } void setFullPivoting(const bool full_pivoting) { _full_pivoting = full_pivoting; } private: int _dimension; vector _singIdx; vector _kern_list_eq; vector _kern_list_eq_left; ColumnMatrix _kern_basis; ColumnMatrix _tkern_basis; SquareMatrix _kern_proj; SquareMatrix _tkern_proj; SquareMatrix _ntkern_proj; bool _full_pivoting; }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionQueue.cpp000664 000000 000000 00000214347 14037356732 026140 0ustar00rootroot000000 000000 /*! \file DissectionQueue.hpp \brief management of threads for factorization and Fw/Bw substitution \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Apr. 22th 2013 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include "Driver/C_threads_tasks.hpp" #include "Driver/C_Dsub.hpp" #include "Driver/DissectionQueue.hpp" #include "Driver/QueueRuntime.hpp" #include "Compiler/DissectionIO.hpp" #define RATIO_DTRSM_MERGED 0.4 template const T DissectionQueue::_one = T(1.0); template const T DissectionQueue::_none = T(-1.0); template const T DissectionQueue::_zero = T(0.0); template DissectionQueue::DissectionQueue(Dissection::Tree *btree, vector* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp) : _btree(btree), _num_threads(num_threads), _verbose(verbose), _fp(fp), _isSym(isSym), _queue_symb_allocated(false), _queue_numrc_allocated(false), _queue_fwbw_allocated(false) { const int nb_level = _btree->NumberOfLevels(); _nb_level = nb_level; const int level_last = nb_level - 1; const int nb_doms = _btree->NumberOfSubdomains(); //(1U<<(level_last+1))-1 _nb_doms = nb_doms; const int nb_doms_dense = (1U << level_last) - 1; const int nb_doms_sparse = (1U << level_last); // _queue_dynamic->reserve(10); for (int d = 1; d <= nb_doms; d++) { const int d1 = _btree->selfIndex(d); dM[d1] = new DissectionMatrix(_btree, d, isSym, verbose, fp); } // set up working arrays for C_Dsub _children = new vector[nb_doms]; _tasks_SparseSymb = new vector[nb_doms_sparse]; _tasks_SparseNum = new vector[nb_doms]; // nd_doms_sparse is _tasks_SparseLocalSchur = new vector[nb_doms]; // enough but _tasks_DFillSym = new vector[nb_doms]; // for convinience of // dependency _tasks_DFullLDLt = new vector[nb_doms_dense]; _tasks_DTRSMScale = new vector[nb_doms_dense]; _tasks_DSymmGEMM = new vector[nb_doms_dense]; _tasks_deallocLocalSchur = new vector[nb_doms_dense + nb_doms_sparse]; _tasks_Dsub = new vector*[nb_level]; _tasks_Dsub[0] = new vector[nb_level * nb_doms_dense]; for (int i = 1; i < nb_level; i++) { _tasks_Dsub[i] = _tasks_Dsub[0] + i * nb_doms_dense; } _tasks_deallocLower = new vector[nb_level]; _child_contribs = new list >[nb_doms]; _pivots = new double[nb_doms]; // pass pivot value from a DissectionMatrix to // the other is not easy in thread execution // initialize state to controle tasks _dissectionRuntime = new QueueRuntime(nb_doms, num_threads, isSym, verbose, fp); } template DissectionQueue:: DissectionQueue(Dissection::Tree *btree, vector* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); template DissectionQueue:: DissectionQueue(Dissection::Tree *btree, vector* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); template DissectionQueue, double>:: DissectionQueue(Dissection::Tree *btree, vector, double>* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); template DissectionQueue, quadruple>:: DissectionQueue(Dissection::Tree *btree, vector, quadruple>* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); template DissectionQueue:: DissectionQueue(Dissection::Tree *btree, vector* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); template DissectionQueue, float>:: DissectionQueue(Dissection::Tree *btree, vector, float>* >& dM, const int num_threads, const bool isSym, const bool verbose, FILE *fp); // template DissectionQueue::~DissectionQueue() { // const int nb_level = _btree->NumberOfLevels(); // const int num_threads = _num_threads; erase_queue(); erase_queue_fwbw(); // set up working arrays for C_Dsub delete [] _children; delete [] _tasks_SparseSymb; delete [] _tasks_SparseNum; delete [] _tasks_SparseLocalSchur; delete [] _tasks_DFillSym; // _tasks_DFillSym = new vector[nb_doms]; delete [] _tasks_DFullLDLt; delete [] _tasks_DTRSMScale; delete [] _tasks_DSymmGEMM; delete [] _tasks_Dsub[0]; delete [] _tasks_Dsub; delete [] _tasks_deallocLocalSchur; delete [] _tasks_deallocLower; // delete [] _tasks_fillSym; delete [] _child_contribs; delete [] _pivots; // nullify the pointer _btree = NULL; delete _dissectionRuntime; } template DissectionQueue::~DissectionQueue(); template DissectionQueue::~DissectionQueue(); template DissectionQueue, double>::~DissectionQueue(); template DissectionQueue, quadruple>::~DissectionQueue(); template DissectionQueue::~DissectionQueue(); template DissectionQueue, float>::~DissectionQueue(); // template void DissectionQueue:: generate_queue(vector* >& dM, int nnz, T *coefs) { const int num_threads = _num_threads; const int nb_level = _btree->NumberOfLevels(); const int level_last = nb_level - 1; const int nb_doms = _btree->NumberOfSubdomains(); const int nb_doms_dense = (1U << level_last) - 1; const int nb_doms_sparse = (1U << level_last); // set up working arrays for C_Dsub vector null_task; for (int level = (level_last - 1); level >= 0; level--){ const int begdom = 1U << level; const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int j = _btree->selfIndex(d); const int mm = _btree->childIndex(d); const int nn = _btree->brotherIndex(mm); _children[j].push_back(_btree->selfIndex(mm)); _children[j].push_back(_btree->selfIndex(nn)); } } vector > all_fathersIndex(nb_level); long **nops_queue = new long*[num_threads + 1]; nops_queue[0] = new long[(num_threads + 1) * nb_level]; for (int p = 1; p <= num_threads; p++) { nops_queue[p] = nops_queue[0] + p * nb_level; for (int j = 0; j <= level_last; j++) { nops_queue[p][j] = 0L; } } vector* tasks_deallocLocalSchur_indcol = new vector[nb_doms_dense + nb_doms_sparse]; // sprase subdomains : begin : level == level_last { all_fathersIndex[level_last].clear(); for (int i = 0; i < nb_doms; i++) { _child_contribs[i].clear(); } const int begdom = 1U << level_last; const int enddom = begdom * 2; int k = 0; for (int d = begdom; d < enddom; d++, k++) { const int j = _btree->selfIndex(d); _tasks_SparseSymb[k].resize(1); _tasks_SparseNum[j].resize(1); _tasks_SparseLocalSchur[j].resize(1); // _tasks_DFillSym[j].resize(2); dM[j]->C_SparseSymbFact_queue(_tasks_SparseSymb[k], _btree, _verbose, &_fp); dM[j]->C_SparseNumFact_queue(_tasks_SparseNum[j], _btree, nnz, coefs, &_eps_piv, &_pivots[j], &_kernel_detection, &_higher_precision, &_aug_dim, &_eps_machine, _tasks_SparseSymb[k], _verbose, &_fp); dM[j]->C_SparseLocalSchur_queue(_tasks_SparseLocalSchur[j], _btree, nnz, coefs, _tasks_SparseNum[j], _verbose, &_fp); dM[j]->C_deallocLocalSchur_queue(_tasks_deallocLocalSchur[j], tasks_deallocLocalSchur_indcol[j], _verbose, &_fp); dM[j]->ChildContrib(_child_contribs, _btree, dM, _verbose, &_fp); } // loop : d for (int d = 1; d < begdom; d++) { const int j = _btree->selfIndex(d); _tasks_DFillSym[j].resize(2); dM[j]->C_FillMatrix_queue(_tasks_DFillSym[j], nnz, coefs, _verbose, &_fp); } all_fathersIndex[level_last].resize((1U << level_last) - 1); for (int i = 1; i < (1U << level_last); i++) { const int j = _btree->selfIndex(i); all_fathersIndex[level_last][j] = j; } for (vector::const_iterator it = all_fathersIndex[level_last].begin(); it != all_fathersIndex[level_last].end(); ++it) { C_Dsub_queue(_isSym, (*it), false, // all sparse matrix parts need to be done _tasks_Dsub[level_last][(*it)], _child_contribs[(*it)], (vector *)NULL, // _tasks_DSymmGEMM (vector *)NULL, false, _tasks_DFillSym[(*it)], _tasks_SparseLocalSchur, null_task, // (vector *)NULL, _tasks_deallocLocalSchur, tasks_deallocLocalSchur_indcol, level_last, true, _verbose, _fp); } // tasks to deallocate local Schur complement _tasks_deallocLower[level_last].resize(0); // no C- working array of DTSRM } // end : level == level_last vector nrow_DFullLDLt; nrow_DFullLDLt.resize(nb_doms_dense); vector isMergedDTRSM(nb_doms_dense, false); vector isDividedDTRSM(nb_doms_dense, false); vector* tasks_DTRSMScale_indcol = new vector[nb_doms_dense]; vector* tasks_DTRSMScale_index = new vector[nb_doms_dense]; vector >* tasks_DTRSMScale_parents_index = new vector >[nb_doms_dense]; vector* tasks_DTRSMScale_ptr = new vector[nb_doms_dense]; vector* tasks_DSymmGEMM_indcol = new vector[nb_doms_dense]; for (int level = (level_last - 1); level >= 0; level--) { int itmp; all_fathersIndex[level].clear(); for (int i = 0; i < nb_doms; i++) { _child_contribs[i].clear(); } const int begdom = 1U << level; const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int j = _btree->selfIndex(d); const int num_row_b = dM[j]->diagBlock().num_blocks(); int num_tasks_ldlt; num_tasks_ldlt = (num_row_b * (num_row_b + 1) * (num_row_b + 2)) / 6; if (level == 0) { num_tasks_ldlt += (((num_row_b - 1) * num_row_b * (num_row_b + 1)) / 6 + num_row_b); } nrow_DFullLDLt[j] = num_row_b; // factorization by block + whole pivotting _tasks_DFullLDLt[j].resize(num_tasks_ldlt); vector task_indcol; vector task_ptr; task_indcol.resize(num_tasks_ldlt); // _tasks_DTRSMScale[j].resize(num_col_b2); // _tasks_DSymmGEMM[j].resize(num_dgemm); bool isChldrnAlgnd = false; int evnchld_id; if (level < (level_last - 1)) { evnchld_id = _btree->childIndex(d); // "selfIndex(j) + 1 == j" is used isChldrnAlgnd = dM[evnchld_id]->isAlignedFather(); } // vector &task_p = (isChldrnAlgnd ? _tasks_DSymmGEMM[evnchld_id] : // _tasks_Dsub[level + 1][j]); itmp = dM[j]->C_DFullLDLt_queue(_tasks_DFullLDLt[j], task_indcol, task_ptr, // value not yet defined &_eps_piv, &_kernel_detection, &_aug_dim, &_eps_machine, &_pivots[j], &_pivots[_children[j][0]], &_pivots[_children[j][1]], // task_p, (isChldrnAlgnd ? _tasks_DSymmGEMM[evnchld_id] : _tasks_Dsub[level + 1][j]), isChldrnAlgnd, _verbose, &_fp); if (itmp > 1) { itmp = EraseNullParents(_tasks_DFullLDLt[j]); } #ifdef DEBUG_PREPARE_THREAD cout << j << " : " << " generated queue : C_DFullLDLt = " << j << " num_block = " << num_row_b << " num_tasks = " << num_tasks_ldlt << " / " << itmp << endl; #endif if (level > 0) { // vector &task_pp = _tasks_Dsub[level + 1][j]; itmp = dM[j]->C_DTRSMScale_queue(_tasks_DTRSMScale[j], tasks_DTRSMScale_parents_index[j], tasks_DTRSMScale_index[j], tasks_DTRSMScale_indcol[j], tasks_DTRSMScale_ptr[j], _btree, _tasks_DFullLDLt[j], task_indcol, task_ptr, _tasks_Dsub[level + 1][j], // task_pp, _verbose, &_fp); if (itmp > 1) { itmp = EraseNullParents(_tasks_DTRSMScale[j]); dM[j]->C_DTRSMScale_rearrange(_tasks_DTRSMScale[j], tasks_DTRSMScale_parents_index[j], tasks_DTRSMScale_index[j], tasks_DTRSMScale_indcol[j], tasks_DTRSMScale_ptr[j], _verbose, _fp); } } #ifdef DEBUG_PREPARE_THREAD cout << j << " : " << " generated queue : C_DTRSMScale_queue " << j << " num_tasks = " << num_col_b2 << " " << _tasks_DTRSMScale[j].size() << endl; #endif SquareBlockMatrix* f_diag; int jf = (-1); if (level > 0) { // there is no father for level == 0 jf = _btree->selfIndex(_btree->fatherIndex(d)); f_diag = dM[jf]->addrdiagBlock(); } else { f_diag = (SquareBlockMatrix*)NULL; } bool isDirect = dM[j]->isAlignedFather(); bool isEven = (level > 0) ? (j % 2 == 0) : false; RectBlockMatrix *upper1, *lower1; upper1 = (isEven ? dM[j - 1]->addrupperBlock() : (RectBlockMatrix *) NULL); lower1 = (isEven ? dM[j - 1]->addrlowerBlock() : (RectBlockMatrix *) NULL); // vector &tasks_q = (isEven ? _tasks_DTRSMScale[j - 1] : // null_task); vector null_idx; // vector &tasks_q_index = (isEven ? tasks_DTRSMScale_index[j - 1] : // null_idx); // vector &tasks_q_indcol = (isEven ? tasks_DTRSMScale_indcol[j - 1] : // null_idx); // vector &tasks_q_ptr = (isEven ? tasks_DTRSMScale_ptr[j - 1] : // null_idx); itmp = dM[j]->C_DGEMM_local_queue(_tasks_DSymmGEMM[j], tasks_DSymmGEMM_indcol[j], upper1, lower1, (!isEven), isDirect, f_diag, _tasks_DTRSMScale[j], tasks_DTRSMScale_index[j], tasks_DTRSMScale_indcol[j], tasks_DTRSMScale_ptr[j], (isEven ? _tasks_DTRSMScale[j - 1] : null_task), (isEven ? tasks_DTRSMScale_index[j - 1] : null_idx), (isEven ? tasks_DTRSMScale_indcol[j - 1] : null_idx), (isEven ? tasks_DTRSMScale_ptr[j - 1] : null_idx), // tasks_q, // tasks_q_index, // tasks_q_indcol, // tasks_q_ptr, (jf >= 0) ? &_tasks_Dsub[level + 1][jf] : (vector*)NULL, _verbose, &_fp); if (itmp > 1 ) { itmp = EraseNullParents(_tasks_DSymmGEMM[j]); } dM[j]->C_deallocLocalSchur_queue(_tasks_deallocLocalSchur[j], tasks_deallocLocalSchur_indcol[j], _verbose, &_fp); #ifdef DEBUG_PREPARE_THREAD cout << j << " : " << " generated queue : C_DGEMM_local_queue " << j << " num_tasks = " << num_dgemm << " " << _tasks_DGEMM[j].size() << endl; #endif dM[j]->ChildContrib(_child_contribs, _btree, dM, _verbose, &_fp); } // loop : d(j) all_fathersIndex[level].resize((1U << level) - 1); for (int i = 1; i < (1U << level); i++) { const int j = _btree->selfIndex(i); all_fathersIndex[level][j] = j; } for (vector::const_iterator it = all_fathersIndex[level].begin(); it != all_fathersIndex[level].end(); ++it) { bool skip_flag = false; const int father_id = _btree->Index2Node((*it)); if (_btree->nodeLayer(father_id) == (level - 1)) { if (_child_contribs[(*it)].size() == 2) { int child_id = _btree->childIndex(father_id); int child_id0 = _btree->selfIndex(child_id); int child_id1 = _btree->selfIndex(child_id + 1); skip_flag = ((dM[child_id0]->isAlignedFather() && dM[child_id1]->isAlignedFather()) ? true : false); diss_printf(_verbose, _fp, "%s %d : father_id = %d children_id = %d %d skip = %s\n", __FILE__, __LINE__, ((*it) + 1), (child_id0 + 1), (child_id1 + 1), skip_flag ? "true" : "false"); } } C_Dsub_queue(_isSym, (*it), skip_flag, _tasks_Dsub[level][(*it)], _child_contribs[(*it)], _tasks_DSymmGEMM, tasks_DSymmGEMM_indcol, true, // (vector *)NULL, // _tasks_DFillSym, null_task, (vector *)NULL, _tasks_Dsub[level + 1][(*it)], _tasks_deallocLocalSchur, tasks_deallocLocalSchur_indcol, level, true, _verbose, _fp); } // loop : it // tasks to deallocate lower column matrix and local Schur complement if (level > 0) { _tasks_deallocLower[level].resize(enddom - begdom); { int k = 0; for (int d = begdom; d < enddom; d++, k++) { const int j = _btree->selfIndex(d); bool isDirect = dM[j]->isAlignedFather(); const int jj = (j % 2 == 1) ? (j + 1) : j; dM[j]->deallocLower_queue(_tasks_deallocLower[level][k], _isSym, _tasks_DSymmGEMM[j], isDirect, _tasks_DSymmGEMM[jj]); } // loop : d, k } // scope for k itmp = EraseNullParents(_tasks_deallocLower[level]); } // if (level > 0) } // loop : level for (int level = (level_last - 1); level > 0; level--) { const int begdom = 1U << level; const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int j = _btree->selfIndex(d); if(nrow_DFullLDLt[j] > 2) { list tmp1; list tmp2; const int ncol = dM[j]->upperBlock().num_blocks_c(); const int ncol1 = (int)((double)ncol * RATIO_DTRSM_MERGED); int i0, i1, k1, kk1, kk2; // int i0, i1, k1, k2, kk1, kk2; tmp1.push_back(_tasks_DFullLDLt[j][0]); k1 = 1; i0 = 1; i1 = 0; bool first_loop_done = false; kk2 = 0; while (i0 < _tasks_DFullLDLt[j].size()) { const int parallel_max0 =_tasks_DFullLDLt[j][i0]->parallel_max; const int parallel_max1 =_tasks_DTRSMScale[j][i1]->parallel_max; kk1 = 0; for (int m = 0; m < parallel_max0; m++) { const int atomic_size = _tasks_DFullLDLt[j][i0]->atomic_size; for (int n = 0; n < atomic_size; n++) { _tasks_DFullLDLt[j][i0 + n]->parallel_id = kk1; _tasks_DFullLDLt[j][i0 + n]->parallel_max = 0; // for conuting tmp1.push_back(_tasks_DFullLDLt[j][i0 + n]); } kk1++; i0 += atomic_size; } // loop : m int ll = 0; for (int m = 0; m < parallel_max1; m++) { const int atomic_size = _tasks_DTRSMScale[j][i1]->atomic_size; for (int n = 0; n < atomic_size; n++) { if (first_loop_done) { if (ll < ncol1) { _tasks_DTRSMScale[j][i1 + n]->parallel_id = kk1; _tasks_DTRSMScale[j][i1 + n]->parallel_max = 0; tmp1.push_back(_tasks_DTRSMScale[j][i1 + n]); } else { _tasks_DTRSMScale[j][i1 + n]->parallel_id = kk2; _tasks_DTRSMScale[j][i1 + n]->parallel_max = 0; tmp2.push_back(_tasks_DTRSMScale[j][i1 + n]); } } else { _tasks_DTRSMScale[j][i1 + n]->parallel_id = kk1; _tasks_DTRSMScale[j][i1 + n]->parallel_max = 0; tmp1.push_back(_tasks_DTRSMScale[j][i1 + n]); } } // loop : n if (first_loop_done) { if (ll < ncol1) { kk1++; } else { kk2++; } } else { kk1++; } ll++; if (ll == ncol) { ll = 0; } i1 += atomic_size; } // loop : m for (list::const_iterator it = tmp1.begin(); it != tmp1.end(); ++it) { if ((*it)->parallel_max == 0) { (*it)->parallel_max = kk1; } } if (i0 >= _tasks_DFullLDLt[j].size()) { break; } // number of C_dupdateb_Schur_{diag,offdiag} may be large enough const int parallel_max2 =_tasks_DFullLDLt[j][i0]->parallel_max; kk1 = 0; for (int m = 0; m < parallel_max2; m++) { const int atomic_size = _tasks_DFullLDLt[j][i0]->atomic_size; for (int n = 0; n < atomic_size; n++) { _tasks_DFullLDLt[j][i0 + n]->parallel_id = kk1; _tasks_DFullLDLt[j][i0 + n]->parallel_max = 0; tmp1.push_back(_tasks_DFullLDLt[j][i0 + n]); } i0 += atomic_size; kk1++; } // loop : m for (list::const_iterator it = tmp1.begin(); it != tmp1.end(); ++it) { if ((*it)->parallel_max == 0) { (*it)->parallel_max = kk1; } } first_loop_done = true; } // while (i0 < _tasks_DFullLDLt[j].size()) int ll = 0; while (i1 < _tasks_DTRSMScale[j].size()) { kk1 = 0; const int parallel_max =_tasks_DTRSMScale[j][i1]->parallel_max; for (int m = 0; m < parallel_max; m++) { const int atomic_size = _tasks_DTRSMScale[j][i1]->atomic_size; for (int n = 0; n < atomic_size; n++) { if (ll < ncol1) { _tasks_DTRSMScale[j][i1 + n]->parallel_id = kk1; _tasks_DTRSMScale[j][i1 + n]->parallel_max = 0; tmp1.push_back(_tasks_DTRSMScale[j][i1 + n]); } else { _tasks_DTRSMScale[j][i1 + n]->parallel_id = kk2; _tasks_DTRSMScale[j][i1 + n]->parallel_max = 0; tmp2.push_back(_tasks_DTRSMScale[j][i1 + n]); } } if (ll < ncol1) { kk1++; } else { kk2++; } i1 += atomic_size; ll++; if (ll == ncol) { ll = 0; } } // loop : m for (list::const_iterator it = tmp1.begin(); it != tmp1.end(); ++it) { if ((*it)->parallel_max == 0) { (*it)->parallel_max = kk1; } } for (list::const_iterator it = tmp2.begin(); it != tmp2.end(); ++it) { if ((*it)->parallel_max == 0) { (*it)->parallel_max = kk2; } } } // while (i1 < _tasks_DTRSMScale[j].size()) _tasks_DFullLDLt[j].resize(tmp1.size()); vector::iterator jt; jt = _tasks_DFullLDLt[j].begin(); for (list::const_iterator it = tmp1.begin(); it != tmp1.end(); ++it, ++jt) { (*jt) = (*it); } tmp1.clear(); _tasks_DTRSMScale[j].clear(); _tasks_DTRSMScale[j].resize(tmp2.size()); jt = _tasks_DTRSMScale[j].begin(); for (list::const_iterator it = tmp2.begin(); it != tmp2.end(); ++it, ++jt) { (*jt) = (*it); } isMergedDTRSM[j] = true; isDividedDTRSM[j] = true; } // if(nrow_DFullLDLt[j] > 2) else { if (nrow_DFullLDLt[j + ((j % 2 == 0 ) ? (-1) : 1)] > 2) { // brother has merged LDLt and DTRSM int size0 = _tasks_DFullLDLt[j].size(); int size1 = _tasks_DTRSMScale[j].size(); vector tmp; tmp.resize(size0 + size1); int ii = 0; for (int k = 0; k < _tasks_DFullLDLt[j].size(); k++, ii++) { tmp[ii] = _tasks_DFullLDLt[j][k]; tmp[ii]->parallel_max += size1; } for (int k = 0; k < _tasks_DTRSMScale[j].size(); k++, ii++) { tmp[ii] = _tasks_DTRSMScale[j][k]; tmp[ii]->parallel_max += size0; tmp[ii]->parallel_id += size0; } _tasks_DFullLDLt[j].resize(tmp.size()); vector::iterator jt = _tasks_DFullLDLt[j].begin(); for (vector::const_iterator it = tmp.begin(); it != tmp.end(); ++it, ++jt) { (*jt) = (*it); } _tasks_DTRSMScale[j].clear(); isMergedDTRSM[j] = true; } // if }// else if(nrow_DFullLDLt[j] > 2) } // loop : d } list queue_null; _dissectionRuntime->generate_queue(_queue_symb, _queue_static, _queue_dynamic, queue_null, // _queue_dummy, _btree, _children, _tasks_SparseSymb, _tasks_SparseNum, _tasks_SparseLocalSchur, _tasks_DFillSym, _tasks_DFullLDLt, _tasks_DTRSMScale, _tasks_DSymmGEMM, _tasks_Dsub, _tasks_deallocLower, _tasks_deallocLocalSchur, nops_queue, all_fathersIndex, nrow_DFullLDLt, isMergedDTRSM, isDividedDTRSM, level_last); queue_null.sort(compare_task_name); queue_null.unique(); // #define DEBUG_QUEUE_NULL #ifdef DEBUG_QUEUE_NULL fprintf(_fp, "%s %d : NULL queues : ", __FILE__, __LINE__); #endif for (list::iterator it = queue_null.begin(); it != queue_null.end(); ++it) { #ifdef DEBUG_QUEUE_NULL fprintf(_fp, "%s %d : %s : %ld\n", __FILE__, __LINE__, (*it)->task_name, (*(*it)->ops_complexity)); #endif erase_task(*it); } queue_null.clear(); delete [] tasks_deallocLocalSchur_indcol; delete [] tasks_DTRSMScale_indcol; delete [] tasks_DTRSMScale_index; delete [] tasks_DTRSMScale_parents_index; delete [] tasks_DTRSMScale_ptr; delete [] tasks_DSymmGEMM_indcol; delete [] nops_queue[0]; delete [] nops_queue; _queue_symb_allocated = true; _queue_numrc_allocated = true; } template void DissectionQueue:: generate_queue(vector*>& dM, int nnz, double *coefs); template void DissectionQueue:: generate_queue(vector*>& dM, int nnz, quadruple *coefs); template void DissectionQueue, double>:: generate_queue(vector, double>*>& dM, int nnz, complex *coefs); template void DissectionQueue, quadruple>:: generate_queue(vector, quadruple>*>& dM, int nnz, complex *coefs); template void DissectionQueue:: generate_queue(vector*>& dM, int nnz, float *coefs); template void DissectionQueue, float>:: generate_queue(vector, float>*>& dM, int nnz, complex *coefs); // template void DissectionQueue:: generate_queue_fwbw(vector*>& dM, int dim, int nnz, T *coefs) { // const T zero(0.0); // const T none(-1.0); // const T one(1.0); const int nb_level = _btree->NumberOfLevels(); const int level_last = nb_level - 1; const int nb_doms = _btree->NumberOfSubdomains(); // allocation of working array for sereial execution // find maximum size of submatrix among dense domains // _dim = dim; _nnz = nnz; _nrhs = new int*; _isTrans = new bool*; _x = new T*; _xi = new T**[nb_doms]; _yi = new T**[nb_doms]; _zi = new T**[nb_doms]; _wi = new T**[nb_doms]; for (int d = 1; d <= nb_doms; d++) { const int d0 = _btree->selfIndex(d); _xi[d0] = new T*; _yi[d0] = new T*; _wi[d0] = new T*; _zi[d0] = new T*; } _diag_contribs = new list[nb_doms]; for (int level = level_last; level >= 0; level--) { const unsigned begdom = 1U << level; const unsigned enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { int offset_src = 0; for (int ll = (level - 1); ll >=0; ll--) { const int father_id = _btree->nthfatherIndex(d, (level - ll)); const int father_id0 = _btree->selfIndex(father_id); // copy data from Xavier's SetOfStrips to list list diag; const Dissection::SetOfStrips &diag_x = _btree->getFathersStrips(d)[ll]; for (Dissection::SetOfStrips::const_iterator it = diag_x.begin(); it != diag_x.end(); ++it) { diag.push_back(index_strip((*it).begin_dst, (*it).begin_src + offset_src, (*it).width)); } list &tmp = _diag_contribs[father_id0]; tmp.push_back(diag_contribution(d, _btree->sizeOfDomain(d), _btree->sizeOfFathersStrips(d), _btree->sizeOfDomain(father_id), diag)); offset_src += diag_x.numberOfIndices(); } } } vector tasks_a(1U << level_last); // sparse solver forward substitution phase { // begin : sparse const unsigned begdom = 1U << level_last; const unsigned enddom = begdom * 2; int ipos = 0; for (int d = begdom; d < enddom; d++, ipos++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const CSR_indirect &offdiag = _btree->getOffdiagCSR(d); int *indVals = offdiag.indVals; // get from the global array // const int *loc2glob_diag = _btree->getDiagLoc2Glob(d); const int colors = dM[d0]->ColorTridiagBlockMatrix(); C_SparseFw_arg *arg = new C_SparseFw_arg(colors, d, _isSym, dim, _isTrans, // _nrhs, dM[d0]->addrtridiagBlock(), _x, _yi[d0], _zi[d0], coefs, (int)_btree->sizeOfDomain(d), (int)_btree->sizeOfFathersStrips(d), offdiag.ptRows, offdiag.indCols, (_isSym ? indVals : offdiag.indVals_unsym), indVals, _btree->getDiagLoc2Glob(d)); string task_name = "a : " + to_string(d); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_SPARSESYMFW, task_name, (void *)arg, C_SparseFw, 1, 0, arg->ops_complexity); task->parallel_max = begdom; task->parallel_id = dd0; task->parents->clear(); // no dependency tasks_a[dd0] = task; } // loop : d } // end : sparse // updating RHS vector from level_last to all levels less than level_last vector tasks_i((1U << level_last) - 1); { const unsigned begdom = 1U; const unsigned enddom = 1U << level_last; for (int d = begdom; d < enddom; d++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const int n_diag = _btree->sizeOfDomain(d); if (n_diag == 0) { string task_name = ("i dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_i[dd0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { C_Dsub_FwBw_arg *arg = new C_Dsub_FwBw_arg(dim, _nrhs, n_diag, true, level_last, _btree, &_diag_contribs[d0], _x, _yi[d0], _zi, _btree->getDiagLoc2Glob(d)); string task_name = "i : " + to_string(d); *(arg->ops_complexity) = (long)n_diag; C_task *task = new C_task(C_DSUB_FWBW, task_name, (void *)arg, C_Dsub_FwBw, 1, 0, arg->ops_complexity); //task->func(task->func_arg); task->parallel_max = enddom; task->parallel_id = dd0; const int begdom0 = 1U << level_last; for (list::const_iterator it = _diag_contribs[d0].begin(); it != _diag_contribs[d0].end(); ++it) { const int child_id = (*it).child_id; if (_btree->nodeLayer(child_id) == level_last) { task->parents->push_back(tasks_a[child_id - begdom0]); } } tasks_i[dd0] = task; } } // loop : d } // forward substitution of dense part vector** tasks_de = new vector*[level_last]; tasks_de[0] = new vector[(1U << (level_last + 1)) - 1]; vector** tasks_k = new vector*[level_last]; tasks_k[0] = new vector[(1U << (level_last + 1)) - 1]; vector* tasks_j = new vector[level_last]; for (int level = (level_last - 1); level >= 0; level--) { const unsigned begdom = 1U << level; const unsigned enddom = begdom * 2; if (level > 0) { tasks_de[level] = tasks_de[0] + ((1U << level) - 1); tasks_k[level] = tasks_k[0] + ((1U << level) - 1); } for (int d = begdom; d < enddom; d++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const int n_diag = _btree->sizeOfDomain(d); // fprintf(stderr, "%s %d : %d %d\n", __FILE__, __LINE__, d, n_diag); const int n_offdiag = _btree->sizeOfFathersStrips(d); SquareBlockMatrix &Diag = dM[d0]->diagBlock(); // const int *permute = Diag.getPermute().getAddr(); { // closure of tasks_de const int num_block = Diag.num_blocks(); if (n_diag == 0) { tasks_de[level][dd0].resize(1); string task_name = ("d dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_de[level][dd0][0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { tasks_de[level][dd0].resize((num_block * (num_block + 1)) / 2); for (int k = 0; k < num_block; k++) { const int kk = Diag.IndexBlock(k); const int ncol = Diag.nrowBlock(k); C_DenseFwBw_arg *arg = new C_DenseFwBw_arg(_isSym, false, // isBackward (int)dim, _isTrans, _nrhs, (int)n_diag, (int)ncol, // (int)k, (int)kk, _xi[d0], _wi[d0], _yi[d0], (T **)NULL, dM[d0]->addrdiagBlock(), (k == 0), // isFirstBlock (k == (num_block - 1)), // isLastBlock (int *)NULL, _verbose, &_fp); string task_name = "d : " + to_string(d) + " : " + to_string(k); *(arg->ops_complexity) = (long)n_diag; C_task *task = new C_task(C_DENSE_SYMFW_DIAG, task_name, (void *)arg, C_DenseFwBw_diag, 1, 0, arg->ops_complexity); if (k == 0) { task->parallel_max = 1; task->parallel_id = 0; tasks_de[level][dd0][0] = task; if (level == (level_last - 1)) { task->parents->push_back(tasks_i[d0]); // } else { task->parents->push_back(tasks_j[level + 1][d0]); } } else { task->parallel_max = num_block - k; task->parallel_id = 0; task->atomic_size = 2; task->atomic_id = 1; int ipos; // depending on e_{k k-1} ipos = 2 + num_block * (k - 1) - ((k - 1) * k) / 2 + k - 1; task->parents->push_back(tasks_de[level][dd0][ipos - 1]); // tasks_de[level][dd0][ipos] = task; for (int m = 0; m < (k - 1); m++) { ipos = num_block * m + k + 1 - (m * (m + 1)) / 2; task->parents->push_back(tasks_de[level][dd0][ipos]); // } } // updating of lower blocks xi[d0] + ii are independent among i for (int i = (k + 1); i < num_block; i++) { const int ii = Diag.IndexBlock(i); const int nrow = Diag.nrowBlock(i); C_DenseFwBwOffdiag_arg *arg = new C_DenseFwBwOffdiag_arg(_isSym ? true : false, //trans true, //isLower (int)dim, _isTrans, _nrhs, (int)n_diag, (int)n_diag, (int)n_diag, //ldc (int)nrow, (int)ncol, _wi[d0], kk, // ii _yi[d0], // _xi[d0], // ii, // jj dM[d0]->addrdiagBlock(), i, // i_block k, // j_block _none, // alpha, _one); // beta string task_name = ("e : " + to_string(d) + " : " + to_string(k) + " " + to_string(i)); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_DENSE_SYMFW_OFFDIAG, task_name, (void *)arg, C_DenseFwBw_offdiag, 1, 0, arg->ops_complexity); task->parallel_max = num_block - (k + 1); task->parallel_id = i - (k + 1); int ipos; // depending on d_{k-1} if (k > 0) { ipos = num_block * (k - 1) + i + 1 - ((k - 1) * k) / 2; task->parents->push_back(tasks_de[level][dd0][ipos]); } if (k > 0) { ipos = 2 + num_block * (k - 1) - ((k - 1) * k) / 2 + k - 1; } else { ipos = 0; } task->parents->push_back(tasks_de[level][dd0][ipos]); if (i == (k + 1)) { task->atomic_size = 2; // task->atomic_id = 0; // ipos = 2 + num_block * k - (k * (k + 1)) / 2 + k - 1; tasks_de[level][dd0][ipos] = task; } else { // i > (k + 1) ipos = num_block * k + i + 1 - (k * (k + 1)) / 2; tasks_de[level][dd0][ipos] = task; } // if (i == (k + 1)) } // loop : i } // loop : k } // if (n_diag == 0) } // closure of tasks_de if (level > 0) { // const int num_block = Diag.num_blocks(); // const int num_block_r = Diag.num_blocks(); RectBlockMatrix *upperblock = (_isSym ? dM[d0]->addrupperBlock() : dM[d0]->addrlowerBlock()); RectBlockMatrix *lowerblock = dM[d0]->addrupperBlock(); const int num_block_c = upperblock->num_blocks_c(); if (n_diag == 0) { tasks_k[level][dd0].resize(1); string task_name = ("k dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_k[level][dd0][0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { tasks_k[level][dd0].resize(num_block_r * num_block_c); // const double alpha = 1.0; for (int i = 0; i < num_block_r; i++) { const int ii = Diag.IndexBlock(i); const int nrow = Diag.nrowBlock(i); const T beta = (i == 0) ? _zero : _one; for (int j = 0; j < num_block_c; j++) { const int jj = upperblock->IndexBlock_c(j); // j * SIZE_B1; const int ncol = upperblock->ncolBlock(j); C_StripsFwBwOffdiag_arg *arg = new C_StripsFwBwOffdiag_arg(true, // isLower (int)dim, _isTrans, _nrhs, (int)n_diag, (int)n_diag, (int)n_offdiag, (int)ncol, // refering to upper (int)nrow, _wi[d0], ii, // ii, _zi[d0], jj, // jj, upperblock, lowerblock, i, j, _one, // alpha, beta); string task_name = ("k : " + to_string(d) + " : " + to_string(i) + " " + to_string(j)); *(arg->ops_complexity) = (long)n_diag; C_task *task = new C_task(C_STRIPS_SYMFW_OFFDIAG, task_name, (void *)arg, C_StripsFwBw_offdiag, 1, 0, arg->ops_complexity); task->parallel_max = num_block_c; // sequential among i task->parallel_id = j; // parallel among j int ipos; if (i == 0) { task->parents->push_back(tasks_de[level][dd0][0]); } else { ipos = 2 + num_block_r * (i - 1) - ((i - 1) * i) / 2 + i - 1; task->parents->push_back(tasks_de[level][dd0][ipos]); ipos = (i - 1) * num_block_c + j; task->parents->push_back(tasks_k[level][dd0][ipos]); } ipos = i * num_block_c + j; tasks_k[level][dd0][ipos] = task; } // loop : j } // loop : i } } // if (level > 0) // solving D y = x } // loop : d // updating RHS vector from level to all levels less than level { const unsigned begdom = 1U; const unsigned enddom = 1U << level; tasks_j[level].resize(enddom - 1); for (int d = begdom; d < enddom; d++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const int n_diag = _btree->sizeOfDomain(d); if (n_diag == 0) { string task_name = ("j dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_j[level][dd0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { C_Dsub_FwBw_arg *arg = new C_Dsub_FwBw_arg(dim, _nrhs, -1, // n_diag false, // access_global level, _btree, &_diag_contribs[d0], (T **)NULL, // x _yi[d0], _zi, _btree->getDiagLoc2Glob(d)); string task_name = "j : " + to_string(level) + " : " + to_string(d); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_DSUB_FWBW, task_name, (void *)arg, C_Dsub_FwBw, 1, 0, arg->ops_complexity); // task->func(task->func_arg); task->parallel_max = enddom; task->parallel_id = d0; task->parents->push_back(tasks_i[dd0]); for (list::const_iterator it = _diag_contribs[d0].begin(); it != _diag_contribs[d0].end(); ++it) { if ((*it).diag_strip.size() > 0) { // to avoid null contribution const int child_id = (*it).child_id; if (_btree->nodeLayer(child_id) == level) { const int child_id0 = _btree->selfIndex(child_id); RectBlockMatrix *upperblock = dM[child_id0]->addrupperBlock(); const int num_block_r = upperblock->num_blocks_r(); const int num_block_c = upperblock->num_blocks_c(); for (int j = 0; j < num_block_c; j++) { int ipos = (num_block_r - 1) * num_block_c + j; task->parents->push_back(tasks_k[0][child_id0][ipos]); } } // if (_btree->nodeLayer(child_id) == level) } // (if (*it).diag_strip.size() > 0) } // loop : it tasks_j[level][dd0] = task; } } // loop : d } } // loop : level vector* tasks_h = new vector[level_last]; vector** tasks_l = new vector*[level_last]; vector** tasks_fg = new vector*[level_last]; tasks_l[0] = new vector[(1U << (level_last + 1)) - 1]; tasks_fg[0] = new vector[(1U << (level_last + 1)) - 1]; for (int level = 0; level < level_last; level++) { const unsigned begdom = 1U << level; const unsigned enddom = begdom * 2; tasks_h[level].resize(enddom - 1); if (level > 0) { tasks_fg[level] = tasks_fg[0] + ((1U << level) - 1); tasks_l[level] = tasks_l[0] + ((1U << level) - 1); } for (int d = begdom; d < enddom; d++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const int n_diag = _btree->sizeOfDomain(d); const int n_offdiag = _btree->sizeOfFathersStrips(d); SquareBlockMatrix &Diag = dM[d0]->diagBlock(); if (level > 0) { if (n_diag == 0) { string task_name = ("h dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_h[level][dd0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { C_Dfill_FwBw_arg *arg = new C_Dfill_FwBw_arg(_nrhs, d, level, _btree, (int)n_offdiag, _yi, _zi[d0]); string task_name = "h : " + to_string(d); *(arg->ops_complexity) = (long)n_offdiag; C_task *task = new C_task(C_DENSE_SYMFILL, task_name, (void *)arg, C_Dfill_FwBw, 1, 0, arg->ops_complexity); task->parallel_max = enddom - 1; task->parallel_id = dd0; task->parents->clear(); for (int ll = (level - 1); ll >= 0; ll--) { const int father_id = _btree->nthfatherIndex(d, (level - ll)); const int father_id0 = _btree->selfIndex(father_id); const Dissection::SetOfStrips &diag = _btree->getFathersStrips(d)[ll]; if (diag.numberOfIndices() > 0) { const int ipos = tasks_fg[0][father_id0].size() - 1; // the last C_task *task_parent = tasks_fg[0][father_id0][ipos]; bool flag = false; for (list::const_iterator it = task->parents->begin(); it != task->parents->end(); ++it) { if ((*it) == task_parent) { flag = true; break; } } // loop : it if (flag == false) { task->parents->push_back(task_parent); } } } tasks_h[level][dd0] = task; } const int num_block_r = Diag.num_blocks(); if (n_diag == 0) { tasks_l[level][dd0].resize(1); string task_name = ("l dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_l[level][dd0][0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { tasks_l[level][dd0].resize(num_block_r); for (int i = 0; i < num_block_r; i++) { const int ii = Diag.IndexBlock(i); const int nrow = Diag.nrowBlock(i); C_StripsFwBwOffdiag_arg *arg = new C_StripsFwBwOffdiag_arg(false, // isLower (int)dim, _isTrans, _nrhs, (int)n_diag, (int)n_offdiag, (int)n_diag, (int)nrow, (int)n_offdiag, _zi[d0], 0, // ii, _xi[d0], ii, // jj, dM[d0]->addrupperBlock(), dM[d0]->addrlowerBlock(), 0, i, _none, //alpha, _one); // beta); string task_name = "l : " + to_string(d) + " : " + to_string(i); *(arg->ops_complexity) = (long)n_diag; C_task *task = new C_task(C_STRIPS_SYMFW_OFFDIAG, task_name, (void *)arg, C_StripsFwBw_offdiag, 1, 0, arg->ops_complexity); int ipos; task->parents->push_back(tasks_h[level][dd0]); const int father_id = _btree->fatherIndex(d); const int father_id0 = _btree->selfIndex(father_id); ipos = tasks_fg[0][father_id0].size() - 1; // the last of tasks_fg if (ipos >= 0) { task->parents->push_back(tasks_fg[0][father_id0][ipos]); } tasks_l[level][dd0][i] = task; } // loop : i } // if (n_diag == 0) } // if (level > 0) { SquareBlockMatrix &Diag = dM[d0]->diagBlock(); const int num_block = Diag.num_blocks(); if (n_diag == 0) { tasks_fg[level][dd0].resize(1); string task_name = ("d dummy : " + to_string(d)); C_dummy_arg *arg = new C_dummy_arg(_verbose, &_fp, d); tasks_fg[level][dd0][0] = new C_task(C_DUMMY, task_name, (void *)arg, C_dummy, 1, // atomic_size, 0, // atomic_id, arg->ops_complexity); } else { tasks_fg[level][dd0].resize((num_block * (num_block + 1)) / 2); int k0 = 0; for (int k = (num_block - 1); k >= 0; k--, k0++) { const int kk = Diag.IndexBlock(k); const int ncol = Diag.nrowBlock(k); C_DenseFwBw_arg *arg = new C_DenseFwBw_arg(_isSym, true, // isBackward (int)dim, _isTrans, _nrhs, (int)n_diag, (int)ncol, // (int)k, (int)kk, _xi[d0], (T **)NULL, _yi[d0], // (k == 0) ? _x : (T **)NULL, dM[d0]->addrdiagBlock(), (k == 0), // isFirstBlock (k == (num_block - 1)), // isLastBlock (k == 0) ? _btree->getDiagLoc2Glob(d) : (int *)NULL, _verbose, &_fp); string task_name = "f : " + to_string(d) + " : " + to_string(k); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_DENSE_SYMFW_DIAG, task_name, (void *)arg, C_DenseFwBw_diag, 1, 0, arg->ops_complexity); if (k0 == 0) { task->parallel_max = 1; task->parallel_id = 0; tasks_fg[level][dd0][0] = task; if (level == 0){ const int ipos = tasks_de[0][0].size() - 1; //the last of the fw if (ipos >= 0) { task->parents->push_back(tasks_de[0][0][ipos]); } } else { task->parents->push_back(tasks_l[0][d0][k]); } } else { task->parallel_max = num_block - k0; task->parallel_id = 0; task->atomic_size = 2; task->atomic_id = 1; int ipos; ipos = 2 + num_block * (k0 - 1) - ((k0 - 1) * k0) / 2 + k0 - 1; task->parents->push_back(tasks_fg[level][dd0][ipos - 1]); // tasks_fg[level][dd0][ipos] = task; for (int m = 0; m < (k0 - 1); m++) { ipos = num_block * m + k0 + 1 - (m * (m + 1)) / 2; task->parents->push_back(tasks_fg[level][dd0][ipos]); // } } int i0 = k0 + 1; // a trick to use the same formula to count ipos for (int i = (k - 1); i >= 0; i--, i0++) { const int ii = Diag.IndexBlock(i); const int nrow = Diag.nrowBlock(i); // SIZE_B1; // ? C_DenseFwBwOffdiag_arg *arg = new C_DenseFwBwOffdiag_arg(false, // trans false, // isLower (int)dim, _isTrans, _nrhs, (int)n_diag, (int)n_diag, (int)n_diag, (int)nrow, (int)ncol, _xi[d0], kk, // ii _xi[d0], _xi[d0], // (double **)NULL, ii, // jj dM[d0]->addrdiagBlock(), i, // i_block k, // j_block _none, // alpha, _one); //beta); string task_name = ("g : " + to_string(d) + " : " + to_string(k) + " " + to_string(i)); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_DENSE_SYMFW_OFFDIAG, task_name, (void *)arg, C_DenseFwBw_offdiag, 1, 0, arg->ops_complexity); if (level > 0) { task->parents->push_back(tasks_l[0][d0][i]); } task->parallel_max = num_block - (k0 + 1); int ipos; if (k0 > 0) { ipos = num_block * (k0 - 1) + i0 + 1 - ((k0 - 1) * k0) / 2; task->parents->push_back(tasks_fg[level][dd0][ipos]); } if (k0 > 0) { ipos = 2 + num_block * (k0 - 1) - ((k0 - 1) * k0) / 2 + k0 - 1; } else { ipos = 0; } task->parents->push_back(tasks_fg[level][dd0][ipos]); if (i0 == (k0 + 1)) { task->parallel_id = 0; task->atomic_size = 2; // task->atomic_id = 0; // ipos = 2 + num_block * k0 - (k0 * (k0 + 1)) / 2 + k0 - 1; } else { // i0 > (k0 + 1) task->parallel_id = i - (k0 + 1); ipos = num_block * k0 + i0 + 1 - (k0 * (k0 + 1)) / 2; } tasks_fg[level][dd0][ipos] = task; } // loop : i } // loop : k } // if (n_diag == 0) } } // loop : d } // loop : level vector tasks_b(1U << level_last); { // begin : sparse const unsigned begdom = 1U << level_last; const unsigned enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { // to be parallelized const int dd0 = d - begdom; const int d0 = _btree->selfIndex(d); const CSR_indirect &offdiag = _btree->getOffdiagCSR(d); const int colors = dM[d0]->ColorTridiagBlockMatrix(); C_SparseBw_arg *arg = new C_SparseBw_arg(colors, d, _isSym, (int)dim, _isTrans, _nrhs, _btree, (int)level_last, dM[d0]->addrtridiagBlock(), _x, _yi, _xi[d0], _yi[d0], _zi[d0], coefs, offdiag.ptRows, offdiag.indCols, offdiag.indVals, offdiag.indVals_unsym); string task_name = "b : " + to_string(d); *(arg->ops_complexity) = (long)dim; C_task *task = new C_task(C_SPARSESYMBW, task_name, (void *)arg, C_SparseBw, 1, 0, arg->ops_complexity); task->parallel_max = begdom; task->parallel_id = dd0; for (int ll = (level_last - 1); ll >= 0; ll--) { const int father_id = _btree->nthfatherIndex(d, (level_last - ll)); const int father_id0 = _btree->selfIndex(father_id); const Dissection::SetOfStrips &diag = _btree->getFathersStrips(d)[ll]; const int ipos = tasks_fg[0][father_id0].size() - 1; if ((diag.numberOfIndices() > 0) && (ipos >= 0)) { task->parents->push_back(tasks_fg[0][father_id0][ipos]); } } tasks_b[dd0] = task; } // loop : d } // end : sparse int count_tasks = 0; for (int i = 0; i < (1U << level_last); i++) { count_tasks++; } for (int i = 0; i < ((1U << level_last) - 1); i++) { count_tasks++; } for (int level = (level_last - 1); level >= 0; level--) { const int begdom = (int)(1U << level); const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int dd0 = d - begdom; for (int i = 0; i < tasks_de[level][dd0].size(); i++) { count_tasks++; } if (level > 0) { for (int i = 0; i < tasks_k[level][dd0].size(); i++) { count_tasks++; } } } // loop : d for (int d = 1; d < begdom; d++) { // to be parallelized count_tasks++; } } for (int level = 0; level < level_last; level++) { const int begdom = (int)1U << level; const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int dd0 = d - begdom; if (level > 0) { count_tasks++; for (int i = 0; i < tasks_l[level][dd0].size(); i++) { count_tasks++; } } for (int i = 0; i < tasks_fg[level][dd0].size(); i++) { count_tasks++; } } // loop : d } for (int i = 0; i < (1U << level_last); i++) { count_tasks++; } vector *tasks_tmp = new vector; tasks_tmp->resize(count_tasks); count_tasks = 0; for (int i = 0; i < (1U << level_last); i++) { (*tasks_tmp)[count_tasks++] = tasks_a[i]; } for (int i = 0; i < ((1U << level_last) - 1); i++) { (*tasks_tmp)[count_tasks++] = tasks_i[i]; } for (int level = (level_last - 1); level >= 0; level--) { const int begdom = (int)(1U << level); const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int dd0 = d - begdom; for (int i = 0; i < tasks_de[level][dd0].size(); i++) { (*tasks_tmp)[count_tasks++] = tasks_de[level][dd0][i]; } if (level > 0) { for (int i = 0; i < tasks_k[level][dd0].size(); i++) { (*tasks_tmp)[count_tasks++] = tasks_k[level][dd0][i]; } } } // loop : d for (int d = 1; d < begdom; d++) { // to be parallelized const int dd0 = d - 1; (*tasks_tmp)[count_tasks++] = tasks_j[level][dd0]; } } for (int level = 0; level < level_last; level++) { const int begdom = (int)(1U << level); const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int dd0 = d - begdom; if (level > 0) { (*tasks_tmp)[count_tasks++] = tasks_h[level][dd0]; for (int i = 0; i < tasks_l[level][dd0].size(); i++) { (*tasks_tmp)[count_tasks++] = tasks_l[level][dd0][i]; } } for (int i = 0; i < tasks_fg[level][dd0].size(); i++) { (*tasks_tmp)[count_tasks++] = tasks_fg[level][dd0][i]; } } // loop : d } for (int i = 0; i < (1U << level_last); i++) { (*tasks_tmp)[count_tasks++] = tasks_b[i]; } string task_name = "fwbw whole"; _queue_fwbw = new C_task_seq(C_FWBW, task_name, // dummy (-1), // mutex_id TASK_SINGLE, //TASK_SINGLE, _num_threads, -1, // level -1, // phase tasks_tmp, 0, //itmp, count_tasks, 0); // dummy _dissectionRuntime->set_queue_fwbw(_queue_fwbw); diss_printf(_verbose, _fp, "%s %d : void DissectionQueue::generate_queue_fwbw\n", __FILE__, __LINE__); delete [] tasks_de[0]; // delete [] tasks_de; // vector** delete [] tasks_k[0]; // delete [] tasks_k; // vector** delete [] tasks_j; // vector* delete [] tasks_h; // vector* delete [] tasks_l[0]; // delete [] tasks_l; // vector** delete [] tasks_fg[0]; delete [] tasks_fg; // vector** // vector tasks_a, tasks_b, and tasks_i are deallocated automatically _queue_fwbw_allocated = true; } template void DissectionQueue:: generate_queue_fwbw(vector*>& dissectionMatrix, int dim, int nnz, double *coefs); template void DissectionQueue:: generate_queue_fwbw(vector*>& dissectionMatrix, int dim, int nnz, quadruple *coefs); template void DissectionQueue, double>:: generate_queue_fwbw(vector, double>*>& dissectionMatrix, int dim, int nnz, complex *coefs); template void DissectionQueue, quadruple>:: generate_queue_fwbw(vector, quadruple>*>& dissectionMatrix, int dim, int nnz, complex *coefs); template void DissectionQueue:: generate_queue_fwbw(vector*>& dissectionMatrix, int dim, int nnz, float *coefs); template void DissectionQueue, float>:: generate_queue_fwbw(vector, float>*>& dissectionMatrix, int dim, int nnz, complex *coefs); // template void DissectionQueue::exec_symb_fact(void) { _dissectionRuntime->exec_symb_fact(); } template void DissectionQueue::exec_symb_fact(); template void DissectionQueue::exec_symb_fact(); template void DissectionQueue, double>::exec_symb_fact(); template void DissectionQueue, quadruple>::exec_symb_fact(); template void DissectionQueue::exec_symb_fact(); template void DissectionQueue, float>::exec_symb_fact(); // template void DissectionQueue::exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const U eps_machine, const bool higher_precision) { _kernel_detection = kernel_detection; _aug_dim = aug_dim; #if 0 if (higher_precision) { _eps_piv = eps_machine; } else { _eps_piv = eps_piv; } #else _eps_piv = eps_piv; #endif _eps_machine = eps_machine; _higher_precision = higher_precision; _dissectionRuntime->exec_num_fact(called); } template void DissectionQueue:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const double eps_machine, const bool higher_precision); template void DissectionQueue:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const quadruple eps_machine, const bool higher_precision); template void DissectionQueue, double>:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const double eps_machine, const bool higher_precision); template void DissectionQueue, quadruple>:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const quadruple eps_machine, const bool higher_precision); template void DissectionQueue:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const float eps_machine, const bool higher_precision); template void DissectionQueue, float>:: exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const float eps_machine, const bool higher_precision); // template void DissectionQueue::exec_fwbw_seq(T *x, const int nrhs, bool isTrans) { const int nb_doms = _btree->NumberOfSubdomains(); int ntmp; *_x = x; *_nrhs = (int *)&nrhs; *_isTrans = &isTrans; ntmp = 0; for (int d = 1; d <= nb_doms; d++) { ntmp += _btree->sizeOfDomain(d); } ColumnMatrix xxi(ntmp, nrhs); ColumnMatrix yyi(ntmp, nrhs); ColumnMatrix wwi(ntmp, nrhs); ntmp = 0; for (int d = 1; d <= nb_doms; d++) { const int d0 = _btree->selfIndex(d); *_xi[d0] = xxi.addrCoefs() + ntmp * nrhs; *_yi[d0] = yyi.addrCoefs() + ntmp * nrhs; *_wi[d0] = wwi.addrCoefs() + ntmp * nrhs; ntmp += _btree->sizeOfDomain(d); } ntmp = 0; for (int d = 1; d <= nb_doms; d++) { ntmp += _btree->sizeOfFathersStrips(d); } ColumnMatrix zzi(ntmp, nrhs); ntmp = 0; for (int d = 1; d <= nb_doms; d++) { const int d0 = _btree->selfIndex(d); *_zi[d0] = zzi.addrCoefs() + ntmp * nrhs; ntmp += _btree->sizeOfFathersStrips(d); } diss_printf(_verbose, _fp, "exec_fwbw_seq for nrhs = %d \n", nrhs); _dissectionRuntime->exec_fwbw_seq(); xxi.free(); yyi.free(); wwi.free(); zzi.free(); } template void DissectionQueue::exec_fwbw_seq(double *x, const int nrhs, bool isTrans); template void DissectionQueue::exec_fwbw_seq(quadruple *x, const int nrhs, bool isTrans); template void DissectionQueue, double>:: exec_fwbw_seq(complex *x, const int nrhs, bool isTrans); template void DissectionQueue, quadruple>:: exec_fwbw_seq(complex *x, const int nrhs, bool isTrans); template void DissectionQueue::exec_fwbw_seq(float *x, const int nrhs, bool isTrans); template void DissectionQueue, float>:: exec_fwbw_seq(complex *x, const int nrhs, bool isTrans); // template void DissectionQueue::exec_fwbw(T *x, const int nrhs, bool isTrans) { const int num_threads = _num_threads; //_num_threads_symb; // struct timespec ts0, ts1; const int nb_doms = _btree->NumberOfSubdomains(); int ntmp; diss_printf(_verbose, _fp, "fwbw for nrhs = %d with %d threads\n", nrhs, num_threads); *_x = x; *_nrhs = (int *)&nrhs; *_isTrans = &isTrans; ntmp = 0; for (int d = 1; d <= nb_doms; d++) { ntmp += _btree->sizeOfDomain(d); } ColumnMatrix xxi(ntmp, nrhs); ColumnMatrix yyi(ntmp, nrhs); ColumnMatrix wwi(ntmp, nrhs); ntmp = 0; for (int d = 1; d <= nb_doms; d++) { const int d0 = _btree->selfIndex(d); *_xi[d0] = xxi.addrCoefs() + ntmp * nrhs; *_yi[d0] = yyi.addrCoefs() + ntmp * nrhs; *_wi[d0] = wwi.addrCoefs() + ntmp * nrhs; ntmp += _btree->sizeOfDomain(d); } ntmp = 0; for (int d = 1; d <= nb_doms; d++) { ntmp += _btree->sizeOfFathersStrips(d); } ColumnMatrix zzi(ntmp, nrhs); ntmp = 0; for (int d = 1; d <= nb_doms; d++) { const int d0 = _btree->selfIndex(d); // *_zi[d0] = &zzi[ntmp * nrhs]; *_zi[d0] = zzi.addrCoefs()+ ntmp * nrhs; ntmp += _btree->sizeOfFathersStrips(d); } _dissectionRuntime->exec_fwbw(); xxi.free(); yyi.free(); wwi.free(); zzi.free(); } template void DissectionQueue::exec_fwbw(double *x, const int nrhs, bool isTrans); template void DissectionQueue::exec_fwbw(quadruple *x, const int nrhs, bool isTrans); template void DissectionQueue, double>::exec_fwbw(complex *x, const int nrhs, bool isTrans); template void DissectionQueue::exec_fwbw(float *x, const int nrhs, bool isTrans); template void DissectionQueue, float>::exec_fwbw(complex *x, const int nrhs, bool isTrans); template void DissectionQueue, quadruple>:: exec_fwbw(complex *x, const int nrhs, bool isTrans); // // #define DEBUG_ERASE template void DissectionQueue::erase_queue(void) { const int num_threads = _num_threads; diss_printf(_verbose, _fp, "%s %d : void QueueRuntime::erase_queue(void)", __FILE__, __LINE__); if (_queue_symb_allocated) { #ifdef DEBUG_ERASE cerr << "symbolic "; #endif // vector &queue = *(_queue_symb->queue); for (int j = _queue_symb->begin; j < _queue_symb->end; j++) { // C_task *task = queue[j]; C_task *task = (*(_queue_symb->queue))[j]; #ifdef DEBUG_ERASE cerr << task->task_name << " "; #endif // delete [] task->task_name; // char *task_name_cstr = new char [] erase_task(task); } // loop : j #ifdef DEBUG_ERASE cerr << _queue_symb->task_name << " " << endl; #endif // delete [] _queue_symb->task_name; delete _queue_symb->queue; delete _queue_symb; } #ifdef DEBUG_ERASE cerr << "zero reset" << endl; #endif if (_queue_numrc_allocated) { // zero reset could be done redundantly for (int p = 0; p < num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { (*it)->referred = 0; vector &queue = *((*it)->queue); for (int j = (*it)->begin; j < (*it)->end; j++) { queue[j]->referred = 0; } // loop : j } // loop : it } // loop : p for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { (*it)->referred = 0; for (int j = (*it)->begin; j < (*it)->end; j++) { (*(*it)->queue)[j]->referred = 0; } } #ifdef DEBUG_ERASE cerr << "increment referred counter" << endl; #endif // increment referred counter for (int p = 0; p < num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { (*it)->referred++; vector &queue = *((*it)->queue); for (int j = (*it)->begin; j < (*it)->end; j++) { queue[j]->referred++; } // loop : j } // loop : it } // loop : p for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { (*it)->referred++; for (int j = (*it)->begin; j < (*it)->end; j++) { (*(*it)->queue)[j]->referred++; } } // delete task #ifdef DEBUG_ERASE cerr << "delete task" << endl; #endif for (int p = 0; p < num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { vector &queue = *((*it)->queue); for (int j = (*it)->begin; j < (*it)->end; j++) { C_task *task = queue[j]; task->referred--; if (task->referred == 0) { erase_task(task); // } } // loop : j } // loop : it } // loop : p for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { for (int j = (*it)->begin; j < (*it)->end; j++) { C_task *task = (*(*it)->queue)[j]; task->referred--; if (task->referred == 0) { erase_task(task); // } } } // delete task_seq #ifdef DEBUG_ERASE cerr << "static : C_task_seq" << endl; #endif // _queue_static[] share tasks in _queue_dynamic for (int p = 0; p < num_threads; p++) { for (list::iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { (*it)->referred--; if ((*it)->referred == 0) { #ifdef DEBUG_ERASE fprintf(stderr, "%x %s\n", (*it), (*it)->task_name); #endif delete (*it)->queue; (*it)->queue = NULL; delete (*it); (*it) = NULL; } } // loop : it } // loop : p #ifdef DEBUG_ERASE cerr << "dynamic : C_task_seq" << endl; #endif for (vector::iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { (*it)->referred--; if ((*it)->referred == 0) { #ifdef DEBUG_ERASE fprintf(stderr, "%x %s\n", (*it), (*it)->task_name); #endif delete (*it)->queue; (*it)->queue = NULL; delete (*it); (*it) = NULL; } } // loop : it delete _queue_dynamic; // Here all queues are deallocated. for (int p = 0; p < num_threads; p++) { _queue_static[p].clear(); } delete [] _queue_static; #ifdef DEBUG_ERASE cerr << "queues are deallocated" << endl; #endif } _queue_symb_allocated = false; _queue_numrc_allocated = false; } template void DissectionQueue::erase_queue(void); template void DissectionQueue::erase_queue(void); template void DissectionQueue, double>::erase_queue(void); template void DissectionQueue, quadruple>::erase_queue(void); template void DissectionQueue::erase_queue(void); template void DissectionQueue, float>::erase_queue(void); // template void DissectionQueue::erase_queue_fwbw(void) { diss_printf(_verbose, _fp, "%s %d : void QueueRuntime::erase_queue_fwbw(void)", __FILE__, __LINE__); if (_queue_fwbw_allocated) { delete [] _diag_contribs; delete _x; for (int d = 1; d <= _nb_doms; d++) { const int d0 = _btree->selfIndex(d); delete _xi[d0]; delete _yi[d0]; delete _wi[d0]; delete _zi[d0]; } delete [] _xi; delete [] _yi; delete [] _zi; delete [] _wi; for (vector::iterator it = _queue_fwbw->queue->begin(); it != _queue_fwbw->queue->end(); ++it) { erase_task(*it); } _queue_fwbw->queue->clear(); delete _queue_fwbw->queue; delete _queue_fwbw; delete _nrhs; // added 01 Oct.2013 Atsushi delete _isTrans; } _queue_fwbw_allocated = false; } template void DissectionQueue::erase_queue_fwbw(void); template void DissectionQueue::erase_queue_fwbw(void); template void DissectionQueue, double>::erase_queue_fwbw(void); template void DissectionQueue, quadruple>::erase_queue_fwbw(void); template void DissectionQueue::erase_queue_fwbw(void); template void DissectionQueue, float>::erase_queue_fwbw(void); FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionQueue.hpp000664 000000 000000 00000013212 14037356732 026131 0ustar00rootroot000000 000000 /*! \file DissectionQueue.hpp \brief management of threads for factorization and Fw/Bw substitution \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DISSECTION_QUEUE_ #define _DISSECTION_QUEUE_ #include "Compiler/OptionLibrary.hpp" #include #include "Splitters/BisectionTree.hpp" #include "Driver/DissectionMatrix.hpp" #include "Driver/QueueRuntime.hpp" #include "Compiler/elapsed_time.hpp" #include #include template class DissectionQueue { public: DissectionQueue(Dissection::Tree *btree, vector*>& dissectionMatrix, const int num_threads, const bool isSym, const bool verbose, FILE *fp); ~DissectionQueue(); void generate_queue(vector*>& dM, int nnz, T *coefs); void generate_queue_fwbw(vector*>& dissectionMatrix, int dim, int nnz, T *coefs); void exec_symb_fact(); void exec_num_fact(const int called, const double eps_piv, const bool kernel_detection, const int aug_dim, const U eps_machine, const bool higher_precision); void exec_fwbw(T *x, const int nrhs, bool isTrans); void exec_fwbw_seq(T *x, const int nrhs, bool isTrans); void erase_queue(void); void erase_queue_fwbw(void); DissectionQueue(const DissectionQueue &s) { _queue_symb = s._queue_symb; _queue_static = s._queue_static; _queue_dynamic = s._queue_dynamic; _pivots = s._pivots; } list >& ChildContribs(int nb) { return _child_contribs[nb]; } int dimension() const { return _dim; } int nnz() const { return _nnz; } private: Dissection::Tree* _btree; C_task_seq* _queue_symb; // single set of tasks among sparse subdomains C_task_seq* _queue_fwbw; list *_queue_static; vector *_queue_dynamic; list _queue_dummy; vector* _children; vector* _tasks_SparseSymb; vector* _tasks_SparseNum; vector* _tasks_SparseLocalSchur; vector* _tasks_DFillSym; vector* _tasks_DFullLDLt; vector* _tasks_DTRSMScale; vector* _tasks_DSymmGEMM; vector* _tasks_deallocLocalSchur; vector** _tasks_Dsub; vector* _tasks_deallocLower; list > *_child_contribs; int _num_threads; int _num_threads_symb; int _dim; int _nnz; int _nb_doms; int _nb_level; double _eps_piv; // used in selecting pivot bool _kernel_detection; bool _higher_precision; int _aug_dim; U _eps_machine; // magnitude of numerical perturbation double *_pivots; int _max_size_work_y; list* _diag_contribs; T ***_xi; T ***_yi; T ***_zi; T ***_wi; T **_x; // a pointer to keep rhs and solution vectors int **_nrhs; // these values contain only address information during bool **_isTrans; // queue generation of fw/bw subtitutions bool _verbose; FILE *_fp; FILE **_fps; bool _isSym; QueueRuntime* _dissectionRuntime; bool _queue_symb_allocated; bool _queue_numrc_allocated; bool _queue_fwbw_allocated; static const T _one; // (1.0); static const T _zero; // (0.0); static const T _none; // (-1.0); }; // End class DissictionQueue #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionSolver.cpp000664 000000 000000 00000453745 14037356732 026335 0ustar00rootroot000000 000000 /*! \file DissectionSolver.cpp \brief task mangemanet of dissection algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include "Driver/DissectionSolver.hpp" #include "Driver/C_BlasRoutines.hpp" #include "Driver/C_KernDetect.hpp" #include "Driver/CopyMatrix.hpp" #include "Driver/TridiagBlockMatrix.hpp" #include "Splitters/MetisSplitter.hpp" #include "Algebra/SparseRenumbering.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" #define NORMALIZE_KERNEL_BASIS template const T DissectionSolver::_one = T(1.0); template const T DissectionSolver::_none = T(-1.0); template const T DissectionSolver::_zero = T(0.0); template void DissectionSolver:: Destroy(void) { for (int m = 0; m < _graph_colors;m++) { if (!_tridiagQueue[m]->tridiagSolver()) { delete _dissectionQueue[m]; for (typename vector* >::const_iterator it = _dissectionMatrix[m].begin(); it != _dissectionMatrix[m].end(); ++it) { delete (*it); } } else { delete _tridiagMatrix[m]; } } delete [] _dissectionQueue; delete [] _dissectionMatrix; delete [] _tridiagMatrix; delete _ptDA; delete[] _precDiag; NumericFree(); delete [] _Schur; delete [] _kernel; delete [] _singIdx; // added 01 Oct.2013 Atsushi for (int m = 0; m < _graph_colors;m++) { if (_with_btree && !_tridiagQueue[m]->tridiagSolver()) { delete _btree[m]; // added 01 Oct.2013 Atsushi } delete _tridiagQueue[m]; } delete [] _tridiagQueue; delete [] _btree; _index_isolated.clear(); } template void DissectionSolver::Destroy(void); template void DissectionSolver::Destroy(void); template void DissectionSolver::Destroy(void); template void DissectionSolver::Destroy(void); template void DissectionSolver, double>::Destroy(void); template void DissectionSolver, quadruple>::Destroy(void); template void DissectionSolver, quadruple, complex, double>::Destroy(void); template void DissectionSolver, double, complex, quadruple>::Destroy(void); template void DissectionSolver::Destroy(void); template void DissectionSolver, float>::Destroy(void); // template void DissectionSolver:: NumericFree(void) { diss_printf(_verbose, _fp, "%s %d : void DissectionSolver::NumericFree()", __FILE__, __LINE__ ); if (_status_factorized) { diss_printf(_verbose, _fp, "start "); for (int m = 0; m < _graph_colors; m++) { _Schur[m].free(); _kernel[m].free(); _singIdx[m].clear(); // added 01 Oct.2013 Atsushi } diss_printf(_verbose, _fp, "end"); } _status_factorized = false; diss_printf(_verbose, _fp, ".\n"); } template void DissectionSolver::NumericFree(void); template void DissectionSolver::NumericFree(void); template void DissectionSolver::NumericFree(void); template void DissectionSolver::NumericFree(void); template void DissectionSolver, double>::NumericFree(void); template void DissectionSolver, quadruple>::NumericFree(void); // template void DissectionSolver, quadruple, complex, double>::NumericFree(void); template void DissectionSolver, double, complex, quadruple>::NumericFree(void); template void DissectionSolver::NumericFree(void); template void DissectionSolver, float>::NumericFree(void); // template void DissectionSolver:: SaveCSRMatrix(const int called, const T *coefs_) { diss_printf(true, stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); } template void DissectionSolver::SaveCSRMatrix(const int called, const double *coefs); template void DissectionSolver, double>:: SaveCSRMatrix(const int called, const complex *coefs); template void DissectionSolver:: SaveCSRMatrix(const int called, const quadruple *coefs); template void DissectionSolver, quadruple>:: SaveCSRMatrix(const int called, const complex *coefs); template void DissectionSolver::SaveCSRMatrix(const int called, const float *coefs); template void DissectionSolver, float>:: SaveCSRMatrix(const int called, const complex *coefs); // template void DissectionSolver:: SaveMMMatrix(const int called, const T *coefs_) { diss_printf(true, stderr, "%s %d : specialized template is not yet defined.\n", __FILE__, __LINE__); } template<> void DissectionSolver::SaveMMMatrix(const int called, const double *coefs_) { SaveMMMatrix_(_dim, _ptDA->nnz(), _ptDA->isSymmetric(), _ptDA->isUpper(), _ptDA->getRows(), _ptDA->getIndCols(), called, coefs_); } template<> void DissectionSolver, double>:: SaveMMMatrix(const int called, const complex *coefs_) { SaveMMMatrix_(_dim, _ptDA->nnz(), _ptDA->isWhole() ? false : _ptDA->isSymmetric(), _ptDA->isUpper(), _ptDA->getRows(), _ptDA->getIndCols(), called, coefs_); } template void DissectionSolver:: SaveMMMatrix(const int called, const quadruple *coefs_); template void DissectionSolver, quadruple>:: SaveMMMatrix(const int called, const complex *coefs_); // void SaveMMMatrix_(const int dim, const int nnz, const bool isSymmetric, const bool isUpper, const int *ptrows, const int *indcols, const int called, const double *coefs_) { // MatrixMarket format : one-based index, lower part stored for symmetric char fname[256]; int pid = get_process_id(); FILE *fp; sprintf(fname, "matrix.%04d.%06d.data", called, pid); if ((fp = fopen(fname, "w")) != NULL) { fprintf(fp, "%%%%MatrixMarket matrix coordinate real %s\n%d %d %d\n", isSymmetric? "symmetric" : "general", dim, dim, nnz); if (isUpper) { for (int i = 0; i < dim; i++) { for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { const int jj = indcols[k] + 1; fprintf(fp, "%d %d %.16e\n", jj, (i + 1), coefs_[k]); } } } else { for (int i = 0; i < dim; i++) { for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { const int jj = indcols[k] + 1; fprintf(fp, "%d %d %.16e\n", (i + 1), jj, coefs_[k]); } } } fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } void SaveMMMatrix_(const int dim, const int nnz, const bool isSymmetric, const bool isUpper, const int *ptrows, const int *indcols, const int called, const complex *coefs_) { // MatrixMarket format : one-based index, lower part stored for symmetric char fname[256]; int pid = get_process_id(); FILE *fp; sprintf(fname, "matrix.%04d.%06d.data", called, pid); if ((fp = fopen(fname, "w")) != NULL) { fprintf(fp, "%%%%MatrixMarket matrix coordinate complex %s\n%d %d %d\n", isSymmetric? "symmetric" : "general", dim, dim, nnz); if (isUpper) { for (int i = 0; i < dim; i++) { for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { const int jj = indcols[k] + 1; fprintf(fp, "%d %d %.16e %.16e\n", jj, (i + 1), coefs_[k].real(), coefs_[k].imag()); } } } else { for (int i = 0; i < dim; i++) { for (int k = ptrows[i]; k < ptrows[i + 1]; k++) { const int jj = indcols[k] + 1; fprintf(fp, "%d %d %.16e %.16e\n", (i + 1), jj, coefs_[k].real(), coefs_[k].imag()); } } } fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } // copy from higher precision than T and U : quadruple <- T = double template void DissectionSolver:: CopyQueueFwBw(DissectionSolver &qdslv) { bool isSym = qdslv.ptDA()->isSymmetric(); bool isUpper = qdslv.ptDA()->isUpper(); bool isWhole = qdslv.ptDA()->isWhole(); _dim = qdslv.dimension(); _graph_colors = qdslv.graph_colors(); _scaling = qdslv.scaling(); _verbose = qdslv.verbose(); _num_threads = qdslv.num_threads(); _nsing = qdslv.nsing(); _fp = qdslv.get_filedescriptor(); bool verbose = qdslv.verbose(); // T *coefs; _dissectionQueue = new DissectionQueue*[_graph_colors]; _tridiagQueue = new TridiagQueue*[_graph_colors]; _dissectionMatrix = new vector* >[_graph_colors]; _tridiagMatrix = new TridiagBlockMatrix*[_graph_colors]; _Schur = new SchurMatrix[_graph_colors]; _kernel = new KernelMatrix[_graph_colors]; _singIdx = new vector[_graph_colors]; for (int m = 0; m < _graph_colors; m++) { _Schur[m].getAcol() = new SparseMatrix(); // dummy allocation _Schur[m].getArow() = new SparseMatrix(); // dummy allocation } DissectionQueue** dQ = qdslv.getDissectionQueue(); TridiagQueue** tQ = qdslv.getTridiagQueue(); _precDiag = new U[_dim]; for (int i = 0; i < _dim; i++) { _precDiag[i] = conv_prec(qdslv.addrPrecDiag()[i]); } _ptDA = new SparseMatrix(isSym, isUpper, isWhole); CopySparseMatrix(_ptDA, qdslv.ptDA()); // coefs = _ptDA->getCoef(); for (int m = 0; m < _graph_colors; m++) { if (tQ[m]->tridiagSolver()) { const int dim = tQ[m]->dimension(); _tridiagMatrix[m] = new TridiagBlockMatrix(dim, SIZE_B1, isSym, 0, // no other Tridiag verbose, _fp); CopyTridiagBlockMatrix(*_tridiagMatrix[m], *qdslv.getTridiagBlockMatrix()[m], _ptDA->getCoef()); _tridiagQueue[m] = new TridiagQueue(true, verbose, _fp); _tridiagQueue[m]->generate_queue(_tridiagMatrix[m], tQ[m]->dimension(), tQ[m]->nnz(), tQ[m]->isMapped(), tQ[m]->remap_eqn(), tQ[m]->ptRows(), tQ[m]->indCols(), tQ[m]->indVals(), _ptDA->getCoef()); } else { // if (tQ[m]->tridiagSolver()) Dissection::Tree* btree = qdslv.btree()[m]; _tridiagQueue[m] = new TridiagQueue(false, verbose, _fp); const int nb_doms = btree->NumberOfSubdomains(); _dissectionMatrix[m].resize(nb_doms); _dissectionQueue[m] = new DissectionQueue(btree, _dissectionMatrix[m], _num_threads, isSym, verbose, _fp); typename vector* >::const_iterator it = qdslv.getDissectionMatrix()[m].begin(); typename vector* >::const_iterator jt = _dissectionMatrix[m].begin(); for ( ; it != qdslv.getDissectionMatrix()[m].end(); ++it, ++jt) { if ((*it)->islast()) { int color = (*it)->ColorTridiagBlockMatrix(); for (int i = 0; i < color; i++) { CopyTridiagBlockMatrix(*(*jt)->addrtridiagBlock()[i], *(*it)->addrtridiagBlock()[i], _ptDA->getCoef()); } // loop : i } // if ((*it)->islast()) else { CopySquareBlockMatrix((*jt)->diagBlock(), (*it)->diagBlock()); } SquareBlockMatrix* diag = (*jt)->addrdiagBlock(); RectBlockMatrix* lower = (*jt)->addrlowerBlock(); if (!isSym) { CopyRectBlockMatrix(*lower, (*it)->lowerBlock()); } RectBlockMatrix* upper = (*jt)->addrupperBlock(); CopyRectBlockMatrix(*upper, (*it)->upperBlock()); CopyDissectionMatrix((*jt), (*it), diag, lower, upper); } // loop : it, jt _dissectionQueue[m]->generate_queue_fwbw(_dissectionMatrix[m], dQ[m]->dimension(), dQ[m]->nnz(), _ptDA->getCoef()); } // if (tQ[m]->tridiagSolver()) CopyKernelMatrix(_kernel[m], qdslv.getKernelMatrix()[m]); _singIdx[m] = qdslv.getSingVal()[m]; } // loop : m // qdslv.GetMatrixScaling(_precDiag); _index_isolated = qdslv.getIndexIsolated(); _status_factorized = true; _btree = new Dissection::Tree*[_graph_colors]; // dummy allocation _with_btree = false; } template void DissectionSolver:: CopyQueueFwBw(DissectionSolver &qdslv); template void DissectionSolver, double, complex, quadruple>:: CopyQueueFwBw(DissectionSolver, quadruple, complex, double> &qdslv); template void DissectionSolver:: CopyQueueFwBw(DissectionSolver &qdslv); template void DissectionSolver, quadruple, complex, double>:: CopyQueueFwBw(DissectionSolver, double, complex, quadruple> &qdslv); template void DissectionSolver:: CopyQueueFwBw(DissectionSolver &qdslv); template void DissectionSolver, float, complex, double>:: CopyQueueFwBw(DissectionSolver, double, complex, float> &qdslv); template void DissectionSolver:: SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer_, const int nbLevels_, const int minNodes) { SymbolicFact_(dim_, ptRows[dim_], false, ptRows, indCols, (long long int *)NULL, // const long long int *ptRows, (long long int *)NULL, // const long long int *indCols, isSym, isUpper, isWhole, decomposer_, nbLevels_, minNodes); } template void DissectionSolver::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, double>:: SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, quadruple>:: SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, double, complex, quadruple>:: SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, quadruple, complex, double>:: SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, float>::SymbolicFact(const int dim_, const int *ptRows, const int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); // template void DissectionSolver:: SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer_, const int nbLevels_, const int minNodes) { SymbolicFact_(dim_, (int)ptRows[dim_], true, (int *)NULL, // const int *ptRows, (int *)NULL, // const int *indCols, ptRows, indCols, isSym, isUpper, isWhole, decomposer_, nbLevels_, minNodes); } template void DissectionSolver::SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, double>:: SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, quadruple>:: SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver::SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); template void DissectionSolver, float>::SymbolicFact(const int dim_, const long long int *ptRows, const long long int *indCols, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer, const int nbLevels_, const int minNodes); // template void DissectionSolver:: SymbolicFact_(const int dim_, const int nz_, const bool flagint64, const int *ptRows, const int *indCols, const long long int *ptRows64, const long long int *indCols64, const bool isSym, const bool isUpper, const bool isWhole, const int decomposer_, const int nbLevels_, const int minNodes) { // decomposer = 0: SCOTCH, 1 : METIS, 2 : TRIDIAG(Cuthill-McKee) int dim = dim_; clock_t t0_cpu, t1_cpu, t2_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed; _dim = dim_; const int nz = nz_; int nbLevels, decomposer; bool flag, berr; int *map_eqn, *remap_eqn; int num_threads; t0_cpu = clock(); get_realtime(&t0_elapsed); if (flagint64) { _ptDA = new SparseMatrix(dim, nz, ptRows64, indCols64, isSym, isUpper, isWhole); } else { _ptDA = new SparseMatrix(dim, nz, ptRows, indCols, isSym, isUpper, isWhole); } diss_printf(_verbose, _fp, "%s %d : isSym = %s isUpper = %s isWhole = %s decomposer = %d\n", __FILE__, __LINE__, isSym ? "ture" : "false", isUpper ? "ture" : "false", isWhole ? "ture" : "false", decomposer_); switch(nbLevels_) { case 1 : diss_printf(_verbose, _fp, "%s %d : Dissection :: not activated, switch to tridiag\n", __FILE__, __LINE__); decomposer = TRIDIAG_DECOMPOSER; break; case (-1): nbLevels = (int)log2((double)dim_ / (double)minNodes); if (nbLevels < 2) { decomposer = TRIDIAG_DECOMPOSER; } else { decomposer = decomposer_; } break; default: nbLevels = nbLevels_; decomposer = decomposer_; break; } map_eqn = new int[dim]; // delete [] in DissectionSolver::Destroy(void) remap_eqn = new int[dim]; // delete [] in DissectionSolver::Destroy(void) for (int i = 0; i < dim; i++) { map_eqn[i] = i; remap_eqn[i] = i; } CSR_indirect *unsym_csr = new CSR_indirect; bool unsym_csr_alloc = false; // int *ptUnsymRows, *indUnsymCols, *indVals, *indSymVals; if (isSym) { if (isWhole) { unsym_csr->n = dim; unsym_csr->nnz = nz; unsym_csr->ptRows = _ptDA->getRows(); // (int *)ptRows; unsym_csr->indCols = _ptDA->getIndCols(); //(int *)indCols; unsym_csr->indVals = new int[nz]; unsym_csr->indVals_unsym = new int[nz]; // for safe use of delete [] } else { const int nnz0 = nz * 2 - dim; unsym_csr->n = dim; unsym_csr->nnz = nnz0; unsym_csr->ptRows = new int[dim + 1]; unsym_csr->indCols = new int[nnz0]; unsym_csr_alloc = true; unsym_csr->indVals = new int[nnz0]; unsym_csr->indVals_unsym = (int *)NULL; // for safe use of delete [] // only used as extend symmetric symbolic structure to unsymmetric int nnz1; nnz1 = CSR_sym2unsym(unsym_csr, _ptDA->getRows(), _ptDA->getIndCols(), map_eqn, remap_eqn, // dim, isUpper); if (nnz1 != nnz0) { fprintf(stderr, "%s %d : symmetric matrix has no diagonal entry %d != %d\n", __FILE__, __LINE__, nnz0, nnz1); exit(-1); } } } else { unsym_csr->n = dim; unsym_csr->nnz = nz; unsym_csr->indVals = new int[nz]; unsym_csr->indVals_unsym = new int[nz]; unsym_csr->ptRows = _ptDA->getRows(); //(int *)ptRows; unsym_csr->indCols = _ptDA->getIndCols(); //(int *)indCols; } int* graph_mask = new int[dim]; { _graph_colors = getColorMaskCSR(graph_mask, unsym_csr, _verbose, _fp); int count = 0; for (int i = 0; i < dim; i++) { if (graph_mask[i] == 0) { count++; } } _index_isolated.resize(count); count = 0; for (int i = 0; i < dim; i++) { if (graph_mask[i] == 0) { _index_isolated[count] = i; count++; } } } _btree = new Dissection::Tree*[_graph_colors]; _with_btree = true; _dissectionMatrix = new vector* >[_graph_colors]; _dissectionQueue = new DissectionQueue*[_graph_colors]; _tridiagMatrix = new TridiagBlockMatrix*[_graph_colors]; _tridiagQueue = new TridiagQueue*[_graph_colors]; // loop over each connected graph for (int m = 0; m < _graph_colors; m++) { bool flag_dissection = false; int i0, i1, i2; const int color = m + 1; int count = 0; int dim1, dim2; dim1 = 0; dim2 = 0; for (int i = 0; i < dim; i++) { if (graph_mask[i] == color) { dim1++; } else if (graph_mask[i] == (-color)) { dim2++; } } count = dim1 + dim2; i0 = 0; i1 = dim1; i2 = count; for (int i = 0; i < dim; i++) { if (graph_mask[i] == color) { remap_eqn[i0++] = i; // remap : new(mapped) to old(original) index } else if (graph_mask[i] == (-color)) { remap_eqn[i1++] = i; // remap : new(mapped) to old(original) index } else { remap_eqn[i2++] = i; } } for (int i = 0; i < dim; i++) { map_eqn[remap_eqn[i]] = i; // map : old(original) to new (mapped) index } if (isSym) { if (isWhole) { if (m == 0) { //reconnect to newly allocated array unsym_csr->ptRows = new int[dim + 1]; unsym_csr->indCols = new int[nz]; unsym_csr_alloc = true; } CSR_unsym2unsym(unsym_csr, _ptDA->getRows(), _ptDA->getIndCols(), map_eqn, remap_eqn, // dim, _verbose, _fp); } else { CSR_sym2unsym(unsym_csr, _ptDA->getRows(), _ptDA->getIndCols(), map_eqn, remap_eqn, // dim, isUpper, _verbose, _fp); } } else { if (m == 0) { //reconnect to newly allocated array unsym_csr->ptRows = new int[dim + 1]; unsym_csr->indCols = new int[nz]; unsym_csr_alloc = true; } CSR_unsym2unsym(unsym_csr, _ptDA->getRows(), _ptDA->getIndCols(), map_eqn, remap_eqn, // dim, _verbose, _fp); } flag = false; if (_graph_colors > 1) { const int level_tmp = (int)log2((double)count / (double)minNodes); nbLevels = nbLevels < level_tmp ? nbLevels : level_tmp; nbLevels = nbLevels < 2 ? 2 : nbLevels; // } diss_printf(_verbose, _fp, "%s %d : count = %d decomposer = %d\n", __FILE__, __LINE__, count, decomposer); if ((count > SIZE_TRIDIAG) && ((decomposer == SCOTCH_DECOMPOSER) || (decomposer == METIS_DECOMPOSER))) { flag_dissection = true; diss_printf(_verbose, _fp, "%s %d : %s applied to color %d neq = %d with %d levels\n", __FILE__, __LINE__, (decomposer == METIS_DECOMPOSER) ? "METIS" : "SCOTCH", color, count, nbLevels); while (flag == false) { _btree[m] = new Dissection::Tree(_fp, berr, (unsigned)count, unsym_csr, isSym, remap_eqn, nbLevels, minNodes, #ifdef NO_METIS NULL, #else ((decomposer == METIS_DECOMPOSER) ? MetisSplitter : NULL), #endif true, _verbose); diss_printf(_verbose, _fp, "%s %d : Dissection::Tree %s decomposition\n", __FILE__, __LINE__, (decomposer == METIS_DECOMPOSER) ? "METIS" : "SCOTCH"); flag = true; for (int d = 1; d <= _btree[m]->NumberOfSubdomains(); d++) { // if (_btree[m]->sizeOfDomain(d) == 0) { const int isd = _btree[m]->sizeOfDomain(d); if (isd <= DIM_AUG_KERN) { if (d > 1) { diss_printf(_verbose, _fp, "%s %d : Dissection:: %d-th too small nrow = %d\n", __FILE__, __LINE__, d, isd); } else { fprintf(stderr, "%s %d : too small first bisector %d not yet accepted\n", __FILE__, __LINE__, isd); exit(-1); } } // if (isd == 0) } // loop : d if (flag) { break; } else { if (berr == true) { delete _btree[m]; // for retry of graph partitioning } } } // while // _nbLevels = _btree[m]->NumberOfLevels(); diss_printf(_verbose, _fp, "%s %d : Tree strategy = 0 : nblevels = %d -> %d\n", __FILE__, __LINE__, nbLevels, _btree[m]->NumberOfLevels()); if ((_btree[m]->NumberOfLevels() == 1) || ((nbLevels - _btree[m]->NumberOfLevels()) > 3) || berr == false) { #ifdef NO_METIS flag_dissection = false; #else flag_dissection = true; diss_printf(_verbose, _fp, "%s %d :: retry partitioning by METIS : %d\n", __FILE__, __LINE__, nbLevels); nbLevels = nbLevels > 4 ? (nbLevels - 3) : 2; flag = false; while (flag == false) { _btree[m] = new Dissection::Tree(_fp, berr, (unsigned)count, unsym_csr, isSym, remap_eqn, nbLevels, minNodes, MetisSplitter, true, _verbose); diss_printf(_verbose, _fp, "%s %d :: Tree METIS decomposition **\n", __FILE__, __LINE__ ); flag = true; for (int d = 1; d <= _btree[m]->NumberOfSubdomains(); d++) { if (_btree[m]->sizeOfDomain(d) == 0) { flag = false; } } if (flag) { break; } else { delete _btree[m]; nbLevels--; } } // while (flag == flase) #endif } // if } // if ((count > SIZE_TRIDIAG) && ((decomposer == SCOTCH_DECOMPOSER)... if (flag_dissection) { bool verify_root = (_btree[m]->sizeOfDomain(1) <= SIZE_B1); int nb_doms = _btree[m]->NumberOfSubdomains(); _dissectionMatrix[m].resize(nb_doms); diss_printf(_verbose, _fp, "%s %d : ", __FILE__, __LINE__); diss_printf(_verbose, _fp, "num_threads = %d dim = %d dim_diss. = %d nbLevels = %d\n", _num_threads, _dim, count, _btree[m]->NumberOfLevels()); for (int d = 1; d <= _btree[m]->NumberOfSubdomains(); d++) { diss_printf(_verbose, _fp, "( %d % d) ", d, _btree[m]->sizeOfDomain(d)); } diss_printf(_verbose, _fp, "\n"); const int level_last = _btree[m]->NumberOfLevels(); const int nb_doms_dense0 = (1U << (level_last - 1)); if (_num_threads > nb_doms_dense0 || verify_root) { diss_printf(_verbose, _fp, "%s %d : ", __FILE__, __LINE__); diss_printf(_verbose, _fp, "thread number reduced : %d -> %d\n", _num_threads, 1); diss_printf(_verbose, _fp, "which is used as number of threads for computation\n"); // num_threads = nb_doms_dense0; num_threads = 1; } else { num_threads = _num_threads; } _dissectionQueue[m] = new DissectionQueue(_btree[m], _dissectionMatrix[m], num_threads, isSym, _verbose, _fp); // generation of queue for numerical factorization is treated as a part of // symbolic factorization : _queue_symb, {_queue_static, _queue_dynamic} _dissectionQueue[m]->generate_queue(_dissectionMatrix[m], _ptDA->nnz(), _ptDA->getCoef()); _dissectionQueue[m]->generate_queue_fwbw(_dissectionMatrix[m], _ptDA->dimension(), _ptDA->nnz(), _ptDA->getCoef()); _tridiagQueue[m] = new TridiagQueue(false, _verbose, _fp); } // if (flag_dissection) else { diss_printf(_verbose, _fp, "%s %d : TridiagSolver : m = %d count = %d\n", __FILE__, __LINE__, m, count); const int nnz = unsym_csr->ptRows[count]; bool isMapped = ((_graph_colors > 1) || (_index_isolated.size() > 0)); _tridiagMatrix[m] = new TridiagBlockMatrix(count, SIZE_B1, isSym, 0, // no other Tridiag _verbose, _fp); _tridiagQueue[m] = new TridiagQueue(true, _verbose, _fp); _tridiagQueue[m]->generate_queue(_tridiagMatrix[m], count, nnz, isMapped, remap_eqn, unsym_csr->ptRows, unsym_csr->indCols, unsym_csr->indVals, _ptDA->getCoef()); } } // loop : m if (unsym_csr_alloc) { delete [] unsym_csr->ptRows; delete [] unsym_csr->indCols; } delete [] unsym_csr->indVals; delete [] unsym_csr->indVals_unsym; // isSym == (int *)NULL delete unsym_csr; delete [] map_eqn; delete [] remap_eqn; delete [] graph_mask; // added 01 Oct.2013 Atsushi t1_cpu = clock(); get_realtime(&t1_elapsed); for (int m = 0; m < _graph_colors; m++) { if (_tridiagQueue[m]->tridiagSolver()) { _tridiagQueue[m]->exec_symb_fact(); } else { _dissectionQueue[m]->exec_symb_fact(); } } _precDiag = new U[_dim]; // preparation for numerical fact. _Schur = new SchurMatrix[_graph_colors]; _kernel = new KernelMatrix[_graph_colors]; _singIdx = new vector[_graph_colors]; _status_factorized = false; t2_cpu = clock(); get_realtime(&t2_elapsed); diss_printf(_verbose, _fp, "graph paritioner. : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); diss_printf(_verbose, _fp, "queue symb. fact. : cpu time = %.4e elapsed time = %.4e\n", (double)(t2_cpu - t1_cpu) / (double)CLOCKS_PER_SEC, convert_time(t2_elapsed, t1_elapsed)); } template void DissectionSolver:: NumericFact(const int called, T *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision) { clock_t t0_cpu, t1_cpu, t2_cpu, t3_cpu; elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed; const U eps_machine = machine_eps_ < 0.0 ? machine_epsilon() : U(machine_eps_); t0_cpu = clock(); _assume_invertible = assume_invertible; _dim_augkern = dim_augkern; get_realtime(&t0_elapsed); diss_printf(_verbose, _fp, "%s %d : eps_machine = %s scaling = %d\n", __FILE__, __LINE__, tostring(eps_machine).c_str(), scaling); _scaling = scaling; // _ptDA->normalize(_scaling, coefs, _precDiag); normalize(_scaling, coefs, _ptDA, _precDiag); t1_cpu = clock(); get_realtime(&t1_elapsed); for (int m = 0; m < _graph_colors; m++) { if (_tridiagQueue[m]->tridiagSolver()) { _tridiagQueue[m]->exec_num_fact(called, eps_pivot, true, // matrix will not be decomposed dim_augkern, eps_machine, higher_precision); } else { _dissectionQueue[m]->exec_num_fact(called, eps_pivot, kernel_detection_all, dim_augkern, eps_machine, higher_precision); } } t2_cpu = clock(); get_realtime(&t2_elapsed); // dense {0 [1 2] [3 4 5 6] .... } + sparse {[2^(_nbLevels-1)-1... ]} int sz_total = 0; VectorArray work(_dim); for (int m = 0; m < _graph_colors; m++) { if (_tridiagQueue[m]->tridiagSolver()) { const int nrow = _tridiagMatrix[m]->nrow(); const int nn0 = nrow - _tridiagMatrix[m]->rank(); if (nn0 > 0) { _tridiagMatrix[m]->SingularNode(_kernel[m].getKernListEq()); // _ptDA_kern_list_eq[m]); _kernel[m].set_dimension(nn0); _kernel[m].getKernProj().init(nn0); //_ptDA_kern_proj[m].init(nn0); _tridiagMatrix[m]->KernelBasis(false, _kernel[m].getKernBasis()); //_ptDA_kern_basis[m]); // isTrans if (_tridiagQueue[m]->isMapped()) { int *remap_eqn = _tridiagQueue[m]->remap_eqn(); ColumnMatrix kernel_tmp; kernel_tmp.init(_dim, nn0); for (int n = 0; n < nn0; n++) { for (int i = 0; i < _dim; i++) { kernel_tmp(i, n) = _zero; } for (int i = 0; i < nrow; i++) { kernel_tmp(remap_eqn[i], n) = _kernel[m].getKernBasis()(i, n); } } _kernel[m].getKernBasis().free(); _kernel[m].getKernBasis().init(_dim, nn0); for (int n = 0; n < nn0; n++) { for (int i = 0; i < _dim; i++) { _kernel[m].getKernBasis()(i, n) = kernel_tmp(i, n); } } kernel_tmp.free(); } // if (_tridiagQueue[m]->isMapped()) T *kern_basis = _kernel[m].getKernBasis().addrCoefs(); //_ptDA_kern_basis[m].addrCoefs(); diss_printf(_verbose, _fp, "%s %d : residual of kernel vertors : %d\n", __FILE__, __LINE__, nn0); for (int i = 0; i < nn0; i++) { _ptDA->prod((kern_basis + (i * _dim)), work.addrCoefs()); U stmp = blas_l2norm(_dim, work.addrCoefs(), 1); diss_printf(_verbose, _fp, "%d %s\n", i, tostring(stmp).c_str()); } if(_scaling) { for (int j = 0; j < nn0; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kern_basis[i + jtmp] *= _precDiag[i]; } } } _kernel[m].getTKernBasis().init(_dim, nn0); // normalize each kernel_basis for (int j = 0; j < nn0; j++) { U stmp(0.0); U one(1.0); stmp = blas_l2norm(_dim, kern_basis + (j * _dim), 1); stmp = one / stmp; blas_scal2(_dim, stmp, (kern_basis + (j * _dim)), 1); } for (int i = 0; i < nn0; i++) { for(int j = i; j < nn0; j++) { // _ptDA_kern_proj[m](i, j) = _kernel[m].getKernProj()(i,j) = blas_dot(_dim, (kern_basis + (i * _dim)), 1, (kern_basis + (j * _dim)), 1); } // symmetrize for (int j = (i + 1); j < nn0; j++) { _kernel[m].getKernProj()(j,i) = _kernel[m].getKernProj()(i,j); // _ptDA_kern_proj[m](j, i) = _ptDA_kern_proj[m](i, j); } } full_ldlh(nn0, _kernel[m].getKernProj().addrCoefs(), nn0); if (!_tridiagMatrix[m]->isSym()) { T *kernt_basis = _kernel[m].getTKernBasis().addrCoefs(); _tridiagMatrix[m]->KernelBasis(true, _kernel[m].getTKernBasis()); if (_tridiagQueue[m]->isMapped()) { int *remap_eqn = _tridiagQueue[m]->remap_eqn(); ColumnMatrix kernel_tmp; kernel_tmp.init(_dim, nn0); for (int n = 0; n < nn0; n++) { for (int i = 0; i < _dim; i++) { kernel_tmp(i, n) = _zero; } for (int i = 0; i < nrow; i++) { kernel_tmp(remap_eqn[i], n) = _kernel[m].getTKernBasis()(i, n); } } _kernel[m].getTKernBasis().free(); _kernel[m].getTKernBasis().init(_dim, nn0); for (int n = 0; n < nn0; n++) { for (int i = 0; i < _dim; i++) { _kernel[m].getTKernBasis()(i, n) = kernel_tmp(i, n); } } kernel_tmp.free(); } // if (_tridiagQueue[m]->isMapped()) diss_printf(_verbose, _fp, "%s %d : residual of transposed kernel vertors : %d\n", __FILE__, __LINE__, nn0); for (int i = 0; i < nn0; i++) { _ptDA->prodt((kernt_basis + (i * _dim)), work.addrCoefs()); U stmp = blas_l2norm(_dim, work.addrCoefs(), 1); diss_printf(_verbose, _fp, "%d %s\n", i, tostring(stmp).c_str()); } if(_scaling) { for (int j = 0; j < nn0; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kernt_basis[i + jtmp] *= _precDiag[i]; } } } } // if (!_tridiagMatrix[m]->isSym()) } // if (nn0 > 0) else { _kernel[m].set_dimension(0); _kernel[m].getKernProj().init(0); // _ptDA_kern_proj[m].init(0); // = new SquareMatrix(); _kernel[m].getKernBasis().init(0, 0); // = new ColumnMatrix(); _kernel[m].getKernListEq().clear(); _kernel[m].getKernListEqLeft().clear(); } _singIdx[m].resize(0); _Schur[m].getAcol() = new SparseMatrix(); // dummy allocation _Schur[m].getArow() = new SparseMatrix(); // dummy allocation // if (_status_factorized) { // for safty of second call _Schur[m].getSlduList().clear(); // _Schur[m].getSlduListLeft().clear(); // _Schur[m].getSldu().free(); } // if (_tridiagQueue[m]->tridiagSolver()) else { int sz = 0, sz1 = 0; bool kernel_detected = true; vector kernel_found; // int id_dom = 1; for (int j = 0; j < _dissectionMatrix[m].size(); j++ ) { vector& singIdx = _dissectionMatrix[m][j]->singIdxPermute(); if (singIdx.size() > 0) { diss_printf(_verbose, _fp, "%s %d : ", __FILE__, __LINE__); diss_printf(_verbose, _fp, "%d : %s : %d :: %d [ ", (j + 1), // selfIndex() == j _dissectionMatrix[m][j]->KernelDetected() ? "true" : "false", _dissectionMatrix[m][j]->nrow(), (int)singIdx.size()); vector::const_iterator it = singIdx.begin(); for ( ;it != singIdx.end(); ++it) { diss_printf(_verbose, _fp, "%d ", *it); } diss_printf(_verbose, _fp, "]\n"); } // if (singIdx.size() > 0) if (singIdx.size() > 0) { // id_dom = j + 1; kernel_found.push_back(j); sz += singIdx.size(); if (!_dissectionMatrix[m][j]->KernelDetected()) { //if (true) { // 29 Oct.2014 debug sz1 += singIdx.size(); diss_printf(_verbose, _fp, "%s %d : ", __FILE__, __LINE__); diss_printf(_verbose, _fp, "domain %d kernel is not detected\n", j); kernel_detected = false; } } } // loop : j sz_total += sz; if (sz > 0) { if (kernel_detected && (!_assume_invertible)) { _singIdx[m].resize(sz); int k = 0; for (int j = 0; j < _dissectionMatrix[m].size(); j++ ) { const int *loc2glob = _btree[m]->getDiagLoc2Glob(j + 1); for (vector::const_iterator it = _dissectionMatrix[m][j]->singIdxPermute().begin(); it != _dissectionMatrix[m][j]->singIdxPermute().end(); ++it, k++) { _singIdx[m][k] = loc2glob[*it]; } // loop : d } #if 0 if (_verbose) { std::sort(_singIdx[m].begin(), _singIdx[m].end()); char fname[256]; FILE *fp; int pid = get_process_id(); sprintf(fname, "singularindex.%06d.data", pid); if ((fp = fopen(fname, "w")) != NULL) { fprintf(fp, "# %d \n", (int)_singIdx[m].size()); for (int i = 0; i < _singIdx[m].size(); i++) { fprintf(fp, "%d\n", _singIdx[m][i]); } fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } // if (_verbose) BuildKernels(_singIdx[m], sz, _Schur[m], _kernel[m]); #else vector* > diags; // dummy vector >augkern_indexes; // dummy BuildKernelsDetection(sz, _singIdx[m], augkern_indexes, diags, eps_pivot, dim_augkern, _Schur[m], _kernel[m], false); #endif // need to copy kern basis projection matrix from global to m-th array } // if (kernel_detected) else { diss_printf(_verbose, _fp, "%s %d : candidates of null pivots = %d, to be fixed\n", __FILE__, __LINE__, sz); _singIdx[m].resize(sz + dim_augkern); //= vector int k = 0; for (int d = 1; d <= _btree[m]->NumberOfSubdomains(); d++) { const int j = _btree[m]->selfIndex(d); if (_dissectionMatrix[m][j]->KernelDetected() && (_dissectionMatrix[m][j]->diagBlock().dim_kern() > 0)) { _dissectionMatrix[m][j]->diagBlock().set_KernelDetected(false); } const int *loc2glob = _btree[m]->getDiagLoc2Glob(d); for (vector::const_iterator it = _dissectionMatrix[m][j]->singIdxPermute().begin(); it != _dissectionMatrix[m][j]->singIdxPermute().end(); ++it, k++) { _singIdx[m][k] = loc2glob[*it]; } } // loop : d // selecting dim_augkern indices for comparison in the kernel detection diss_printf(_verbose, _fp, "%s %d : size = %d : ", __FILE__, __LINE__, (int)_singIdx[m].size()); for (int i = 0; i < _singIdx[m].size(); i++) { diss_printf(_verbose, _fp, "%d ", _singIdx[m][i]); } diss_printf(_verbose, _fp, "\n"); vector* > diags; vector >augkern_indexes; int nn = 1; // level of dense part with 1 indexing k = 0; while (nn <= dim_augkern) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(nn)]->diagBlock(); const int *loc2glob = _btree[m]->getDiagLoc2Glob(nn); vector &permute = diag->getPermute(); vector augkern_index; // augkern_index.resize(dim_augkern); int n = diag->dimension() - 1; //diag.dimension() == permute.size() int k0 = 0; while ((k < dim_augkern) && (n >= 0)) { const int itmp = loc2glob[permute[n]]; bool flag = true; for (int i = 0; i < sz; i++) { if(_singIdx[m][i] == itmp) { flag = false; break; } } if (flag) { _singIdx[m][sz + k0] = itmp; augkern_index.push_back(n); k0++; k++; } n--; } // while diags.push_back(diag); augkern_indexes.push_back(augkern_index); if (k == dim_augkern) { break; } nn++; // } // while (nn <= dim_augkern) diss_printf(_verbose, _fp, "%s %d : adding local node using %d diag matrices ", __FILE__, __LINE__, (int)augkern_indexes.size()); for (vector >::const_iterator it = augkern_indexes.begin(); it != augkern_indexes.end(); ++it) { for (vector::const_iterator jt = (*it).begin(); jt != (*it).end(); ++jt) { diss_printf(_verbose, _fp, "%d ", (*jt)); } } diss_printf(_verbose, _fp, "to the last level.\n"); diss_printf(_verbose, _fp, "%s %d : singIdx = %d : ", __FILE__, __LINE__, (int)_singIdx[m].size()); for (int i = 0; i < _singIdx[m].size(); i++) { diss_printf(_verbose, _fp, "%d ", _singIdx[m][i]); } diss_printf(_verbose, _fp, "\n"); std::sort(_singIdx[m].begin(), _singIdx[m].end()); diss_printf(_verbose, _fp, "%s %d : singIdx sorted = %d : ", __FILE__, __LINE__, (int)_singIdx[m].size()); for (int i = 0; i < _singIdx[m].size(); i++) { diss_printf(_verbose,_fp, "%d ", _singIdx[m][i]); } diss_printf(_verbose, _fp, "\n"); int kern_dim = 0; BuildKernelsDetection(kern_dim, _singIdx[m], augkern_indexes, diags, eps_pivot, dim_augkern, _Schur[m], _kernel[m], true); if (kern_dim == (-1)) { int pid = get_process_id(); SaveCSRMatrix(_called, coefs); fprintf(stderr, "%s %d : DissectionSolver CSR matrix dumped : %d %d\n", __FILE__, __LINE__, _called, pid); exit(-1); } } // if (kernel_detected) } // if (sz > 0) else { _singIdx[m].resize(0); _Schur[m].getAcol() = new SparseMatrix(); // dummy allocation _Schur[m].getArow() = new SparseMatrix(); // dummy allocation // _ptDA_arow[m] = new SparseMatrix(true); _kernel[m].set_dimension(0); _kernel[m].getKernProj().init(0); // _ptDA_kern_proj[m].init(0); // _kernel[m].getKernBasis().init(0, 0); // _kernel[m].getKernListEq().clear(); _kernel[m].getKernListEqLeft().clear(); // if (_status_factorized) { // for safty of second call _Schur[m].getSlduList().clear(); // _ptDA_sldu_list[m].clear(); _Schur[m].getSlduListLeft().clear(); // _ptDA_sldu_list[m].clear(); _Schur[m].getSldu().free(); // _ptDA_sldu[m].free(); } } // if (_tridiagQueue[m]->tridiagSolver()) } // loop : _graph_colors work.free(); { int itmp = 0; for (int m = 0; m < _graph_colors; m++) { itmp += _singIdx[m].size(); } _nsing = itmp; // keep total dimension of kernel of singluar blocks } t3_cpu = clock(); get_realtime(&t3_elapsed); //clock_gettime(CLOCK_REALTIME, &ts3); diss_printf(_verbose, _fp, "queue num. fact. : cpu time = %.4e elapsed time = %.4e\n", (double)(t2_cpu - t1_cpu) / (double)CLOCKS_PER_SEC, convert_time(t2_elapsed, t1_elapsed)); diss_printf(_verbose, _fp, "total num. fact. : cpu time = %.4e elapsed time = %.4e\n", (double)(t3_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t2_elapsed, t0_elapsed)); diss_printf(_verbose, _fp, "scaling matrix : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); if (sz_total > 0) { diss_printf(_verbose, _fp, "kernel vec. gen : cpu time = %.4e elapsed time = %.4e\n", (double)(t3_cpu - t2_cpu) / (double)CLOCKS_PER_SEC, convert_time(t3_elapsed, t2_elapsed)); } // (sz_total > 0) { // check error // this should be a function with specialized template if (_verbose) { int count = 0; for (int m = 0; m < _graph_colors; m++) { diss_printf(_verbose, _fp, "%s %d :negative diagnol entries : color = %d\n", __FILE__, __LINE__, m); if (_tridiagQueue[m]->tridiagSolver()) { int count0; count0 = _tridiagMatrix[m]->NumNegativeDiags(); if (count0 > 0) { diss_printf(_verbose, _fp, "%d / %d\n", count0, _tridiagMatrix[m]->nrow()); } count += count0; } else { int level; level = (_btree[m]->NumberOfLevels() - 1); const unsigned begdom = 1U << level; const unsigned enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int d0 = _btree[m]->selfIndex(d); // const void *diagsparse = _dissectionMatrix[m][d0]->diagSparse(); TridiagBlockMatrix **tridiag = _dissectionMatrix[m][d0]->addrtridiagBlock(); int colors = _dissectionMatrix[m][d0]->ColorTridiagBlockMatrix(); int count0 = 0; for (int i = 0; i < colors; i++) { count0 += tridiag[i]->NumNegativeDiags(); } if (count0 > 0) { diss_printf(_verbose, _fp, "%d : %d / %d\n", d0, count0, _dissectionMatrix[m][d0]->nrow()); } count += count0; } for (int level = (_btree[m]->NumberOfLevels() - 2); level >= 0; level--) { const unsigned begdom = 1U << level; const unsigned enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int d0 = _btree[m]->selfIndex(d); int count0; SquareBlockMatrix& diag = _dissectionMatrix[m][d0]->diagBlock(); count0 = count_diag_negative(diag); if (count0 > 0) { diss_printf(_verbose, _fp, "%d : %d / %d\n", d0, count0, _dissectionMatrix[m][d0]->nrow()); } count += count0; } } } // (_tridiagQueue[m]->tridiagSolver()) { { int count0; SubSquareMatrix& diag = _Schur[m].getSldu(); count0 = count_diag_negative(diag); if (count0 > 0) { diss_printf(_verbose, _fp, "-1 : %d / %d\n", count0, diag.dimension()); } count += count0; } } // loop : m if (count > 0) { diss_printf(_verbose, _fp, "%s %d :negative diagonal entries = %d / %d\n", __FILE__, __LINE__, count, _dim); } } // if (_verbose) // bool kernel_flag = false; for (int m = 0; m < _graph_colors; m++) { if (_kernel[m].dimension() != 0) { kernel_flag = true; break; } } #if 0 if (kernel_flag && (!_ptDA->isSymmetric())) { ComputeTransposedKernels(); } #endif if(_verbose) { int n1 = 0; for (int m = 0; m < _graph_colors; m++) { n1 += _Schur[m].getSlduList().size(); } VectorArray xx(_dim); VectorArray yy(_dim); VectorArray ww(_dim); for (int i = 0; i < _dim; i++) { ww[i] = T(2.0 * ((double)rand() / (double)RAND_MAX) - 1.0); // ww[i] = T((double)(i % 11)); } VectorArray bb(_dim); bb.ZeroClear(); diss_printf(_verbose, _fp, "%s %d : compute error in (Ker A)^\\perp\n", __FILE__, __LINE__); SpMV(ww.addrCoefs(), xx.addrCoefs(), true); // isScaling = true if (kernel_flag) { ProjectionKernelOrthSingle(xx.addrCoefs(), "creating solution", false); } SpMV(xx.addrCoefs(), bb.addrCoefs(), true); // isScaling = true if (kernel_flag) { ProjectionKernelOrthSingle(bb.addrCoefs(), "RHS", true); // ProjectionImageSingle(bb.addrCoefs(), "RHS", false); } blas_copy(_dim, bb.addrCoefs(), 1, yy.addrCoefs(), 1); SolveSingle(yy.addrCoefs(), false, false, true); // isScaling = true if (kernel_flag) { ProjectionKernelOrthSingle(yy.addrCoefs(), "computed solution", false); } SpMV(yy.addrCoefs(), ww.addrCoefs(), true); U norm0, norm1; norm0 = blas_l2norm(_dim, xx.addrCoefs(), 1); blas_axpy(_dim, _none, xx.addrCoefs(), 1, yy.addrCoefs(), 1); norm1 = blas_l2norm(_dim, yy.addrCoefs(), 1); diss_printf(_verbose, _fp, "%s %d : error = %s / %s = %s\n", __FILE__, __LINE__, tostring(norm1).c_str(), tostring(norm0).c_str(), tostring(norm1 / norm0).c_str()); if (todouble(norm1 / norm0) > 1.0e-5) { int pid = get_process_id(); int nnz = _ptDA->nnz(); diss_printf(_verbose, stderr, "%s %d : too large error = %s / %s = %s\n", __FILE__, __LINE__, tostring(norm1).c_str(), tostring(norm0).c_str(), tostring(norm1 / norm0).c_str()); // SaveMMMatrix(_called, coefs); // _status_factorized = false; // return; } norm0 = blas_l2norm(_dim, bb.addrCoefs(), 1); blas_axpy(_dim, _none, bb.addrCoefs(), 1, ww.addrCoefs(), 1); norm1 = blas_l2norm(_dim, ww.addrCoefs(), 1); diss_printf(_verbose, _fp, "%s %d : residual = %s / %s = %s\n", __FILE__, __LINE__, tostring(norm1).c_str(), tostring(norm0).c_str(), tostring(norm1 / norm0).c_str()); } // if (_verbose) _status_factorized = true; } template void DissectionSolver::NumericFact(const int called, double *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver, double>:: NumericFact(const int called, complex *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver::NumericFact(const int called, quadruple *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver, quadruple>:: NumericFact(const int called, complex *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver:: NumericFact(const int called, double *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver, double, complex, quadruple>:: NumericFact(const int called, complex *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver:: NumericFact(const int called, quadruple *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver, quadruple, complex, double>:: NumericFact(const int called, complex *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver::NumericFact(const int called, float *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); template void DissectionSolver, float>:: NumericFact(const int called, complex *coefs, const int scaling, const double eps_pivot, const bool kernel_detection_all, const int dim_augkern, const double machine_eps_, const bool assume_invertible, const bool higher_precision); // template void DissectionSolver:: GetNullPivotIndices(int *pivots) { int i0 = 0; for (int m = 0; m < _graph_colors; m++) { const int n0 = _kernel[m].dimension(); //_ptDA_kern_proj[m].dimension(); // should be optimized : replaced by memcopy()? for (int i = 0; i < n0; i++, i0++) { pivots[i0] = _kernel[m].getKernListEq()[i]; //_ptDA_kern_list_eq[m][i]; } } // loop : _graph_colors : m } template void DissectionSolver::GetNullPivotIndices(int *pivots); template void DissectionSolver::GetNullPivotIndices(int *pivots); template void DissectionSolver, double>::GetNullPivotIndices(int *pivots); template void DissectionSolver, quadruple>::GetNullPivotIndices(int *pivots); template void DissectionSolver::GetNullPivotIndices(int *pivots); template void DissectionSolver::GetNullPivotIndices(int *pivots); template void DissectionSolver::GetNullPivotIndices(int *pivots); template void DissectionSolver, float>::GetNullPivotIndices(int *pivots); template void DissectionSolver::GetNullPivotIndices(int *pivots); // template int DissectionSolver:: GetMaxColors() { int nn = 0; for (int m = 0; m < _graph_colors; m++) { if (!_tridiagQueue[m]->tridiagSolver()) { nn++; } } return nn; } template int DissectionSolver::GetMaxColors(); template int DissectionSolver::GetMaxColors(); template int DissectionSolver, double>::GetMaxColors(); template int DissectionSolver, quadruple>::GetMaxColors(); template int DissectionSolver::GetMaxColors(); template int DissectionSolver::GetMaxColors(); template int DissectionSolver::GetMaxColors(); template int DissectionSolver, float>::GetMaxColors(); // template void DissectionSolver:: GetSmallestPivotIndices(const int n, int *pivots) { int nn = 0; for (int m = 0; m < _graph_colors; m++) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); const int *loc2glob = _btree[m]->getDiagLoc2Glob(1); vector &permute = diag->getPermute(); int ndiag = diag->dimension(); if (n > ndiag) { diss_printf(_verbose, _fp, "%s %d : GetSmallestPivotIndices %d > %d\n", __FILE__, __LINE__, n, ndiag); } for (int k = (ndiag - 1); k >= (ndiag - n); k--, nn++) { pivots[nn] = loc2glob[permute[k]]; } } } // loop : m } template void DissectionSolver::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver, double>::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver, quadruple>::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver::GetSmallestPivotIndices(const int n, int *pivots); template void DissectionSolver, float>::GetSmallestPivotIndices(const int n, int *pivots); // template void DissectionSolver:: GetKernelVectors(T *kern_basis) { int ii = 0; for (int m = 0; m < _graph_colors; m++) { const int n0 = _kernel[m].dimension(); //_ptDA_kern_proj[m].dimension(); // should be optimized : replaced by memcopy()? for (int j = 0; j < n0; j++) { for (int i = 0; i < _dim; i++, ii++) { kern_basis[ii] = _kernel[m].getKernBasis().addrCoefs()[ii]; } } } // loop : _graph_colors : m } template void DissectionSolver::GetKernelVectors(double *kern_basis); template void DissectionSolver::GetKernelVectors(quadruple *kern_basis); template void DissectionSolver, double>::GetKernelVectors(complex *kern_basis); template void DissectionSolver, quadruple>::GetKernelVectors(complex *kern_basis); template void DissectionSolver::GetKernelVectors(double *kern_basis); template void DissectionSolver::GetKernelVectors(quadruple *kern_basis); template void DissectionSolver::GetKernelVectors(float *kern_basis); template void DissectionSolver, float>::GetKernelVectors(complex *kern_basis); // template void DissectionSolver:: GetTransKernelVectors(T *kernt_basis) { int ii = 0; for (int m = 0; m < _graph_colors; m++) { const int n0 = _kernel[m].dimension(); //_ptDA_kern_proj[m].dimension(); // should be optimized : replaced by memcopy()? for (int j = 0; j < n0; j++) { for (int i = 0; i < _dim; i++, ii++) { kernt_basis[ii] = _kernel[m].getTKernBasis().addrCoefs()[ii]; } } } // loop : _graph_colors : m } template void DissectionSolver::GetTransKernelVectors(double *kernt_basis); template void DissectionSolver::GetTransKernelVectors(quadruple *kernt_basis); template void DissectionSolver, double>::GetTransKernelVectors(complex *kernt_basis); template void DissectionSolver, quadruple>::GetTransKernelVectors(complex *kernt_basis); template void DissectionSolver::GetTransKernelVectors(double *kernt_basis); template void DissectionSolver::GetTransKernelVectors(quadruple *kernt_basis); template void DissectionSolver::GetTransKernelVectors(float *kernt_basis); template void DissectionSolver, float>::GetTransKernelVectors(complex *kernt_basis); // template void DissectionSolver:: GetMatrixScaling(Z *weight) { for (int i = 0; i < _dim; i++) { weight[i] = _precDiag[i]; } } template void DissectionSolver::GetMatrixScaling(double *weight); template void DissectionSolver, double>:: GetMatrixScaling(double *weight); template void DissectionSolver::GetMatrixScaling(quadruple *weight); template void DissectionSolver, quadruple>:: GetMatrixScaling(quadruple *weight); template void DissectionSolver::GetMatrixScaling(float *weight); template void DissectionSolver, float>:: GetMatrixScaling(float *weight); // template void DissectionSolver:: ProjectionImageSingle(T *x, string name) { for (int m = 0; m < _graph_colors; m++) { T *kern_basis0 = _kernel[m].getKernBasis().addrCoefs(); T *kern_basis1 = (_ptDA->isSymmetric() ? _kernel[m].getKernBasis().addrCoefs() : _kernel[m].getTKernBasis().addrCoefs()); T *kern_proj = (_ptDA->isSymmetric() ? _kernel[m].getKernProj().addrCoefs() : _kernel[m].getNTKernProj().addrCoefs()); const int n0 = _kernel[m].dimension(); VectorArray xx(n0); for (int j = 0; j < n0; j++) { xx[j] = blas_dot(_dim, (kern_basis1 + j * _dim), 1, x, 1); } diss_printf(_verbose, _fp, "%s %d : %s : check orthogonality of the given vector \n", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); blas_trsv(CblasLower, CblasNoTrans, CblasUnit, n0, kern_proj, n0, xx.addrCoefs(), 1); { int itmp = 0; for (int j = 0; j < n0; j++, itmp += (n0 + 1)) { xx[j] *= kern_proj[itmp]; } } blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, n0, kern_proj, n0, xx.addrCoefs(), 1); diss_printf(_verbose, _fp, "%s %d : %s : solution of kernel adjustment : ", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); for (int i = 0; i < _dim; i++) { for (int j = 0; j < n0; j++) { x[i] -= xx[j] * kern_basis0[i + j * _dim]; } } for (int j = 0; j < n0; j++) { xx[j] = blas_dot(_dim, (kern_basis1 + j * _dim), 1, x, 1); } diss_printf(_verbose, _fp, "%s %d : %s : after projection, component of each kernel \n", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); xx.free(); } // loop : _graph_colors : m } template void DissectionSolver::ProjectionImageSingle(double *x, string name); template void DissectionSolver::ProjectionImageSingle(quadruple *x, string name); template void DissectionSolver, double>:: ProjectionImageSingle(complex *x, string name); template void DissectionSolver, quadruple>:: ProjectionImageSingle(complex *x, string name); template void DissectionSolver::ProjectionImageSingle(float *x, string name); template void DissectionSolver, float>:: ProjectionImageSingle(complex *x, string name); template void DissectionSolver::ProjectionImageSingle(double *x, string name); template void DissectionSolver::ProjectionImageSingle(quadruple *x, string name); // template void DissectionSolver:: ProjectionKernelOrthSingle(T *x, string name, bool isTrans) { for (int m = 0; m < _graph_colors; m++) { const bool flag_trans = isTrans && (!_ptDA->isSymmetric()); T *kern_basis = flag_trans ? _kernel[m].getTKernBasis().addrCoefs() : _kernel[m].getKernBasis().addrCoefs(); T *kern_proj = flag_trans ? _kernel[m].getTKernProj().addrCoefs() : _kernel[m].getKernProj().addrCoefs(); const int n0 = _kernel[m].dimension(); VectorArray xx(n0); for (int j = 0; j < n0; j++) { xx[j] = blas_dot(_dim, (kern_basis + j * _dim), 1, x, 1); } diss_printf(_verbose, _fp, "%s %d : %s : check orthogonality of the given vector\n", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); blas_trsv(CblasLower, CblasNoTrans, CblasUnit, n0, kern_proj, n0, xx.addrCoefs(), 1); { int itmp = 0; for (int j = 0; j < n0; j++, itmp += (n0 + 1)) { xx[j] *= kern_proj[itmp]; } } blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, n0, kern_proj, n0, xx.addrCoefs(), 1); diss_printf(_verbose, _fp, "%s %d : %s : solution of kernel adjustment\n", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); for (int i = 0; i < _dim; i++) { for (int j = 0; j < n0; j++) { x[i] -= xx[j] * kern_basis[i + j * _dim]; } } for (int j = 0; j < n0; j++) { xx[j] = blas_dot(_dim, (kern_basis + j * _dim), 1, x, 1); } diss_printf(_verbose, _fp, "%s %d : %s : after projection, component of each kernel\n", __FILE__, __LINE__, name.c_str()); for (int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.6e ", blas_abs(xx[j])); } diss_printf(_verbose, _fp, "\n"); xx.free(); } // loop : _graph_colors : m } template void DissectionSolver::ProjectionKernelOrthSingle(double *x, string name, bool isTrans); template void DissectionSolver::ProjectionKernelOrthSingle(quadruple *x, string name, bool isTrans); template void DissectionSolver, double>:: ProjectionKernelOrthSingle(complex *x, string name, bool isTrans); template void DissectionSolver, quadruple>:: ProjectionKernelOrthSingle(complex *x, string name, bool isTrans); template void DissectionSolver::ProjectionKernelOrthSingle(float *x, string name, bool isTrans); template void DissectionSolver, float>:: ProjectionKernelOrthSingle(complex *x, string name, bool isTrans); // // template void DissectionSolver:: SpMV(const T *x, T *y, bool scaling_flag) { if (_scaling && scaling_flag) { VectorArray w(_dim); for (int i = 0; i < _dim; i++) { // w[i] = x[i] / fromreal(_precDiag[i]); // conversion if T == complex w[i] = x[i] / _precDiag[i]; // without conversion when complex } _ptDA->prod(w.addrCoefs(), y); for (int i = 0; i < _dim; i++) { // y[i] /= fromreal(_precDiag[i]); y[i] /= _precDiag[i]; } w.free(); } else { _ptDA->prod(x, y); } } template void DissectionSolver::SpMV(const double *x, double *y, bool scaling_flag); template void DissectionSolver::SpMV(const quadruple *x, quadruple *y, bool scaling_flag); template void DissectionSolver, double>:: SpMV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver, quadruple>:: SpMV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver::SpMV(const double *x, double *y, bool scaling_flag); template void DissectionSolver::SpMV(const quadruple *x, quadruple *y, bool scaling_flag); template void DissectionSolver, double, complex, quadruple>:: SpMV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver, quadruple, complex, double>:: SpMV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver::SpMV(const float *x, float *y, bool scaling_flag); template void DissectionSolver, float>:: SpMV(const complex *x, complex *y, bool scaling_flag); // template void DissectionSolver:: SpMtV(const T *x, T *y, bool scaling_flag) { if (_scaling && scaling_flag) { VectorArray w(_dim); for (int i = 0; i < _dim; i++) { // w[i] = x[i] / fromreal(_precDiag[i]); w[i] = x[i] / _precDiag[i]; } _ptDA->prodt(w.addrCoefs(), y); for (int i = 0; i < _dim; i++) { // y[i] /= fromreal(_precDiag[i]); y[i] /= _precDiag[i]; } w.free(); } else { _ptDA->prodt(x, y); } } template void DissectionSolver::SpMtV(const double *x, double *y, bool scaling_flag); template void DissectionSolver::SpMtV(const quadruple *x, quadruple *y, bool scaling_flag); template void DissectionSolver, double>:: SpMtV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver, quadruple>:: SpMtV(const complex *x, complex *y, bool scaling_flag); template void DissectionSolver::SpMtV(const float *x, float *y, bool scaling_flag); template void DissectionSolver, float>:: SpMtV(const complex *x, complex *y, bool scaling_flag); // template void DissectionSolver:: SolveSingle(T *x, bool projection, bool isTrans, bool isScaling, const int nexcls) { int n0, n1; n0 = 0; for (int m = 0; m < _graph_colors; m++) { n0 += _kernel[m].dimension(); } n1 = 0; for (int m = 0; m < _graph_colors; m++) { n1 += _Schur[m].getSlduList().size(); } diss_printf(_verbose, _fp, "%s %d : colors = %d n1 = %d nexcls = %d\n", __FILE__, __LINE__, _graph_colors, n1, nexcls); VectorArray yy(n1); // allocation with size 0 is defined in PlainMatirx.hpp elapsed_t t0_elapsed, t1_elapsed; get_realtime(&t0_elapsed); if (n0 > 0 && projection) { diss_printf(_verbose, _fp, "%s %d : projection of the scaled RHS onto the image\n", __FILE__, __LINE__); ProjectionKernelOrthSingle(x, "scaled RHS", true); // orthogonal to ker A^T } if (isScaling) { for (int i = 0; i < _dim; i++) { x[i] *= _precDiag[i]; } } if (isTrans) { if (n1 > 0) { int i0 = 0; for (int m = 0; m < _graph_colors; m++) { int nn1 = _Schur[m].getSlduList().size(); for (int i = 0; i < nn1; i++, i0++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; yy[i0] = x[ii]; x[ii] = _zero; // the matrix is factorized only regular node, // elements for suspicious pivots are set to be 0 } // loop : i } // loop : m diss_printf(_verbose, _fp, "%s %d : _ptDA_sldu[] are solved with n1=%d\n", __FILE__, __LINE__, n1); int mm0 = 0; for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); if (nnn1 > nexcls) { const int nn1 = nnn1 - nexcls; // y2 = A32^T - (A_11^-1 A_12)^T A_31^T blas_gemv(CblasTrans, _dim, nn1, _none, _Schur[m].getScol().addrCoefs(), _dim, x, 1, _one, yy.addrCoefs() + mm0, 1); // S22 x2 = y2 blas_trsv(CblasUpper, CblasTrans, CblasUnit, nn1, _Schur[m].getSldu().addrCoefs(), nn1, yy.addrCoefs() + mm0, 1); for (int i = 0; i < nn1; i++) { yy[mm0 + i] *= _Schur[m].getSldu()(i, i); // should be optimized } blas_trsv(CblasLower, CblasTrans, CblasUnit, nn1, _Schur[m].getSldu().addrCoefs(), nn1, yy.addrCoefs() + mm0, 1); // A_11^T x_1 = A_31^T - A_21^T x_2 for (int i = 0; i < nn1; i++) { const int ii = _Schur[m].getSlduList()[i]; const SparseMatrix * const arow = _Schur[m].getArow(); for (int k = arow->ptRow(ii); k < arow->ptRow(ii + 1); k++) { const int j = arow->indCol(k); x[j] -= arow->Coef(k) * yy[mm0 + i]; } } // loop : i } else { // change singIdx0 } mm0 += nnn1; // move to the next block } // loop : _graph_colors : m } // if (n1 > 0) SolveScaled(x, 1, true); if (n1 > 0) { int mm0 = 0; for (int m = 0; m < _graph_colors; m++) { const int nn1 = _Schur[m].getSlduList().size(); for (int i = 0; i < nn1; i++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; x[ii] = yy[mm0 + i]; } mm0 += nn1; // move to the next block } // loop : m } // if (n1 > 0) } // if (isTrans) else { if (n1 > 0) { int i0 = 0; for (int m = 0; m < _graph_colors; m++) { int nn1 = _Schur[m].getSlduList().size(); for (int i = 0; i < nn1; i++, i0++) { const int ii = _Schur[m].getSldu().loc2glob_left()[i]; yy[i0] = x[ii]; x[ii] = _zero; // the matrix is factorized only regular node, // elements for suspicious pivots are set to be 0 } // loop : i for (int i = 0; i < _kernel[m].getKernListEqLeft().size(); i++) { const int ii = _kernel[m].getKernListEqLeft()[i]; x[ii] = _zero; } } // loop : m } for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); diss_printf(_verbose, _fp, "%s %d : color = %d %d\n", __FILE__, __LINE__, m, nnn1); if (nnn1 < nexcls) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); vector &singIdx0 = diag->getSingIdx0(); int pseudosing; if (singIdx0.size() > 0) { pseudosing = std::min((int)singIdx0.front(), (int)singIdx0.back()) - 1; } else { pseudosing = diag->dimension() - 1; } for (int i = nnn1; i < nexcls; i++, pseudosing--) { singIdx0.push_back(pseudosing); } } } } for (int m = 0; m < _graph_colors; m++) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); vector &singIdx0 = diag->getSingIdx0(); if (singIdx0.size() > 0) { diss_printf(_verbose, _fp, "%s %d : color = %d modify dim = %d : ", __FILE__, __LINE__, m, diag->dimension()); for (vector::const_iterator it = singIdx0.begin(); it != singIdx0.end(); ++it) { diss_printf(_verbose, _fp, " %d", *it); } diss_printf(_verbose, _fp, "\n"); } //if (_verbose && (singIdx0.size() > 0)) } } SolveScaled(x, 1, false); if (n1 > 0) { diss_printf(_verbose, _fp, "%s %d : _ptDA_sldu[] are solved with n1=%d\n", __FILE__, __LINE__, n1); int mm0 = 0; for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); if (nnn1 > nexcls) { const int nn1 = nnn1 - nexcls; diss_printf(_verbose, _fp, "%s %d : sldu %d\n", __FILE__, __LINE__, nn1); for (int i = 0; i < nn1; i++) { T stmp(0.0); const int ii = _Schur[m].getSlduListLeft()[i]; const SparseMatrix * const arow = _Schur[m].getArow(); for (int k = arow->ptRow(ii); k < arow->ptRow(ii + 1); k++) { const int j = arow->indCol(k); stmp += arow->Coef(k) * x[j]; } yy[mm0 + i] -= stmp; } blas_trsv(CblasLower, CblasNoTrans, CblasUnit, nn1, _Schur[m].getSldu().addrCoefs(), nn1, yy.addrCoefs() + mm0, 1); for (int i = 0; i < nn1; i++) { yy[mm0 + i] *= _Schur[m].getSldu()(i, i); // should be optimized } blas_trsv(CblasUpper, CblasNoTrans, CblasUnit, nn1, _Schur[m].getSldu().addrCoefs(), nn1, yy.addrCoefs() + mm0, 1); for (int i = 0; i < nn1; i++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; x[ii] = yy[mm0 + i]; } } } // loop : m { int mm0 = 0; for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); if (nnn1 > nexcls) { const int nn1 = nnn1 - nexcls; blas_gemv(CblasNoTrans, _dim, nn1, _none, _Schur[m].getScol().addrCoefs(), _dim, yy.addrCoefs() + mm0, 1, _one, x, 1); mm0 += nnn1; // move to the next block } for (int i = 0; i < _kernel[m].getKernListEq().size(); i++) { const int ii = _kernel[m].getKernListEq()[i]; x[ii] = _zero; } } // loop : _graph_colors : m } // scope of mm0 } // if (n1 > 0) else { for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); if (nnn1 < nexcls) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); vector &singIdx0 = diag->getSingIdx0(); if (singIdx0.size() > 0) { diss_printf(_verbose, _fp, "%s %d : restore ", __FILE__, __LINE__); for (vector::const_iterator it = singIdx0.begin(); it != singIdx0.end(); ++it) { diss_printf(_verbose, _fp, " %d", *it); } diss_printf(_verbose, _fp, "\n"); for (int i = nnn1; i < nexcls; i++) { singIdx0.pop_back(); } } } } // if (nn1 < nexcls) } // loop : m } // if (n1 > 0) } // if (isTrans) // adjust the kernel : x = x - N (N{^T} N)^-1 N^{T} x if (isScaling) { for (int i = 0; i < _dim; i++) { x[i] *= _precDiag[i]; } } // if (_index_isolated.size() > 0) { for (vector::const_iterator it = _index_isolated.begin(); it != _index_isolated.end(); ++it) { T atmp(1.0); for (int k = _ptDA->ptRow((*it)); k < _ptDA->ptRow((*it) + 1); k++) { if (_ptDA->indCol(k) == (*it)) { atmp = _one / _ptDA->Coef(k); break; } } x[(*it)] *= atmp; } // loop : it } // if (_index_isolated.size() > 0) if (n0 > 0 && projection) { diss_printf(_verbose, _fp, "%s %d : projection orthogonal to the kernel\n", __FILE__, __LINE__); ProjectionKernelOrthSingle(x, "scaled RHS", false); // orthogonal to ker A } get_realtime(&t1_elapsed); } template void DissectionSolver::SolveSingle(double *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver::SolveSingle(quadruple *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, double>:: SolveSingle(complex *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, quadruple>:: SolveSingle(complex *x, bool projection, bool isTrans, bool isScaling, const int nexcls); // template void DissectionSolver::SolveSingle(double *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver::SolveSingle(quadruple *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, double, complex, quadruple>:: SolveSingle(complex *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, quadruple, complex, double>:: SolveSingle(complex *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver::SolveSingle(float *x, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, float>:: SolveSingle(complex *x, bool projection, bool isTrans, bool isScaling, const int nexcls); // template void DissectionSolver:: SolveScaled(T *x, int nrhs, bool isTrans) { for (int m = 0; m < _graph_colors; m++) { if (_tridiagQueue[m]->tridiagSolver()) { _tridiagQueue[m]->exec_fwbw(x, nrhs, isTrans); } else { //if (false) { if (nrhs == 1) { _dissectionQueue[m]->exec_fwbw_seq(x, nrhs, isTrans); } else { _dissectionQueue[m]->exec_fwbw(x, nrhs, isTrans); } } } // loop : m } template void DissectionSolver::SolveScaled(double *x, int nrhs, bool isTrans); template void DissectionSolver, double>:: SolveScaled(complex *x, int nrhs, bool isTrans); template void DissectionSolver::SolveScaled(quadruple *x, int nrhs, bool isTrans); template void DissectionSolver, quadruple>:: SolveScaled(complex *x, int nrhs, bool isTrans); template void DissectionSolver::SolveScaled(float *x, int nrhs, bool isTrans); template void DissectionSolver, float>:: SolveScaled(complex *x, int nrhs, bool isTrans); // template void DissectionSolver:: SolveMulti(T *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls) { const int nsing = _nsing; //_singIdx.size(); VectorArray vtmp(nrhs); vector nn1(_graph_colors); // nn1.resize(_graph_colors); int i0, mm0, n0, n1; int itmp; ColumnMatrix xx; ColumnMatrix yy; elapsed_t t0_elapsed, t1_elapsed; n0 = 0; for (int m = 0; m < _graph_colors; m++) { n0 += _kernel[m].dimension(); } n1 = 0; if (nsing > 0) { for (int m = 0; m < _graph_colors; m++) { nn1[m] = _Schur[m].getSlduList().size(); n1 += nn1[m]; } } get_realtime(&t0_elapsed); itmp = 0; xx.init(_dim, nrhs, x, false); if (isScaling) { for (int i = 0; i < _dim; i++) { for (int n = 0; n < nrhs; n++) { xx(i, n) *= _precDiag[i]; } } // loop : n } if (n0 > 0 && projection) { diss_printf(_verbose, _fp, "%s %d : projection of the scaled RHS onto the image\n", __FILE__, __LINE__); for (int n = 0; n < nrhs; n++) { ProjectionKernelOrthSingle(xx.addrCoefs() + (n * _dim), "scaled RHS", true); // orthogonal to ker A^T } } if (n1 > 0) { yy.init(n1, nrhs); } // if (n1 > 0) if (isTrans) { if (n1 > 0) { // if (nsing > 0) { i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; yy(i0, n) = xx(ii, n); // yy[n][i0] = xx[n][ii]; xx(ii, n) = _zero; } // loop : n } // loop : i } // loop : m mm0 = 0; for (int m = 0; m < _graph_colors; m++) { if (nn1[m] > 0) { blas_gemm(CblasTrans, CblasNoTrans, nn1[m], nrhs, _dim, _none, _Schur[m].getScol().addrCoefs(), _dim, xx.addrCoefs(), _dim, _one, yy.addrCoefs() + mm0, n1); mm0 += nn1[m]; } } mm0 = 0; i0 = 0; for (int m = 0; m < _graph_colors; m++) { if (nn1[m] > 0) { blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, nn1[m], nrhs, _one, // alpha _Schur[m].getSldu().addrCoefs(), nn1[m], yy.addrCoefs() + mm0, n1); // yy[i] = &y[i * n1] for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { //yy[n][i0] *= _Schur[m].getSldu()(i, i); // should be optimized yy(i0, n) *= _Schur[m].getSldu()(i, i); // should be optimized } } blas_trsm(CblasLeft, CblasLower, CblasTrans, CblasUnit, nn1[m], nrhs, _one, // alpha _Schur[m].getSldu().addrCoefs(), nn1[m], yy.addrCoefs() + mm0, n1); mm0 += _Schur[m].getSlduList().size(); } } i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { const int ii = _Schur[m].getSlduList()[i]; const SparseMatrix * const arow = _Schur[m].getArow(); for (int k = arow->ptRow(ii); k < arow->ptRow(ii + 1); k++) { const int j = arow->indCol(k); const T xtmp = arow->Coef(k); for (int n = 0; n < nrhs; n++) { xx(j, n) -= xtmp * yy(i0, n); // xx[n][j] -= xtmp * yy[n][i0]; } } // loop : n } // loop : i } // loop : m } // if (nsing > 0) SolveScaled(xx.addrCoefs(), nrhs, true); if (nsing > 0) { i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; xx(ii, n) = yy(i0, n); // xx[n][ii] = yy[n][i0]; } } // loop : i } // loop : m } // if (nsing > 0) } else { if (n1 > 0) { // if (nsing > 0) { i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { const int ii = _Schur[m].getSldu().loc2glob_left()[i]; yy(i0, n) = xx(ii, n); // yy[n][i0] = xx[n][ii]; xx(ii, n) = _zero; } // loop : n } // loop : i } } for (int m = 0; m < _graph_colors; m++) { const int nnn1 = _Schur[m].getSlduList().size(); diss_printf(_verbose, _fp, "%s %d : color = %d %d\n", __FILE__, __LINE__, m, nnn1); if (nnn1 < nexcls) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); vector &singIdx0 = diag->getSingIdx0(); int pseudosing; if (singIdx0.size() > 0) { pseudosing = std::min((int)singIdx0.front(), (int)singIdx0.back()) - 1; } else { pseudosing = diag->dimension() - 1; } for (int i = nnn1; i < nexcls; i++, pseudosing--) { singIdx0.push_back(pseudosing); } } } } for (int m = 0; m < _graph_colors; m++) { if (!_tridiagQueue[m]->tridiagSolver()) { SquareBlockMatrix* diag = &_dissectionMatrix[m][_btree[m]->selfIndex(1)]->diagBlock(); vector &singIdx0 = diag->getSingIdx0(); if (singIdx0.size() > 0) { diss_printf(_verbose, _fp, "%s %d : color = %d modify dim = %d : ", __FILE__, __LINE__, m, diag->dimension()); for (vector::const_iterator it = singIdx0.begin(); it != singIdx0.end(); ++it) { diss_printf(_verbose, _fp, " %d", *it); } diss_printf(_verbose, _fp, "\n"); } //if (singIdx0.size() > 0) } } SolveScaled(xx.addrCoefs(), nrhs, false); if (nsing > 0) { i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { vtmp.ZeroClear(); const int ii = _Schur[m].getSlduListLeft()[i]; const SparseMatrix * const arow = _Schur[m].getArow(); for (int k = arow->ptRow(ii); k < arow->ptRow(ii + 1); k++) { const int j = arow->indCol(k); const T xtmp = arow->Coef(k); for (int n = 0; n < nrhs; n++) { vtmp[n] += xtmp * xx(j, n); //xx[n][j]; } } // loop : n for (int n = 0; n < nrhs; n++) { yy(i0, n) -= vtmp[n]; // yy[n][i0] -= vtmp[n]; } // loop : n } // loop : i } // loop : m mm0 = 0; i0 = 0; for (int m = 0; m < _graph_colors; m++) { if (nn1[m] > 0) { blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, nn1[m], nrhs, _one, // alpha _Schur[m].getSldu().addrCoefs(), nn1[m], yy.addrCoefs() + mm0, n1); // yy[i] = &y[i * n1] for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { // yy[n][i0] *= _Schur[m].getSldu()(i, i); // should be optimized yy(i0, n) *= _Schur[m].getSldu()(i, i); // should be optimized } } blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, nn1[m], nrhs, _one, _Schur[m].getSldu().addrCoefs(), nn1[m], yy.addrCoefs() + mm0, n1); mm0 += _Schur[m].getSlduList().size(); } // if (nn1[m] > 0) } // loop : m i0 = 0; for (int m = 0; m < _graph_colors; m++) { for (int i = 0; i < nn1[m]; i++, i0++) { for (int n = 0; n < nrhs; n++) { const int ii = _Schur[m].getSldu().loc2glob()[i]; xx(ii, n)= yy(i0, n); // xx[n][ii] = yy[n][i0]; } } } // loop : m mm0 = 0; for (int m = 0; m < _graph_colors; m++) { if (nn1[m] > 0) { blas_gemm(CblasNoTrans, CblasNoTrans, _dim, nrhs, nn1[m], _none, _Schur[m].getScol().addrCoefs(), _dim, yy.addrCoefs() + mm0, n1, _one, xx.addrCoefs(), _dim); mm0 += nn1[m]; } } } } // if (isTrans) // adjust the kernel : x = x - N (N{^T} N)^-1 N^{T} x if (isScaling) { for (int n = 0; n < nrhs; n++) { for (int i = 0; i < _dim; i++) { xx(i, n) *= _precDiag[i]; // xx[n][i] *= _precDiag[i]; } } } else { // inversion of isolated diagonal entries for (vector::const_iterator it = _index_isolated.begin(); it != _index_isolated.end(); ++it) { T atmp(1.0); for (int k = _ptDA->ptRow((*it)); k < _ptDA->ptRow((*it) + 1); k++) { if (_ptDA->indCol(k) == (*it)) { atmp = _one / _ptDA->Coef(k); break; } } for (int n = 0; n < nrhs; n++) { xx((*it), n) *= atmp; // xx[n][(*it)] *= atmp; } } // loop : it } // if (nsing > 0) if (n0 > 0 && projection) { diss_printf(_verbose, _fp, "%s %d : projection of the scaled solution onto the image\n", __FILE__, __LINE__); for (int n = 0; n < nrhs; n++) { ProjectionKernelOrthSingle(xx.addrCoefs() + (n * _dim), "scaled solution", false); // orthogonal to ker A } } // if (n1 > 0) { // delete [] y; //yy.free(); // } // delete [] yy; // delete [] xx; xx.free(); yy.free(); // delete [] vtmp; // delete [] nn1; get_realtime(&t1_elapsed); } template void DissectionSolver::SolveMulti(double *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver::SolveMulti(quadruple *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, double>:: SolveMulti(complex *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, quadruple>:: SolveMulti(complex *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver:: SolveMulti(double *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, double, complex, quadruple>:: SolveMulti(complex *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver::SolveMulti(float *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); template void DissectionSolver, float>:: SolveMulti(complex *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls); // template void DissectionSolver:: BuildSingCoefs(T *DSsingCoefs, SparseMatrix *DCsCoefs, T *DBsCoefs, vector& singIdx, const bool isTrans) { const bool flag_trans = isTrans && (!_ptDA->isSymmetric()); T *DSsingCoefs0; const int nsing = singIdx.size(); if (flag_trans) { DSsingCoefs0 = new T [nsing * nsing]; } else { DSsingCoefs0 = DSsingCoefs; } _ptDA->extractSquareMatrix(DSsingCoefs0, singIdx); if (flag_trans) { for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing;j++) { DSsingCoefs[i + j * nsing] = DSsingCoefs0[j + i * nsing]; } } } if ( _ptDA->isSymmetric() ) { // singIdx[] is sorted in increasing order // col_ind[] of symmetric matrix with CSR also in increasing order // Ssing = Asing - Cs*Am-1*(Bs) // ?? (*it == 0) where it.column() == singIdx[k] ?? for (int i = 0; i < nsing; i++) { for (int j = i; j < nsing;j++) { // [dense matrix As] - [sparse matrix Cs] * [dense matrix Am^-1 (Bs)] T stmp(0.0); for (int k = DCsCoefs->ptRow(i); k < DCsCoefs->ptRow(i + 1); k++) { const int icol = DCsCoefs->indCol(k); stmp += DCsCoefs->Coef(k) * DBsCoefs[icol + j * _dim]; } DSsingCoefs[i + j * nsing] -= stmp; } // symmetrize for (int j = i + 1; j < nsing;j++) { DSsingCoefs[j + i * nsing] = DSsingCoefs[i + j * nsing]; } } } // if ( _ptDA->isSymmetric() ) { else { for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing;j++) { // [dense matrix As] - [sparse matrix Cs] * [dense matrix Am^-1 (Bs)] T stmp(0.0); for (int k = DCsCoefs->ptRow(i); k < DCsCoefs->ptRow(i + 1); k++) { const int icol = DCsCoefs->indCol(k); stmp += DCsCoefs->Coef(k) * DBsCoefs[icol + j * _dim]; } DSsingCoefs[i + j * nsing] -= stmp; } } } // if ( _ptDA->isSymmetric() ) { diss_printf(_verbose, _fp, "%s %d : Schur complement on the singular dofs set %d x %d\n", __FILE__, __LINE__, nsing, nsing); if (flag_trans) { delete [] DSsingCoefs0; } } template void DissectionSolver:: BuildSingCoefs(double *DSsingCoefs, SparseMatrix *DCsCoefs, double *DBsCoefs, vector& singIdx, const bool isTrans); template void DissectionSolver, double>:: BuildSingCoefs(complex *DSsingCoefs, SparseMatrix > *DCsCoefs, complex *DBsCoefs, vector& singIdx, const bool isTrans); template void DissectionSolver:: BuildSingCoefs(quadruple *DSsingCoefs, SparseMatrix *DCsCoefs, quadruple *DBsCoefs, vector& singIdx, const bool isTrans); template void DissectionSolver, quadruple>:: BuildSingCoefs(complex *DSsingCoefs, SparseMatrix > *DCsCoefs, complex *DBsCoefs, vector& singIdx, const bool isTrans); template void DissectionSolver:: BuildSingCoefs(float *DSsingCoefs, SparseMatrix *DCsCoefs, float *DBsCoefs, vector& singIdx, const bool isTrans); template void DissectionSolver, float>:: BuildSingCoefs(complex *DSsingCoefs, SparseMatrix > *DCsCoefs, complex *DBsCoefs, vector& singIdx, const bool isTrans); // template void DissectionSolver:: BuildKernels(vector &singIdx_, int n2, SchurMatrix &Schur, KernelMatrix &kernel) { elapsed_t t0_elapsed, t1_elapsed; // struct timespec ts0, ts1; const int n0 = singIdx_.size(); const int n1 = n0 - n2; // regular nodes const int nsing = n2; // singIdx.size(); ColumnMatrix DBsCoefs(_dim, nsing); diss_printf(_verbose, _fp, "%s %d : BuildKernels n0 = %d n1 = %d n2 = %d\n", __FILE__, __LINE__, n0, n1, n2); if (n1 > 0) { //IndiceArray regularVal(n1); vector regularVal; regularVal.resize(n1); // SparseMatrix* ptDA_acol; for (int i = 0; i < n1; i++) { regularVal[i] = singIdx_[i]; } // to sort by increasing-order std::sort(regularVal.begin(), regularVal.end()); //regularVal.renumber(); Schur.getAcol() = _ptDA->PartialCopyCSR(regularVal, n1, true); Schur.getArow() = _ptDA->PartialCopyCSR(regularVal, n1, false); // nullify [ A_12 / A_22 / A_23 ] -> [ A_12 / 0 / 0 ] for (int i = 0; i < n1; i++) { for (int k = Schur.getAcol()->ptRow(i); k < Schur.getAcol()->ptRow(i + 1); k++) { // elimination from the CSR foramt is best for performance for (int m = 0; m < n0; m++) { if (Schur.getAcol()->indCol(k) == singIdx_[m]) { Schur.getAcol()->Coef(k) = _zero; } } } } Schur.getScol().init(_dim, n1); for (int j = 0; j < n1; j++) { for (int i = 0; i < _dim; i++) { Schur.getScol()(i, j) = _zero; // ZeroClear() } for (int k = Schur.getAcol()->ptRow(j); k < Schur.getAcol()->ptRow(j + 1); k++) { const int icol = Schur.getAcol()->indCol(k); Schur.getScol()(icol, j) = Schur.getAcol()->Coef(k); } } //IndiceArray s_list_eq(n1); // local -> global mapping vector s_list_eq; s_list_eq.resize(n1); for (int i = 0; i < n1; i++) { s_list_eq[i] = regularVal[i]; } Schur.getSldu().init(s_list_eq); // ptDA_sldu.init // _Schur.getScol() = [ A_11^-1 A_12 / 0 / 0 ] SolveScaled(Schur.getScol().addrCoefs(), n1, false); //Schur.getScol().addrCoefs(), n1, false); BuildSingCoefs(Schur.getSldu().addrCoefs(), //ptDA_sldu.addrCoefs(), Schur.getArow(), Schur.getScol().addrCoefs(), //Schur.getScol().addrCoefs(), regularVal); int *pivot_width = &(Schur.getSldu().getPivotWidth()[0]); //&ptDA_sldu.getPivotWidth()[0]; T *d1 = Schur.getSldu().addr2x2(); int *permute = &Schur.getSldu().getPermute()[0]; #if 0 full_sym_2x2BK(n1, Schur.getSldu().addrCoefs(), d1, pivot_width, permute); // fprintf(_fp, "Bunch-Kaufman permutation : "); diss_printf(_verbose, _fp, "%s %d : permutation for Schur complement: ", __FILE__, __LINE__); for (int i = 0; i < n1; i++) { diss_printf(_verbose, _fp, "%3d ", pivot_width[i]); } diss_printf(_verbose, _fp, "\n"); for (int i = 0; i < n1; i++) { diss_printf(_verbose, _fp, "%3d ", permute[i]); } diss_printf(_verbose, _fp, "\n"); Schur.getSlduList().resize(n1); for (int i = 0; i < n1; i++) { Schur.getSlduList()[i] = permute[i]; // ?? 20 Apr.2016 ?? } int itmp2x2 = 0; for (int i = 0; i < n1; i++) { if (pivot_width[i] == 20) { itmp2x2++; } } Schur.getSldu().getPivot2x2().resize(itmp2x2); itmp2x2 = 0; for (int i = 0; i < n1; i++) { if (pivot_width[i] == 20) { Schur.getSldu().getPivot2x2()[itmp2x2] = i; itmp2x2++; } } // 1x1 + 2x2 // delete ptDA_acol; #else { int nn0, n0 = 0; double fop, pivot_ref = 1.0; // should be automatically defined in ldlt/ldu double eps_piv = machine_epsilon(); if (_ptDA->isSymmetric()) { full_ldlt_permute(&nn0, n0, n1, Schur.getSldu().addrCoefs(), n1, &pivot_ref, permute, eps_piv, &fop); } else { full_ldu_permute(&nn0, n0, n1, Schur.getSldu().addrCoefs(), n1, &pivot_ref, permute, eps_piv, &fop); } diss_printf(_verbose, _fp, "%s %d : factorization with eps_piv = %g : dim kern = %d\n", __FILE__, __LINE__, eps_piv, nn0); diss_printf(_verbose, _fp, "permute[] = "); for (int i = 0; i < n1; i++) { diss_printf(_verbose, _fp, "%d ", permute[i]); } diss_printf(_verbose, _fp, "\n"); } Schur.getSlduList().resize(n1); for (int i = 0; i < n1; i++) { Schur.getSlduList()[i] = permute[i]; } #endif } // if (n1 > 0) else { // if (n1 == 0) Schur.getAcol() = new SparseMatrix(); // dummy allocation Schur.getArow() = new SparseMatrix(); // dummy allocation Schur.getSlduList().clear(); } // if (n1 > 0) if (n2 == 0) { kernel.set_dimension(0); kernel.getKernProj().init(0); kernel.getKernBasis().init(0, 0); } else { SparseMatrix *DCsCoefs; vector singIdx; singIdx.resize(n2); for (int i = 0; i < n2; i++) { singIdx[i] = singIdx_[n0 - n2 + i]; } // to sort by increasing-order std::sort(singIdx.begin(), singIdx.end()); // singIdx.renumber(); //DCsCoefs = PartialCopyCSR(_ptDA, singIdx, nsing, true); DCsCoefs = _ptDA->PartialCopyCSR(singIdx, nsing, true); // 28 Dec.2015 // clear element of DBsCoefs belonging to diagnal block for (int i = 0; i < nsing; i++) { for (int k = DCsCoefs->ptRow(i); k < DCsCoefs->ptRow(i + 1); k++) { // elimination from the CSR foramt is best for performance for (int m = 0; m < nsing; m++) { if (DCsCoefs->indCol(k) == singIdx[m]) { DCsCoefs->Coef(k) = _zero; } } } } diss_printf(_verbose, _fp, "%s %d : Cs matrix: row = %d\n", __FILE__, __LINE__, nsing); // generate DBsCoefs[] // copy from lower part (sparse) with transporse to upper part (dense) DBsCoefs.ZeroClear(); for (int j = 0; j < nsing; j++) { for (int k = DCsCoefs->ptRow(j); k < DCsCoefs->ptRow(j + 1); k++) { const int icol = DCsCoefs->indCol(k); DBsCoefs(icol, j) = DCsCoefs->Coef(k); } } // access with stride size _dim for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { DBsCoefs(singIdx[i], j) = _zero; } } diss_printf(_verbose, _fp, "%s %d Bs matrix: singular columns of global matrix %d x %d\n", __FILE__, __LINE__, _dim, nsing); // Compute Am-1*Bs // scaling flag is off : internal solver A_11^-1 [A^12 A^13] if (n1 > 0) { ColumnMatrix yy(n1, nsing); ColumnMatrix yy1(n1, nsing); for (int m = 0; m < nsing; m++) { for (int i = 0; i < n1; i++) { int ii = Schur.getSldu().loc2glob()[i]; // yy[i + m * n1] = DBsCoefs[ii + m * _dim]; save data of singular nds // DBsCoefs[ii + m * _dim] = zero; SolveScaled works for regular nodes yy(i, m) = DBsCoefs(ii, m); DBsCoefs(ii, m) = _zero; } } // loop : j get_realtime(&t0_elapsed); SolveScaled(DBsCoefs.addrCoefs(), nsing, false); get_realtime(&t1_elapsed); diss_printf(_verbose, _fp, "%s %d : dissection solve : %d RHS (sec.) = %.6f\n", __FILE__, __LINE__, nsing, convert_time(t1_elapsed, t0_elapsed)); for (int m = 0; m < nsing; m++) { // Block SpMV for (int i = 0; i < n1; i++) { T stmp(0.0); // = _zero; const int ii = Schur.getSlduList()[i]; for (int k = Schur.getArow()->ptRow(ii); k < Schur.getArow()->ptRow(ii + 1); k++) { const int j = Schur.getArow()->indCol(k); stmp += Schur.getArow()->Coef(k) * DBsCoefs(j, m); } yy(i, m) -= stmp; } } // loop : m blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, nsing, _one, // alpha Schur.getSldu().addrCoefs(), n1, yy.addrCoefs(), n1); for (int i = 0; i < n1; i++) { T stmp = Schur.getSldu()(i, i); for (int m = 0; m < nsing; m++) { yy1(i, m) = yy(i, m) * stmp; // should be optimized } // loop : m } for (int i = 0; i < Schur.getSldu().getPivot2x2().size(); i++) { const int ii = Schur.getSldu().getPivot2x2()[i]; T *d1 = Schur.getSldu().addr2x2(); for (int m = 0; m < nsing; m++) { yy1(ii, m) += d1[m] * yy((i + 1), m); yy1((ii + 1), m) += d1[m + 1] * yy(i, m); } } yy.copy(yy1); blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, n1, nsing, _one, Schur.getSldu().addrCoefs(), n1, yy.addrCoefs(), n1); for (int i = 0; i < n1; i++) { const int ii = Schur.getSldu().loc2glob()[i]; for (int m = 0; m < nsing; m++) { DBsCoefs(ii, m) = yy(i, m); } // loop : m } blas_gemm(CblasNoTrans, CblasNoTrans, _dim, nsing, n1, _none, // alpha Schur.getScol().addrCoefs(), _dim, yy.addrCoefs(), n1, _one, // beta DBsCoefs.addrCoefs(), _dim); } // if (n1 > 0) else { get_realtime(&t0_elapsed); SolveScaled(DBsCoefs.addrCoefs(), nsing, false); get_realtime(&t1_elapsed); diss_printf(_verbose, _fp, "%s %d : dissection solve : %d RHS (sec.) = %.6f\n", __FILE__, __LINE__, nsing, convert_time(t1_elapsed, t0_elapsed)); } // clear diagonal blocks of Bs in the place of Schur complement for (int i = 0; i < nsing; i++) { DBsCoefs(singIdx[i], i) = _none; } kernel.getKernListEq().resize(nsing); // = IndiceArray(nsing); for (int i = 0; i < nsing; i++) { kernel.getKernListEq()[i]= singIdx[i]; } diss_printf(_verbose, _fp, "%s %d : kern_list_eq[] = ", __FILE__, __LINE__); for (int i = 0; i < kernel.getKernListEq().size(); i++) { diss_printf(_verbose, _fp, "%d ", kernel.getKernListEq()[i]); } diss_printf(_verbose, _fp, "\n"); kernel.getKernBasis().init(_dim, nsing); T *kern_basis = kernel.getKernBasis().addrCoefs(); kernel.getKernBasis().copy(DBsCoefs); // normalize each kernel_basis for (int j = 0; j < nsing; j++) { U stmp(0.0); const U one(1.0); stmp = one / blas_l2norm(_dim, (kern_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kern_basis + (j * _dim)), 1); } diss_printf(_verbose, _fp, "%s %d : check kern_basis belonging to the kernel of A\n", __FILE__, __LINE__); VectorArray v(_dim); diss_printf(_verbose, _fp, "%s %d : orthogonality of the kernel vectors\n", __FILE__, __LINE__); for (int j = 0; j < nsing; j++) { _ptDA->prod((kern_basis + (j * _dim)), v.addrCoefs()); double norm_l2, norm_infty; calc_relative_norm(&norm_l2, &norm_infty, v.addrCoefs(), kern_basis + (j * _dim), _dim); diss_printf(_verbose, _fp, "%d-th kernel scaled : l2_norm = %.6e, infty_norm = %.6e / ", j, norm_l2, norm_infty); if(_scaling) { calc_relative_normscaled(&norm_l2, &norm_infty, v.addrCoefs(), kern_basis + (j * _dim), &_precDiag[0], _dim); diss_printf(_verbose, _fp, "original : l2_norm = %.6e, infty_norm = %.6e\n", norm_l2, norm_infty); } // if (_scaling) diss_printf(_verbose, _fp, "\n"); } #ifdef DEBUG_DATA fout.close(); #endif if(_scaling) { for (int j = 0; j < nsing; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kern_basis[i + jtmp] *= _precDiag[i]; } } } #ifdef NORMALIZE_KERNEL_BASIS // normalize each kernel_basis for (int j = 0; j < nsing; j++) { U stmp(0.0); const U one(1.0); stmp = one / blas_l2norm(_dim, (kern_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kern_basis + (j * _dim)), 1); } #endif kernel.set_dimension(nsing); kernel.getKernProj().init(nsing); for (int i = 0; i < nsing; i++) { for(int j = i; j < nsing; j++) { kernel.getKernProj()(i, j) = blas_dot(_dim, (kern_basis + (i * _dim)), 1, (kern_basis + (j * _dim)), 1); } // symmetrize for (int j = (i + 1); j < nsing; j++) { kernel.getKernProj()(j, i) = kernel.getKernProj()(i, j); } } full_ldlh(nsing, kernel.getKernProj().addrCoefs(), nsing); // inverse of diagonal part is also storead in the factorized matrix delete DCsCoefs; } // if (n2 == 0) // delete [] DBsCoefs; } template void DissectionSolver:: BuildKernels(vector &singIdx_, int n2, SchurMatrix &Schur, KernelMatrix &kernel); template void DissectionSolver, double>:: BuildKernels(vector &singIdx_, int n2, SchurMatrix > &Schur, KernelMatrix > &kernel); template void DissectionSolver:: BuildKernels(vector &singIdx_, int n2, SchurMatrix &Schur, KernelMatrix &kernel); template void DissectionSolver, quadruple>:: BuildKernels(vector &singIdx_, int n2, SchurMatrix > &Schur, KernelMatrix > &kernel); template void DissectionSolver:: BuildKernels(vector &singIdx_, int n2, SchurMatrix &Schur, KernelMatrix &kernel); template void DissectionSolver, float>:: BuildKernels(vector &singIdx_, int n2, SchurMatrix > &Schur, KernelMatrix > &kernel); // template void DissectionSolver:: BuildKernelsDetection(int &n0, vector &singIdx, vector >& augkern_indexes, vector* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix &Schur, KernelMatrix &kernel, const bool enableDetection) { elapsed_t t0_elapsed, t1_elapsed, t2_elapsed, t3_elapsed; // struct timespec ts0, ts1, ts2, ts3; // vector& singIdx = singIdx; // singIdx is inherited from older version const int nsing = singIdx.size(); ColumnMatrix DBsCoefs(_dim, nsing); ColumnMatrix DCsCoefs(_dim, nsing); ColumnMatrix DSsingCoefs(nsing, nsing); ColumnMatrix DSsingCoefs0(nsing, nsing); ColumnMatrix DSsingCoefs1; ColumnMatrix DSsingCoefs2(nsing, nsing); ColumnMatrix DSsingCoefs3(nsing, nsing); ColumnMatrix DS0(nsing, nsing); ColumnMatrix DS2(nsing, nsing); double pivot_ref; int *permute0 = new int[nsing]; VectorArray u(_dim); VectorArray v(_dim); // VectorArray save_diag(dim_augkern); { int i = 0; diss_printf(_verbose, _fp, "%s %d : nullifying diagonal entries\n", __FILE__, __LINE__); typename vector* >::const_iterator kt = diags.begin(); for (vector >::const_iterator it = augkern_indexes.begin(); it != augkern_indexes.end(); ++it, ++kt) { SquareBlockMatrix &diag = *(*kt); const int n0_diag = diag.rank(); diss_printf(_verbose, _fp, "diagonal entries modifed : %d -> %d\n", n0_diag, n0_diag - (int)(*it).size()); for (vector::const_iterator jt = (*it).begin(); jt != (*it).end(); ++jt) { diss_printf(_verbose, _fp, "%d : %s\n", *jt, tostring(diag.diag(*jt)).c_str()); save_diag[i++] = diag.diag((*jt)); diag.diag((*jt)) = _zero; } diag.set_rank(n0_diag - (*it).size()); } } // singIdx is sorted with increasing-order Schur.getAcol() = _ptDA->PartialCopyCSR(singIdx, nsing, true); // 28 Dec.2015 Schur.getArow() = _ptDA->PartialCopyCSR(singIdx, nsing, false); // 28 Dec.2015 // nullify diagonal blocks of singular nodes // [A_21 A_22 A_23 / A_31 A_32 A_33 ] -> [A_21 0 0 / A_31 0 0 ] // elimination of CSR entries is best for performance, but now replacing by 0 { // scope of acol SparseMatrix *acol = Schur.getAcol(); SparseMatrix *arow = Schur.getArow(); for (int i = 0; i < nsing; i++) { for (int k = acol->ptRow(i); k < acol->ptRow(i + 1); k++) { for (int n = 0; n < nsing; n++) { if (acol->indCol(k) == singIdx[n]) { acol->Coef(k) = _zero; } } } for (int k = arow->ptRow(i); k < arow->ptRow(i + 1); k++) { for (int n = 0; n < nsing; n++) { if (arow->indCol(k) == singIdx[n]) { arow->Coef(k) = _zero; } } } } diss_printf(_verbose, _fp, "%s %d : Cs matrix: from global sparse matrix : row = %d\n", __FILE__, __LINE__, nsing); // generate DBsCoefs[] // copy from lower part (sparse) with transporse to upper part (dense) DBsCoefs.ZeroClear(); for (int j = 0; j < nsing; j++) { for (int k = acol->ptRow(j); k < acol->ptRow(j + 1); k++) { const int icol = acol->indCol(k); DBsCoefs(icol, j) = acol->Coef(k); } } DCsCoefs.ZeroClear(); for (int j = 0; j < nsing; j++) { for (int k = arow->ptRow(j); k < arow->ptRow(j + 1); k++) { const int icol = arow->indCol(k); DCsCoefs(icol, j) = arow->Coef(k); } } } // scope of acol diss_printf(_verbose, _fp, "%s %d : Bs matrix: from global sparse matrix %d x %d\n", __FILE__, __LINE__, _dim, nsing); // Compute Am-1*Bs // scaling flag is off : internal solver A_11^-1 [A^12 A^13] get_realtime(&t0_elapsed); // clock_gettime(CLOCK_REALdoubleIME, &ts0); SolveScaled(DBsCoefs.addrCoefs(), nsing, false); if (!_ptDA->isSymmetric()) { SolveScaled(DCsCoefs.addrCoefs(), nsing, true); } get_realtime(&t1_elapsed); // clock_gettime(CLOCK_REALTIME, &ts1); diss_printf(_verbose, _fp, "%s %d : dissection solve : %d RHS (sec.) = %.6f\n", __FILE__, __LINE__, nsing, convert_time(t1_elapsed, t0_elapsed)); BuildSingCoefs(DSsingCoefs.addrCoefs(), Schur.getArow(), DBsCoefs.addrCoefs(), singIdx); if (!_ptDA->isSymmetric()) { BuildSingCoefs(DSsingCoefs2.addrCoefs(), Schur.getAcol(), DCsCoefs.addrCoefs(), singIdx, true); } // copy DSsingCoefs DS0.copy(DSsingCoefs); DS2.copy(DSsingCoefs2); // set candidate of kernel vectors before applying Schur complement on // regular part of suspicious pivots for (int i = 0; i < nsing; i++) { DBsCoefs(singIdx[i], i) = _none; DCsCoefs(singIdx[i], i) = _none; } get_realtime(&t2_elapsed); DSsingCoefs0.copy(DSsingCoefs); diss_printf(_verbose, _fp, "%s %d : DSsingCoefs0[]\n", __FILE__, __LINE__); for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { diss_printf(_verbose, _fp, "%s ", tostring(DSsingCoefs0(i, j)).c_str()); } diss_printf(_verbose, _fp, "\n"); } if (!_ptDA->isSymmetric()) { diss_printf(_verbose, _fp, "%s %d : transposed DSsingCoefs2[]\n", __FILE__, __LINE__); for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { diss_printf(_verbose, _fp, "%s ", tostring(DSsingCoefs2(j, i)).c_str()); } diss_printf(_verbose, _fp, "\n"); } diss_printf(_verbose, _fp, "%s %d : unsymmetry\n", __FILE__, __LINE__); for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { diss_printf(_verbose, _fp, "%s ", tostring(DSsingCoefs0(i, j) - DSsingCoefs2(j, i)).c_str()); } diss_printf(_verbose, _fp, "\n"); } } bool flag_unsym_permute = false; if (enableDetection) { pivot_ref = 0.0; for (int i = 0; i < nsing; i++) { // find maximum of abs value of diagonal const double stmp = blas_abs(DSsingCoefs0(i, i)); pivot_ref = stmp > pivot_ref ? stmp : pivot_ref; } diss_printf(_verbose, _fp, "%s %d : pviot_ref = %.8e\n", __FILE__, __LINE__, pivot_ref); // permute0[] is initialized in ddfull_sym_gauss_part int nn = 0; { int nn0; double fop; if (_ptDA->isSymmetric()) { full_ldlt_permute(&nn0, nn, nsing, DSsingCoefs0.addrCoefs(), nsing, &pivot_ref, permute0, eps_piv, &fop); } else { full_ldu_permute(&nn0, nn, nsing, DSsingCoefs0.addrCoefs(), nsing, &pivot_ref, permute0, eps_piv, &fop); } nn = nn0; } diss_printf(_verbose, _fp, "%s %d : factorization with eps_piv = %g : dim kern = %d\n", __FILE__, __LINE__, eps_piv, nn); diss_printf(_verbose, _fp, "permute[] = "); for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d ", permute0[i]); } diss_printf(_verbose, _fp, "\n"); #if 0 diss_printf(_verbose, _fp, "%s %d : DSsingCoefs0[]\n", __FILE__, __LINE__); for (int i = 0; i < nsing; i++) { for (int j = 0; j < nsing; j++) { diss_printf(_verbose, _fp, "%s ", tostring(DSsingCoefs0(i, j)).c_str()); } diss_printf(_verbose, _fp, "\n"); } #endif if (nn == 0) { n0 = 0; } else { const int nsing1 = nn + dim_augkern; const int nsing0 = nsing - nsing1; DSsingCoefs1.init(nsing1, nsing1); if (nsing0 == 0) { DSsingCoefs1.copy(DSsingCoefs); } else{ // recompute smaller Schur complement with size nsing1 // nullify rows and columns for (int j = nsing0; j < nsing; j++) { for (int i = 0; i < nsing; i++) { DSsingCoefs0(i, j) = _zero; DSsingCoefs0(j, i) = _zero; } } // copy upper block for (int j = nsing0; j < nsing; j++) { for (int i = 0; i < nsing0; i++) { DSsingCoefs0(i, j) = DSsingCoefs(permute0[i], permute0[j]); } } if (_ptDA->isSymmetric()) { int ii = nsing0; for (int i = 0; i < nsing1; i++, ii++) { int jj = nsing0; for (int j = 0; j <= i; j++, jj++) { DSsingCoefs1(i, j) = DSsingCoefs(permute0[ii], permute0[jj]); } // symmetrize for (int j = 0; j < i; j++) { DSsingCoefs1(j, i) = DSsingCoefs1(i, j); } } } else { int ii = nsing0; for (int i = 0; i < nsing1; i++, ii++) { int jj = nsing0; for (int j = 0; j < nsing1; j++, jj++) { DSsingCoefs1(i, j) = DSsingCoefs(permute0[ii], permute0[jj]); } } // copy lower block for (int j = 0; j < nsing0; j++) { for (int i = nsing0; i < nsing; i++) { DSsingCoefs0(i, j) = DSsingCoefs(permute0[i], permute0[j]); } } } blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, nsing0, nsing1, _one, // alpha DSsingCoefs0.addrCoefs(), nsing, DSsingCoefs0.addrCoefs() + (nsing0 * nsing), nsing); if (_ptDA->isSymmetric()) { for (int j = nsing0; j < nsing; j++) { for (int i = 0; i < nsing0; i++) { DSsingCoefs0(j, i) = DSsingCoefs0(i, j); } } } else { blas_trsm(CblasRight, CblasUpper, CblasNoTrans, CblasUnit, nsing1, nsing0, _one, // alpha DSsingCoefs0.addrCoefs(), nsing, DSsingCoefs0.addrCoefs() + nsing0, nsing); } // if (_ptDA->isSymmetric()) // scaling upper block by diagonal for (int j = nsing0; j < nsing; j++) { for (int i = 0; i < nsing0; i++) { DSsingCoefs0(i, j) *= DSsingCoefs0(i, i); } } blas_gemm(CblasNoTrans, CblasNoTrans, nsing1, nsing1, nsing0, _none, // alpha DSsingCoefs0.addrCoefs() + nsing0, nsing, DSsingCoefs0.addrCoefs() + (nsing0 * nsing), nsing, _one, // beta DSsingCoefs1.addrCoefs(), nsing1); if (_ptDA->isSymmetric()) { // symmetrize for (int j = 0; j < nsing1; j++) { for (int i = 0; i < j; i++) { // DSsingCoefs1[j + i * nsing1] = DSsingCoefs1[i + j * nsing1]; DSsingCoefs1(j, i) = DSsingCoefs1(i, j); } } } diss_printf(_verbose, _fp, "%s %d : DSsingCoefs1[]\n", __FILE__, __LINE__); for (int i = 0; i < nsing1; i++) { for (int j = 0; j < nsing1; j++) { diss_printf(_verbose,_fp, "%s ", tostring(DSsingCoefs1(i, j)).c_str()); } diss_printf(_verbose, _fp, "\n"); } } // if (nsing0 <= 0) bool flag; const U eps_machine = machine_epsilon(); DSsingCoefs3.copy(DSsingCoefs2); int n0n, n0t; flag = ComputeDimKernel(&n0n, &flag_unsym_permute, DSsingCoefs1.addrCoefs(), nsing1, _ptDA->isSymmetric(), dim_augkern, eps_machine, eps_piv, _verbose, _fp); flag = ComputeDimKernel(&n0t, &flag_unsym_permute, DSsingCoefs3.addrCoefs(), nsing1, _ptDA->isSymmetric(), dim_augkern, eps_machine, eps_piv, _verbose, _fp); n0 = n0t > n0n ? n0t : n0n; // n0 = n0n; if (flag == false) { fprintf(stderr, "%s %d : ERROR: kernel detection failed!\n", __FILE__, __LINE__); //exit(-1); } if (_assume_invertible) { n0 = 0; } } // if (nn > 0) delete [] permute0; get_realtime(&t3_elapsed); // clock_gettime(CLOCK_REALTIME, &ts3); diss_printf(_verbose, _fp, "%s %d : detection of the _dim. of the kernel (sec.) = %.6f\n", __FILE__, __LINE__, convert_time(t3_elapsed, t2_elapsed)); } // if (enableDetection) // LDLt factorization of DS0(i,j) vector permute, permute_left, permute_tright, permute_tleft; permute.resize(nsing); const double machine_eps_double = machine_epsilon(); const int n1 = nsing - n0; // flag_unsym_permute = false; Schur.setFullPivoting(flag_unsym_permute); if (flag_unsym_permute) { int nn0; double fop; double last_pivot = 1.0; permute_left.resize(nsing); ldu_full_permute(&nn0, n0, nsing, DSsingCoefs.addrCoefs(), nsing, &last_pivot, &permute[0], &permute_left[0], machine_eps_double, &fop); #if 0 last_pivot = 1.0; permute_tright.resize(nsing); permute_tleft.resize(nsing); ldu_full_permute(&nn0, n0, nsing, DSsingCoefs2.addrCoefs(), nsing, &last_pivot, &permute_tright[0], &permute_tleft[0], machine_eps_double, &fop); #endif n0 = nn0; } else { int nn0; double fop; double last_pivot = 1.0; if (_ptDA->isSymmetric()) { full_ldlt_permute(&nn0, n0, nsing, DSsingCoefs.addrCoefs(), nsing, &last_pivot, &permute[0], machine_eps_double, &fop); } else { full_ldu_permute(&nn0, n0, nsing, DSsingCoefs.addrCoefs(), nsing, &last_pivot, &permute[0], machine_eps_double, &fop); } n0 = nn0; permute_left = permute; } if (n0 != (nsing - n1)) { diss_printf(_verbose, _fp, "%s %d : factroization of suspicious pivots fails : %d -> %d\n", __FILE__, __LINE__, (nsing - n1), n0); } else { diss_printf(_verbose, _fp, "%s %d : pivot = %s after n0 = %d\n", __FILE__, __LINE__, flag_unsym_permute ? "full" : "symmetric", n0); } // keep offdiagonal part corresponding to the last Schur complement Schur.getScol().init(_dim, n1); for (int j = 0; j < n1; j++) { blas_copy(_dim, DBsCoefs.addrCoefs() + (permute[j] * _dim), 1, Schur.getScol().addrCoefs() + (j * _dim), 1); } if (n0 > 0) { Schur.getSchur().init(n1, n1); // copy with permutation for debugging for (int j = 0; j < n1; j++) { for (int i = 0; i < n1; i++) { Schur.getSchur()(i, j) = DS0(permute_left[i], permute[j]); } } // d23 = (S_22)^{-1} (-S_23) ColumnMatrix d23(nsing, n0); ColumnMatrix d32(nsing, n0); d23.ZeroClear(); for (int j = 0; j < n0; j++) { for (int i = 0; i < n1; i++) { d23(i, j) = DS0(permute_left[i], permute[j + n1]); } } d32.ZeroClear(); for (int j = 0; j < n0; j++) { for (int i = 0; i < n1; i++) { d32(i, j) = DS0(permute_left[j + n1], permute[i]); //d32(i, j) = DS2(permute_tleft[i], permute_tright[j + n1]); } } // alpha = 1.0; blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, n0, _one, DSsingCoefs.addrCoefs(), nsing, d23.addrCoefs(), nsing); { for (int i = 0; i < n1; i++) { const T stmp = DSsingCoefs(i, i); for (int j = 0; j < n0; j++) { d23(i, j) *= stmp; } } } blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, n1, n0, _one, DSsingCoefs.addrCoefs(), nsing, d23.addrCoefs(), nsing); for (int i = 0; i < n0; i++) { const int ii = i + n1; d23(ii, i) = _none; } blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, n1, n0, _one, DSsingCoefs.addrCoefs(), nsing, //blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, //n1, n0, _one, DSsingCoefs2.addrCoefs(), nsing, d32.addrCoefs(), nsing); { for (int i = 0; i < n1; i++) { const T stmp = DSsingCoefs(i, i); //const T stmp = DSsingCoefs2(i, i); for (int j = 0; j < n0; j++) { d32(i, j) *= stmp; } } } blas_trsm(CblasLeft, CblasLower, CblasTrans, CblasUnit, n1, n0, _one, DSsingCoefs.addrCoefs(), nsing, // blas_trsm(CblasLeft, CblasUpper, CblasNoTrans, CblasUnit, // n1, n0, _one, DSsingCoefs2.addrCoefss(), nsing, d32.addrCoefs(), nsing); for (int i = 0; i < n0; i++) { const int ii = i + n1; d32(ii, i) = _none; } // generating the kernel vectors // keep the vectors from suspicious pivots kernel.getKernBasis().free(); // the 2nd allocation _dim * nsing -> _dim * n kernel.getKernBasis().init(_dim, n0); kernel.getTKernBasis().free(); // kernel.getTKernBasis().init(_dim, n0); T *kern_basis = kernel.getKernBasis().addrCoefs(); T *kernt_basis = kernel.getTKernBasis().addrCoefs(); ColumnMatrix d23_permuted(nsing, n0); #if 1 ColumnMatrix dtest(nsing, n0); // for debugging d23_permuted.ZeroClear(); for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { d23_permuted(permute[i], j) = d23(i, j); } } blas_gemm(CblasNoTrans, CblasNoTrans, nsing, n0, nsing, _one, DS0.addrCoefs(), nsing, d23_permuted.addrCoefs(), nsing, _zero, dtest.addrCoefs(), nsing); diss_printf(_verbose, _fp, "%s %d verify kernel : %d\n", __FILE__, __LINE__, n0); for (int j = 0; j < n0; j++) { U stmp = blas_l2norm(nsing, dtest.addrCoefs() + j * nsing, 1); for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d : %d %s %s\n", i, permute[i], tostring(d23_permuted(i, j)).c_str(), tostring(dtest[i + j * nsing]).c_str()); } diss_printf(_verbose, _fp, "%d %s\n", j, tostring(stmp).c_str()); } if (!_ptDA->isSymmetric()) { d23_permuted.ZeroClear(); for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { d23_permuted(permute_left[i], j) = d32(i, j); //d23_permuted(permute_tright[i], j) = d32(i, j); } } blas_gemm(CblasTrans, CblasNoTrans, nsing, n0, nsing, _one, DS0.addrCoefs(), nsing, //DS2.addrCoefs(), nsing, d23_permuted.addrCoefs(), nsing, _zero, dtest.addrCoefs(), nsing); diss_printf(_verbose, _fp, "%s %d verify transposed kernel : %d\n", __FILE__, __LINE__, n0); for (int j = 0; j < n0; j++) { U stmp = blas_l2norm(nsing, dtest.addrCoefs() + j * nsing, 1); for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d : %d %s %s\n", i, permute_left[i], //i, permute_tright[i], tostring(d23_permuted(i, j)).c_str(), tostring(dtest[i + j * nsing]).c_str()); } diss_printf(_verbose, _fp, "%d %s\n", j, tostring(stmp).c_str()); } } #endif // d23_permuted.ZeroClear(); if (flag_unsym_permute) { for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { d23_permuted(permute[i], j) = d23(i, j); } } // A11^-1 [ A_13 A_12] permute_right [S22^-1 S_23] - p_r [S22^-1 S_23] // [ -I ] [ -I ] kernel.getKernBasis().ZeroClear(); for (int i = 0; i < nsing; i++) { DBsCoefs(singIdx[i], i) = _zero; } blas_gemm(CblasNoTrans, CblasNoTrans, _dim, n0, nsing, _one, // alpha DBsCoefs.addrCoefs(), _dim, // d23_permuted.addrCoefs(), nsing, _zero, // beta kern_basis, _dim); for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { kernel.getKernBasis()(singIdx[i], j) = -d23_permuted(i, j); } } // compute transposed kernel d23_permuted.ZeroClear(); for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { d23_permuted(permute_left[i], j) = d32(i, j); //d23_permuted(permute_tright[i], j) = d32(i, j); } } // A11^-T [ A_31^T A_12^T] permute_l [S22^-T S_32^T] - p_l [S22^-T S_23^T] // [ -I ] [ -I ] kernel.getTKernBasis().ZeroClear(); for (int i = 0; i < nsing; i++) { DCsCoefs(singIdx[i], i) = _zero; } blas_gemm(CblasNoTrans, CblasNoTrans, _dim, n0, nsing, _one, // alpha DCsCoefs.addrCoefs(), _dim, // d23_permuted.addrCoefs(), nsing, _zero, // beta kernt_basis, _dim); for (int j = 0; j < n0; j++) { for (int i = 0; i < nsing; i++) { kernel.getTKernBasis()(singIdx[i], j) = -d23_permuted(i, j); } } // d23_permuted.free(); } else { // if (flag_unsym_permute) for (int i = 0; i < nsing; i++) { DBsCoefs(singIdx[i], i) = _none; } for (int j = 0; j < n0; j++) { for (int i = 0; i < _dim; i++) { kernel.getKernBasis()(i, j) = DBsCoefs(i, permute[j + n1]); } } // update the kernel using singular part of the Schur complement, d23 // kern_basis = - kern_basis - scol * d23 // - [ A11^-1 A_13] + [A_11^-1 A_12] [S22^-1 S_23] // [ 0 ] [ -I ] // [ -I ] [ 0 ] blas_gemm(CblasNoTrans, CblasNoTrans, _dim, n0, n1, _one, // alpha Schur.getScol().addrCoefs(), _dim, // none d23.addrCoefs(), nsing, _none, // beta kern_basis, _dim); // kernel of the transposed matrix ColumnMatrix DCsCoefs12(_dim, n1); for (int i = 0; i < nsing; i++) { DCsCoefs(singIdx[i], i) = _none; } for (int j = 0; j < n1; j++) { blas_copy(_dim, DCsCoefs.addrCoefs() + (permute[j] * _dim), 1, DCsCoefs12.addrCoefs() + (j * _dim), 1); } for (int j = 0; j < n0; j++) { for (int i = 0; i < _dim; i++) { kernel.getTKernBasis()(i, j) = DCsCoefs(i, permute[j + n1]); } } blas_gemm(CblasNoTrans, CblasNoTrans, _dim, n0, n1, _one, // alpha DCsCoefs12.addrCoefs(), _dim, // none d32.addrCoefs(), nsing, _none, // beta kernt_basis, _dim); DCsCoefs12.free(); } // if (flag_unsym_permute) // normalize each kernel_basis for (int j = 0; j < n0; j++) { U stmp(0.0); const U one(1.0); stmp = one / blas_l2norm(_dim, (kern_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kern_basis + (j * _dim)), 1); } diss_printf(_verbose, _fp, "%s %d : == ptDA * kernel\n", __FILE__, __LINE__); for (int j = 0; j < n0; j++) { _ptDA->prod((kern_basis + (j * _dim)), v.addrCoefs()); double norm_l2, norm_infty; calc_relative_norm(&norm_l2, &norm_infty, v.addrCoefs(), (kern_basis + (j * _dim)), _dim); diss_printf(_verbose, _fp, "%d -th kernel scaled : norm_l2 = %.6e, norm_infty = %.6e / ", j, norm_l2, norm_infty); if(_scaling) { calc_relative_normscaled(&norm_l2, &norm_infty, v.addrCoefs(), kern_basis + (j * _dim), &_precDiag[0], _dim); diss_printf(_verbose, _fp, "original : norm_l2 = %.6e, norm_infty = %.6e\n", norm_l2, norm_infty); } else { diss_printf(_verbose, _fp, "\n"); }// if (_scaling) for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d %d %s\n", i, singIdx[i], tostring(v(singIdx[i], j)).c_str()); } } if (!_ptDA->isSymmetric()) { for (int j = 0; j < n0; j++) { U stmp(0.0); const U one(1.0); stmp = one / blas_l2norm(_dim, (kernt_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kernt_basis + (j * _dim)), 1); } diss_printf(_verbose, _fp, "%s %d : == ptDA^T * transposed kernel\n", __FILE__, __LINE__); for (int j = 0; j < n0; j++) { _ptDA->prodt((kernt_basis + (j * _dim)), v.addrCoefs()); double norm_l2, norm_infty; calc_relative_norm(&norm_l2, &norm_infty, v.addrCoefs(), (kernt_basis + (j * _dim)), _dim); diss_printf(_verbose, _fp, "%d -th scaled : norm_l2 = %.6e, norm_infty = %.6e / ", j, norm_l2, norm_infty); if(_scaling) { calc_relative_normscaled(&norm_l2, &norm_infty, v.addrCoefs(), kernt_basis + (j * _dim), &_precDiag[0], _dim); diss_printf(_verbose, _fp, "original : norm_l2 = %.6e, norm_infty = %.6e\n", norm_l2, norm_infty); } else { diss_printf(_verbose, _fp, "\n"); } diss_printf(_verbose, _fp, "%s %d\n", __FILE__, __LINE__); for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d %d %s\n", i, singIdx[i], tostring(v(singIdx[i], j)).c_str()); } } } // if (!_ptDA->isSymmetric()) if(_scaling) { for (int j = 0; j < n0; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kern_basis[i + jtmp] *= _precDiag[i]; } } for (int j = 0; j < n0; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kernt_basis[i + jtmp] *= _precDiag[i]; } } } #ifdef NORMALIZE_KERNEL_BASIS // normalize each kernel_basis for (int j = 0; j < n0; j++) { U stmp(0.0); const U one(1.0); stmp = one / blas_l2norm(_dim, (kern_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kern_basis + (j * _dim)), 1); stmp = one / blas_l2norm(_dim, (kernt_basis + (j * _dim)), 1); blas_scal2(_dim, stmp, (kernt_basis + (j * _dim)), 1); } #endif kernel.getKernProj().free(); // second alloctation : nsing -> n0 kernel.set_dimension(n0); kernel.getKernProj().init(n0); kernel.getTKernProj().free(); // second alloctation : nsing -> n0 if (!_ptDA->isSymmetric()) { kernel.getTKernProj().init(n0); kernel.getNTKernProj().init(n0); } for (int i = 0; i < n0; i++) { for(int j = i; j < n0; j++) { kernel.getKernProj()(i, j) = blas_dot(_dim, (kern_basis + (i * _dim)), 1, (kern_basis + (j * _dim)), 1); } // symmetrize for (int j = (i + 1); j < n0; j++) { kernel.getKernProj()(j, i) = kernel.getKernProj()(i, j); } } full_ldlh(n0, kernel.getKernProj().addrCoefs(), n0); // inverse of diagonal part is also storead in the factorized matrix if (!_ptDA->isSymmetric()) { for (int i = 0; i < n0; i++) { for(int j = i; j < n0; j++) { kernel.getTKernProj()(i, j) = blas_dot(_dim, (kernt_basis + (i * _dim)), 1, (kernt_basis + (j * _dim)), 1); } // symmetrize for (int j = (i + 1); j < n0; j++) { kernel.getTKernProj()(j, i) = kernel.getTKernProj()(i, j); } } full_ldlh(n0, kernel.getTKernProj().addrCoefs(), n0); // for oblique projection for (int i = 0; i < n0; i++) { for(int j = 0; j < n0; j++) { kernel.getNTKernProj()(i, j) = blas_dot(_dim, (kernt_basis + (i * _dim)), 1, (kern_basis + (j * _dim)), 1); } } diss_printf(_verbose, _fp, "%s %d : orthogonality matrix kernels of A and A^T\n", __FILE__, __LINE__); for (int i = 0; i < n0; i++) { diss_printf(_verbose, _fp, "%d : ", i); for(int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.16e ", blas_abs(kernel.getNTKernProj()(i, j))); } diss_printf(_verbose, _fp, "\n"); } full_ldu(n0, kernel.getNTKernProj().addrCoefs(), n0); } } // if (n0 > 0) else { kernel.set_dimension(0); } // diss_printf(_verbose, _fp, "%s %d : nsing = %d dim_augkern = %d n1 = %d n0 = %d\n", __FILE__, __LINE__, nsing, dim_augkern, n1, n0); // for some applications which need index of singular entries kernel.setFullPivoting(flag_unsym_permute); kernel.getKernListEq().resize(n0); kernel.getKernListEqLeft().resize(n0); for (int i = 0; i < n0; i++) { kernel.getKernListEq()[i]= singIdx[permute[i + n1]]; kernel.getKernListEqLeft()[i]= singIdx[permute_left[i + n1]]; } diss_printf(_verbose, _fp, "%s %d : kern_list_eq[] = ", __FILE__, __LINE__); for (int i = 0; i < kernel.getKernListEq().size(); i++) { diss_printf(_verbose,_fp, "%d ", kernel.getKernListEq()[i]); } diss_printf(_verbose, _fp, "\n"); if (flag_unsym_permute) { diss_printf(_verbose, _fp, "%s %d : kern_list_eq_left[] = ", __FILE__, __LINE__); for (int i = 0; i < kernel.getKernListEqLeft().size(); i++) { diss_printf(_verbose,_fp, "%d ", kernel.getKernListEqLeft()[i]); } diss_printf(_verbose, _fp, "\n"); } if (n1 == dim_augkern && (!flag_unsym_permute)) { diss_printf(_verbose, _fp, "%s %d dimension of detected kernel == suspicous one = %d\n", __FILE__, __LINE__, n0); { int i = 0; typename vector* >::const_iterator kt = diags.begin(); for (vector >::const_iterator it = augkern_indexes.begin(); it != augkern_indexes.end(); ++it, ++kt) { SquareBlockMatrix &diag = *(*kt); const int n0_diag = diag.rank(); for (vector::const_iterator jt = (*it).begin(); jt != (*it).end(); ++jt, i++) { diag.diag((*jt)) = save_diag[i]; // i++ } diag.set_rank(n0_diag + (*it).size()); // restore } } Schur.getSlduList().resize(0); } else { // if (n1 == dim_augkern) { Schur.getSlduList().resize(n1); Schur.getSlduListLeft().resize(n1); for (int i = 0; i < n1; i++) { Schur.getSlduList()[i] = permute[i]; Schur.getSlduListLeft()[i] = permute_left[i]; } vector s_list_eq, s_list_eq_left; s_list_eq.resize(n1); s_list_eq_left.resize(n1); // local2global index for the last Schur complement for (int i = 0; i < n1; i++) { s_list_eq[i] = singIdx[permute[i]]; s_list_eq_left[i] = singIdx[permute_left[i]]; } diss_printf(_verbose, _fp, "%s %d : s_list_eq[] = ", __FILE__, __LINE__); for (int i = 0; i < s_list_eq.size(); i++) { diss_printf(_verbose, _fp, "%d ", s_list_eq[i]); } diss_printf(_verbose, _fp, "\n"); if (flag_unsym_permute) { diss_printf(_verbose, _fp, "%s %d : s_list_eq_left[] = ", __FILE__, __LINE__); for (int i = 0; i < s_list_eq_left.size(); i++) { diss_printf(_verbose, _fp, "%d ", s_list_eq_left[i]); } diss_printf(_verbose, _fp, "\n"); } Schur.getSldu().init(flag_unsym_permute, s_list_eq, s_list_eq_left); // copy regular part n1*n1 from nsing*nsing matrix DSsingCoefs[] for (int j = 0; j < n1; j++) { blas_copy(n1, DSsingCoefs.addrCoefs() + j * nsing, 1, Schur.getSldu().addrCoefs() + j * n1, 1); } // <---- n1 ----> : given by pemrute[j] // [ A_11^-1 A_12 ] -> [ A_11^-1 A_12 ] // [ -I ] [ 0 ] : singIdx[permute[i]] // [ 0 ] [ 0 ] : 0<= i < nsing for (int i = 0; i < n1; i++) { Schur.getScol()(s_list_eq[i], i) = _zero; } } // if (n1 == dim_augkern) { // delete ptDA_acol; // 28 Dec.2015 // delete [] save_diag; // delete [] DBsCoefs; // delete [] DBsCoefs0; // delete [] DSsingCoefs; // delete [] DSsingCoefs0; // delete [] DS0; // delete [] DS2; } template void DissectionSolver:: BuildKernelsDetection(int &n0, vector &singIdx, vector > &augkern_indexes, vector* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix &Schur, KernelMatrix &kernel, const bool enableDetection); template void DissectionSolver, double>:: BuildKernelsDetection(int &n0, vector &singIdx, vector >& augkern_indexes, vector >* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix > &Schur, KernelMatrix > &kernel, const bool enableDetection); template void DissectionSolver:: BuildKernelsDetection(int &n0, vector &singIdx, vector > &augkern_indexes, vector* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix &Schur, KernelMatrix &kernel, const bool enableDetection); template void DissectionSolver, quadruple>:: BuildKernelsDetection(int &n0, vector &singIdx, vector >& augkern_indexes, vector >* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix > &Schur, KernelMatrix > &kernel, const bool enableDetection); template void DissectionSolver:: BuildKernelsDetection(int &n0, vector &singIdx, vector > &augkern_indexes, vector* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix &Schur, KernelMatrix &kernel, const bool enableDetection); template void DissectionSolver, float>:: BuildKernelsDetection(int &n0, vector &singIdx, vector >& augkern_indexes, vector >* >& diags, const double eps_piv, const int dim_augkern, SchurMatrix > &Schur, KernelMatrix > &kernel, const bool enableDetection); // template int DissectionSolver:: kern_dimension(void) { int itmp = 0; for (int m = 0; m < _graph_colors; m++) { itmp += _kernel[m].dimension(); } return itmp; } // template int DissectionSolver::kern_dimension(void); template int DissectionSolver::kern_dimension(void); template int DissectionSolver, double>::kern_dimension(void); template int DissectionSolver, quadruple>::kern_dimension(void); template int DissectionSolver::kern_dimension(void); template int DissectionSolver::kern_dimension(void); template int DissectionSolver::kern_dimension(void); template int DissectionSolver, float>::kern_dimension(void); // // template int DissectionSolver:: postponed_pivots(void) { int itmp = 0; for (int m = 0; m < _graph_colors; m++) { itmp += _singIdx[m].size(); } return itmp; } // template int DissectionSolver::postponed_pivots(void); template int DissectionSolver::postponed_pivots(void); template int DissectionSolver, double>::postponed_pivots(void); template int DissectionSolver, quadruple>::postponed_pivots(void); template int DissectionSolver::postponed_pivots(void); template int DissectionSolver::postponed_pivots(void); template int DissectionSolver::postponed_pivots(void); template int DissectionSolver, float>::postponed_pivots(void); // template int DissectionSolver:: ComputeTransposedKernels(void) { // int n0_max = 0; if (_ptDA->isSymmetric()) { diss_printf(_verbose, _fp, "%s %d : ComputeTransposedKernels : matrix is symmetric\n", __FILE__, __LINE__); return (-1); } for (int m = 0; m < _graph_colors; m++) { int n0 = _kernel[m].dimension(); #if 0 if (!_tridiagQueue[m]->tridiagSolver()) { _kernel[m].getTKernBasis().init(_dim, n0); } #endif _kernel[m].getTKernProj().init(n0); _kernel[m].getNTKernProj().init(n0); } for (int m = 0; m < _graph_colors; m++) { int n0 = _kernel[m].dimension(); T *kernt_basis = _kernel[m].getTKernBasis().addrCoefs(); T *kernn_basis = _kernel[m].getKernBasis().addrCoefs(); #if 0 if (!_tridiagQueue[m]->tridiagSolver()) { _kernel[m].getTKernBasis().ZeroClear(); for (int i = 0; i < n0; i++) { const int ii = _kernel[m].getKernListEq()[i]; for (int k = _ptDA->ptRow(ii); k < _ptDA->ptRow(ii + 1); k++) { _kernel[m].getTKernBasis()(_ptDA->indCol(k), i) = _ptDA->Coef(k); } } for (int j = 0; j < n0; j++) { for (int i = 0; i < n0; i++) { _kernel[m].getTKernBasis()(_kernel[m].getKernListEq()[i], j) = _zero; } } if (n0 > 1) { SolveMulti(kernt_basis, n0, false, true, false); } else { // porjection, isTrans, isScaling SolveSingle(kernt_basis, false, true, false); } // nullify singular block for (int j = 0; j < n0; j++) { for (int i = 0; i < n0; i++) { _kernel[m].getTKernBasis()(_kernel[m].getKernListEqLeft()[i], j) = _zero; } } // set -I on the diagonal entries of singular block for (int i = 0; i < n0; i++) { _kernel[m].getTKernBasis()(_kernel[m].getKernListEqLeft()[i], i) = _none; } if(_scaling) { for (int j = 0; j < n0; j++) { const int jtmp = j * _dim; for (int i = 0; i < _dim; i++) { kernt_basis[i + jtmp] *= _precDiag[i]; } } } } // if (_tridiagQueue[m]->tridiagSolver()) else { diss_printf(_verbose, _fp, "%s %d : transposed kernel from tridiag_kernelt_basis()\n", __FILE__, __LINE__); } #endif VectorArray vn(_dim); VectorArray vt(_dim); diss_printf(_verbose, _fp, "%s %d : check kern_basis belonging to the kernel of A\n", __FILE__, __LINE__); for (int j = 0; j < n0; j++) { SpMV(&kernn_basis[j * _dim], vn.addrCoefs()); //&vn[j][0]); double norm_l2, norm_infty; calc_relative_norm(&norm_l2, &norm_infty, vn.addrCoefs(), //&vn[j][0], &kernn_basis[j * _dim], _dim); diss_printf(_verbose, _fp, "%s %d : %d-th : l2_norm = %.6e, infty_norm = %.6e\n", __FILE__, __LINE__, j, norm_l2, norm_infty); } diss_printf(_verbose, _fp, "%s %d : check kern_basis belonging to the kernel of A^T\n", __FILE__, __LINE__); for (int j = 0; j < n0; j++) { SpMtV(&kernt_basis[j * _dim], vt.addrCoefs()); double norm_l2, norm_infty; calc_relative_norm(&norm_l2, &norm_infty, vt.addrCoefs(), //&vt[j][0], &kernt_basis[j * _dim], _dim); diss_printf(_verbose, _fp, "%s %d : %d-th : l2_norm = %.6e, infty_norm = %.6e\n", __FILE__, __LINE__, j, norm_l2, norm_infty); } // loop : j vn.free(); vt.free(); #ifdef NORMALIZE_KERNEL_BASIS2 // normalize each kernel_basis for (int j = 0; j < n0; j++) { U stmp(0.0); const U one(1.0); stmp = blas_l2norm(_dim, (kernt_basis + (j * _dim)), 1); stmp = one / stmp; blas_scal2(_dim, stmp, (kernt_basis + (j * _dim)), 1); } #endif for (int i = 0; i < n0; i++) { for(int j = i; j < n0; j++) { _kernel[m].getTKernProj()(i, j) = blas_dot(_dim, (kernt_basis + (i * _dim)), 1, (kernt_basis + (j * _dim)), 1); } // symmetrize for (int j = (i + 1); j < n0; j++) { _kernel[m].getTKernProj()(j, i) = _kernel[m].getTKernProj()(i, j); } } full_ldlh(n0, _kernel[m].getTKernProj().addrCoefs(), n0); // for oblique projection for (int i = 0; i < n0; i++) { for(int j = 0; j < n0; j++) { _kernel[m].getNTKernProj()(i, j) = blas_dot(_dim, (kernt_basis + (i * _dim)), 1, (kernn_basis + (j * _dim)), 1); } } diss_printf(_verbose, _fp, "%s %d : orthogonality matrix kernels of A and A^T : %d\n", __FILE__, __LINE__, m); for (int i = 0; i < n0; i++) { diss_printf(_verbose, _fp, "%d : ", i); for(int j = 0; j < n0; j++) { diss_printf(_verbose, _fp, "%.16e ", blas_abs(_kernel[m].getNTKernProj()(i, j))); } diss_printf(_verbose, _fp, "\n"); } full_ldu(n0, _kernel[m].getNTKernProj().addrCoefs(), n0); } // loop : m return 1; } template int DissectionSolver::ComputeTransposedKernels(void); template int DissectionSolver, double>::ComputeTransposedKernels(void); template int DissectionSolver::ComputeTransposedKernels(void); template int DissectionSolver, quadruple>:: ComputeTransposedKernels(void); template int DissectionSolver::ComputeTransposedKernels(void); template int DissectionSolver, float>::ComputeTransposedKernels(void); FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionSolver.hpp000664 000000 000000 00000024301 14037356732 026320 0ustar00rootroot000000 000000 /*! \file DissectionSolver.hpp \brief task mangemanet of dissection algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DISSECTION_SOLVER_ #define _DISSECTION_SOLVER_ #include #include #include "Compiler/OptionCompiler.hpp" #include "Driver/DissectionVersion.hpp" #include "Driver/C_threads_tasks.hpp" #include "Driver/DissectionMatrix.hpp" #include "Driver/DissectionQueue.hpp" #include "Driver/TridiagBlockMatrix.hpp" #include "Driver/TridiagQueue.hpp" #include "Algebra/SparseMatrix.hpp" #include "Driver/DissectionDefault.hpp" // #include #include #include #include "Splitters/BisectionTree.hpp" #ifdef BLAS_MKL #include "mkl_lapack.h" #include "mkl_types.h" #endif # include #include using std::vector; using std::list; void SaveMMMatrix_(const int dim, const int nnz, const bool isSymmetric, const bool isUpper, const int *ptrows, const int *indcols, const int called, const double *coefs_); void SaveMMMatrix_(const int dim, const int nnz, const bool isSymmetric, const bool isUpper, const int *ptrows, const int *indcols, const int called, const complex *coefs_); // example on usage of template // T : complex, U : double, W : complex, Z : quadruple // T : complex, U : quadruple, W : complex, Z : double // T : complex, U : quadruple, W = T, Z = U // T : quadruple, U = T, W = T, Z = U template class DissectionSolver { public: DissectionSolver(int num_threads, bool verbose, int called, FILE *fp) : _precDiag(NULL), _num_threads(num_threads), _status_factorized(false), _called(called) { if (fp == NULL) { _verbose = false; } else { _verbose = verbose; _fp = fp; } } ~DissectionSolver() { Destroy(); } std::string Version(void) { string version = (to_string(DISSECTION_VERSION) + "." + to_string(DISSECTION_RELEASE) + "." + to_string(DISSECTION_PATCHLEVEL)); return version; } void SaveCSRMatrix(const int called, const T *coefs); void SaveMMMatrix(const int called, const T *coefs); void Destroy(void); void NumericFree(void); void SymbolicFact_(const int dim_, const int nz_, const bool flagint64, const int *ptRows, const int *indCols, const long long int *ptRows64, const long long int *indCols64, const bool sym, const bool upper, const bool isWhole, const int decomposer, const int nbLevels, const int minNodes); void SymbolicFact(const int dim, const int *ptRows, const int *indCols, const bool sym, const bool upper, const bool isWhole = false, const int decomposer = SCOTCH_DECOMPOSER, const int nbLevels = (-1), const int minNodes = MINNODES); void SymbolicFact(const int dim, const long long int *ptRows64, const long long int *indCols64, const bool sym, const bool upper, const bool isWhole = false, const int decomposer = SCOTCH_DECOMPOSER, const int nbLevels = (-1), const int minNodes = MINNODES); void NumericFact(const int called, T *coefs, const int scaling = KKT_SCALING, //useful for the Stokes eqs. const double eps_pivot = EPS_PIVOT, const bool kernel_detection_all = false, const int dim_augkern = DIM_AUG_KERN, const double machine_eps_ = -1.0, const bool assume_invertible = false, const bool higher_precision = false); bool getFactorized(void) { return _status_factorized; }; void CopyQueueFwBw(DissectionSolver &qdslv); int GetMaxColors(); void GetNullPivotIndices(int *pivots); void GetSmallestPivotIndices(const int n, int *pivots); void GetKernelVectors(T *kern_basis); void GetTransKernelVectors(T *kernt_basis); void GetMatrixScaling(Z *weight); void ProjectionImageSingle(T *x, string name = string("")); void ProjectionKernelOrthSingle(T *x, string name = string(""), bool isTrans = false); void ProjectionImageMulti(T *x, int nrhs); void SpMV(const T *x, T *y, bool scaling_flag = true); void SpMtV(const T *x, T *y, bool scaling_flag = true); void SolveScaled(T *x, int nrhs, bool isTrans); void SolveScaledRefinement(T *x, int nrhs, vector &singIdx, bool isTrans); void QueueFwBw(T **x, int *nrhs); void SolveSingle(T *x, bool projection, bool isTrans, bool isScaling, const int nexcls = 0); void SolveSingle2(T *x, bool isScaling, bool pseudo = false); void SolveMulti(T *x, int nrhs, bool projection, bool isTrans, bool isScaling, const int nexcls = 0); void SolveSingleDebug(T *x, T* y); void BuildSingCoefs(T *DSsingCoefs, SparseMatrix *DCsCoefs, T *DBsCoefs, vector &singIdx, const bool isTrans = false); void BuildKernels(vector &singIdx_, int n2, SchurMatrix &Schur, KernelMatrix &kernel); void BuildKernelsDetection(int &n0, vector &singIdx, vector >& augkern_indexes, vector* >& diags, const double eps, const int dim_augkern, SchurMatrix &Schur, KernelMatrix &kernel, const bool enableDetection = true); Dissection::Tree** btree() { return _btree; } SparseMatrix* ptDA() { return _ptDA; } int dimension(void) { return _dim; } int kern_dimension(void); int postponed_pivots(void); int ComputeTransposedKernels(void); int get_num_threads(void) { return _num_threads; } int scaling(void) const { return _scaling; } bool verbose(void) const { return _verbose; } int num_threads(void) const { return _num_threads; } bool isScaled(void) { return (_scaling > 0 ? true : false); } int nsing(void) const { return _nsing; } void SetNsing(int nsing) { _nsing = nsing; } int graph_colors() const { return _graph_colors; } FILE* get_filedescriptor() { return _fp; } U* addrPrecDiag() { return _precDiag; } DissectionQueue** getDissectionQueue() { return _dissectionQueue; } TridiagQueue** getTridiagQueue() { return _tridiagQueue; } vector* >* getDissectionMatrix() { return _dissectionMatrix; } TridiagBlockMatrix** getTridiagBlockMatrix() { return _tridiagMatrix; } KernelMatrix* getKernelMatrix() { return _kernel; } vector* getSingVal() { return _singIdx; } vector& getIndexIsolated() { return _index_isolated; } DissectionSolver(const DissectionSolver &s) { _ptDA = s._ptDA; _btree = s._btree; _dissectionMatrix = s._dissectionMatrix; _dissectionQueue = s._dissectionQueue; _kernel = s._kernel; _singIdx = s._singIdx; _precDiag = s._precDiag; _scaling = s._scaling; _verbose = s._verbose; _num_threads = s._num_threads; } private: SparseMatrix* _ptDA; Dissection::Tree** _btree; vector* >* _dissectionMatrix; DissectionQueue** _dissectionQueue; TridiagBlockMatrix** _tridiagMatrix; TridiagQueue** _tridiagQueue; SchurMatrix* _Schur; KernelMatrix* _kernel; vector* _singIdx; U* _precDiag; vector _index_isolated; int _scaling; bool _verbose; int _num_threads; bool _assume_invertible; bool _status_factorized; int _dim; int _graph_colors; int _dim_augkern; int _nsing; int _called; FILE *_fp; bool _with_btree; static const T _one; // (1.0); static const T _zero; // (0.0); static const T _none; // (-1.0); }; // End class DissictionSolver #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionVersion.cpp000664 000000 000000 00000005353 14037356732 026474 0ustar00rootroot000000 000000 /*! \file DissectionVersion.cpp \brief definition of default value for factorization \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 24th 2015 \date Sep. 29th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "DissectionVersion.hpp" void DissectionVersion ( int * const vern, int * const rels, int * const ptch) { *vern = DISSECTION_VERSION; *rels = DISSECTION_RELEASE; *ptch = DISSECTION_PATCHLEVEL; } FreeFem-sources-4.9/3rdparty/dissection/src/Driver/DissectionVersion.hpp000664 000000 000000 00000005405 14037356732 026477 0ustar00rootroot000000 000000 /*! \file DissectionDefault.hpp \brief definition of default value for factorization \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 24th 2015 \date Sep. 29th 2015 \date Nov. 30th 2016 \date Oct. 15th 2017 \date Apr. 24th 2018 \date Jun. 15th 2018 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _DRIVER_DISSECTIONVERSION_HPP # define _DRIVER_DISSECTIONVERSION_HPP #include #define DISSECTION_VERSION 1 #define DISSECTION_RELEASE 2 #define DISSECTION_PATCHLEVEL 2 #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/Make.inc000664 000000 000000 00000005510 14037356732 023661 0ustar00rootroot000000 000000 #! \file Make.inc # \brief source files in Driver/ directory and depending libraries # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # OBJS = $(SOURCESCPP:%.cpp=%.o) $(SOURCESF77:%.for=%.o) $(SOURCESF90:%.f90=%.o) all: $(OBJS) .SUFFIXES: .for .f90 .cpp .cpp.o: $(CXX) $(CCFLAGS) -c $< -o $@ %.d: %.cpp $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.o *.so core *.d *.mod *.a *.i90 FreeFem-sources-4.9/3rdparty/dissection/src/Driver/QueueRuntime.cpp000664 000000 000000 00000510713 14037356732 025453 0ustar00rootroot000000 000000 /*! \file QueueRuntime.cpp \brief management of threads for Dissection Matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 \date Apr. 17th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include "Driver/QueueRuntime.hpp" #include "Compiler/DissectionIO.hpp" #define RATIO_STATIC 0.7 // for slow DGEMM' #define RATIO_QUEUE_GREEDY 0.8 //#define DEBUG_THREAD_TIME //#define DEBUG_EXEC_THREAD #define DEBUG_DEADLOCK // DEBUG_DEADLOCK only works without setting DEBUG_EXEC_THREAD_IDLE #ifdef DEBUG_DEADLOCK #ifdef DEBUG_EXEC_THREAD_IDLE #undef DEBUG_EXEC_THREAD_IDLE #endif // DEBUG_EXEC_THREAD_IDLE #endif // DEBUG_DEADLOCK // constructor QueueRuntime::QueueRuntime(int nb_doms, int num_threads, const bool isSym, const bool verbose, FILE *fp) { _nb_doms = nb_doms; _num_threads = num_threads; _isSym = isSym; _verbose = verbose; _fp = fp; _zone_entered = new int[DIST_TASK_CRITICAL]; _zone_finished = new int[DIST_TASK_CRITICAL]; _zone_static_assigned = new int[DIST_TASK_CRITICAL]; allocate_int2d(_begins, num_threads); allocate_int2d(_ends, num_threads); allocate_int3d(_group_entered, num_threads); allocate_int3d(_group_finished, num_threads); allocate_int3d(_group_task_ends, num_threads); allocate_int3d(_group_static_assigned, num_threads); _group_task_id = new int[num_threads]; allocate_int3d(_begins_group, num_threads); allocate_int3d(_ends_group, num_threads); allocate_unsigned2d(_group_nops, num_threads); _mutex_group = new QueueRuntime_mutex[num_threads]; } QueueRuntime::~QueueRuntime() { // initialize state to controle tasks delete [] _zone_entered; delete [] _zone_finished; delete [] _zone_static_assigned; deallocate_int2d(_begins); deallocate_int2d(_ends); deallocate_int3d(_group_entered); deallocate_int3d(_group_finished); deallocate_int3d(_group_task_ends); deallocate_int3d(_group_static_assigned); deallocate_int3d(_begins_group); deallocate_int3d(_ends_group); deallocate_unsigned2d(_group_nops); delete [] _group_task_id; delete [] _mutex_group; } void allocate_int2d(int **&array, const int num_threads) { array = new int*[num_threads]; array[0] = new int[num_threads * DIST_TASK_CRITICAL]; int p, pp; p = 1; pp = DIST_TASK_CRITICAL; for ( ; p < num_threads; p++, pp += DIST_TASK_CRITICAL) { array[p] = &array[0][pp]; } } void allocate_unsigned2d(long **&array, const int num_threads) { array = new long*[num_threads]; array[0] = new long[num_threads * DIST_TASK_CRITICAL]; int p, pp; p = 1; pp = DIST_TASK_CRITICAL; for ( ; p < num_threads; p++, pp += DIST_TASK_CRITICAL) { array[p] = &array[0][pp]; } } void allocate_int3d(int ***&array, const int num_threads) { array = new int**[num_threads]; array[0] = new int*[num_threads * DIST_TASK_CRITICAL]; array[0][0] = new int[(num_threads * DIST_TASK_CRITICAL * DIST_TASK_CRITICAL)]; int p, q, pp; p = 1; pp = DIST_TASK_CRITICAL; for ( ; p < num_threads; p++, pp += DIST_TASK_CRITICAL) { array[p] = &array[0][pp]; } p = 0; pp = 0; for ( ; p < num_threads; p++, pp += DIST_TASK_CRITICAL) { for (q = 0; q < DIST_TASK_CRITICAL; q++) { if (!((p == 0) && (q == 0))) { array[p][q] = &array[0][0][(q + pp) * DIST_TASK_CRITICAL]; } } } } void deallocate_int2d(int **&array) { delete [] array[0]; delete [] array; } void deallocate_unsigned2d(long **&array) { delete [] array[0]; delete [] array; } void deallocate_int3d(int ***&array) { delete [] array[0][0]; delete [] array[0]; delete [] array; } int IndexTask(vector &queue, int pos) { int j = 0; int pos1 = pos > queue.size() ? queue.size() : pos; for (int i = 0; i < pos1; i++) { j += queue[j]->atomic_size; } return j; } // #define DEBUG_NULL_TASK void copytask_list2seq(list &queue_static, list queue_lists, list &queue_null, // list &queue_dummy, string task_name, int task_id, int mutex_id, int parallel_single, int num_threads, int level, int phase) { // #define DEBUG_ASSIGN_TASK #ifdef DEBUG_ASSIGN_TASK cerr << endl << "** " << task_name << " ** " << queue_lists.size() << endl; #endif int num_tasks = 0; for (list::const_iterator it = queue_lists.begin(); it != queue_lists.end(); ++it) { // int itmp = 0; for (int j = (*it)->begin; j < (*it)->end; j++) { if (*((*(*it)->queue)[j]->ops_complexity) != 0L) {//cf. -1L : 16 Sep.2014 num_tasks++; // itmp++; } else { (*(*it)->queue)[j]->status = TASK_DONE; #ifdef DEBUG_NULL_TASK fprintf(stderr, "%s %d : copytask_list2seq() null task = %s\n", __FILE__, __LINE__, (*(*it)->queue)[j]->task_name); #endif queue_null.push_back((*(*it)->queue)[j]); } } #ifdef DEBUG_ASSIGN_TASK cerr << (*it)->task_id << " [ " << (*it)->end << " " << (*it)->begin << " ] "; #endif } #ifdef DEBUG_ASSIGN_TASK cerr << endl; #endif vector *tasks_tmp = new vector; long nops = 0L; tasks_tmp->resize(num_tasks); // tasks_tmp->reserve(num_tasks); int k = 0; for (list::const_iterator it = queue_lists.begin(); it != queue_lists.end(); ++it) { for (int j = (*it)->begin; j < (*it)->end; j++) { if (*((*(*it)->queue)[j]->ops_complexity) != 0L) { //cf. -1L : 16 Sep.2014 (*tasks_tmp)[k++] = (*(*it)->queue)[j]; if (*((*(*it)->queue)[j]->ops_complexity) > 0L) { nops += *((*(*it)->queue)[j]->ops_complexity); } } } } C_task_seq* seq_tmp = new C_task_seq(task_id, task_name, mutex_id, parallel_single, num_threads, level, phase, tasks_tmp, 0, // begin num_tasks, // end nops); queue_static.push_back(seq_tmp); } void task_assign_diag1(list * &queue_static, vector* &tasks_queue, list &queue_null, // list &queue_dummy, vector &nrow_DFullLDLt, vector &isMergedDTRSM, string queue_symbol, int queue_id, int level_id, int num_threads, int level_last, int level, int begdom, int enddom, long *nops_sum, vector starts, list* &queue_lists, bool queue_lists_clear, Dissection::Tree* btree, vector* children) { int num_tmp_shared; list tmp_dynamic; list task_seq_tmp; long nops_block_total; vector *tmp_shared = new vector; if (queue_lists_clear) { for (int p = 0; p < num_threads; p++) { #if 1 for (list::iterator it = queue_lists[p].begin(); it != queue_lists[p].end(); ++it) { delete (*it); (*it) = NULL; } #endif queue_lists[p].clear(); } } task_seq_tmp.clear(); nops_block_total = 0L; for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; if ((queue_id == C_DTRSM) && (nrow_DFullLDLt[jc] > 2)) { continue; } long nops = 0L; const int begin = starts[jc]; int end; if (queue_id == C_DTRSM) { if (!isMergedDTRSM[j]) { // looking the father node end = IndexTask(tasks_queue[jc], tasks_queue[jc][0]->parallel_max); } else { end = tasks_queue[jc].size(); } } else { int itmp = tasks_queue[jc][0]->parallel_max; int jtmp = tasks_queue[jc].size(); // if (itmp < (_isSym ? 3 : 4)) { if (!isMergedDTRSM[j]) { end = itmp; } else { if (itmp == jtmp) { end = jtmp; } else { end = itmp + tasks_queue[jc][itmp]->parallel_max; } } } if (level == 0) { end = (int)((double)end * RATIO_STATIC); const int ktmp = tasks_queue[jc][end]->atomic_id; end -= ktmp; } for (int i = begin; i < end; i++) { nops += *(tasks_queue[jc][i]->ops_complexity); } string task_name = to_string(jc); if (nops > 0L) { C_task_seq* tmp = new C_task_seq(queue_id, task_name, // for debugging (-1), // mutex_id TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + level_id, &tasks_queue[jc], begin, end, nops); task_seq_tmp.push_back(tmp); nops_block_total += nops; } else { for (int i = begin; i < end; i++) { queue_null.push_back(tasks_queue[jc][i]); } } } // loop : ic } // loop : j num_tmp_shared = 0; tmp_dynamic.clear(); if (task_seq_tmp.size() > 0) { task_seq_tmp.sort(C_task_seq_complexity_greater); string queue_name; if (level == 0) { queue_name = queue_symbol + "% "; } else { queue_name = queue_symbol + "* "; } assign_tasks_statically(queue_lists, tmp_dynamic, nops_sum, task_seq_tmp, queue_id, queue_name.c_str(), (level + 1), (level_last - 1 - level) * 6 + level_id, nops_block_total, num_threads); // task_seq_tmp.clear(); moved in assign_tasks_statically if (tmp_dynamic.size() > 0) { num_tmp_shared += tmp_dynamic.size(); } } if (level == 0) { for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; int parallel_max; if ((queue_id == C_DTRSM) && (nrow_DFullLDLt[jc] > 2)) { continue; } if (queue_id == C_DTRSM) { if (!isMergedDTRSM[j]) { // looking father node parallel_max = IndexTask(tasks_queue[jc], tasks_queue[jc][0]->parallel_max); } else { parallel_max = tasks_queue[jc].size(); } } else { int itmp = tasks_queue[jc][0]->parallel_max; int jtmp = tasks_queue[jc].size(); // if (itmp < _isSym ? 3 : 4) { if (!isMergedDTRSM[j]) { parallel_max = itmp; } else { parallel_max = ((itmp == jtmp) ? jtmp : (itmp + tasks_queue[jc][itmp]->parallel_max)); } } const int end = (int)((double)parallel_max * RATIO_STATIC); const int ktmp = tasks_queue[jc][end]->atomic_id; num_tmp_shared += parallel_max - end + ktmp; } } } tmp_shared->reserve(num_tmp_shared); if (tmp_dynamic.size() > 0) { long nops = 0L; for (list::iterator mt = tmp_dynamic.begin(); mt != tmp_dynamic.end(); ++mt) { nops += (*mt)->ops_complexity; } #ifdef DEBUG_PREPARE_THREAD cout << "static -> dynamic : " << tmp_dynamic.size() << " nops_sum = " << nops << " " ; #endif for (list::iterator mt = tmp_dynamic.begin(); mt != tmp_dynamic.end(); ++mt) { for (int i = (*mt)->begin; i < (*mt)->end; i++) { tmp_shared->push_back((*(*mt)->queue)[i]); } delete (*mt); (*mt) = NULL; } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif } // if (tmp_dynamic.size() > 0) if (level == 0) { for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; int parallel_max; if ((queue_id == C_DTRSM) && (nrow_DFullLDLt[jc] > 2)) { continue; } if (queue_id == C_DTRSM) { if (!isMergedDTRSM[j]) { // looking father node parallel_max = IndexTask(tasks_queue[jc], tasks_queue[jc][0]->parallel_max); } else { parallel_max = tasks_queue[jc].size(); } } else { int itmp = tasks_queue[jc][0]->parallel_max; int jtmp = tasks_queue[jc].size(); // if (itmp <= (_isSym ? 3 : 4)) { if (!isMergedDTRSM[j]) { parallel_max = itmp; } else { parallel_max = ((itmp == jtmp) ? jtmp : (itmp + tasks_queue[jc][itmp]->parallel_max)); } } int end = (int)((double)parallel_max * RATIO_STATIC); const int ktmp = tasks_queue[jc][end]->atomic_id; end -= ktmp; for (int i = end; i < parallel_max; i++) { // exclude null tasks if (*(tasks_queue[jc][i]->ops_complexity) > 0L) { tmp_shared->push_back(tasks_queue[jc][i]); } else { queue_null.push_back(tasks_queue[jc][i]); } // cyclic by index jc is better ? } } // loop : ic } } { string task_name = queue_symbol + " " + to_string(level + 1); for (int p = 0; p < num_threads; p++) { copytask_list2seq(queue_static[p], queue_lists[p], queue_null, // queue_dummy, task_name, queue_id, p, TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + level_id); for (list::iterator it = queue_lists[p].begin(); it != queue_lists[p].end(); ++it) { delete (*it); (*it) = NULL; } } } #ifdef DEBUG_PREPARE_THREAD cout << queue_symbol << num_tmp_shared << " : " << tmp_shared->size() << endl; #endif if (tmp_shared->size() > 0) { string task_name = queue_symbol + "+ " + to_string(level + 1); long nops = 0L; for (vector::const_iterator it = tmp_shared->begin(); it != tmp_shared->end(); ++it) { nops += *((*it)->ops_complexity); } C_task_seq* tmp = new C_task_seq(queue_id, task_name, 0, TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + level_id, tmp_shared, 0, tmp_shared->size(), nops); for (int p = 0; p < num_threads; p++) { queue_static[p].push_back(tmp); } } // if (num_tmp_shared > 0) else { delete tmp_shared; } // delete tmp_shared; } void task_assign_diag2(list *&queue_static, vector* &tasks_queue, list &queue_null, // list &queue_dummy, vector &isMergedDTRSM, string queue_symbol, int queue_id, int level_id, int num_threads, int level_last, int level, int begdom, int enddom, long *nops_sum, vector starts, vector starts_sub, list* &queue_lists, bool queue_lists_clear, Dissection::Tree* btree, vector* children) { int num_tmp_shared; list tmp_dynamic; listtask_seq_tmp; long nops_block_total; vector *tmp_shared = new vector; task_seq_tmp.clear(); nops_block_total = 0L; if (queue_lists_clear) { for (int p = 0; p < num_threads; p++) { #if 1 for (list::iterator it = queue_lists[p].begin(); it != queue_lists[p].end(); ++it) { delete (*it); (*it) = NULL; } #endif queue_lists[p].clear(); } } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); long nops = 0L; const int begin = starts_sub[j]; int end; // if (tasks_queue0[j].size() <= 4) { if (!isMergedDTRSM[j]) { end = tasks_queue[j][0]->parallel_max; } else { end = tasks_queue[j].size(); } if (level == 0) { end = (int)((double)end * RATIO_STATIC); } for (int i = begin; i < end; i++) { nops += *(tasks_queue[j][i]->ops_complexity); } string task_name = to_string(j); if (nops > 0L) { C_task_seq* tmp = new C_task_seq(queue_id, task_name, // for debugging (-1), // mutex_id TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + level_id, &tasks_queue[j], begin, end, nops); task_seq_tmp.push_back(tmp); nops_block_total += nops; } else { for (int i = begin; i < end; i++) { queue_null.push_back(tasks_queue[j][i]); } } } // loop : j tmp_dynamic.clear(); num_tmp_shared = 0; if (task_seq_tmp.size() > 0) { task_seq_tmp.sort(C_task_seq_complexity_greater); string queue_name; if (level == 0) { queue_name = queue_symbol + "% " ; } else { queue_name = queue_symbol + "* " ; } assign_tasks_statically(queue_lists, tmp_dynamic, nops_sum, task_seq_tmp, queue_id, queue_name.c_str(), (level + 1), (level_last - 1 - level) * 6 + level_id, nops_block_total, num_threads); if (tmp_dynamic.size() > 0) { num_tmp_shared += tmp_dynamic.size(); } } if (level == 0) { for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); const int parallel_max = tasks_queue[j].size(); const int end = (int)((double)parallel_max * RATIO_STATIC); num_tmp_shared += parallel_max - end; } } tmp_shared->reserve(num_tmp_shared); if (tmp_dynamic.size() > 0) { long nops = 0L; for (list::const_iterator mt = tmp_dynamic.begin(); mt != tmp_dynamic.end(); ++mt) { nops += (*mt)->ops_complexity; } for (list::iterator mt = tmp_dynamic.begin(); mt != tmp_dynamic.end(); ++mt) { for (int i = (*mt)->begin; i < (*mt)->end; i++) { tmp_shared->push_back((*(*mt)->queue)[i]); } delete (*mt); (*mt) = NULL; } } if (level == 0) { for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); int parallel_max; if (!isMergedDTRSM[j]) { parallel_max = tasks_queue[j][0]->parallel_max; } else { parallel_max = tasks_queue[j].size(); } const int end = (int)((double)parallel_max * RATIO_STATIC); for (int i = end; i < parallel_max; i++) { if (*(tasks_queue[j][i]->ops_complexity) > 0L) { // exclude null tasks tmp_shared->push_back(tasks_queue[j][i]); } else { queue_null.push_back(tasks_queue[j][i]); } // cyclic by index jc is better ? } } } { string task_name = queue_symbol + " " + to_string(level + 1); for (int p = 0; p < num_threads; p++) { copytask_list2seq(queue_static[p], queue_lists[p], queue_null, // queue_dummy, task_name, queue_id, p, TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + level_id); // 3 Jul 2016 for (list::iterator it = queue_lists[p].begin(); it != queue_lists[p].end(); ++it) { delete (*it); (*it) = NULL; } } } #ifdef DEBUG_PREPARE_THREAD cout << queue_symbol << num_tmp_shared << " : " << tmp_shared->size() << endl; #endif if (tmp_shared->size() > 0) { string task_name = queue_symbol + "+ " + to_string(level + 1); long nops = 0L; for (vector::const_iterator it = tmp_shared->begin(); it != tmp_shared->end(); ++it) { nops += *((*it)->ops_complexity); } C_task_seq* tmp = new C_task_seq(queue_id, task_name, 0, TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + level_id, tmp_shared, 0, tmp_shared->size(), nops); for (int p = 0; p < num_threads; p++) { queue_static[p].push_back(tmp); } } // if (tmp_shared->size() > 0) else { delete tmp_shared; } } void QueueRuntime::generate_queue(C_task_seq* &queue_symb_, list* & queue_static_, vector* &queue_dynamic_, list &queue_null_, // list &queue_dummy_, Dissection::Tree* btree, vector* children, vector* tasks_SparseSymb, vector* tasks_SparseNum, vector* tasks_SparseLocalSchur, vector* tasks_DFillSym, vector* tasks_DFullLDLt, vector* tasks_DTRSMScale, vector* tasks_DGEMM, vector** tasks_Dsub, vector* tasks_deallocLower, vector* tasks_deallocLocalSchur, long **nops_queue, vector > all_fathersIndex, vector nrow_DFullLDLt, vector isMergedDTRSM, vector isDividedDTRSM, const int level_last) { // begin : task squence for symmbolic factorization const int num_threads = _num_threads; const int nb_doms_dense = (1U << level_last) - 1; { //begin scope : task_name list queue_tmp; string task_name = "l " + to_string(level_last); const int begdom = 1U << level_last; const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { const int j = d - begdom; C_task_seq* tmp = new C_task_seq(C_SPARSESYMBFACT, _null_name, // dummy (-1), // mutex_id TASK_SINGLE, 1, level_last, (-1), // phase &tasks_SparseSymb[j], 0, 1, *(tasks_SparseSymb[j][0]->ops_complexity)); queue_tmp.push_back(tmp); } // queue_tmp.sort(C_task_seq_complexity_greater); vector *task_tmp = new vector; task_tmp->resize(begdom); int k = 0; for (list::iterator it = queue_tmp.begin(); it != queue_tmp.end(); ++it, k++) { (*task_tmp)[k] = (*it)->queue[0][0]; } queue_symb_ = new C_task_seq(C_SPARSESYMBFACT, task_name, // dummy (-1), // mutex_id TASK_SINGLE, 1, level_last, (-1), // phase task_tmp, 0, begdom, 0); // dummy _queue_symb = queue_symb_; for (list::iterator it = queue_tmp.begin(); it != queue_tmp.end(); ++it) { delete (*it); (*it) = NULL; } } //end scope : task_name queue_static_ = new list[num_threads]; _queue_static = queue_static_; queue_dynamic_ = new vector; _queue_dynamic = queue_dynamic_; // end : task squence for symmbolic factorization list* queue_dynamic = new list[level_last + 1]; list* queue_dynamic0 = new list; list* queue_dynamic1 = new list[level_last + 1]; list* queue_dynamic2 = new list[level_last + 1]; // list* queue_dynamic3 = new list[level_last + 1]; list* queue_lists = new list[num_threads]; list queue_tmp0, queue_tmp1; // assuming that num_leaves(level_last) > num_threads // num_leaves(level_last) = _begLevel[level_last] - _begLevel[level_last + 1] // // Sparse numeric factorization : tasks_SparseNum[] -> _queue_static[] queue_tmp0.clear(); #define SPARSE_MERGED #ifdef SPARSE_MERGED { // sparse subdomains : tasks_SparseNum[] // all dense subdomains : tasks_DFillSym[] // the begining of the dense subdomains const int begdom = 1U << (level_last - 1); const int enddom = begdom * 2; for (int d = begdom; d < enddom; d++) { int j; vector* tasks = new vector; j = btree->selfIndex(2 * d); long nops = 0L; tasks->push_back(tasks_SparseNum[j][0]); nops += *(tasks_SparseNum[j][0]->ops_complexity); tasks->push_back(tasks_SparseLocalSchur[j][0]); nops += *(tasks_SparseLocalSchur[j][0]->ops_complexity); j = btree->selfIndex(2 * d + 1); tasks->push_back(tasks_SparseNum[j][0]); nops += *(tasks_SparseNum[j][0]->ops_complexity); tasks->push_back(tasks_SparseLocalSchur[j][0]); nops += *(tasks_SparseLocalSchur[j][0]->ops_complexity); j = btree->selfIndex(d); for (vector::const_iterator it = tasks_DFillSym[j].begin(); it != tasks_DFillSym[j].end(); ++it) { tasks->push_back(*it); nops += *((*it)->ops_complexity); } // task complexity of DFillSym[] in higher level is very large int dd; if (d % 2 == 0) { dd = (d - begdom) / 2; } else { dd = begdom - (d - begdom + 1) / 2; } if (dd > 0) { j = btree->selfIndex(dd); for (vector::const_iterator it = tasks_DFillSym[j].begin(); it != tasks_DFillSym[j].end(); ++it) { tasks->push_back(*it); nops += *((*it)->ops_complexity); } } C_task_seq* tmp = new C_task_seq(C_SPARSESOLVER, _null_name, (-1), // mutex_id (-1), //TASK_SINGLE, (-1), // 1, (-1), // level_last, (-1), // phase tasks, 0, tasks->size(), nops); queue_tmp0.push_back(tmp); } // loop : d(j) string task_name = "z " + to_string(level_last); copytask_list2seq(queue_dynamic[level_last], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_SPARSESOLVER, // C_FILLMATRIX1, 0, // shared by all threads TASK_PARALLEL, num_threads, level_last, 2); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it)->queue; // deallocate a container : tasks in L880 delete (*it); (*it) = NULL; } } #else { // sparse subdomains : tasks_SparseNum[] // all dense subdomains : tasks_DFillSym[] // the begining of the dense subdomains const int begdom = 1U << level_last; const int enddom = begdom * 2; queue_tmp0.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); C_task_seq* tmp = new C_task_seq(C_SPARSENUMFACT, _null_name, (-1), // mutex_id (-1), //TASK_SINGLE, (-1), // 1, (-1), // level_last, (-1), // phase &tasks_SparseNum[j], 0, 1, *(tasks_SparseNum[j][0]->ops_complexity)); queue_tmp0.push_back(tmp); } // loop : d(j) queue_tmp0.sort(C_task_seq_complexity_greater); { int itmp = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, itmp++) { queue_lists[itmp % num_threads].push_back(*it); } } // loop : it, itmp { string task_name = "u " + to_string(level_last); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_SPARSENUMFACT, p, TASK_SINGLE, 1, level_last, 0); } for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // Sparse local Schur complement : tasks_SparseLocalSchur[] -> queue_dynamic[] queue_tmp0.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); C_task_seq* tmp = new C_task_seq(C_SPARSELOCALSCHUR, _null_name, //(char *)NULL, // dummy (-1), // mutex_id (-1), //TASK_SINGLE, (-1), // 1, (-1), // level_last, (-1), // phase &tasks_SparseLocalSchur[j], 0, 1, *(tasks_SparseLocalSchur[j][0]->ops_complexity)); queue_tmp0.push_back(tmp); } // loop : d(j) queue_tmp0.sort(C_task_seq_complexity_greater); { string task_name = "w " + to_string(level_last); // C_SPARSELOCALSCHUR is simple greedy estimated complexity by // assuming blocks are dense // C_SPARSELOCALSCHUR1 uses estimated complexity by C_SPARSENUMFACT copytask_list2seq(*queue_dynamic0, queue_tmp0, queue_null_, // queue_dummy_, task_name, C_SPARSELOCALSCHUR, // C_SPARSELOCALSCHUR1 0, // shared by all threads TASK_PARALLEL, num_threads, level_last, 1); // SparseLDLt : 0 // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic0->begin(); it != queue_dynamic0->end(); ++it) { _queue_static[p].push_back(*it); } } } // -- C_FILLMATRIX diagonal - // without using children queue_tmp0.clear(); { // sparse subdomains : tasks_SparseNum[] // all dense subdomains : tasks_DFillSym[] // the begining of the dense subdomains const int begdom = 1U << (level_last - 1); const int enddom = begdom * 2; for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } { string task_name = "v " + to_string(level_last) + " : "; queue_tmp0.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); const int begin = 0; // better that all (diag/offdiag) in greedy // const int begin = tasks_DFillSym[j][0]->parallel_max; // -- C_FILLMATRIX offdiagonal -- const int end = tasks_DFillSym[j].size(); if ((end - begin) > 0) { // always true task_name += to_string(d) + " "; C_task_seq* tmp = new C_task_seq(C_FILLMATRIX, _null_name, // task_name_cstr, (-1), // mutex_id (-1), // TASK_PARALLEL, (-1), // num_threads, (-1), // level (-1), // phase &tasks_DFillSym[j], begin, end, 0L // nops; ); queue_tmp0.push_back(tmp); } } // loop : d(j) // -- C_FILLMATRIX diag/offdiagonal for grand fathers -- for (int d = 1; d < begdom; d++) { const int j = btree->selfIndex(d); task_name += to_string(d) + " "; C_task_seq* tmp = new C_task_seq(C_FILLMATRIX, _null_name, // task_name_cstr, (-1), // mutex_id (-1), // TASK_PARALLEL, (-1), // num_threads, (-1), // level (-1), // phase &tasks_DFillSym[j], 0, tasks_DFillSym[j].size(), 0L // nops; ); queue_tmp0.push_back(tmp); } // loop : d(j) if (queue_tmp0.size() > 0) { // only the last block copytask_list2seq(queue_dynamic[level_last], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_FILLMATRIX, // C_FILLMATRIX1, 0, // shared by all threads TASK_PARALLEL, num_threads, level_last, 2); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } } } #endif { for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic[level_last].begin(); it != queue_dynamic[level_last].end(); ++it) { _queue_static[p].push_back(*it); } } } // for dense subdomains for (int level = (level_last - 1); level >=0; level--) { const int begdom = 1U << level; const int enddom = begdom * 2; const int num_leaves = 1 << (level + 1); // 1 << level if (level < (level_last - 1)) { // better management by mixed Assignment of blocks of DTRSM/DGEMM // -- DTRSM offdiagonal -- { string task_name = "q offdiag " + to_string(level + 1) + " : "; queue_tmp0.clear(); // off-diagonal parallel block between children and grandfather, ancenstors for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; const int end = tasks_DTRSMScale[jc].size(); int begin = tasks_DTRSMScale[jc].size(); if (!isDividedDTRSM[jc] && (end > 0) && !isMergedDTRSM[j]) { begin = IndexTask(tasks_DTRSMScale[jc], tasks_DTRSMScale[jc][0]->parallel_max); } if ((end - begin) > 0) { task_name += to_string(jc) + " "; C_task_seq* tmp = new C_task_seq(C_DTRSM, _null_name, // task_name_cstr, (-1), // mutex_id (-1), // TASK_PARALLEL, (-1), // num_threads, (-1), // level (-1), // phase &tasks_DTRSMScale[jc], begin, end, 0L // nops; ); queue_tmp0.push_back(tmp); } // if ((end - begin) > 0) } // loop : ic } // loop : j if (queue_tmp0.size() > 0) { // only the last block copytask_list2seq(queue_dynamic[level], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_DTRSM1, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 1); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } } // dependency // -- D_GEMM offdiagonal -- { string task_name = "r " + to_string(level + 1) + " : "; queue_tmp0.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; const int end = tasks_DGEMM[jc].size(); if (end > 0) { int begin; const int itmp = tasks_DGEMM[jc][0]->parallel_max; if (!isMergedDTRSM[j]) { begin = itmp; } else { begin = ((itmp == end) ? end : (itmp + tasks_DGEMM[jc][itmp]->parallel_max)); } if ((end - begin) > 0) { task_name += to_string(jc) + " "; C_task_seq* tmp = new C_task_seq(C_DGEMM, _null_name, // task_name_cstr, (-1), // mutex_id (-1), // TASK_PARALLEL, (-1), // num_threads, (-1), // (level + 1), (-1), // (level_last - 1 - level) * 5 + 2, &tasks_DGEMM[jc], begin, end, 0L// nops); ); queue_tmp0.push_back(tmp); } } // if (tasks_DGEMM[jc].size() > 0) } // loop : ic } // loop j if (queue_tmp0.size() > 0) { // only the last part copytask_list2seq(queue_dynamic[level], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_DGEMM1, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 2); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // if (queue_tmp0.size() > 0) } // scope of task_name } // if (level < (level_last - 1)) // -- DEALLACATE { string task_name = "X " + to_string(level + 1) + " : "; queue_tmp0.clear(); C_task_seq* tmp = new C_task_seq(C_DEALLOCATE, _null_name, (-1), // mutex_id (-1), //TASK_PARALLEL, (-1),//num_threads, (-1),//(level + 1), (-1),//(level_last - 1 - level) * 5 + 3, &tasks_deallocLower[level + 1], 0, tasks_deallocLower[level + 1].size(), 0L// nops); ); queue_tmp0.push_back(tmp); copytask_list2seq(queue_dynamic[level], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_DEALLOCATE1, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 3); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // -- DSUB offdiagonal -- { string task_name = "s " + to_string(level + 1) + " : "; queue_tmp0.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); if (!isMergedDTRSM[j]) { const int end = tasks_Dsub[level + 1][j].size(); if (end > 0) { // to skip non allocated tasks_Dsub[level + 1][j][0] const int begin = tasks_Dsub[level + 1][j][0]->parallel_max; if ((end - begin) > 0) { task_name += to_string(j) + " "; C_task_seq* tmp = new C_task_seq(C_DSUB, _null_name, (-1), // mutex_id (-1), //TASK_PARALLEL, (-1),//num_threads, (-1),//(level + 1), (-1),//(level_last - 1 - level) * 5 + 3, &tasks_Dsub[level + 1][j], begin, end, 0L// nops); ); queue_tmp0.push_back(tmp); } // if ((end - begin) > 0) } // if (end > 0) } // if (!isMergedDTRSM[j]) } // loop : d / j for (vector::const_iterator it = all_fathersIndex[level].begin(); it != all_fathersIndex[level].end(); ++it) { const int size = tasks_Dsub[level + 1][*it].size(); if (size > 0) { task_name += to_string(*it) + " "; C_task_seq* tmp = new C_task_seq(C_DSUB, _null_name, (-1), // mutex_id (-1), //TASK_PARALLEL, (-1), //num_threads, (-1), //(level + 1), (-1), //(level_last - 1 - level) * 5 + 3, &tasks_Dsub[level + 1][*it], 0, size, 0L// nops); ); queue_tmp0.push_back(tmp); } } // loop : it if (queue_tmp0.size() > 0) { // only the last part copytask_list2seq(queue_dynamic[level], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_DSUB1, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 4); // erase temporary C_task_seq whose elements are copied to // queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } } // -- DEALLACATE { string task_name = "x " + to_string(level + 1) + " : "; queue_tmp0.clear(); const int begdom1 = 1U << (level + 1); const int enddom1 = begdom1 * 2; for (int d = begdom1; d < enddom1; d++) { const int j = btree->selfIndex(d); C_task_seq* tmp = new C_task_seq(C_DEALLOCATE, _null_name, (-1), // mutex_id (-1), //TASK_PARALLEL, (-1),//num_threads, (-1),//(level + 1), (-1),//(level_last - 1 - level) * 5 + 3, &tasks_deallocLocalSchur[j], 0, tasks_deallocLocalSchur[j].size(), 0L// nops); ); queue_tmp0.push_back(tmp); } copytask_list2seq(queue_dynamic2[level], queue_tmp0, queue_null_, // queue_dummy_, task_name, C_DEALLOCATE1, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 5); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // level == 0 <==> num_leaves = 2 if (num_leaves <= num_threads) { // trick to merge DFullLDLt into previous stage of DTRSM to follow // the critical path long *nops_sum = new long[num_threads]; for (int i = 0; i < num_threads; i++) { nops_sum[i] = 0L; } vector starts(nb_doms_dense, 0); vector starts_dtrsm(nb_doms_dense, 0); vector starts_sub(nb_doms_dense, 0); long nops_block_total = 0L; // -- STARTING BLOCK : DTSRM + DGEMM + DSUB + DFULLLDLT -- list *> diag_starting; list task_name_strs; for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); #ifdef DEBUG_PREPARE_THREAD cout << "+ level = " << (level + 1) << " j = " << j << " children = " << children[j][0] << " " << children[j][1] << " : " << "# DFullLDLt @ " << level << " : " << j << " / " << tasks_DFullLDLt[j].size() << endl; #endif // 3 * 4 * 5 / 6 + 1 = 11 // 3 * 4 * 5 / 6 + 2 * 3 * 4 / 6 + 3 = 17 // if (tasks_DFullLDLt[j].size() > 4) { // with parallelizing if(nrow_DFullLDLt[j] > 2) { vector *tmp = new vector; tmp->reserve(4 + 2 * (_isSym ? 3 : 6)); // 3 = 2 + 1 for (int ic = 0; ic < 2; ic++) { const int jj = children[j][ic]; if ((nrow_DFullLDLt[jj] <= 2) && !isMergedDTRSM[jj]) { // the case DFullLDLt and DTRSMScale are concatenated can be skipped const int itmp = tasks_DTRSMScale[jj][0]->atomic_size; for (int k = 0; k < itmp; k++) { tmp->push_back(tasks_DTRSMScale[jj][k]); } if (_isSym) { starts_dtrsm[jj] = tasks_DTRSMScale[jj][0]->atomic_size; } else { for (int k = 0; k < tasks_DTRSMScale[jj][itmp]->atomic_size; k++) { tmp->push_back(tasks_DTRSMScale[jj][itmp + k]); } starts_dtrsm[jj] = (itmp + tasks_DTRSMScale[jj][itmp]->atomic_size); } // if (_isSym) } // if ((nrow_DFullLDLt[jj] <= 2) && !isMergedDTRSM[jj]) tmp->push_back(tasks_DGEMM[jj][0]); starts[jj] = 1; } // loop : ic tmp->push_back(tasks_Dsub[level + 1][j][0]); // null for many cases tmp->push_back(tasks_DFullLDLt[j][0]); diag_starting.push_back(tmp); string task_name = "p " + to_string(level + 1) + " " + to_string(j); task_name_strs.push_back(task_name); starts_sub[j] = 1; } // if(nrow_DFullLDLt[j] > 2) } // loop : d list::const_iterator kt = task_name_strs.begin(); { int itmp = 0; for (list *>::const_iterator it = diag_starting.begin(); it != diag_starting.end(); ++it, ++kt, itmp++){ long nops = 0L; for (vector::const_iterator jt = (*it)->begin(); jt != (*it)->end(); ++jt) { nops += *((*jt)->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DIAG_START, (*kt), TASK_SINGLE, (-1), // mutex_id 1, (level + 1), (level_last - 1 - level) * 6 + 1, (*it), 0, (*it)->size(), nops); queue_lists[itmp % num_threads].push_back(tmp); nops_sum[itmp % num_threads] = nops; nops_block_total += nops; #ifdef DEBUG_PREPARE_THREAD cout << "ll = " << itmp % num_threads << " critical path " << *kt << " nops = " << nops << endl; #endif } // loop : it, kt, itmp } // scope of itmp // -- DTRSM -- diangonal task_assign_diag1(_queue_static, tasks_DTRSMScale, queue_null_, // queue_dummy_, nrow_DFullLDLt, isMergedDTRSM, "q", C_DTRSM, 1, // int level_id, num_threads, level_last, level, begdom, enddom, nops_sum, starts_dtrsm, queue_lists, false, btree, children); // erase STL container defined in line 1499 for (list *>::iterator it = diag_starting.begin(); it != diag_starting.end(); ++it) { delete (*it); (*it) = NULL; } { // begin : scope queue_tmp2 list queue_tmp2; queue_tmp2.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; long nops = 0L; if (isDividedDTRSM[jc]) { const int end = tasks_DTRSMScale[jc].size(); for (int i = 0; i < end; i++) { nops += *(tasks_DTRSMScale[jc][i]->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DTRSM, _null_name, (-1), // mutex_id (-1), // TASK_SINGLE, (-1), // 1, (-1), // (level + 1), (-1), // (level_last - 1 - level) * 5 + 1, &tasks_DTRSMScale[jc], 0, end, nops); queue_tmp2.push_back(tmp); } // if (isDividedDTRSM[jc]) } // loop : ic } // loop : d { // begin : scope task_name string task_name = "q Offdiag " + to_string(level + 1); if (queue_tmp2.size() > 0) { // only the last block copytask_list2seq(queue_dynamic1[level], queue_tmp2, queue_null_, // queue_dummy_, task_name, C_DTRSM, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 1); for (list::iterator it = queue_tmp2.begin(); it != queue_tmp2.end(); ++it) { delete (*it); (*it) = NULL; } // loop : it } // if (queue_tmp2.size() > 0) } // end : scope task_name } // end : scope queue_tmp2 if (level >= 0) { for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic1[level].begin(); it != queue_dynamic1[level].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p // the case queue_dynamic consists of only C_task_seq == 'x', copied here for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic2[level + 1].begin(); it != queue_dynamic2[level + 1].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p } // -- DGEMM -- diagonal vector null_idx; task_assign_diag1(_queue_static, tasks_DGEMM, queue_null_, // queue_dummy_, null_idx, // not used isMergedDTRSM, "r", C_DGEMM, 2, // int level_id, num_threads, level_last, level, begdom, enddom, nops_sum, starts, queue_lists, true, btree, children); // -- DSUB -- diagonal task_assign_diag2(_queue_static, tasks_Dsub[level + 1], queue_null_, // queue_dummy_, isMergedDTRSM, //tasks_DFullLDLt, "s", C_DSUB, 3, // int level_id, num_threads, level_last, level, begdom, enddom, nops_sum, starts, starts_sub, queue_lists, true, btree, children); if (level == 0) { // the case queue_dynamic consists of only C_task_seq == 'x', copied here for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic2[level].begin(); it != queue_dynamic2[level].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p } // -- C_DFULL -- diagonal queue_tmp0.clear(); queue_tmp1.clear(); for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); long nops = 0L; for (int i = starts_sub[j]; i < tasks_DFullLDLt[j].size(); i++) { nops += *(tasks_DFullLDLt[j][i]->ops_complexity); } string task_name = "t& " + to_string(level) + " : " + to_string(j); C_task_seq* tmp = new C_task_seq(C_DFULL, task_name, 0, // mutex_id TASK_SINGLE, 1, level, (level_last - level) * 6, &tasks_DFullLDLt[j], starts_sub[j], tasks_DFullLDLt[j].size(), nops); if (starts_sub[j] > 0) { queue_tmp0.push_back(tmp); } else { queue_tmp1.push_back(tmp); } } const int size_task0 = queue_tmp0.size(); const int size_threads = num_threads - queue_tmp1.size(); #ifdef DEBUG_PREPARE_THREAD cout << "*** C_DFULL : parallel = " << size_task0 << " : serial " << queue_tmp1.size() << " / "; #endif if (size_task0 > 0 && size_threads > 0) { queue_tmp0.sort(C_task_seq_complexity_greater); int mm0 = size_threads % size_task0; const int mm = (size_threads / size_task0) + (mm0 != 0); if (mm0 == 0) { mm0 = size_task0; } int jj = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, jj++) { const int multplcty = (jj < mm0) ? mm : (mm - 1); #ifdef DEBUG_PREPARE_THREAD cout << (*it)->task_id << " : " << (*it)->task_name << " : < " << multplcty << " > "; #endif const int parallel_single = (multplcty == 1) ? TASK_SINGLE : TASK_PARALLEL; // copy raw task into a task sequence whose length is one list queue_tmp2, queue_tmp3; queue_tmp2.push_back(*it); copytask_list2seq(queue_tmp3, queue_tmp2, queue_null_, // queue_dummy_, (*it)->task_name, (*it)->task_id, jj, parallel_single, multplcty, (*it)->level, (*it)->phase); // threads share one task (*it) for (int m = 0; m < mm; m++) { const int n = jj + m * size_task0; if (n < size_threads) { _queue_static[n % num_threads].push_back(queue_tmp3.front()); // _queue_static[n % num_threads].push_back(seq_tmp); #ifdef DEBUG_PREPARE_THREAD cout << "[ " << jj << " @ " << n << " : " << n % num_threads << " ]"; #endif } } } // loop : j #ifdef DEBUG_PREPARE_THREAD cout << "/ "; #endif jj = size_threads; for (list::const_iterator it = queue_tmp1.begin(); it != queue_tmp1.end(); ++it, jj++) { // copy for each thread list queue_tmp2, queue_tmp3; queue_tmp2.push_back(*it); copytask_list2seq(queue_tmp3, queue_tmp2, queue_null_, // queue_dummy_, (*it)->task_name, (*it)->task_id, 0, TASK_SINGLE, 1, (*it)->level, (*it)->phase); _queue_static[jj % num_threads].push_back(queue_tmp3.front()); #ifdef DEBUG_PREPARE_THREAD cout << jj << " ; " << jj % num_threads << " / "; #endif } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif // delete original C_task_seq after copying to all threads for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } for (list::iterator it = queue_tmp1.begin(); it != queue_tmp1.end(); ++it) { delete (*it); (*it) = NULL; } } // if (size_task0 > 0) else { // (size_task0 == 0) || (size_threads <= 0) #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif { int itmp = 0; queue_tmp1.sort(C_task_seq_complexity_greater); for (list::const_iterator it = queue_tmp1.begin(); it != queue_tmp1.end(); ++it, itmp++) { (*it)->parallel_single = TASK_SINGLE; queue_lists[itmp % num_threads].push_back(*it); } // loop : it, itmp } { string task_name = "t " + to_string(level); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_DFULL, p, TASK_SINGLE, 1, level, (level_last - level) * 6); } // loop : p // (queue_tmp0.size() == 0) ? for safty of non-memory-leak for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } for (list::iterator it = queue_tmp1.begin(); it != queue_tmp1.end(); ++it) { delete (*it); (*it) = NULL; } } } // if (size_task > 0) if (level > 0) { for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic[level].begin(); it != queue_dynamic[level].end(); ++it) { _queue_static[p].push_back(*it); } } } delete [] nops_sum; } // else { // if (num_leaves <= num_threads) if (level < (level_last - 1)) { // -- DTRSM -- diagonal queue_tmp0.clear(); for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } list queue_tmp2; queue_tmp2.clear(); for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); #ifdef DEBUG_PREPARE_THREAD cout << "* level = " << (level + 1) << " j = " << j << " children = " << children[j][0] << " " << children[j][1] << " " << endl; #endif for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; if (tasks_DTRSMScale[jc].size() > 0) { long nops = 0L; if (isDividedDTRSM[jc]) { const int end = tasks_DTRSMScale[jc].size(); for (int i = 0; i < end; i++) { nops += *(tasks_DTRSMScale[jc][i]->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DTRSM, _null_name, (-1), // mutex_id (-1), // TASK_SINGLE, (-1), // 1, (-1), // (level + 1), (-1), // (level_last - 1 - level) * 5 + 1, &tasks_DTRSMScale[jc], 0, end, nops); queue_tmp2.push_back(tmp); } // if (isDividedDTRSM[jc]) else { int end; if (!isMergedDTRSM[j]) { // looking the father node end = IndexTask(tasks_DTRSMScale[jc], tasks_DTRSMScale[jc][0]->parallel_max); } else { end = tasks_DTRSMScale[jc].size(); } for (int i = 0; i < end; i++) { nops += *(tasks_DTRSMScale[jc][i]->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DTRSM, _null_name, (-1), // mutex_id (-1), // TASK_SINGLE, (-1), // 1, (-1), // (level + 1), (-1), // (level_last - 1 - level) * 5 + 1, &tasks_DTRSMScale[jc], 0, end, nops); queue_tmp0.push_back(tmp); } // if (isDividedDTRSM[jc]) } // if (tasks_DTRSMScale[jc].size() > 0) } // loop : ic } // loop : d queue_tmp0.sort(C_task_seq_complexity_smaller); { // begin : scope itmp int itmp = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, itmp++) { queue_lists[itmp % num_threads].push_back(*it); } // loop : it } // end : scope itmp { // begin : scope task_name string task_name = "q diag " + to_string(level + 1); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_DTRSM, p, TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + 1); } // erase temporary C_task_seq whose elements are copied to // queue_static[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // end : scope task_name { // begin : scope task_name string task_name = "q Offdiag " + to_string(level + 1); if (queue_tmp2.size() > 0) { // only the last block copytask_list2seq(queue_dynamic1[level], queue_tmp2, queue_null_, // queue_dummy_, task_name, C_DTRSM, 0, // shared by all threads TASK_PARALLEL, num_threads, (level + 1), (level_last - 1 - level) * 6 + 1); // erase temporary C_task_seq whose elements are copied to queue_dynamic[] for (list::iterator it = queue_tmp2.begin(); it != queue_tmp2.end(); ++it) { delete (*it); (*it) = NULL; } } // if (queue_tmp2.size() > 0) } // end : scope task_name if (level >= 0) { for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic1[level].begin(); it != queue_dynamic1[level].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p // C_task_seq == 'x', copied here for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic2[level + 1].begin(); it != queue_dynamic2[level + 1].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p } // if (level >= 0) // -- DGEMM -- diagonal queue_tmp0.clear(); for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); for (int ic = 0; ic < 2; ic++) { const int jc = children[j][ic]; const int tasks_size = tasks_DGEMM[jc].size(); if (tasks_size > 0) { long nops = 0L; int end; int itmp = tasks_DGEMM[jc][0]->parallel_max; // if (itmp <= (_isSym ? 3 : 4)) { #if 1 if (!isMergedDTRSM[j]) { end = itmp; } else { end = ((itmp == tasks_size) ? tasks_size : itmp + tasks_DGEMM[jc][itmp]->parallel_max); } #else end = ((itmp == tasks_size) ? tasks_size : itmp + tasks_DGEMM[jc][itmp]->parallel_max); #endif for (int i = 0; i < end; i++) { nops += *(tasks_DGEMM[jc][i]->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DGEMM, _null_name, (-1), // mutex_id (-1), // TASK_SINGLE, (-1), //1, (-1), //(level + 1), (-1), //(level_last - 1 - level) * 5 + 2, &tasks_DGEMM[jc], 0, end, nops); queue_tmp0.push_back(tmp); } // if (tasks_DGEMM[jc].size() > 0) } // loop : ic } // loop : d queue_tmp0.sort(C_task_seq_complexity_greater); { int itmp = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, itmp++) { queue_lists[itmp % num_threads].push_back(*it); } // loop : it } { string task_name = "r " + to_string(level + 1); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_DGEMM, p, TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + 2); } // erase temporary C_task_seq whose elements are copied to // queue_static[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } } // if (level == (level_last - 1)) // -- DSUB -- diagonal queue_tmp0.clear(); for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); long nops = 0L; if (tasks_Dsub[level + 1][j].size() > 0) { int end; // if (tasks_DFullLDLt[j].size() <= 4) { // if(nrow_DFullLDLt[j] <= 2) { if (!isMergedDTRSM[j]) { end = tasks_Dsub[level + 1][j][0]->parallel_max; } else { end = tasks_Dsub[level + 1][j].size(); } for (int i = 0; i < end; i++) { nops += *(tasks_Dsub[level + 1][j][i]->ops_complexity); } C_task_seq* tmp = new C_task_seq(C_DSUB, _null_name, (-1), // mutex_id (-1), // TASK_SINGLE, (-1), //1, (-1), //(level + 1), (-1), //(level_last - 1 - level) * 5 + 3, &tasks_Dsub[level + 1][j], 0, end, // tasks_Dsub[level + 1][j][0]->parallel_max, nops); queue_tmp0.push_back(tmp); } } // loop : d queue_tmp0.sort(C_task_seq_complexity_greater); { int itmp = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, itmp++) { queue_lists[itmp % num_threads].push_back(*it); } // loop : it } { // begin : scope for task_name; string task_name = "s " + to_string(level + 1); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_DSUB, p, TASK_SINGLE, 1, (level + 1), (level_last - 1 - level) * 6 + 3); } // erase temporary C_task_seq whose elements are copied to // queue_static[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // end : scope for task_name; if (level == 0) { // the case queue_dynamic consists of only C_task_seq == 'x', copied here for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic2[level].begin(); it != queue_dynamic2[level].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p } // -- C_DFULL -- queue_tmp0.clear(); for (int p = 0; p < num_threads; p++) { queue_lists[p].clear(); } for (int d = begdom; d < enddom; d++) { const int j = btree->selfIndex(d); long nops = 0L; for (int i = 0; i < tasks_DFullLDLt[j].size(); i++) { nops += *(tasks_DFullLDLt[j][i]->ops_complexity); } string task_name = "t " + to_string(level) + " : " + to_string(j); C_task_seq* tmp = new C_task_seq(C_DFULL, task_name, (-1), // mutex_id TASK_SINGLE, 1, level, (level_last - level) * 6, &tasks_DFullLDLt[j], 0, tasks_DFullLDLt[j].size(), nops); queue_tmp0.push_back(tmp); } // loop : d queue_tmp0.sort(C_task_seq_complexity_greater); { int itmp = 0; for (list::const_iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it, itmp++) { queue_lists[itmp % num_threads].push_back(*it); } // loop : it } // begin : scope for task_name; { string task_name = "t " + to_string(level); for (int p = 0; p < num_threads; p++) { copytask_list2seq(_queue_static[p], queue_lists[p], queue_null_, // queue_dummy_, task_name, C_DFULL, p, TASK_SINGLE, 1, level, (level_last - level) * 6); } // erase temporary C_task_seq whose elements are copied to // queue_static[] for (list::iterator it = queue_tmp0.begin(); it != queue_tmp0.end(); ++it) { delete (*it); (*it) = NULL; } } // end : scope for task_name if ((level > 0)) { // && (level < (level_last - 1))) { for (int p = 0; p < num_threads; p++) { for(list::const_iterator it = queue_dynamic[level].begin(); it != queue_dynamic[level].end(); ++it) { _queue_static[p].push_back(*it); } } // loop : p } // dependency } // if (num_leaves < num_threads) } // loop : level // begin : counting task size { for (int p = 0; p < num_threads; p++) { #ifdef DEBUG_PREPARE_THREAD cout << "thread = " << p << " @ " << _queue_static[p].size() << " / "; #endif for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { if ((*it)->parallel_single == TASK_SINGLE) { #ifdef DEBUG_PREPARE_THREAD cout << (*it)->task_name << " [ " << (*it)->begin << " : " << (*it)->end << " ] ; " << (*it)->ops_complexity << " / "; #endif nops_queue[p][(*it)->level] += (*it)->ops_complexity; } if ((*it)->parallel_single == TASK_PARALLEL) { #ifdef DEBUG_PREPARE_THREAD cout << (*it)->task_name << " [[ " << (*it)->begin << " : " << (*it)->end << " ]] ; " << (*it)->ops_complexity << " / "; #endif if (((*it)->num_threads > 0) && ((*it)->task_id == C_DFULL)) { // cout << "mutex_id = " << (*it)->mutex_id; nops_queue[p][(*it)->level] += (*it)->ops_complexity / (long)(*it)->num_threads; } if ((*it)->level == 1) { nops_queue[num_threads][(*it)->level] += (*it)->ops_complexity; } } } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif } #ifdef DEBUG_PREPARE_THREAD cout << "shared / "; #endif for (int level = level_last; level >=0; level--) { #ifdef DEBUG_PREPARE_THREAD cout << "level = " << level << endl; #endif for (list::const_iterator it = queue_dynamic[level].begin(); it != queue_dynamic[level].end(); ++it) { #ifdef DEBUG_PREPARE_THREAD cout << (*it)->task_name << " [ " << (*it)->begin << " : " << (*it)->end << " ] ; " << (*it)->ops_complexity << " / "; #endif nops_queue[num_threads][(*it)->level] += (*it)->ops_complexity; } #ifdef DEBUG_PREPARE_THREAD cout << endl; #endif } #ifdef DEBUG_PREPARE_THREAD for (int level = (level_last + 1); level >= 0; level--) { cout << "level = " << level << " : "; for (int j = 0; j <= num_threads; j++) { cout << nops_queue[j][level] << " "; } cout << endl; } #endif } // end : counting task size // copy tasks in reverse order { int itmp = 0; for (int i = 0; i <= level_last; i++) { itmp += queue_dynamic[i].size(); itmp += queue_dynamic1[i].size(); itmp += queue_dynamic2[i].size(); // C_DEALLOCATE // itmp += queue_dynamic3[i].size(); // C_DEALLOCATE } _queue_dynamic->reserve(itmp); } for (int i = 0; i <= level_last; i++) { const int level = level_last - i; for (list::const_iterator it = queue_dynamic[level].begin(); it != queue_dynamic[level].end(); ++it) { _queue_dynamic->push_back(*it); } for (list::const_iterator it = queue_dynamic1[level].begin(); it != queue_dynamic1[level].end(); ++it) { _queue_dynamic->push_back(*it); } for (list::const_iterator it = queue_dynamic2[level].begin(); it != queue_dynamic2[level].end(); ++it) { _queue_dynamic->push_back(*it); } } // begin : scope for fout if (false) { char fname[256]; int pid = get_process_id(); sprintf(fname, "tasks-created.%d.data", pid); FILE *fp; if ((fp = fopen(fname, "a")) != NULL) { write_dependency(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } queue_dynamic0->clear(); for (int i = 0; i < (level_last + 1); i++) { queue_dynamic[i].clear(); queue_dynamic1[i].clear(); queue_dynamic2[i].clear(); // queue_dynamic3[i].clear(); } delete [] queue_dynamic; delete [] queue_dynamic1; delete [] queue_dynamic2; // delete [] queue_dynamic3; delete queue_dynamic0; for (int i = 0; i < num_threads; i++) { queue_lists[i].clear(); } delete [] queue_lists; } void QueueRuntime::write_dependency(FILE *fp) { for (int p = 0; p < _num_threads; p++) { fprintf(fp, "*** thread = %d @ %d ***\n", p, (int)_queue_static[p].size()); for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { fprintf(fp, "** %d C_task_seq = %d %s %s **\n", p, (*it)->task_id, (*it)->task_name, (((*it)->parallel_single == TASK_SINGLE) ? " single " : " shared ")); for (int j = (*it)->begin; j < (*it)->end; j++) { fprintf(fp, "%s : ", (*(*it)->queue)[j]->task_name); if ((*(*it)->queue)[j]->atomic_size > 1) { fprintf(fp, "atomic %d / %d ", (*(*it)->queue)[j]->atomic_id, (*(*it)->queue)[j]->atomic_size); } // list& parents = *(*(*it)->queue)[j]->parents_work; list& parents = *(*(*it)->queue)[j]->parents; fprintf(fp, "parents = %d / ", (int)parents.size()); for (list::const_iterator jt = parents.begin(); jt != parents.end(); ++jt) { fprintf(fp, "[%p] %s / ", (*jt), (*jt)->task_name); } fprintf(fp, "\n"); } } // loop : it } // loop : p fprintf(fp, "*** dynamic : %d ***\n", (int)_queue_dynamic->size()); for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { fprintf(fp, "** C_task_seq = %d : %s [ %d %d] %d **\n", (*it)->task_id, (*it)->task_name, (*it)->begin, (*it)->end, (*it)->pos); for (int j = (*it)->begin; j < (*it)->end; j++) { fprintf(fp, "%s : ", (*(*it)->queue)[j]->task_name); if ((*(*it)->queue)[j]->atomic_size > 1) { fprintf(fp, " atomic %d / %d ", (*(*it)->queue)[j]->atomic_id, (*(*it)->queue)[j]->atomic_size); } // list& parents = *(*(*it)->queue)[j]->parents_work; list& parents = *(*(*it)->queue)[j]->parents; fprintf(fp, "parents = %d / ", (int)parents.size()); for (list::const_iterator jt = parents.begin(); jt != parents.end(); ++jt) { fprintf(fp, "[%p] %s / ", (*jt), (*jt)->task_name); } fprintf(fp, "\n"); } } // loop : it } // end : scope for fout void QueueRuntime::exec_symb_fact() { const int num_threads = _num_threads; #ifdef POSIX_THREADS void* results; pthread_attr_t th_attr; #endif vector threads; //#else // vector threads; //#endif clock_t t0_cpu, t1_cpu; elapsed_t t0_elapsed, t1_elapsed; // struct timespec ts0, ts1; int ierr; diss_printf(_verbose, _fp, "symbolic factorization of sparse matrices with %d threads\n", num_threads); #ifdef POSIX_THREADS // threads = new pthread_t[num_threads]; threads.resize(num_threads); ierr = pthread_mutex_init(&_mutex_root, NULL); if (ierr != 0) { diss_printf(_verbose, _fp, "%s %d : pthread_mutex_init(&_mutex_root, NULL) : %d\n", __FILE__, __LINE__, ierr); } #endif #ifdef POSIX_THREADS #ifdef DEBUG_EXEC_THREAD ierr = pthread_mutex_init(&_mutex_debug, NULL); #endif if (ierr != 0) { diss_printf(_verbose, _fp, "%s %d : pthread_mutex_init(&_mutex_debug, NULL) %d\n", __FILE__, __LINE__, ierr); } pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); #endif #ifdef DEBUG_EXEC_THREAD_FILE { int pid = get_process_id(); char fname[256]; sprintf(fname, "task-s.%d.data", pid); _fout.open(fname); } #endif t0_cpu = clock(); get_realtime(&t0_elapsed); #ifdef POSIX_THREADS THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); int pid = pthread_create(&threads[p], &th_attr, &thread_queue_symb_factorize_, (void *)params[p]); if (pid != 0) { fprintf(stderr, "bad thread creation ? : %d\n", pid); exit(0); } } pthread_attr_destroy(&th_attr); for (int p = 0; p < num_threads; p++) { int pid = pthread_join(threads[p], &results); if (pid != 0) { fprintf(stderr, "bad thread creation ? : %d\n", pid); exit(0); } delete params[p]; } delete [] params; #else THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); threads.push_back(std::thread(thread_queue_symb_factorize_, (void *)params[p])); } for (int p = 0; p < num_threads; p++) { threads[p].join(); delete params[p]; } delete [] params; #endif t1_cpu = clock(); get_realtime(&t1_elapsed); diss_printf(_verbose, _fp, "execution of symb queue : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); #ifdef DEBUG_EXEC_THREAD_FILE _fout.close(); #endif #ifdef DEBUG_THREAD_TIME { int pid = get_process_id(); char filename[256]; FILE *fp; sprintf(filename, "threadtime-symb.%d.data", pid); if ((fp = fopen(filename, "w")) == NULL) { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, filename); exit(-1); } else { for (int j = _queue_symb->begin; j < _queue_symb->end; j++) { fprintf(fp, "%s\t%d\t%d\t%d\t%d\t%d\t%ld\n", (*_queue_symb->queue)[j]->task_name, (*_queue_symb->queue)[j]->thread_id, convert_sec((*_queue_symb->queue)[j]->t0), convert_microsec((*_queue_symb->queue)[j]->t0), convert_sec((*_queue_symb->queue)[j]->t1), convert_microsec((*_queue_symb->queue)[j]->t1), *((*_queue_symb->queue)[j]->ops_complexity)); } fclose(fp); } // if fopen() } #endif #ifdef POSIX_THEADS pthread_mutex_destroy(&_mutex_root); #ifdef DEBUG_EXEC_THREAD pthread_mutex_destroy(&_mutex_debug); #endif #endif threads.clear(); } void QueueRuntime::exec_num_fact_debug() { diss_printf(_verbose, _fp, "numerical factorization with single threads :: DEBUG\n"); int *permute_block = new int[SIZE_B1]; list::const_iterator *its = new list::const_iterator[_num_threads]; diss_printf(_verbose, _fp, "*** thread = 0 @ %d ***\n", (int)_queue_static[0].size()); for (int p = 0; p < _num_threads; p++) { its[p] = _queue_static[p].begin(); } for (list::const_iterator it = _queue_static[0].begin(); it != _queue_static[0].end(); ++it) { diss_printf(_verbose, _fp, "** C_task_seq = %d : %s, %s **\n", (*it)->task_id, (*it)->task_name , (((*it)->parallel_single == TASK_SINGLE) ? " single " : " shared ")); for (int j = (*it)->begin; j < (*it)->end; j++) { diss_printf(_verbose, _fp, "%s : ", (*(*it)->queue)[j]->task_name); if ((*(*it)->queue)[j]->atomic_size > 1) { diss_printf(_verbose, _fp, "atomic %d / %d ", (*(*it)->queue)[j]->atomic_id, (*(*it)->queue)[j]->atomic_size); } else { diss_printf(_verbose, _fp, "\n"); } execute_task_debug(*it, j, permute_block, 0); if ((*(*it)->queue)[j]->quit_queue) { return; } } // loop : j for (int p = 1 ; p < _num_threads; p++) { diss_printf(_verbose, _fp, "** C_task_seq = %d : %s %s **\n", (*its[p])->task_id, (*its[p])->task_name, (((*its[p])->parallel_single == TASK_SINGLE) ? " single " : " shared ")); if ((*its[p])->parallel_single == TASK_SINGLE) { for (int j = (*its[p])->begin; j < (*its[p])->end; j++) { diss_printf(_verbose, _fp, "%s : ", (*(*its[p])->queue)[j]->task_name); if ((*(*its[p])->queue)[j]->atomic_size > 1) { diss_printf(_verbose, _fp, " atomic %d / %d\n", (*(*its[p])->queue)[j]->atomic_id, (*(*its[p])->queue)[j]->atomic_size); } else { diss_printf(_verbose, _fp, "\n"); } execute_task_debug(*its[p], j, permute_block, 0); if ((*(*its[p])->queue)[j]->quit_queue) { return; } } // loop : j } // if ((*its[p])->parallel_single == TASK_SINGLE) ++its[p]; } // loop : p > 0 } // loop : it } void QueueRuntime::execute_task_debug(C_task_seq *seq, int pos, int *permute_block, int pid) { C_task *task = (*seq->queue)[pos]; #ifdef DEBUG_CHECKPARENTS_DONE // debugging int waiting; waiting = check_parents_done(task); if (waiting > 0) { { cerr << pid << " parents of task " << task->task_name << " not finished : "; for(list::const_iterator nt = task->parents_work->begin(); nt != task->parents_work->end(); ++nt) { cerr << (*nt)->task_name << " "; } cerr << endl; } } // #endif // accuessing to unsigned char does not need mutex task->status = TASK_WORKING; // #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t0)); // clock_gettime(CLOCK_MONOTONIC, &(task->t0)); #endif // debugging : 13 :Apr.2012 Atsushi task->func(task->func_arg); if (task->task_id == C_DFULL_SYM_GAUSS) { // task->quit_queue = ((C_dfull_gauss_arg*)task->func_arg)->quit; for (int i = (pos + 1); i <= (pos + task->to_next_task); i++) { (*seq->queue)[i]->status = TASK_DONE; (*seq->queue)[i]->quit_queue = true; } } // accuessing to unsigned char does not neet mutex task->status = TASK_DONE; // } void QueueRuntime::exec_num_fact(const int called) { // unsigned int ui; const int num_threads = _num_threads; #ifdef POSIX_THREADS void* results; pthread_attr_t th_attr; vector threads; #else vector threads; #endif clock_t t0_cpu, t1_cpu; elapsed_t t0_elapsed, t1_elapsed; // struct timespec ts0, ts1; #ifdef DEBUG_PRINT_TASK _fps = new FILE*[num_threads]; char fname[256]; for (int p = 0; p < num_threads; p++) { sprintf(fname, "log.%04d.data", p); _fps[p] = fopen(fname, "w"); } #endif t0_cpu = clock(); get_realtime(&t0_elapsed); // clock_gettime(CLOCK_REALTIME, &ts0); diss_printf(_verbose, _fp, "numerical factorization with %d threads\n", num_threads); _queue_dynamic_pos_start = (int)((double)(_queue_dynamic->size()) * RATIO_QUEUE_GREEDY); _queue_dynamic_pos = _queue_dynamic_pos_start; _queue_dynamic_notcopied = num_threads; // reset status for numeric queue execution _waiting_root = 0; _phase_dynamic = 0; for (int i = 0; i < DIST_TASK_CRITICAL; i++) { _zone_entered[i] = 0; _zone_finished[i] = 0; } for (int i = 0; i < DIST_TASK_CRITICAL; i++) { _zone_static_assigned[i] = 0; } for (int p = 0; p < num_threads; p++) { for (int i = 0; i < DIST_TASK_CRITICAL; i++) { _begins[p][i] = 0; _ends[p][i] = 0; } for (int q = 0; q < DIST_TASK_CRITICAL; q++) { for (int i = 0; i < DIST_TASK_CRITICAL; i++) { _group_entered[p][q][i] = 0; _group_finished[p][q][i] = 0; _group_task_ends[p][q][i] = (-1); _group_static_assigned[p][q][i] = (-1); } } // loop : q } // loop : p #ifdef POSIX_THREADS pthread_mutex_init(&_mutex_file, NULL); pthread_mutex_init(&_mutex_dependency, NULL); pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); for (int p = 0; p < num_threads; p++) { pthread_mutex_init(&_mutex_group[p], NULL); } // threads = new pthread_t[num_threads]; threads.resize(num_threads); pthread_mutex_init(&_mutex_root, NULL); pthread_mutex_init(&_mutex_debug, NULL); pthread_cond_init(&_cond_root, NULL); #endif #ifdef DEBUG_EXEC_THREAD_FILE { int pid = get_process_id(); char fname[256]; sprintf(fname, "task-n.%d.data", pid); cerr << "== task log == " << fname << endl; _fout.open(fname); } #endif #if 1 { // copy dependency data : parents -> parents_work for (int p = 0; p < _num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { vector &queue = *(*it)->queue; for (int j = (*it)->begin; j < (*it)->end; j++) { list& parents_work = *(queue[j]->parents_work); if (parents_work.size() > 0) { // to avoid double free parents_work.clear(); } } // loop : j } // loop : it } for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { vector &queue = *((*it)->queue); for (int j = (*it)->begin; j < (*it)->end; j++) { list& parents_work = *(queue[j]->parents_work); if (parents_work.size() > 0) { // to avoid double free parents_work.clear(); } } // loop : j } // loop : it for (int p = 0; p < _num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { vector &queue = *(*it)->queue; for (int j = (*it)->begin; j < (*it)->end; j++) { queue[j]->quit_queue = false; list& parents = *(queue[j]->parents); list& parents_work = *(queue[j]->parents_work); if (parents_work.size() == 0) { #ifdef SX_ACE for (list::const_iterator lt = parents.begin(); lt != parents.end(); ++lt) { parents_work.push_back(*lt); } #else // SX-ACE C++ rev 110 with C++98/03 does not understand back_insertera std::copy(parents.begin(), parents.end(), back_inserter(parents_work)); #endif } queue[j]->status = TASK_WAITING; // reset status } // loop : j (*it)->pos = (*it)->begin; } // loop : it } for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { vector &queue = *((*it)->queue); for (int j = (*it)->begin; j < (*it)->end; j++) { queue[j]->quit_queue = false; list& parents = *(queue[j]->parents); list& parents_work = *(queue[j]->parents_work); if (parents_work.size() == 0) { #ifdef SX_ACE for (list::const_iterator lt = parents.begin(); lt != parents.end(); ++lt) { parents_work.push_back(*lt); } #else // SX-ACE C++ rev 110 with C++98/03 does not understand back_inserter std::copy(parents.begin(), parents.end(), back_inserter(parents_work)); #endif } queue[j]->status = TASK_WAITING; // reset status } // loop : j (*it)->pos = (*it)->begin; } // loop : it } #endif // begin : scope for fout if (0) { char fname[256]; int pid = get_process_id(); sprintf(fname, "tasks-before.%d.%d.data", pid, called); FILE *fp; if ((fp = fopen(fname, "a")) != NULL) { write_dependency(fp); fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } // end : scope for fout t0_cpu = clock(); get_realtime(&t0_elapsed); #ifdef POSIX_THREADS THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); int pid = pthread_create(&threads[p], &th_attr, &thread_queue_num_factorize_, (void *)params[p]); if (pid != 0) { fprintf(_fp, "bad thread creation ? : %d\n", pid); exit(0); } } pthread_attr_destroy(&th_attr); for (int p = 0; p < num_threads; p++) { int pid = pthread_join(threads[p], &results); if (pid != 0) { fprintf(_fp, "bad thread creation ? : %d\n", pid); exit(0); } delete params[p]; } delete [] params; #else THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); threads.push_back(std::thread(thread_queue_num_factorize_, (void *)params[p])); } for (int p = 0; p < num_threads; p++) { threads[p].join(); delete params[p]; } delete [] params; #endif t1_cpu = clock(); get_realtime(&t1_elapsed); diss_printf(_verbose, _fp, "execution of num queue : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); #ifdef DEBUG_EXEC_THREAD_FILE _fout.close(); #endif // begin : scope for fout if (0) { char fname[256]; int ppid = get_process_id(); sprintf(fname, "tasks-copied.%d.data", ppid); FILE *fp; if ((fp = fopen(fname, "a")) != NULL) { write_dependency(fp); fclose(fp); } else { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, fname); exit(-1); } } #ifdef DEBUG_THREAD_TIME { int pid = get_process_id(); char filename[256]; FILE *fp; for (int p = 0; p < num_threads; p++) { for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { (*it)->status = TASK_WAITING; } } for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { (*it)->status = TASK_WAITING; } sprintf(filename, "threadtime-num.%d.data", pid); if ((fp = fopen(filename, "w")) == NULL) { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, filename); exit(-1); } else { fprintf(fp, "**** numeric factorization start ****\n"); for (int p = 0; p < num_threads; p++) { fprintf(fp, "*** thread = %d @ %d ** ", p, (int)_queue_static[p].size()); for (list::const_iterator it = _queue_static[p].begin(); it != _queue_static[p].end(); ++it) { fprintf(fp, "** %d C_task_seq = %d : %s %s **\n", p, (*it)->task_id, ((*it)->parallel_single == TASK_SINGLE ? "distributed " : "shared "), (*it)->task_name); if ((*it)->status == TASK_DONE) { continue; } for (int j = (*it)->begin; j < (*it)->end; j++) { if ((*(*it)->queue)[j]->task_id == C_SPARSESCHUR) { elapsed_t t0, t1; t0 = (*(*it)->queue)[j]->t0; t1 = ((C_SparseNumFact_arg *)(*(*it)->queue)[j]->func_arg)->tt[0]; string task_name = "o0 : "; fprintf(fp, "%s\t%d\t%d\t%d\t%d\t%d\t%ld\n", task_name.c_str(), // (*(*it)->queue)[j]->task_name, (*(*it)->queue)[j]->thread_id, convert_sec(t0), convert_microsec(t0), convert_sec(t1), convert_microsec(t1), 0L); for (int i = 0; i < 4; i++) { string task_name = "o" + to_string(i + 1) + " : "; t0 = ((C_SparseNumFact_arg *)(*(*it)->queue)[j]->func_arg)->tt[i]; t1 = ((C_SparseNumFact_arg *)(*(*it)->queue)[j]->func_arg)->tt[i + 1]; fprintf(fp, "%s\t%d\t%d\t%d\t%d\t%d\t%ld\n", task_name.c_str(), // (*(*it)->queue)[j]->task_name, (*(*it)->queue)[j]->thread_id, convert_sec(t0), convert_microsec(t0), convert_sec(t1), convert_microsec(t1), 0L); } } else { fprintf(fp, "%s\t%d\t%d\t%d\t%d\t%d\t%ld\n", (*(*it)->queue)[j]->task_name, (*(*it)->queue)[j]->thread_id, convert_sec((*(*it)->queue)[j]->t0), convert_microsec((*(*it)->queue)[j]->t0), convert_sec((*(*it)->queue)[j]->t1), convert_microsec((*(*it)->queue)[j]->t1), *((*(*it)->queue)[j]->ops_complexity)); } //*((C_SparseNumFact_arg *)(*(*it)->queue)[j]->func_arg)->nops : } (*it)->status = TASK_DONE; } } fprintf(fp, "*** dynamic : %d ***", (int)_queue_dynamic->size()); for (vector::const_iterator it = _queue_dynamic->begin(); it != _queue_dynamic->end(); ++it) { fprintf(fp, "** C_task_seq = %s : [%d, %d] %d **\n", (*it)->task_name, (*it)->begin, (*it)->end,(*it)->pos); if ((*it)->status == TASK_DONE) { continue; } for (int j = (*it)->begin; j < (*it)->end; j++) { fprintf(fp, "%s\t%d\t%d\t%d\t%d\t%d\t%ld\n", (*(*it)->queue)[j]->task_name, (*(*it)->queue)[j]->thread_id, convert_sec((*(*it)->queue)[j]->t0), convert_microsec((*(*it)->queue)[j]->t0), convert_sec((*(*it)->queue)[j]->t1), convert_microsec((*(*it)->queue)[j]->t1), *((*(*it)->queue)[j]->ops_complexity)); } (*it)->status = TASK_DONE; } fprintf(fp, "**** numeric factorization end ****\n"); fclose(fp); } } // if fopen() #endif // DEBUG_THREAD_TIME #ifdef DEBUG_PRINT_TASK for (int p = 0; p < num_threads; p++) { fclose(_fps[p]); } delete [] _fps; #endif threads.clear(); } void *thread_queue_symb_factorize_(void *arg) { THREAD_QUEUE_EXEC *params = (THREAD_QUEUE_EXEC *)arg; params->dissectionRuntime->thread_queue_symb_factorize(params->id, params->num_threads); #ifdef POSIX_THREADS pthread_exit(arg); #endif return (void *)NULL; } void QueueRuntime::thread_queue_symb_factorize(const int pid, const int num_threads) { // const int pid = params->id; // const int num_threads = params->num_threads; // greedy -- better to be in seperated function int pos, end; C_task_seq* it = _queue_symb; while(1) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); pos = it->pos; end = it->end; if (pos < end) { it->pos++; } MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (pos >= end) { break; } C_task *task = (*it->queue)[pos]; task->status = TASK_WORKING; #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t0)); // clock_gettime(CLOCK_MONOTONIC, &(task->t0)); #endif task->func(task->func_arg); #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t1)); // clock_gettime(CLOCK_MONOTONIC, &(task->t1)); task->thread_id = pid; #endif // accuessing to unsigned char does not neet mutex task->status = TASK_DONE; #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << pid << " " << pos << " ) " ; MUTEX_UNLOCK(_mutex_debug); } #endif } // while (1) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << it->task_name << " @ " << it->task_id << " greedy end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } void *thread_queue_num_factorize_(void *arg) { THREAD_QUEUE_EXEC *params = (THREAD_QUEUE_EXEC *)arg; params->dissectionRuntime->thread_queue_num_factorize(params->id, params->num_threads); #ifdef POSIX_THREADS pthread_exit(arg); #endif return (void *)NULL; } void QueueRuntime::thread_queue_num_factorize(const int pid, const int num_threads) { int *permute_block = new int[SIZE_B1]; list::const_iterator it = _queue_static[pid].begin(); int zone, zone_idxn, zone_idxp; bool zone_flag; int zone_first_entered = 0; int zone_last_entered = 0; int zone_first_finished = 0; int zone_last_finished = 0; int cnt_cdfull = 0; // zone = 0; while(it != _queue_static[pid].end()) { #ifdef DEBUG_THREAD_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " : " << (*it)->task_name << " _phase_dynamic = " << _phase_dynamic << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // zone_idxp = (zone + DIST_TASK_CRITICAL - 1) % DIST_TASK_CRITICAL; zone_idxn = zone % DIST_TASK_CRITICAL; zone_first_entered = 0; zone_last_entered = 0; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); if (_zone_entered[zone_idxn] == 0) { zone_first_entered = 1; _zone_static_assigned[zone_idxn] = 0; } _zone_entered[zone_idxn]++; zone_flag = (_zone_entered[zone_idxn] == num_threads); if (zone_flag) { // clear cyclic buffer _zone_entered[zone_idxp] = 0; _zone_static_assigned[zone_idxp] = 0; // only for safety zone_last_entered = 1; } MUTEX_UNLOCK(_mutex_root); } // scope of mutex zone++; // if ((*it)->parallel_single == TASK_PARALLEL) { switch((*it)->task_id) { case C_DFULL : #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " C_DFULL " << (*it)->task_name << " : " << (*it)->task_id << " @ " << zone << " : " << zone_flag << " : " << " zone ( " << zone_first_entered << " : " << zone_last_entered << " )" << " : " << (*it)->pos << " " << (*it)->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif thread_queue_C_DFULL(pid, num_threads, *it, permute_block, cnt_cdfull, zone_first_entered, zone_idxn); #ifdef DEBUG_THREAD_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " C_DFULL " << (*it)->task_name << " end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; case C_SPARSELOCALSCHUR: case C_DTRSM: case C_DGEMM: case C_DSUB: case C_FILLMATRIX: case C_DEALLOCATE: case C_SPARSESOLVER: #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " remaining diagonal " << (*it)->task_name << " : " << (*it)->task_id << " @ " << zone << " : " << zone_flag << " : " << " zone ( " << zone_first_entered << " : " << zone_last_entered << " )" << " : " << (*it)->pos << " " << (*it)->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif thread_queue_parallel_dynamic(pid, num_threads, *it, permute_block, zone_idxn); #ifdef DEBUG_THREAD_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " remaining diagonal " << (*it)->task_name << " end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; case C_SPARSELOCALSCHUR1: case C_DTRSM1: case C_DGEMM1: case C_DSUB1: case C_FILLMATRIX1: case C_DEALLOCATE1: #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " remaining off-diagonal " << (*it)->task_name << " : " << (*it)->task_id << " @ " << zone << " : " << zone_flag << " : " << " zone ( " << zone_first_entered << " : " << zone_last_entered << " )" << " : " << (*it)->pos << " " << (*it)->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif thread_queue_parallel_static(pid, num_threads, *it, permute_block, zone_idxp, zone_idxn, zone_first_entered, zone_last_entered); #ifdef DEBUG_THREADS_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " remaining off-diagonal " << (*it)->task_name << " end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; } } else { // if ((*it)->parallel_single == TASK_SINGLE) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " single " << (*it)->task_name << " : " << (*it)->task_id << " @ " << zone << " : " << zone_flag << " : " << " zone ( " << zone_first_entered << " : " << zone_last_entered << " )" << " : " << (*it)->pos << " " << (*it)->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif thread_queue_single(pid, num_threads, *it, permute_block, zone_idxn); #ifdef DEBUG_THREAD_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " single " << (*it)->task_name << " end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } // if ((*it)->parallel_single == TASK_SINGLE) int clear_flag = 0; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); if (_zone_finished[zone_idxn] == 0) { zone_first_finished = 1; } _zone_finished[zone_idxn]++; if (_zone_finished[zone_idxn] == num_threads) { zone_last_finished = 1; // clear status of cyclic buffer _zone_finished[zone_idxp] = 0; // the last task (*it)->status = TASK_DONE; if((*it)->task_id == C_DFULL) { clear_flag = 1; } #ifdef DEBUG_THREAD_LOOP { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << (*it)->task_name << " end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } MUTEX_UNLOCK(_mutex_root); } // scope of mutex // without mutex if((*it)->task_id == C_DFULL) { cnt_cdfull = (cnt_cdfull + 1) % DIST_TASK_CRITICAL; if (clear_flag) { const int cnt_cdfull_p = ((cnt_cdfull + DIST_TASK_CRITICAL - 1) % DIST_TASK_CRITICAL); for (int p = 0; p < num_threads; p++) { for (int i = 0; i < DIST_TASK_CRITICAL; i++) { _group_entered[p][cnt_cdfull_p][i] = 0; _group_finished[p][cnt_cdfull_p][i] = 0; _group_task_ends[p][cnt_cdfull_p][i] = (-1); } } } } // if((*it)->task_id == C_DFULL) { ++it; } // while (it != _queue_static[pid].end()) { delete [] permute_block; } void QueueRuntime::thread_queue_C_DFULL(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int cnt_cdfull, const int zone_first_entered, const int zone_idxn) { const int pid_g = it->mutex_id; int group = 0; int group_first_entered = 0, group_last_entered = 0; int group_first_finished = 0; int pos; int pid0; int *_grp_entered = _group_entered[pid_g][cnt_cdfull]; int *_grp_finished = _group_finished[pid_g][cnt_cdfull]; int *_grp_task_ends = _group_task_ends[pid_g][cnt_cdfull]; int *_grp_static_assigned = _group_static_assigned[pid_g][cnt_cdfull]; bool zone_flag; // get status other threads entered the same zone { // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " cnt_cdfull = " << cnt_cdfull << " parallel : c_dfull " << it->task_name << " pos = " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); if (_grp_entered[0] == 0) { // reset flags to keep status of entered/finshed tasks for (int i = 1; i < DIST_TASK_CRITICAL; i++) { _grp_entered[i] = 0; _grp_finished[i] = 0; } for (int i = 0; i < it->num_threads; i++) { _group_nops[pid_g][i] = 0L; } group_first_entered = 1; _grp_static_assigned[0] = 0; // 12 Feb.2013 Atsusti : for safety? #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " : " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } // pid0 is decided by arrived order pid0 = _grp_entered[0]++; if (_grp_entered[0] == it->num_threads) { group_last_entered = 1; } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex // while (1) { // global loop of task queue // const int gidxp = (group + DIST_TASK_CRITICAL - 1) % DIST_TASK_CRITICAL; const int gidxn = group % DIST_TASK_CRITICAL; int finish_group = 0; int skip_flag = 0; if (group > 0) { { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); _begins_group[pid_g][pid0][gidxn] = (-1); _ends_group[pid_g][pid0][gidxn] = (-1); group_first_entered = 0; group_last_entered = 0; if (_grp_entered[gidxn] == 0) { group_first_entered = 1; // assuming other jobs are assiged by static for (int i = 0; i < it->num_threads; i++) { _group_nops[pid_g][i] = 0L; } #if 1 pos = it->pos; int jtmp = pos; for (int i = (*it->queue)[pos]->parallel_id; i < (*it->queue)[pos]->parallel_max; i++) { jtmp += (*it->queue)[jtmp]->atomic_size; } _grp_task_ends[gidxn] = jtmp; _grp_task_ends[gidxp] = (-1); _grp_static_assigned[gidxn] = 0; #else _group_task_ends[pid_g][cnt_cdfull][gidxn] = (-1); #endif } // if (_grp_entered[gidxn] == 0) _grp_entered[gidxn]++; if (_grp_entered[gidxn] == it->num_threads) { // clear status of cyclic buffer _grp_entered[gidxp] = 0; group_last_entered = 1; } if ((_grp_finished[gidxn] > 0) && (group_first_finished == 0)) { _grp_finished[gidxn]++; if (_grp_finished[gidxn] == it->num_threads) { // clear status of cyclic buffer _grp_finished[gidxp] = 0; _group_task_ends[pid_g][cnt_cdfull][gidxp] = (-1); } skip_flag = 1; } // if ((_grp_finished[gidxn] > 0) && (group_first_finished == 0)) MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex } // if (group > 0) else { { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); if ((_grp_finished[gidxn] > 0) && (group_first_finished == 0)) { _grp_finished[gidxn]++; if (_grp_finished[gidxn] == it->num_threads) { // clear status of cyclic buffer _grp_finished[gidxp] = 0; _group_task_ends[pid_g][cnt_cdfull][gidxp] = (-1); } skip_flag = 1; } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope mutex } // if (group > 0) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pid_g = " << pid_g << " pid0 " << pid0 << " group " << group << " entered " << group_first_entered << " / " << group_last_entered << " (" << _grp_entered[gidxp] << "@" << gidxp << ") / (" << _grp_entered[gidxn] << "@" << gidxn << ") finished " << " (" << _grp_finished[gidxp] << "@" << gidxp << ") / ( " << _grp_finished[gidxn] << "@" << gidxn << ") " << it->pos << " : " << it->end << " skip_flag = " << skip_flag; if (skip_flag) { cerr << " task is already finished : my job is skipped"; } cerr << " zone_flag = " << zone_flag << " zone_idxn = " << zone_idxn << endl; MUTEX_UNLOCK(_mutex_debug); } #endif group++; // if (skip_flag) { continue; } #if 1 // wait until other threads enter the same zone while (!zone_flag) { // looking for dynamic queue int flag = 1; while ((flag == 1) && (!zone_flag)) { flag = execute_task_dynamic_buffer(permute_block, pid, _mutex_dependency); { // scope of mutex // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } } // while ((flag == 1) && (!zone_flag)) if (zone_flag) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pid_g = " << pid_g << " pid0 " << pid0 << " zone_idxn = " << zone_idxn << " zone_flag = " << zone_flag << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; } // get tasks with atomic_size from the queue : *it int itmp, jtmp; int exit_flag = 0; { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; // do not increase queue more than its size if (pos == it->end) { exit_flag = 1; } else { // there might be segments with more than 1 automic size // between parallel_id and parallel_max itmp = pos + (*it->queue)[pos]->atomic_size; jtmp = (itmp - 1 + ((*it->queue)[pos]->parallel_max - (*it->queue)[pos]->parallel_id)); #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " +atomic_size = " << itmp << " parallel_max " << jtmp << " : " << it->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif if (itmp <= jtmp) { it->pos = itmp; } } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (exit_flag) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << "it->end = " << it->end << " : " << it->task_name << " already finished.." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } if (itmp == jtmp) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " jtmp = " << jtmp << endl; MUTEX_UNLOCK(_mutex_debug); } #endif finish_group = 1; } // reserve tasks and conunt estimated job size { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { _group_nops[pid_g][pid0] += *((*it->queue)[pos + i]->ops_complexity); } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope mutex #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); if ((*it->queue)[pos]->atomic_size > 1) { C_task *kt = (*it->queue)[pos]; cerr << pid << " b-c-a " << pos << " : " << kt->task_name << " : " << kt->parents_work->size() << " / "; for (list::const_iterator mt = kt->parents_work->begin(); mt != kt->parents_work->end(); ++mt) { cerr << (int)(*mt)->status << "@" << (*mt)->task_name << " / "; } cerr << "before statically assigned task : with status check." << endl; } MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { C_task *kt = (*it->queue)[pos + i]; int waiting1 = check_parents_done(kt, _mutex_dependency); while(waiting1 > 0) { // { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = "<< waiting1 << " : " << (int)_waiting_root << " " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); // #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << "\t" << pid << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif _waiting_root--; // other thread broacast wake up and decreasing _waiting_root waiting1 = check_parents_done(kt, _mutex_dependency); MUTEX_UNLOCK(_mutex_root); } // scope of mutex //#endif } // while (waiting > 0) if (waiting1 == (-1)) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << " : " << (*it->queue)[pos + i]->task_name << " : " << it->task_name << " quitted..." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } execute_task(it, (pos + i), permute_block, pid, _mutex_dependency); if ((*it->queue)[pos + i]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " atomic_size= " << (*it->queue)[pos]->atomic_size << " : " << it->task_name << " finished..." << "skip " << (*it->queue)[pos + i]->to_next_task << " : " << __FILE__ << " : " << __LINE__ << endl; MUTEX_UNLOCK(_mutex_debug); } #endif pos += (*it->queue)[pos + i]->to_next_task; return; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "{ " << (*it->queue)[pos + i]->task_name << " @ " << pid << " " << (pos + i) << " }"; MUTEX_UNLOCK(_mutex_debug); } #endif } // loop : i // static tasks are assigned by other thread if (finish_group) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " C_FULL : the first greedy end the queue." << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; for (int p = 0; p < it->num_threads; p++) { _begins_group[pid_g][p][gidxn] = pos; _ends_group[pid_g][p][gidxn] = pos; } _grp_static_assigned[gidxn] = 1; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex continue; } { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // while (zone_flag) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pid_g = " << pid_g << " pid0 " << pid0 << " group " << (group - 1) // after increment of group << " zone_flag = " << zone_flag << endl; MUTEX_UNLOCK(_mutex_debug); } #endif #endif if (group_last_entered) { // long nops, nops_total, nops_static, nops_per_thread; nops = 0L; nops_total = 0L; { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (pos == it->end) { break; } const int queue_size = ((*it->queue)[pos]->parallel_max - (*it->queue)[pos]->parallel_id); if (queue_size < it->num_threads) { { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; for (int p = 0; p < it->num_threads; p++) { _begins_group[pid_g][p][gidxn] = pos; _ends_group[pid_g][p][gidxn] = pos; } _grp_static_assigned[gidxn] = 1; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex } else { { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; int jj = pos; for (int i = (*it->queue)[pos]->parallel_id; i < (*it->queue)[pos]->parallel_max; i++) { // atomic_size const int itmp = jj; for (int j = 0; j < (*it->queue)[itmp]->atomic_size; j++, jj++) { nops_total += *((*it->queue)[jj]->ops_complexity); } } nops_static = (long)((double)nops_total * RATIO_QUEUE_GREEDY); nops_per_thread = (nops_static / (long)it->num_threads); #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pos << " " << _grp_task_ends[gidxn] << " " << nops_total << " " << nops_per_thread << endl; MUTEX_UNLOCK(_mutex_debug); } #endif long ntmp = 0L; for (int p = 0; p < it->num_threads; p++) { if (ntmp < _group_nops[pid_g][p]) { ntmp = _group_nops[pid_g][p]; } } if (ntmp >= nops_per_thread) { // greedy #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pos << " max _group_nops[pid_g][] = " << ntmp << " nops_per_threads = " << nops_per_thread << endl; MUTEX_UNLOCK(_mutex_debug); } #endif for (int p = 0; p < it->num_threads; p++) { _begins_group[pid_g][p][gidxn] = pos; _ends_group[pid_g][p][gidxn] = pos; } // _group_task_ends[pid_g][cnt_cdfull][gidxn] = end_queue; } else { // if (ntmp < nops_per_thread) _begins_group[pid_g][0][gidxn] = pos; // for fail-safe //_ends_group[pid_g][it->num_threads - 1][gidxn] = end_queue; _ends_group[pid_g][it->num_threads - 1][gidxn] = _grp_task_ends[gidxn]; nops = 0L; int p = 0; for (int j = pos; j < _grp_task_ends[gidxn]; ) { for (int k = 0; k < (*it->queue)[j]->atomic_size; k++) { nops += *((*it->queue)[j + k]-> ops_complexity); } j += (*it->queue)[j]->atomic_size; // do not devide atomic operation if (nops > nops_per_thread) { _ends_group[pid_g][p][gidxn] = j; p++; if (p == it->num_threads) { break; } _begins_group[pid_g][p][gidxn] = j; nops = 0L; } // if } // loop : j if (p < it->num_threads) { // static assignment is failed. #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << "pid_g = " << pid_g << " gidxn = " << gidxn << " : pos = " << pos << " static assignment is failed" << endl; MUTEX_UNLOCK(_mutex_debug); } #endif for (int j = 0; j < it->num_threads; j++) { _begins_group[pid_g][j][gidxn] = pos; _ends_group[pid_g][j][gidxn] = pos; } } it->pos = _ends_group[pid_g][it->num_threads - 1][gidxn]; // cerr << pid << " pos = " << it->pos << endl; // _group_task_ends[pid_g][cnt_cdfull][gidxn] = end_queue; } // if (ntmp < nops_per_thread) _grp_static_assigned[gidxn] = 1; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex } // if (queue_size < it->num_threads) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " : "; for (int p = 0; p < it->num_threads; p++) { cerr << "[ " << _begins_group[pid_g][p][gidxn] << " : " << _ends_group[pid_g][p][gidxn] << " ] "; } cerr << _grp_task_ends[gidxn] << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } // if (group_last_entered) else { int group_task_end, group_static_assigned; { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); group_task_end = _grp_task_ends[gidxn]; group_static_assigned = _grp_static_assigned[gidxn]; pos = it->pos; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " gidxn = " << gidxn << " group_task_end " << group_task_end << " group_static_assigned " << group_static_assigned << endl; MUTEX_UNLOCK(_mutex_debug); } #endif if (pos == it->end && (group_static_assigned == 0)) { // not yet static assigned but queue is exhausted #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << "it->end = " << it->end << " : " << it->task_name << " finished." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } // include group_static_assigned == (-1) : 21 Jun.2012, Atsushi while (group_static_assigned != 1) { // greedy int exit_flag = 0; // to skip un-necessary access to _dynamic_queue { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; if (pos == it->end) { exit_flag = 1; } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (exit_flag) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << "it->end = " << it->end << " : " << it->task_name << " already finished." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } // looking for dynamic queue int flag = 1; // include group_static_assigned == (-1) : 21 Jun.2012, Atsushi while ((flag == 1) && (group_static_assigned != 1)) { flag = execute_task_dynamic_buffer(permute_block, pid, _mutex_dependency); { // scope of mutex // std::unique_locklck_root(_mutex_group[pid_g]); MUTEX_LOCK(_mutex_group[pid_g], lck_root); group_static_assigned = _grp_static_assigned[gidxn]; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex } // while ((flag == 1) && (group_static_assigned != 0)) if (group_static_assigned) { break; } // get tasks with atomic_size from the queue : *it int itmp, jtmp; { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; // do not increase queue more than its size if (pos == it->end) { exit_flag = 1; } else { // there might be segments with more than 1 automic size // between parallel_id and parallel_max itmp = pos + (*it->queue)[pos]->atomic_size; jtmp = (itmp - 1 + ((*it->queue)[pos]->parallel_max - (*it->queue)[pos]->parallel_id)); #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " +atomic_size = " << itmp << " parallel_max " << jtmp << " : " << it->end << endl; MUTEX_UNLOCK(_mutex_debug); } #endif if (itmp <= jtmp) { it->pos = itmp; } } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (exit_flag) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << "it->end = " << it->end << " : " << it->task_name << " already finished.." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } if (itmp == jtmp) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " jtmp = " << jtmp << endl; MUTEX_UNLOCK(_mutex_debug); } #endif finish_group = 1; } // reserve tasks and conunt estimated job size { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { _group_nops[pid_g][pid0] += *((*it->queue)[pos + i]->ops_complexity); } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); if ((*it->queue)[pos]->atomic_size > 1) { C_task *kt = (*it->queue)[pos]; cerr << pid << " b-c-a " << pos << " : " << kt->task_name << " : " << kt->parents_work->size() << " / "; for (list::const_iterator mt = kt->parents_work->begin(); mt != kt->parents_work->end(); ++mt) { cerr << (int)(*mt)->status << "@" << (*mt)->task_name << " / "; } cerr << "before statically assigned task : with status check." << endl; } MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { C_task *kt = (*it->queue)[pos + i]; int waiting = check_parents_done(kt, _mutex_dependency); while(waiting > 0) { // { // scope of mutex // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = "<< waiting << " : " << (int)_waiting_root << " " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); // #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << "\t" << pid << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif _waiting_root--; // other thread broacast wake up and decreasing _waiting_root waiting = check_parents_done(kt, _mutex_dependency); MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // while (waiting > 0) if (waiting == (-1)) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos + i << " : " << (*it->queue)[pos + i]->task_name << " : " << it->task_name << " quitted..." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } execute_task(it, (pos + i), permute_block, pid, _mutex_dependency); if ((*it->queue)[pos + i]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " atomic_size= " << (*it->queue)[pos]->atomic_size << " : " << it->task_name << " finished..." << "skip " << (*it->queue)[pos + i]->to_next_task << " : " << __FILE__ << " : " << __LINE__ << endl; MUTEX_UNLOCK(_mutex_debug); } #endif pos += (*it->queue)[pos + i]->to_next_task; return; // break; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "{ " << (*it->queue)[pos + i]->task_name << " @ " << pid << " " << (pos + i) << " }"; MUTEX_UNLOCK(_mutex_debug); } #endif } // loop : i // static tasks are assigned by other thread { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); group_static_assigned = _grp_static_assigned[gidxn]; if ((finish_group == 1) && (group_static_assigned == 0)) { // static task is not assigned but queue becomes empty for (int p = 0; p < it->num_threads; p++) { _begins_group[pid_g][p][gidxn] = (-1); _ends_group[pid_g][p][gidxn] = (-1); } _grp_task_ends[gidxn] = jtmp; } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (finish_group) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " C_FULL : the first greedy end the queue." << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; for (int p = 0; p < it->num_threads; p++) { _begins_group[pid_g][p][gidxn] = pos; _ends_group[pid_g][p][gidxn] = pos; } _grp_static_assigned[gidxn] = 1; MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex continue; } // just for debugging : group_task_end >=0 <=> braek while ( < 0 ) #ifdef DEBUG_EXEC_THREAD if (group_static_assigned) { { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " end the first greedy " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } } #endif } // while (group_static_assigned != 1) } // if (group_last_entered) // static assignment int _begin_grp, _end_grp; if ( _grp_static_assigned[gidxn] == (-1)) { // this never happen? _begin_grp = _end_grp = (-1); #ifdef DEBUG_EXEC_THREAD_21Jun2012 { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << ": pid0 = " << pid0 << " gidxn = " << gidxn << " static assignment is not yet done! " << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } else { _begin_grp = _begins_group[pid_g][pid0][gidxn]; _end_grp = _ends_group[pid_g][pid0][gidxn]; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << ": pid0 = " << pid0 << " gidxn = " << gidxn << " < " << _begin_grp << " " << _end_grp << " > assigned" << endl; MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = _begin_grp; i < _end_grp; i++) { // no need to check dependency for static assignement execute_task(it, i, permute_block, pid); if ((*it->queue)[i]->quit_queue) { fprintf(stderr, "%d i = %d : %s finished ... skip %d\n", pid, i, it->task_name, (*it->queue)[i]->to_next_task); i += (*it->queue)[i]->to_next_task; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << (*it->queue)[i]->task_name << " @ " << pid << " " << i << " )"; MUTEX_UNLOCK(_mutex_debug); } #endif } // loop : i // greedy while(1) { int group_task_end; { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); pos = it->pos; group_task_end = _grp_task_ends[gidxn]; if (pos < group_task_end) { const int itmp = (it->pos + (*it->queue)[pos]->atomic_size); if (itmp <= group_task_end) { it->pos = itmp; } } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex if (pos >= group_task_end) { break; } if (_begin_grp < _end_grp) { // <==> statically assigned block is computed #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); if ((*it->queue)[pos]->atomic_size > 1) { C_task *kt = (*it->queue)[pos]; cerr << pid << " b-c-a " << pos << " : " << kt->task_name << " : " << kt->parents_work->size() << " / "; for (list::const_iterator mt = kt->parents_work->begin(); mt != kt->parents_work->end(); ++mt) { cerr << (int)(*mt)->status << "@" << (*mt)->task_name << " / "; } cerr << "within statically assigned task : without status check." << endl; } MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { execute_task(it, (pos + i), permute_block, pid, _mutex_group[pid_g]); if ((*it->queue)[pos + i]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " atomic_size= " << (*it->queue)[pos]->atomic_size << " : " << it->task_name << " finished..." << "skip " << (*it->queue)[pos + i]->to_next_task << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // const int ibegin = pos + i + 1; pos += (*it->queue)[pos + i]->to_next_task; return; // break; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << (*it->queue)[pos + i]->task_name << " @ " << pid << " " << (pos + i) << " )"; MUTEX_UNLOCK(_mutex_debug); } #endif } // loop : i } // if (_begins_grp == _ends_grp) else { // <==> no statically assigned block #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); if ((*it->queue)[pos]->atomic_size > 1) { C_task *kt = (*it->queue)[pos]; cerr << pid << " b-c-a " << pos << " : " << kt->task_name << " : " << kt->parents_work->size() << " / "; for (list::const_iterator mt = kt->parents_work->begin(); mt != kt->parents_work->end(); ++mt) { cerr << (int)(*mt)->status << "@" << (*mt)->task_name << " / "; } cerr << "no statically assigned task : with status check." << endl; } MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = 0; i < (*it->queue)[pos]->atomic_size; i++) { C_task *kt = (*it->queue)[pos + i]; int waiting = check_parents_done(kt, _mutex_dependency); while(waiting > 0) { { // scope of mutex // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = " << waiting << " : " << (int)_waiting_root << " : " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); // _waiting_root--; #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << endl << pid << " " << kt->task_name << " " << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif waiting = check_parents_done(kt, _mutex_dependency); MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // while (waiting > 0) if (waiting == (-1)) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos + i << " : " << (*it->queue)[pos + i]->task_name << " : " << it->task_name << " quitted..." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } // if (wating == (-1)) execute_task(it, (pos + i), permute_block, pid, _mutex_group[pid_g]); if ((*it->queue)[pos + i]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " i = " << i << " : " << it->task_name << " finished... skip " << (*it->queue)[pos + i]->to_next_task << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // const int ibegin = pos + i + 1; i += (*it->queue)[pos + i]->to_next_task; return; } // if (quit_queue) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << (*it->queue)[pos + i]->task_name << " @ " << pid << " " << (pos + i) << " )"; MUTEX_UNLOCK(_mutex_debug); } #endif } // loop : i } // // if (_begin_grp <_end_grp) } // while (1) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " finish " << group << " pos = " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif if (pos >= it->end) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pod = " << pos << "it->end = " << it->end << " : " << it->task_name << " finished..." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif return; } { // scope of mutex MUTEX_LOCK(_mutex_group[pid_g], lck_root); group_first_finished = 0; if (_grp_finished[gidxn] == 0) { group_first_finished = 1; } _grp_finished[gidxn]++; if (_grp_finished[gidxn] == it->num_threads) { // clear status of cyclic buffer _grp_finished[gidxp] = 0; } MUTEX_UNLOCK(_mutex_group[pid_g]); } // scope of mutex } // while (1) } void QueueRuntime::thread_queue_parallel_dynamic(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int zone_idxn) { // completely greedy int pos, end, atomic_size, atomic_id, update_flag; while(1) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); pos = it->pos; end = it->end; if (pos < end) { atomic_size = (*it->queue)[pos]->atomic_size; atomic_id = (*it->queue)[pos]->atomic_id; it->pos += atomic_size; // it->pos++; if (it->pos >= end) { update_flag = 1; } } MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (pos >= end) { break; } C_task *kt = (*it->queue)[pos]; // [pos + atomic_size - 1]; int waiting = check_parents_done(kt, _mutex_dependency); while (waiting > 0) { // looking for dynamic queue int flag = execute_task_dynamic_buffer(permute_block, pid, _mutex_dependency); if (flag != 1) { while(waiting > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = "<< waiting << " : " << (int)_waiting_root << " : " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); // #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << endl << pid << " " << kt->task_name << " " << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif _waiting_root--; // other thread broacast wake up and decreasing _waiting_root MUTEX_UNLOCK(_mutex_root); } // scope of mutex waiting = check_parents_done(kt, _mutex_dependency); } // while (waiting > 0) } // if (flag != 1) waiting = check_parents_done(kt, _mutex_dependency); } // while (waiting > 0) for (int m = 0; m < atomic_size; m++) { execute_task(it, (pos + m), permute_block, // loop with atomic_size pid, _mutex_dependency); } if ((*it->queue)[pos]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " : " << it->task_name << " finished..." << "skip " << (*it->queue)[pos]->to_next_task << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // const int ibegin = pos + 1; pos += (*it->queue)[pos]->to_next_task; } // if (quit_queue) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << pid << " " << pos << " ) " ; MUTEX_UNLOCK(_mutex_debug); } #endif } // while (1) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << it->task_name << " @ " << it->task_id << " greedy end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } void QueueRuntime::thread_queue_parallel_static(const int pid, const int num_threads, C_task_seq* it, int *permute_block, const int zone_idxp, const int zone_idxn, const int zone_first_entered, const int zone_last_entered) { // the last thread which turns into the zone assigns jobs statiscally int pos, end, atomic_size, atomic_id; int finished_status = 0; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); pos = it->pos; MUTEX_UNLOCK(_mutex_root); } // scope of mutex // reach the end before all threads entered if (pos == it->end) { return; } if (zone_last_entered) { long nops, nops_total, nops_static, nops_per_thread; int begin, end; // this mutex is relatively larage // : lock out other threads until assingnment is finished { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); begin = it->pos; end = it->end; if ((end - begin) < num_threads) { // execute tasks by greedy way for (int q = 0; q < num_threads; q++) { _begins[q][zone_idxn] = _ends[q][zone_idxn] = pos; } _zone_static_assigned[zone_idxn] = 1; } else { // nops_total = it->ops_complexity; nops_total = 0L; for (int j = begin; j < end; j++) { nops_total += *((*it->queue)[j]->ops_complexity); // 28 May 2012 : Atsushi // cerr << (*it->queue)[j]->task_name // << *((*it->queue)[j]->ops_complexity) << " "; } //cerr << endl; nops_static = (long)((double)nops_total * RATIO_QUEUE_GREEDY); nops_per_thread = nops_static / (long)num_threads; #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " begin = " << begin << " end = " << end << " " << nops_total << " " << nops_static << " " << nops_per_thread << endl; MUTEX_UNLOCK(_mutex_debug); } #endif _begins[0][zone_idxn] = begin; _ends[num_threads - 1][zone_idxn] = end; nops = 0L; int p = 0; for (int j = begin; j < end; j++) { nops += *((*it->queue)[j]->ops_complexity); if (nops > nops_per_thread) { // && ((*it->queue)[j]->atomic_size == // 13 May 2014 // ((*it->queue)[j]->atomic_id + 1))) { const int jj = j - (*it->queue)[j]->atomic_id; _ends[p][zone_idxn] = jj; p++; if (p == num_threads) { break; } _begins[p][zone_idxn] = jj; nops = 0L; } } // loop : j if (p < num_threads) { // equally assignment is failed // execute tasks by greedy way for (int q = 0; q < num_threads; q++) { _begins[q][zone_idxn] = _ends[q][zone_idxn] = pos; } } else { it->pos = _ends[num_threads - 1][zone_idxn]; } _zone_static_assigned[zone_idxn] = 1; } MUTEX_UNLOCK(_mutex_root); } // scope of mutex #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); for (int p = 0; p < num_threads; p++) { cerr << "[ " << _begins[p][zone_idxn] << " : " << _ends[p][zone_idxn] << " ] "; } cerr << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // } // if (zone_last_entered) else { // if (zone_last_entered) int zone_static_assigned = 0; while(zone_static_assigned == 0) { // similar to the routine : execute_task_dynamic_buffer() { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); zone_static_assigned = _zone_static_assigned[zone_idxn]; if (zone_static_assigned == 0) { pos = it->pos; end = it->end; if (pos < end) { atomic_size = (*it->queue)[pos]->atomic_size; atomic_id = (*it->queue)[pos]->atomic_id; it->pos += atomic_size; // pos++; if (it->pos >= end) { finished_status = 1; // _phase_dynamic++; if (_phase_dynamic == _queue_dynamic->size()) { _phase_dynamic = (-1); } // queue is exhaused by this step and enforcing other tasks to quit for (int p = 0; p < num_threads; p++) { _begins[p][zone_idxn] = _ends[p][zone_idxn] = end; } } // if (it->pos == end) { } } // if (zone_static_assigned == 0) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " pos = " << pos << " zone_idxn = " << zone_idxn << " zone_static_assigned " << zone_static_assigned << " : " << _zone_static_assigned[zone_idxn] << endl; MUTEX_UNLOCK(_mutex_debug); } #endif MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (zone_static_assigned == 1) { break; } if (pos >= end) { // skip my task _begins[pid][zone_idxn] = _ends[pid][zone_idxn] = end; #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " queue is already exhausted " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; } C_task *kt = (*it->queue)[pos]; // [pos + atomic_size - 1]; int waiting = check_parents_done(kt, _mutex_dependency); if (waiting > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); while (waiting > 0) { // real sleeping _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = "<< waiting << " : " << (int)_waiting_root << " : " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); _waiting_root--; #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << endl << pid << " " << kt->task_name << " " << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif // other thread broacast wake up and decreasing _waiting_root waiting = check_parents_done(kt, _mutex_dependency); } // while (waiting > 0) MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // if (waiting > 0) atomic_size = (*it->queue)[pos]->atomic_size; for (int m = 0; m < atomic_size; m++) { execute_task(it, (pos + m), permute_block, // loop with atomic_size pid, _mutex_dependency); } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "{ " << pid << " " << pos << " }"; MUTEX_UNLOCK(_mutex_debug); } #endif if (finished_status) { // && (zone_static_assigned == 0)) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " queue is exhausted in the first greedy" << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; } { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); it->ops_complexity -= *((*it->queue)[pos]->ops_complexity); zone_static_assigned = _zone_static_assigned[zone_idxn]; MUTEX_UNLOCK(_mutex_root); } // scope of mutex #ifdef DEBUG_EXEC_THREAD if (zone_static_assigned == 1) { { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " end the first greedy " << it->pos << endl; MUTEX_UNLOCK(_mutex_debug); } } #endif } // while (zone_static_assigned == 0) } // if (zone_last_entered) // static #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "pid = " << pid << " zone_idxn = " << zone_idxn << "< " << _begins[pid][zone_idxn] << " " << _ends[pid][zone_idxn] << " > assigned" << endl; MUTEX_UNLOCK(_mutex_debug); } #endif for (int i = _begins[pid][zone_idxn]; i < _ends[pid][zone_idxn]; i++) { execute_task(it, i, permute_block, pid); } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << ": " << "< " << _begins[pid][zone_idxn] << " " << _ends[pid][zone_idxn] << " > done." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // greedy #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << it->task_name << " @ " << it->task_id << " greedy begin: " << endl; MUTEX_UNLOCK(_mutex_debug); } #endif while(1) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); pos = it->pos; end = it->end; if (pos < end) { atomic_size = (*it->queue)[pos]->atomic_size; atomic_id = (*it->queue)[pos]->atomic_id; it->pos += atomic_size; // pos++ if (it->pos >= end) { finished_status = 1; _phase_dynamic++; if (_phase_dynamic == _queue_dynamic->size()) { _phase_dynamic = (-1); } // queue is exhausted #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " _phase_dynamic = " << _phase_dynamic << " @ " << _queue_dynamic->size() << endl; if (_phase_dynamic >= 0) { cerr << pid << " next = " << (*_queue_dynamic)[_phase_dynamic]->task_name << endl; } MUTEX_UNLOCK(_mutex_debug); } #endif } } // if (pos < end) MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (pos >= end) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " queue is already exhausted " << endl; MUTEX_UNLOCK(_mutex_debug); } #endif break; } atomic_size = (*it->queue)[pos]->atomic_size; for (int m = 0; m < atomic_size; m++) { execute_task(it, (pos + m), permute_block, // loop with atomic_size pid, _mutex_dependency); } #ifdef DEBUG_EXEC_THREAD1 { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << pid << " " << pos << " : " << (*it->queue)[pos]->task_name << " ) "; MUTEX_UNLOCK(_mutex_debug); } #endif if (finished_status) { break; } } // while(1) // #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " greedy end : " << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } void QueueRuntime::thread_queue_single(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int zone_idxn) { // tasks are excuted by a single thread : atomic operation will not be divided. int start = it->begin; bool zone_flag; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } // scope of mutex #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " @ " << num_threads << " single " << it->task_name << " : " << it->task_id << " zone_flag = " << zone_flag << endl; MUTEX_UNLOCK(_mutex_debug); } #endif while ((start < it->end) && (!zone_flag)) { C_task *kt = (*it->queue)[start]; int waiting = check_parents_done(kt, _mutex_dependency); while(waiting > 0) { // looking for dynamic queue int flag = execute_task_dynamic_buffer(permute_block, pid, _mutex_dependency); if (flag != 1) { // int waiting2; waiting2 = check_parents_done(kt, _mutex_dependency); while(waiting2 > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); _waiting_root++; #ifdef DEBUG_DEADLOCK if (_waiting_root > num_threads) { fprintf(stderr, "dead lock occured : %s %d\n", __FILE__, __LINE__); exit(-1); } #endif #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << kt->task_name << " " << "waiting = "<< waiting << " : " << (int)_waiting_root << " : " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = kt->parents_work->begin(); jt != kt->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; } #endif COND_WAIT(_mutex_root, _cond_root, lck_root); #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << endl << pid << " " << kt->task_name << " " << " waked up : _waiting_root = " << (int)_waiting_root << endl; } #endif _waiting_root--; // other thread broacast wake up and decreasing _waiting_root waiting2 = check_parents_done(kt, _mutex_dependency); MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // while (waiting2) } // if (flag != 1) waiting = check_parents_done(kt, _mutex_dependency); { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (zone_flag) { break; } } // while (waiting > 0) execute_task(it, start, permute_block, pid, _mutex_dependency); if ((*it->queue)[start]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " start = " << start << " : " << it->task_name << " finished... skip " << (*it->queue)[start]->to_next_task << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // const int ibegin = start + 1; start += (*it->queue)[start]->to_next_task; } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << "( " << pid << " : " << (*it->queue)[start]->task_name << " ) "; MUTEX_UNLOCK(_mutex_debug); } #endif start++; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); zone_flag = (_zone_entered[zone_idxn] == num_threads); MUTEX_UNLOCK(_mutex_root); } // scope of mutex } // while ((start < it->end) || (!zone_flag)) for (int j = start; j < it->end; j++) { execute_task(it, j, permute_block, pid); if ((*it->queue)[j]->quit_queue) { #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " j = " << j << " : " << it->task_name << " finished... skip " << (*it->queue)[j]->to_next_task << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // const int ibegin = j + 1; j += (*it->queue)[j]->to_next_task; } } // loop : j #ifdef DEBUG_EXEC_THREAD2 { MUTEX_LOCK(_mutex_debug, lck_debug); for (int j = start; j < it->end; j++) { cerr << "( " << pid << " : " << (*it->queue)[j]->task_name << " ) "; } MUTEX_UNLOCK(_mutex_debug); } #endif // } int QueueRuntime::execute_task_dynamic_buffer(int *permute_block, int pid, QueueRuntime_mutex &mutex_dependency) { int ph_dynmc, pos, end, atomic_size, atomic_id; int waiting; int update_status = 0; C_task *kt; { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); ph_dynmc = _phase_dynamic; if (ph_dynmc >= 0) { pos = (*_queue_dynamic)[ph_dynmc]->pos; end = (*_queue_dynamic)[ph_dynmc]->end; if (pos < end) { atomic_size = (*(*_queue_dynamic)[ph_dynmc]->queue)[pos]->atomic_size; atomic_id = (*(*_queue_dynamic)[ph_dynmc]->queue)[pos]->atomic_id; kt = (*(*_queue_dynamic)[ph_dynmc]->queue)[pos]; // to avoid other thread takes the same pos waiting = check_parents_done(kt, mutex_dependency); if (waiting <= 0) { (*_queue_dynamic)[ph_dynmc]->pos += atomic_size; // pos++ if ((*_queue_dynamic)[ph_dynmc]->pos == end) { update_status = 1; _phase_dynamic++; if (_phase_dynamic == _queue_dynamic->size()) { _phase_dynamic = (-1); } } // if ((*_queue_dynamic)[ph_dynmc]->pos == end) } } else { ph_dynmc = (-1); } } // if (ph_dynmc > 0) MUTEX_UNLOCK(_mutex_root); } // scope of mutex if (ph_dynmc < 0) { return (-1); // queue exhausted } if (waiting > 0) { // waiting == (-1) <=> quit_queue == true return 0; // the first task in the queue is not ready } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " from dynamic " << (*_queue_dynamic)[ph_dynmc]->task_name << " " << pos << " @ " << ph_dynmc << " " << (*_queue_dynamic)[ph_dynmc]->end << " " << kt->task_name << " : "; MUTEX_UNLOCK(_mutex_debug); } #endif for (int m = 0; m < atomic_size; m++) { execute_task((*_queue_dynamic)[ph_dynmc], (pos + m), // pos permute_block, pid, mutex_dependency); } #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << kt->task_name << " : " << pos << " done" << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // ops_complexity is recalculated in each routine :- { // scope of mutex // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); (*_queue_dynamic)[ph_dynmc]->ops_complexity -= *(kt->ops_complexity); if (update_status) { (*_queue_dynamic)[ph_dynmc]->status = TASK_DONE; } MUTEX_UNLOCK(_mutex_root); } // scope of mutex return 1; // success } int QueueRuntime::check_parents_done(C_task *it, QueueRuntime_mutex &mutex_dependency) { unsigned char status; int dependency = 0; bool quit_queue = false; // cerr << it->task_name << " size = " << it->parents_work->size() << " || "; for(list::iterator nt = it->parents_work->begin(); nt != it->parents_work->end(); ) { // cerr << (*nt)->task_name << " "; #ifdef TASKSTATUS_MUTEX { MUTEX_LOCK(_mutex_dependency, lck_dependency); status = (*nt)->status; MUTEX_UNLOCK(_mutex_dependency); } #else status = (*nt)->status; #endif if (status == TASK_DONE) { if ((*nt)->quit_queue) { quit_queue = true; } // 5 Jul.2015 Atsushi nt = it->parents_work->erase(nt); // ++nt; } else { dependency++; ++nt; } } // cerr << " >> " << it->parents_work->size() << " . " << endl; if (quit_queue && (dependency == 0)) { dependency = (-1); } return dependency; } int QueueRuntime::check_parents_done(C_task *it) { int dependency = 0; bool quit_queue = false; // cerr << it->task_name << " size = " << it->parents_work->size() << " | "; for(list::iterator nt = it->parents_work->begin(); nt != it->parents_work->end(); ) { // cerr << (*nt)->task_name << " "; if ((*nt)->status == TASK_DONE) { if ((*nt)->quit_queue) { quit_queue = true; } // 5 Jul.2015 Atsushi nt = it->parents_work->erase(nt); // ++nt; } else { dependency++; ++nt; } } if (quit_queue && (dependency == 0)) { dependency = (-1); } return dependency; } void QueueRuntime::execute_task(C_task_seq *seq, int pos, int *permute_block, int pid, QueueRuntime_mutex &mutex_depenency) { C_task *task = (*seq->queue)[pos]; // debugging #ifdef DEBUG_CHECKPARENTS_DONE int waiting; waiting = check_parents_done(task, mutex_dependency); if (waiting > 0) { { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " parents of task " << task->task_name << " not finished : "; for(list::const_iterator nt = task->parents_work->begin(); nt != task->parents_work->end(); ++nt) { cerr << (*nt)->task_name << " "; } cerr << endl; MUTEX_UNLOCK(_mutex_debug); } } #endif // #ifdef DEBUG_PRINT_TASK switch (task->task_id) { case C_FILLMATRIX: case C_DFULL_SYM_GAUSS: case C_DTRSMSCALE: // case C_SPARSESCHUR: case C_DGEMM_LOCAL_MULT: case C_DGEMM_LOCAL_TWO: case C_DGEMM_DIRECT_TWO: diss_printf(_verbose, _fps[pid], "%s\n", task->task_name); *(task->fp) = &_fps[pid]; break; } #endif #ifdef TASKSTATUS_MUTEX { MUTEX_LOCK(_mutex_dependency, lck_dependency); task->status = TASK_WORKING; MUTEX_UNLOCK(_mutex_dependency); } #else task->status = TASK_WORKING; #endif // #ifdef TASKSTATUS_MUTEX #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t0)); // clock_gettime(CLOCK_MONOTONIC, &(task->t0)); #endif task->func(task->func_arg); #ifdef DEBUG_THREAD_DONE_PRINT { MUTEX_LOCK(_mutex_debug, lck_debug); fprintf(stderr, "pid = %d : %s\n", pid, task->task_name); MUTEX_UNLOCK(_mutex_debug); } #endif if (task->task_id == C_DFULL_SYM_GAUSS) { // task->quit_queue = ((C_dfull_gauss_arg*)task->func_arg)->quit; #ifdef TASKSTATUS_MUTEX { // scope of mutex MUTEX_LOCK(_mutex_dependency, lck_dependency); // std::unique_locklck_dependency(mutex_dependency); for (int i = (pos + 1); i <= (pos + task->to_next_task); i++) { (*seq->queue)[i]->status = TASK_DONE; (*seq->queue)[i]->quit_queue = true; } MUTEX_UNLOCK(_mutex_dependency); } // scope of mutex #endif // #ifdef TASKSTATUS_MUTEX { for (int i = (pos + 1); i <= (pos + task->to_next_task); i++) { (*seq->queue)[i]->status = TASK_DONE; (*seq->queue)[i]->quit_queue = true; #ifdef DEBUG_EXEC_THREAD2 { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << (*seq->queue)[i]->task_name << " "; MUTEX_UNLOCK(_mutex_debug); } #endif } } } // if (task->task_id == C_DFULL_SYM_GAUSS) #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t1)); // clock_gettime(CLOCK_MONOTONIC, &(task->t1)); task->thread_id = pid; #endif #ifdef DEBUG_EXEC_THREAD_FILE { MUTEX_LOCK(_mutex_file, lck_file); const int sec_n0 = convert_sec(task->t0); const int sec_m0 = convert_microsec(task->t0); _fout << task->task_name << " / "; _fout << sec_n0 << " " << sec_m0 << "\t"; for(list::const_iterator nt = task->parents->begin(); nt != task->parents->end(); ++nt) { int sec_n1 = convert_sec((*nt)->t1); int sec_m1 = convert_microsec((*nt)->t1); if ((sec_n0 < sec_n1) || ((sec_n0 == sec_n1) && (sec_m0 < sec_m1))) { _fout << "*"; } _fout << (*nt)->task_name << " / "; _fout << sec_n1 << " " << sec_m1 << "\t"; } _fout << endl; // " : " << pid << endl; MUTEX_UNLOCK(_mutex_file); } #endif #ifdef TASKSTATUS_MUTEX { // scope of mutex MUTEX_LOCK(_mutex_dependency, lck_dependency); task->status = TASK_DONE; MUTEX_UNLOCK(_mutex_dependecny); } // scope of mutex #else task->status = TASK_DONE; #endif // if (_waiting_root > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); COND_BROADCAST(_cond_root); MUTEX_UNLOCK(_mutex_root); } // scope of mutex #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << task->task_name << "_waiting_root = " << (int)_waiting_root << " broadcast." << endl; } #endif } } void QueueRuntime::execute_task(C_task_seq *seq, int pos, int *permute_block, int pid) { C_task *task = (*seq->queue)[pos]; #ifdef DEBUG_CHECKPARENTS_DONE // debugging int waiting; waiting = check_parents_done(task); if (waiting > 0) { { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " parents of task " << task->task_name << " not finished : " << task->parents_work->size() << " : "; // debug : 5 Jul. 2015, Atsushi for(list::const_iterator nt = task->parents_work->begin(); nt != task->parents_work->end(); ++nt) { cerr << (*nt)->task_name << " / "; } cerr << endl; MUTEX_UNLOCK(_mutex_debug); } } // #endif #ifdef DEBUG_PRINT_TASK switch (task->task_id) { case C_FILLMATRIX: case C_DFULL_SYM_GAUSS: case C_DTRSMSCALE: // case C_SPARSESCHUR: case C_DGEMM_LOCAL_MULT: case C_DGEMM_LOCAL_TWO: case C_DGEMM_DIRECT_TWO: diss_printf(_verbose, _fps[pid], "%s\n", task->task_name); *(task->fp) = &_fps[pid]; break; } #endif // accuessing to unsigned char does not need mutex task->status = TASK_WORKING; // #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t0)); // clock_gettime(CLOCK_MONOTONIC, &(task->t0)); #endif // debugging : 13 :Apr.2012 Atsushi task->func(task->func_arg); #ifdef DEBUG_THREAD_DONE_PRINT { MUTEX_LOCK(_mutex_debug, lck_debug); fprintf(stderr, "pid = %d : %s\n", pid, task->task_name); MUTEX_UNLOCK(_mutex_debug); } #endif if (task->task_id == C_DFULL_SYM_GAUSS) { for (int i = (pos + 1); i <= (pos + task->to_next_task); i++) { (*seq->queue)[i]->status = TASK_DONE; (*seq->queue)[i]->quit_queue = true; #ifdef DEBUG_EXEC_THREAD2 { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << (*seq->queue)[i]->task_name << " "; MUTEX_UNLOCK(_mutex_debug); } #endif } } #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t1)); // clock_gettime(CLOCK_MONOTONIC, &(task->t1)); task->thread_id = pid; #endif // accessing to unsigned char does not neet mutex task->status = TASK_DONE; // if (_waiting_root > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); // std::unique_locklck_root(_mutex_root); COND_BROADCAST(_cond_root); // _cond_root.notify_all(); MUTEX_UNLOCK(_mutex_root); } //scope of mutex #ifdef DEBUG_EXEC_THREAD_IDLE { cerr << pid << " " << task->task_name << "_waiting_root = " << (int)_waiting_root << " broadcast." << endl; } #endif } // #ifdef DEBUG_EXEC_THREAD_FILE { MUTEX_LOCK(_mutex_file, lck_file); const int sec_n0 = convert_sec(task->t1); const int sec_m0 = convert_microsec(task->t1); _fout << task->task_name << " / "; _fout << sec_n0 << " " << sec_m0 << "\t"; for(list::const_iterator nt = task->parents->begin(); nt != task->parents->end(); ++nt) { int sec_n1 = convert_sec((*nt)->t1); int sec_m1 = convert_microsec((*nt)->t1); if ((sec_n0 < sec_n1) || ((sec_n0 == sec_n1) && (sec_m0 < sec_m1))) { _fout << "*"; } _fout << (*nt)->task_name << " / "; _fout << sec_n1 << " " << sec_m1 << "\t"; } _fout << endl; // " : " << pid << endl; MUTEX_UNLOCK(_mutex_file); } #endif task->status = TASK_DONE; } void QueueRuntime::exec_fwbw_seq() { clock_t t0_cpu, t1_cpu; elapsed_t t0_elapsed, t1_elapsed; t0_cpu = clock(); get_realtime(&t0_elapsed); for (vector::const_iterator it = _queue_fwbw->queue->begin(); it != _queue_fwbw->queue->end(); ++it) { (*it)->func((*it)->func_arg); } t1_cpu = clock(); get_realtime(&t1_elapsed); diss_printf(_verbose, _fp, "execution of fw/bw : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); } void QueueRuntime::exec_fwbw() { const int num_threads = _num_threads; //_num_threads_symb; #ifdef POSIX_THREADS void* results; pthread_attr_t th_attr; #endif vector threads; clock_t t0_cpu, t1_cpu; elapsed_t t0_elapsed, t1_elapsed; // struct timespec ts0, ts1; int ierr; diss_printf(_verbose, _fp, "fwbw with %d threads\n", num_threads); // copy dependency data : parents -> parents_work { for (vector::const_iterator it = _queue_fwbw->queue->begin(); it != _queue_fwbw->queue->end(); ++it) { list& parents_work = *((*it)->parents_work); if (parents_work.size() > 0) { // to avoid double free parents_work.clear(); } } // loop : it for (vector::const_iterator it = _queue_fwbw->queue->begin(); it != _queue_fwbw->queue->end(); ++it) { list& parents = *((*it)->parents); list& parents_work = *((*it)->parents_work); (*it)->status = TASK_WAITING; // reset status (*it)->quit_queue = false; if (parents_work.size() == 0) { #ifdef SX_ACE for (list::const_iterator lt = parents.begin(); lt != parents.end(); ++lt) { parents_work.push_back(*lt); } #else // SX-ACE C++ rev 110 with C++98/03 does not understand back_inserter std::copy(parents.begin(), parents.end(), back_inserter(parents_work)); #endif } (*it)->broadcast_deadlock = 0; } // loop : it _queue_fwbw->pos = _queue_fwbw->begin; } #ifdef POSIX_THREADS // threads = new pthread_t[num_threads]; threads.resize(num_threads); #ifdef DEBUG_EXEC_THREAD pthread_mutex_init(&_mutex_debug, NULL); #endif pthread_mutex_init(&_mutex_dependency, NULL); pthread_cond_init(&_cond_root, NULL); ierr = pthread_mutex_init(&_mutex_root, NULL); if (ierr != 0) { fprintf(stderr, " pthread_mutex_init(&_mutex_root, NULL) %s %d : %d\n", __FILE__, __LINE__, ierr); } #ifdef DEBUG_EXEC_THREAD ierr = pthread_mutex_init(&_mutex_debug, NULL); if (ierr != 0) { diss_printf(_verbose, _fp, "%s %d : pthread_mutex_init(&_mutex_debug, NULL) %d\n", __FILE__, __LINE__, ierr); } #endif pthread_attr_init(&th_attr); pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_JOINABLE); #endif #ifdef DEBUG_EXEC_THREAD_FILE { int pid = get_process_id(); char fname[256]; sprintf(fname, "task-s.%d.data", pid); _fout.open(fname); } #endif t0_cpu = clock(); get_realtime(&t0_elapsed); // clock_gettime(CLOCK_REALTIME, &ts0); #ifdef POSIX_THREADS THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); int pid = pthread_create(&threads[p], &th_attr, &thread_queue_fwbw_, (void *)params[p]); if (pid != 0) { fprintf(stderr, "bad thread creation ? : %d\n", pid); exit(0); } } pthread_attr_destroy(&th_attr); for (int p = 0; p < num_threads; p++) { int pid = pthread_join(threads[p], &results); if (pid != 0) { fprintf(stderr, "bad thread creation ? : %d\n", pid); exit(0); } delete params[p]; } delete [] params; #else THREAD_QUEUE_EXEC **params = new THREAD_QUEUE_EXEC*[num_threads]; for (int p = 0; p < num_threads; p++) { params[p] = new THREAD_QUEUE_EXEC(p, num_threads, this); threads.push_back(std::thread(thread_queue_fwbw_, (void *)params[p])); } for (int p = 0; p < num_threads; p++) { threads[p].join(); delete params[p]; } delete [] params; #endif t1_cpu = clock(); get_realtime(&t1_elapsed); // clock_gettime(CLOCK_REALTIME, &ts1); diss_printf(_verbose, _fp, "execution of fw/bw : cpu time = %.4e elapsed time = %.4e\n", (double)(t1_cpu - t0_cpu) / (double)CLOCKS_PER_SEC, convert_time(t1_elapsed, t0_elapsed)); #ifdef DEBUG_EXEC_THREAD_FILE _fout.close(); #endif #ifdef DEBUG_THREAD_TIME { int pid = get_process_id(); char filename[256]; FILE *fp; sprintf(filename, "threadtime-fwbw.%d.data", pid); if ((fp = fopen(filename, "w")) == NULL) { fprintf(stderr, "%s %d : fail to open %s\n", __FILE__, __LINE__, filename); exit(-1); } else { fprintf(fp, "queue_fwbw\n"); for (int j = _queue_fwbw->begin; j < _queue_fwbw->end; j++) { double t0 = (convert_sec((*_queue_fwbw->queue)[j]->t0) + convert_microsec((*_queue_fwbw->queue)[j]->t0) * 1.0e-6); fprintf(fp, "%2d:\t%-16s\t%2d\t%4d\t%7d\t%7d\t%7d\t%7d", (*_queue_fwbw->queue)[j]->thread_id, (*_queue_fwbw->queue)[j]->task_name, (int)(*_queue_fwbw->queue)[j]->parents_work->size(), (*_queue_fwbw->queue)[j]->broadcast_deadlock, convert_sec((*_queue_fwbw->queue)[j]->t0), convert_microsec((*_queue_fwbw->queue)[j]->t0), convert_sec((*_queue_fwbw->queue)[j]->t1), convert_microsec((*_queue_fwbw->queue)[j]->t1)); for (list::const_iterator it = (*_queue_fwbw->queue)[j]->parents->begin(); it != (*_queue_fwbw->queue)[j]->parents->end(); ++it) { double tt1 = (convert_sec((*it)->t1) + convert_microsec((*it)->t1) * 1.0e-6); if (t0 < tt1) { fprintf(fp, " [ %10s ]", (*it)->task_name); } } fprintf(fp, "\n"); } fclose(fp); } // if ((fp = fopen(filename, "a")) == NULL) } #endif #ifdef POSIX_THREADS pthread_mutex_destroy(&_mutex_root); #ifdef DEBUG_EXEC_THREAD pthread_mutex_destroy(&_mutex_debug); #endif threads.clear(); #endif } void *thread_queue_fwbw_(void *arg) { THREAD_QUEUE_EXEC *params = (THREAD_QUEUE_EXEC *)arg; params->dissectionRuntime->thread_queue_fwbw(params->id, params->num_threads); #ifdef POSIX_THREADS pthread_exit(arg); #endif return (void *)NULL; } // #define DEBUG_EXEC_THREAD_IDLEa void QueueRuntime::thread_queue_fwbw(const int pid, const int num_threads) { // const int pid = params->id; // const int num_threads = params->num_threads; // greedy -- better to be in seperated function int pos, end; C_task_seq* it = _queue_fwbw; // cerr << "pid = " << pid // << " _queue_symb : pos = " << it->pos << " end = " << it->end << endl; while (1) { { // std::unique_locklck_root(_mutex_root); MUTEX_LOCK(_mutex_root, lck_root); pos = it->pos; end = it->end; if (pos < end) { it->pos = it->pos + (*it->queue)[pos]->atomic_size; } MUTEX_UNLOCK(_mutex_root); } // destruction of lck_root if (pos >= end) { break; } C_task *task_p = (*it->queue)[pos]; for (int i = 0; i < task_p->atomic_size; i++) { C_task *task = (*it->queue)[pos + i]; int waiting = check_parents_done(task, _mutex_dependency); while (waiting > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); _waiting_root++; if (_waiting_root == num_threads) { #ifdef DEBUG_EXEC_THREAD_IDLEa { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << task->task_name << " _waiting_root = " << (int)_waiting_root << " dead locked! : emergency broadcast." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // DEBUG_EXEC_THREAD_IDLEa COND_BROADCAST(_cond_root); task->broadcast_deadlock++; } #ifdef DEBUG_EXEC_THREAD_IDLEa { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << task->task_name << " " << "waiting = "<< waiting << " : " << (int)_waiting_root << " " << __FILE__ << " " << __LINE__ << " "; for (list::const_iterator jt = task->parents_work->begin(); jt != task->parents_work->end(); ++jt) { cerr << (*jt)->task_name << " "; } cerr << " : sleeping" << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // DEBUG_EXEC_THREAD_IDLEa COND_WAIT(_mutex_root, _cond_root, lck_root); _waiting_root--; #ifdef DEBUG_EXEC_THREAD_IDLEa { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << task->task_name << " waked up : _waiting_root = " << (int)_waiting_root << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // DEBUG_EXEC_THREAD_IDLEa // other thread broacast wake up and decreasing _waiting_root waiting = check_parents_done(task, _mutex_dependency); MUTEX_UNLOCK(_mutex_root); } // destruction of lck_root } // while (waiting > 0) { MUTEX_LOCK(_mutex_dependency, lck_dependency); task->status = TASK_WORKING; MUTEX_UNLOCK(_mutex_dependency); } // destruction of lck_dependency #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t0)); #endif task->func(task->func_arg); #ifdef DEBUG_THREAD_TIME get_realtime(&(task->t1)); task->thread_id = pid; #endif { MUTEX_LOCK(_mutex_dependency, lck_depenency); task->status = TASK_DONE; MUTEX_UNLOCK(_mutex_dependency); } if (_waiting_root > 0) { { // scope of mutex MUTEX_LOCK(_mutex_root, lck_root); #ifdef DEBUG_EXEC_THREAD_IDLEa { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << task->task_name << " _waiting_root = " << (int)_waiting_root << " broadcast." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif // DEBUG_EXEC_THREAD_IDLEa COND_BROADCAST(_cond_root); MUTEX_UNLOCK(_mutex_root); } // destruction of lck_root } } // loop : i } // while (1) #ifdef DEBUG_EXEC_THREAD { MUTEX_LOCK(_mutex_debug, lck_debug); cerr << pid << " " << it->task_name << " @ " << it->task_id << " greedy end." << endl; MUTEX_UNLOCK(_mutex_debug); } #endif } FreeFem-sources-4.9/3rdparty/dissection/src/Driver/QueueRuntime.hpp000664 000000 000000 00000023342 14037356732 025455 0ustar00rootroot000000 000000 /*! \file QueueRuntime.hpp \brief management of threads for Dissection Matrix \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _QUEUE_RUNTIME_ # define _QUEUE_RUNTIME_ #include "Compiler/OptionLibrary.hpp" #include "Driver/C_threads_tasks.hpp" #include "Compiler/elapsed_time.hpp" #include #include #include #ifdef POSIX_THREADS #include #else #include #include #include #endif #ifdef POSIX_THREADS typedef pthread_t QueueRuntime_thread; typedef pthread_mutex_t QueueRuntime_mutex; typedef pthread_cond_t QueueRuntime_cond; #define MUTEX_LOCK(a, b) pthread_mutex_lock(&(a)) #define COND_BROADCAST(b) pthread_cond_broadcast(&(b)) #define MUTEX_UNLOCK(b) pthread_mutex_unlock(&(b)) #define COND_WAIT(a, b, c) pthread_cond_wait(&(b), &(a)) #else typedef std::thread QueueRuntime_thread; typedef std::mutex QueueRuntime_mutex; typedef std::condition_variable QueueRuntime_cond; #define MUTEX_LOCK(a, b) std::unique_lock(b)((a)) #define COND_BROADCAST(b) (b).notify_all() #define MUTEX_UNLOCK(b) #define COND_WAIT(a, b, c) (b).wait((c)) #endif class QueueRuntime { public: QueueRuntime(int nb_doms, int num_threads, const bool isSym, const bool verbose, FILE *fp); ~QueueRuntime(); void generate_queue(C_task_seq* &_queue_symb, list* &_queue_static, vector* &_queue_dynamic, list &queue_null, // list &queue_dummy, Dissection::Tree* btree, vector* children, vector* tasks_SparseSymb, vector* tasks_SparseNum, vector* tasks_SparseLocalSchur, vector* tasks_DFillSym, vector* tasks_DFullLDLt, vector* tasks_DTRSMScale, vector* tasks_DSymmGEMM, vector** tasks_Dsub, vector* tasks_deallocLower, vector* tasks_deallocLocalSchur, long **nops_queue, vector > all_fathersIndex, vector nrow_DFullLDLt, vector isMergedDTRSM, vector isDividedDTRSM, const int level_last); void write_dependency(FILE *fp); void exec_symb_fact(); void exec_num_fact(const int called); void exec_num_fact_debug(); void execute_task_debug(C_task_seq *seq, int pos, int *permute_block, int pid); void exec_fwbw(); void exec_fwbw_seq(); void thread_queue_symb_factorize(const int pid, const int num_threads); void thread_queue_num_factorize(const int pid, const int num_threads); void thread_queue_fwbw(const int pid, const int num_threads); void thread_queue_C_DFULL(const int pid, const int num_thraeds, C_task_seq *it, int *permute_block, const int cnt_cdfull, const int zone_first_entered, const int zone_idxn); void thread_queue_parallel_dynamic(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int zone_idxn); void thread_queue_parallel_static(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int zone_idxp, const int zone_idxn, const int zone_first_entered, const int zone_last_entered); void thread_queue_single(const int pid, const int num_threads, C_task_seq *it, int *permute_block, const int zone_idxn); int execute_task_dynamic_buffer(int *permute_block, int pid, QueueRuntime_mutex &mutex_depenency); void execute_task(C_task_seq *seq, int pos, int *permute_block, int pid, QueueRuntime_mutex &mutex_depenency); void execute_task(C_task_seq *seq, int pos, int *permute_block, int pid); int check_parents_done(C_task *it, QueueRuntime_mutex &mutex_depenency); int check_parents_done(C_task *it); void set_queue_fwbw(C_task_seq* &queue_fwbw) { _queue_fwbw = queue_fwbw; } QueueRuntime(const QueueRuntime &s) { } // copy constrcutor : dummy private: int _nb_doms; int _num_threads; C_task_seq* _queue_symb; C_task_seq* _queue_fwbw; list *_queue_static; vector *_queue_dynamic; // list* _queue_dummy; QueueRuntime_mutex _mutex_root; QueueRuntime_mutex _mutex_dependency; QueueRuntime_mutex *_mutex_group; QueueRuntime_mutex _mutex_debug; QueueRuntime_mutex _mutex_file; QueueRuntime_cond _cond_root; int *_zone_entered; int *_zone_finished; int ***_group_entered; int ***_group_finished; int ***_group_task_ends; int ***_group_static_assigned; int *_group_task_id; int *_zone_static_assigned; int **_begins; int **_ends; int ***_begins_group; int ***_ends_group; long **_group_nops; unsigned char _waiting_root; // num. of threads should be less than 255 int _phase_dynamic; int _queue_dynamic_pos_start; int _queue_dynamic_pos; int _queue_dynamic_notcopied; // // ofstream _fout; // forward-backward substitutions bool _verbose; FILE *_fp; bool _isSym; }; // End class DissictionQueue struct THREAD_QUEUE_EXEC { int id; int num_threads; QueueRuntime* dissectionRuntime; THREAD_QUEUE_EXEC(int id_, int num_threads_, QueueRuntime* dissectionRuntime_) : id(id_), num_threads(num_threads_), dissectionRuntime(dissectionRuntime_) {} }; void copytask_list2seq(list &queue_static, list queue_lists, list &queue_null, string task_name, int task_id, int mutex_id, int parallel_single, int num_threads, int level, int phase); void task_assign_diag1(list * &queue_static, vector* &tasks_queue, list &queue_null, // list &queue_dummy, vector &nrow_DFullLDLt, vector &isMergedDTRSM, string queue_symbol, int queue_id, int level_id, int num_threads, int level_last, int level, int begdom, int enddom, long *nops_sum, vector starts, list* &queue_lists, bool queue_lists_clear, Dissection::Tree* btree, vector* children); void task_assign_diag2(list *&queue_static, vector* &tasks_queue, vector &queue_null, // vector &queue_dummy, vector &isMergedDTRSM, string queue_symbol, int queue_id, int level_id, int num_threads, int level_last, int level, int begdom, int enddom, long *nops_sum, vector starts, vector starts_sub, list* &queue_lists, bool queue_lists_clear, Dissection::Tree* btree, vector* children); void allocate_int2d(int **&array, const int num_threads); void allocate_unsigned2d(long **&array, const int num_threads); void allocate_int3d(int ***&array, const int num_threads); void deallocate_int2d(int **&array); void deallocate_unsigned2d(long **&array); void deallocate_int3d(int ***&array); void *thread_queue_num_factorize_(void *arg); void *thread_queue_symb_factorize_(void *arg); void *thread_queue_fwbw_(void *arg); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/TridiagBlockMatrix.cpp000664 000000 000000 00000315552 14037356732 026552 0ustar00rootroot000000 000000 /*! \file TridiagBlockMatrix.cpp \brief tridiagonal factorization algorithm with Cuthill-McKee \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include "Compiler/OptionCompiler.hpp" #include "Compiler/OptionLibrary.hpp" #include #include #include "Driver/TridiagBlockMatrix.hpp" #include "Driver/C_BlasRoutines.hpp" #include "Driver/C_KernDetect.hpp" #include "Driver/DissectionDefault.hpp" #include "Compiler/arithmetic.hpp" #include "Compiler/elapsed_time.hpp" #include "Algebra/SparseRenumbering.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" template const T TridiagBlockMatrix::_one = T(1.0); template const T TridiagBlockMatrix::_none = T(-1.0); template const T TridiagBlockMatrix::_zero = T(0.0); template void TridiagBlockMatrix::SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_) { long long n1, n2; // const void *fp = (void *)fp_; int dim1, dim2, dim3; _color = color; _color_max = color_max; // need count _dim and _nnz _nnz = 0; dim1 = 0; for (int i = 0; i < dim_; i++) { if (color_mask[i] == color) { _nnz += prow_[i + 1] - prow_[i]; dim1++; } } // isolated entries are dealt by color 1 dim3 = 0; if (color == 1) { for (int i = 0; i < dim_; i++) { if (color_mask[i] == 0) { _nnz++; dim3++; } } } dim2 = 0; for (int i = 0; i < dim_; i++) { // treatment fused block whose size is // less than DIM_AUG_KERN if (color_mask[i] == (-color)) { _nnz += prow_[i + 1] - prow_[i]; dim2++; } } _dim = dim1 + dim2 + dim3; _new2old.resize(_dim); vector remap_eqn, map_eqn, prow1, indcols1; remap_eqn.resize(dim_); map_eqn.resize(dim_); prow1.resize(dim_ + 1); indcols1.resize(nnz_); { int i0, i1, i2, i3; i0 = 0; i1 = dim1; i2 = dim1 + dim2; i3 = _dim; // dim1 + dim2 + dim3 for (int i = 0; i < dim_; i++) { if (color_mask[i] == color) { remap_eqn[i0++] = i; } else if (color_mask[i] == (-color)) { remap_eqn[i1++] = i; } else if ((color == 1) && (color_mask[i] == 0)) { remap_eqn[i2++] = i; } else { remap_eqn[i3++] = i; } } } for (int i = 0; i < dim_; i++) { map_eqn[remap_eqn[i]] = i; // map : old(original) to new (mapped) index } RenumberCSR(dim_, remap_eqn, map_eqn, prow_, indcols_, prow1, indcols1, _verbose, _fp); vector new2old, old2new; new2old.resize(dim_); old2new.resize(dim_); if (dim1 > MIN_TRIDIAG_SIZE) { CMK_number(dim1, &prow1[0], &indcols1[0], new2old, _verbose, _fp); for (int i = 0; i < dim3; i++) { _new2old[i] = remap_eqn[i + dim1 + dim2]; } for (int i = 0; i < dim1; i++) { _new2old[i + dim3] = remap_eqn[new2old[i]]; } for (int i = 0; i < dim2; i++) { _new2old[i + dim1 + dim3] = remap_eqn[i + dim1]; } _nfront = point_front(dim1, &prow1[0], &indcols1[0], new2old, _p_front, _verbose, _fp); } else { _nfront = 1; _p_front.resize(2); _p_front[0] = 0; _p_front[1] = dim1 + dim2; for (int i = 0; i < _dim; i++) { _new2old[i] = remap_eqn[i]; } } if (dim3 > 0) { for (int i = 1; i < _nfront; i++) { _p_front[i] += dim3; } } _p_front[_nfront] = _dim; // replace the end of block prow1.clear(); indcols1.clear(); // remap_eqn.clear(); map_eqn.clear(); diss_printf(_verbose, _fp, "%s %d : SymbolicFact : %d ", __FILE__, __LINE__, _nb); diss_printf(_verbose, _fp, "color = %d dim = %d = %d + %d + %d nfront = %d : ", color, _dim, dim1, dim2, dim3, _nfront); for (int i = 0; i <= _nfront; i++) { diss_printf(_verbose, _fp, "%d ", _p_front[i]); } diss_printf(_verbose, _fp, "\n"); _ptRows.resize(_dim + 1); _indCols.resize(_nnz); _indVals.resize(_nnz); for (int i = 0; i < _dim; i++) { new2old[i] = _new2old[i]; } for (int i = _dim; i < dim_; i++) { new2old[i] = remap_eqn[i]; } for (int i = 0; i < dim_; i++) { old2new[new2old[i]] = i; } { bool shrink_flag = (_dim < dim_); RenumberCSR(shrink_flag, _dim, nnz_, new2old, old2new, prow_, indcols_, indvals_, _ptRows, _indCols, _indVals, _verbose, _fp); } new2old.clear(); old2new.clear(); _p_diag.resize(_dim); _p_upper.resize(_dim); TridiagStruct(_ptRows, _indCols, _nfront, _p_front, _p_diag, _p_upper); if (_isSymmetric) { _nop = 0.0; n2 = (long long)(_p_front[1] - _p_front[0]); _nop += (n2 * n2 * n2) / 3.0; for (int n = 1; n < _nfront; n++) { n1 = n2; n2 = (long long)(_p_front[n + 1] - _p_front[n]); _nop += (n1 * n1 * n2) / 3.0; _nop += (n1 * n2 * n2) / 3.0; _nop += (n2 * n2 * n2) / 3.0; } } else { _nop = 0.0; n2 = (long long)(_p_front[1] - _p_front[0]); _nop += (2.0 * n2 * n2 * n2) / 3.0; for (int n = 1; n < _nfront; n++) { n1 = n2; n2 = (long long)(_p_front[n + 1] - _p_front[n]); _nop += (2.0 * n1 * n1 * n2) / 3.0; _nop += (2.0 * n1 * n2 * n2) / 3.0; _nop += (2.0 * n2 * n2 * n2) / 3.0; } } _diag_blocks = new ColumnMatrix[_nfront]; #ifndef SPARSE_OFFDIAG _upper_blocks = new ColumnMatrix[_nfront]; // 27 Nov.2015 _lower_blocks = new ColumnMatrix[_nfront]; // 27 Nov.2015 #endif #ifdef STORE_WHOLE_FACTORIZED _factorized_whole.init(_dim, _dim); _factorized_whole.ZeroClear(); #endif _diag_block_alloc_status = true; _maxdim = 0; for (int n = 0; n < _nfront; n++) { const int itmp = _p_front[n + 1] - _p_front[n]; _maxdim = _maxdim > itmp ? _maxdim : itmp; } _n0 = 0; // initialization _nscol = 0; remap_eqn.clear(); } template void TridiagBlockMatrix::SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); template void TridiagBlockMatrix::SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); template void TridiagBlockMatrix::SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); template void TridiagBlockMatrix, double>:: SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); template void TridiagBlockMatrix, quadruple>:: SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); template void TridiagBlockMatrix, float>:: SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz_, const int *prow_, const int *indcols_, const int *indvals_); // template void TridiagBlockMatrix:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { fprintf(stderr, "%s %d : only specialized template version is impelemneted\n", __FILE__, __LINE__); } template<> void TridiagBlockMatrix:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { #if 1 ColumnMatrix* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix[_nfront]; TridiagNumericFact_(pivot, #else ColumnMatrix* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix[_nfront]; TridiagNumericFact_(pivot, #endif eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { // fprintf(stderr, "%s %d : %d\n", __FILE__, __LINE__, (*it)); diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); #if 0 fprintf(stderr, "%s %d : _num_null[%d]= ", __FILE__, __LINE__, _nfront); for (int i = 0; i < _nfront; i++) { fprintf(stderr, "%d ", _num_null[i]); } fprintf(stderr, "\n"); #endif } template<> void TridiagBlockMatrix, double>:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix > *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { ColumnMatrix >* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix >[_nfront]; TridiagNumericFact_, double, complex, quadruple>(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); } template<> void TridiagBlockMatrix:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { ColumnMatrix* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix[_nfront]; TridiagNumericFact_(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, false, // higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); } template<> void TridiagBlockMatrix, quadruple>:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix >*diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { ColumnMatrix >* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix >[_nfront]; TridiagNumericFact_, quadruple, complex, quadruple>(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, false, // higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); } template<> void TridiagBlockMatrix:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { ColumnMatrix* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix[_nfront]; TridiagNumericFact_(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { // fprintf(stderr, "%s %d : %d\n", __FILE__, __LINE__, (*it)); diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); #if 0 fprintf(stderr, "%s %d : _num_null[%d]= ", __FILE__, __LINE__, _nfront); for (int i = 0; i < _nfront; i++) { fprintf(stderr, "%d ", _num_null[i]); } fprintf(stderr, "\n"); #endif } template<> void TridiagBlockMatrix, float>:: TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix > *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision) { ColumnMatrix >* diag_blocks_high; list high_blocks; diag_blocks_high = new ColumnMatrix >[_nfront]; TridiagNumericFact_, float, complex, double>(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, higher_precision, _verbose, _fp, _dim, _isSymmetric, _nfront, _p_front, _p_diag, _p_upper, _coef, _ptRows, _indCols, // column_numb _indVals, // column_numb _num_null, _permute, _permute_ginv, _diag_blocks, diag_blocks_high, _upper_blocks, _lower_blocks, high_blocks #ifdef STORE_WHOLE_FACTORIZED , _factorized_whole #endif ); for (list::iterator it = high_blocks.begin(); it != high_blocks.end(); ++it) { diag_blocks_high[(*it)].free(); } delete [] diag_blocks_high; high_blocks.clear(); } template void TridiagBlockMatrix::NumericFact(const T* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const U eps_machine, double *nopd) { int nsing, n0, nn0; ColumnMatrix a12, a21, a22, s22, b12, b21, b22; double eps_piv, pivot_val; bool flag_repeat_piv; vector perm; ColumnMatrix* diag_block_save; vector num_null_aug; vector list_sing; const double machine_eps = todouble(eps_machine); _coef = coef; diag_block_save = new ColumnMatrix[_nfront]; TridiagNumericFact(pivot, eps_pivot, dim_aug_kern, diag_block_save, num_null_aug, nopd, higher_precision); nsing = 0; for (int n = 0; n < _nfront; n++) { nsing += _num_null[n]; } if (nsing == 0) { _nscol = 0; _detected = true; // without using kernel detection _n0 = 0; _list_elim.clear(); _list_schur.clear(); _a12.free(); _a21.free(); _s22.free(); } else { // if (nsing > 0) _detected = false; // initialization : detection is not activated diss_printf(_verbose, _fp, "%s %d nsing = %d nfront=%d : ", __FILE__, __LINE__, nsing, _nfront); for (int n = 0; n < _nfront; n++) { diss_printf(_verbose, _fp, "%d/%d ", _num_null[n], (_p_front[n + 1] - _p_front[n])); if (num_null_aug[n] > 0) { diss_printf(_verbose, _fp, "(+%d)", num_null_aug[n]); } } diss_printf(_verbose, _fp, "\n"); s22.init(nsing, nsing); perm.resize(nsing); list_sing.resize(nsing); nsing = 0; for (int n = 0; n < _nfront; n++) { const int offset = _p_front[n]; const int dim1 = _p_front[n + 1] - offset; const int num_nonsing = dim1 - _num_null[n]; for (int i = num_nonsing; i < dim1; i++, nsing++) { // singular entries list_sing[nsing] = _permute[i + offset] + offset; } } a12.init(_dim, nsing); a21.init(_dim, nsing); a22.init(nsing, nsing); ComputeSchurComplement(nsing, list_sing, a12, a21, a22); if (higher_precision) { eps_piv = machine_epsilon(); } else { eps_piv = eps_pivot; // initial is given by user and then repeated } flag_repeat_piv = true; int count_repeat = 0; while (flag_repeat_piv) { s22.copy(a22); // find maximum diagonal from s22[] pivot_val = 0.0; for (int i = 0; i < nsing; i++) { // pivot value in double const double tmp = blas_abs(s22(i, i)); pivot_val = tmp > pivot_val ? tmp : pivot_val; } n0 = 0; // by assuming the matrix is invertible if (_isSymmetric) { double fop; full_ldlt_permute(&nn0, n0, nsing, s22.addrCoefs(), nsing, &pivot_val, &perm[0], eps_piv, &fop); } else { double fop; full_ldu_permute(&nn0, n0, nsing, s22.addrCoefs(), nsing, &pivot_val, &perm[0], eps_piv, &fop); } diss_printf(_verbose, _fp, "%s %d factorize dim = %d eps_piv = %g -> sing = %d\n", __FILE__, __LINE__, nsing, eps_piv, nn0); // if (((nsing - dim_aug_kern) >= nn0) || (eps_piv < TOL_PIVOT)) { flag_repeat_piv = false; break; } else { eps_piv /= 10.0; count_repeat++; } } // while (flag_repeat_piv) n0 = nn0; diss_printf(_verbose, _fp, "%s %d : n0 = %d count_repeat = %d\n", __FILE__, __LINE__, n0, count_repeat); if ((n0 == 0) && (count_repeat == 0)) { _n0 = 0; _detected = true; // without kernel check } else if (!kernel_detection) { _n0 = (-1); // for safety _detected = false; } else { // if ((n0 == 0) && (count_repeat == 0)) && kernel_detection int nsing1, nsing2; if (n0 == 0) { nsing1 = nsing; b22.init(nsing1, nsing1); b22.copy(a22); // } else { // if (n0 > 0) { // compute Schur complement again nsing1 = n0 + dim_aug_kern; nsing2 = nsing - nsing1; if (nsing2 > 0) { b21.init(nsing2, nsing1); b12.init(nsing2, nsing1); b22.init(nsing1, nsing1); if (_isSymmetric) { for (int j = 0; j < nsing1; j++) { const int jj = perm[nsing2 + j]; for (int i = 0; i <= j; i++) { const int ii = perm[nsing2 + i]; b22(i, j) = a22(ii, jj); } } // symmetrize for (int j = 0 ; j < nsing1; j++) { for (int i = 0; i < j; i++) { b22(j, i) = b22(i, j); } } for (int i = 0; i < nsing2; i++) { const int ii = perm[i]; for (int j = 0; j < nsing1; j++) { const int jj = perm[nsing2 + j]; b12(i, j) = a22(ii, jj); } } // alpha = 1 blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, nsing2, nsing1, _one, s22.addrCoefs(), nsing, b12.addrCoefs(), nsing2); for (int i = 0; i < nsing2; i++) { for (int j = 0; j < nsing1; j++) { b21(i, j) = b12(i, j) * s22(i, i); } } } // _isSymmetric else { for (int j = 0; j < nsing1; j++) { const int jj = perm[nsing2 + j]; for (int i = 0; i < nsing1; i++) { const int ii = perm[nsing2 + i]; b22(i, j) = a22(ii, jj); } } for (int i = 0; i < nsing2; i++) { const int ii = perm[i]; for (int j = 0; j < nsing1; j++) { const int jj = perm[nsing2 + j]; b12(i, j) = a22(ii, jj); b21(i, j) = a22(jj, ii); } } // alpha = T(1) blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, nsing2, nsing1, _one, s22.addrCoefs(), nsing, b12.addrCoefs(), nsing2); blas_trsm(CblasLeft, CblasLower, CblasTrans, CblasUnit, nsing2, nsing1, _one, s22.addrCoefs(), nsing, b21.addrCoefs(), nsing2); for (int i = 0; i < nsing2; i++) { for (int j = 0; j < nsing1; j++) { b12(i, j) *= s22(i, i); } } } // _isSymmetric // alpha = -1 // beta = 1 blas_gemm(CblasTrans, CblasNoTrans, nsing1, nsing1, nsing2, _none, b21.addrCoefs(), nsing2, b12.addrCoefs(), nsing2, _one, b22.addrCoefs(), nsing1); b21.free(); b12.free(); } // if (nsing2 > 0) else { b22.init(nsing1, nsing1); b22.copy(a22); // } } // if (n0 > 0) bool flag, flag_2x2; flag = ComputeDimKernel(&nn0, &flag_2x2, b22.addrCoefs(), nsing1, _isSymmetric, dim_aug_kern, eps_machine, eps_piv, _verbose, _fp); if (!flag) { _n0 = (-1); // for safety _detected = false; } else { _n0 = nn0; _detected = true; } } // if ((n0 == 0) && (count_repeat == 0)) && kernel_detection for (int n = 0; n < _nfront; n++) { if (diag_block_save[n].size() > 0) { _diag_blocks[n].copy(diag_block_save[n]); _num_null[n] -= num_null_aug[n]; num_null_aug[n] = 0; // for safety } diag_block_save[n].free(); // release memory } // recounting "nsing" after determinated singularity with restoring aug_dim nsing = 0; for (int n = 0; n < _nfront; n++) { const int offset = _p_front[n]; const int dim1 = _p_front[n + 1] - offset; const int num_nonsing = dim1 - _num_null[n]; for (int i = num_nonsing; i < dim1; i++, nsing++) { list_sing[nsing] = _permute[i + offset] + offset; } } a12.free(); a21.free(); a21.free(); s22.free(); if ((nsing == _n0) || (!_detected)) { // nsing >= _n0 _nscol = 0; _list_schur.clear(); _list_elim.resize(nsing); for (int i = 0; i < nsing; i++){ _list_elim[i] = list_sing[i]; } _a12.free(); _a21.free(); _s22.free(); if (!_detected) { _n0 = nsing; } } else { // if ((nsing < _n0) && _detected) a12.init(_dim, nsing); a21.init(_dim, nsing); a22.init(nsing, nsing); ComputeSchurComplement(nsing, list_sing, a12, a21, a22); // debug : end pivot_val = 0.0; for (int i = 0; i < nsing; i++) { // pivot value in double const double tmp = blas_abs(a22(i, i)); pivot_val = tmp > pivot_val ? tmp : pivot_val; } diss_printf(_verbose, _fp, "%s %d : _n0 = %d nsing = %d\n", __FILE__, __LINE__, _n0, nsing); // factorize Schur complement by knowing the dimension of the kernle : _n0 if (_isSymmetric) { double fop; bool flag; flag = full_ldlt_permute(&nn0, _n0, nsing, a22.addrCoefs(), nsing, &pivot_val, &perm[0], machine_eps, &fop); if (!flag) { diss_printf(_verbose, _fp, "%s %d : full_ldlt_permute fails : %d != %d\n", __FILE__, __LINE__, _n0, nn0); } } else { double fop; bool flag; flag = full_ldu_permute(&nn0, _n0, nsing, a22.addrCoefs(), nsing, &pivot_val, &perm[0], machine_eps, &fop); if (!flag) { diss_printf(_verbose, _fp, "%s %d : full_ldu_permute fails : %d != %d\n", __FILE__, __LINE__, _n0, nn0); } } _nscol = nsing - _n0; diss_printf(_verbose, _fp, "%s %d : detected = %s : _nscol = %d _n0 = %d : perm[] = ", __FILE__, __LINE__, (_detected ? "true" : "false"), _nscol, _n0); for (int i = 0; i < nsing; i++) { diss_printf(_verbose, _fp, "%d ", perm[i]); } diss_printf(_verbose, _fp, "\n"); _list_schur.resize(_nscol); if (_n0 > 0) { _list_elim.resize(_n0); } for (int i = 0; i < _nscol; i++){ _list_schur[i] = _permute_ginv[list_sing[perm[i]]]; // regular part } for (int i = 0; i < _n0; i++){ _list_elim[i] = list_sing[perm[i + _nscol]]; } diss_printf(_verbose, _fp, "%s %d : _nscol = %d : _list_schur[] = ", __FILE__, __LINE__, _nscol); for (int i = 0; i < _nscol; i++) { diss_printf(_verbose, _fp, "%d ", _list_schur[i]); } diss_printf(_verbose, _fp, "\n"); if (_n0 > 0) { diss_printf(_verbose, _fp, "%s %d : n0 = %d : _list_elim[] = ", __FILE__, __LINE__, _n0); for (int i = 0; i < _n0; i++) { diss_printf(_verbose, _fp, "%d ", _list_elim[i]); } diss_printf(_verbose, _fp, "\n"); diss_printf(_verbose, _fp, "%s %d : n0 = %d : _new2old[_list_elim[]] = ", __FILE__, __LINE__, _n0); for (int i = 0; i < _n0; i++) { diss_printf(_verbose, _fp, "%d ", _new2old[_list_elim[i]]); } diss_printf(_verbose, _fp, "\n"); } // if (_n0 > 0) _a12.init(_dim, _nscol); for (int j = 0; j < _nscol; j++) { for (int i = 0; i < _dim; i++) { _a12(_permute_ginv[i], j) = a12(i, perm[j]); } } if (!_isSymmetric) { _a21.init(_dim, _nscol); for (int j = 0; j < _nscol; j++) { for (int i = 0; i < _dim; i++) { _a21(_permute_ginv[i], j) = a21(i, perm[j]); } } } else { _a21.free(); } _s22.init(_nscol, _nscol); // a22 is already permuted by perm[] during LDLt/LDU, // _s22 is smaller than a22 for (int i = 0; i < _nscol; i++) { for (int j = 0; j < _nscol; j++) { _s22(i, j) = a22(i, j); } } a12.free(); a21.free(); a22.free(); } perm.clear(); } // if (nsing > 0) delete [] diag_block_save; } template void TridiagBlockMatrix:: NumericFact(const double* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const double eps_machine, double *nopd); template void TridiagBlockMatrix:: NumericFact(const quadruple* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const quadruple eps_machine, double *nopd); template void TridiagBlockMatrix, double>:: NumericFact(const complex* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const double eps_machine, double *nopd); template void TridiagBlockMatrix, quadruple>:: NumericFact(const complex* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const quadruple eps_machine, double *nopd); template void TridiagBlockMatrix:: NumericFact(const float* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const float eps_machine, double *nopd); template void TridiagBlockMatrix, float>:: NumericFact(const complex* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const float eps_machine, double *nopd); // template void TridiagBlockMatrix::ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix& a12, ColumnMatrix& a21, ColumnMatrix& a22) { // _nscol should be set 0 before calling a12.ZeroClear(); a21.ZeroClear(); a22.ZeroClear(); for (int j = 0; j < nsing; j++) { extract_column(list_sing[j], a12.addrCoefs() + (j * _dim)); } for (int j = 0; j < nsing; j++) { for (int i = 0; i < nsing; i++) { a22(i, j) = a12(list_sing[i], j); } } for (int j = 0; j < nsing; j++) { for (int i = 0; i < nsing; i++) { a12(list_sing[i], j) = _zero; } } if (_isSymmetric) { a21.copy(a12); } else { for (int j = 0; j < nsing; j++) { extract_row(list_sing[j], a21.addrCoefs() + (j * _dim)); } for (int j = 0; j < nsing; j++) { for (int i = 0; i < nsing; i++) { a21(list_sing[i], j) = _zero; } } } SolveMulti(false, false, nsing, a12, 0); // alpha = -1 // beta = 1 blas_gemm(CblasTrans, CblasNoTrans, nsing, nsing, _dim, _none, a21.addrCoefs(), _dim, a12.addrCoefs(), _dim, _one, a22.addrCoefs(), nsing); if (_isSymmetric) { // symmetrize for (int i = 0; i < nsing; i++) { for (int j = i + 1; j < nsing; j++) { a22(j, i) = a22(i, j); // acess lower } } } else { SolveMulti(false, true, nsing, a21, 0); } } template void TridiagBlockMatrix:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix& a12, ColumnMatrix& a21, ColumnMatrix& a22); template void TridiagBlockMatrix:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix& a12, ColumnMatrix& a21, ColumnMatrix& a22); template void TridiagBlockMatrix, double>:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix >& a12, ColumnMatrix >& a21, ColumnMatrix >& a22); template void TridiagBlockMatrix, quadruple>:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix >& a12, ColumnMatrix >& a21, ColumnMatrix >& a22); template void TridiagBlockMatrix:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix& a12, ColumnMatrix& a21, ColumnMatrix& a22); template void TridiagBlockMatrix, float>:: ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix >& a12, ColumnMatrix >& a21, ColumnMatrix >& a22); // template void TridiagBlockMatrix::SingularNode(vector &list_sing) { if (_n0 >= 0) { list_sing.resize(_n0); for (int i = 0; i < _n0; i++) { list_sing[i] = _new2old[_list_elim[i]]; } } else { diss_printf(_verbose, _fp, "%s %d : _n0 = %d\n", __FILE__, __LINE__, _n0); } } template void TridiagBlockMatrix:: SingularNode(vector &list_sing); template void TridiagBlockMatrix:: SingularNode(vector &list_sing); template void TridiagBlockMatrix, double>:: SingularNode(vector &list_sing); template void TridiagBlockMatrix, quadruple>:: SingularNode(vector &list_sing); template void TridiagBlockMatrix:: SingularNode(vector &list_sing); template void TridiagBlockMatrix, float>:: SingularNode(vector &list_sing); // //#define DEBUG_FACTORIZATION template void TridiagNumericFact_(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const T* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix* &_diag_blocks, ColumnMatrix* &_diag_blocks_high, ColumnMatrix* &_upper_blocks, ColumnMatrix* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix &_factorized_whole #endif ) { int dim1, dim2, nn0; const int n0 = 0; int n0_total = 0; ColumnMatrix diag_works, diag_origs; // supposing "dim2 <= size_b2" #ifdef SPARSE_OFFDIAG ColumnMatrix upper, lower; ColumnMatrix upper_high, lower_high; #endif int upper_size_max; int diag_size_max; int *permute_diag, *permute_offdiag, *permute_work; int *permute_diag_inv, *permute_offdiag_inv; double fop; //const double eps_pivot_high = machine_epsilon(); const double eps_pivot_high = machine_epsilon(); const T _zero(0.0); double pivot_save; _num_null.resize(_nfront); // _null_lists_local.resize(_nfront); _permute.resize(_dim); *pivot = 1.0; upper_size_max = 0; for (int n = 1; n < _nfront; n++) { const int itmp = ((_p_front[n] - _p_front[n - 1]) * (_p_front[n + 1] - _p_front[n])); upper_size_max = itmp > upper_size_max ? itmp : upper_size_max; } #ifdef SPARSE_OFFDIAG VectorArray upper_work(upper_size_max); VectorArray lower_work(upper_size_max); if (higher_precision) { VectorArray upper_high_work(upper_size_max); VectorArray lower_high_work(upper_size_max); } #else VectorArray permute_vals(upper_size_max); #endif bool high_factorization = false; bool high_schur_complement = false; diag_size_max = 0; for (int n = 0; n < _nfront; n++) { const int itmp = _p_front[n + 1] - _p_front[n]; _diag_blocks[n].init(itmp, itmp); diag_size_max = itmp > diag_size_max ? itmp : diag_size_max; } permute_offdiag = new int[diag_size_max]; permute_offdiag_inv = new int[diag_size_max]; permute_diag_inv = new int[diag_size_max]; permute_work = new int[diag_size_max]; { // n = 0 dim2 = _p_front[1] - _p_front[0]; ColumnMatrix &diag = _diag_blocks[0]; diag.ZeroClear(); permute_diag = &_permute[0] + _p_front[0]; for (int i = _p_front[0]; i < _p_front[1]; i++) { for (int k = _p_diag[i]; k < _p_upper[i]; k++) { const int ii = i - _p_front[0]; const int jj = _indCols[k] - _p_front[0]; diag(ii, jj) = _coef[_indVals[k]]; } } if (higher_precision) { // copy to higher precision ColumnMatrix &diag_high = _diag_blocks_high[0]; diag_high.init(dim2, dim2); for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim2; i++) { diag_high(i, j) = conv_prec(diag(i, j)); } } pivot_save = *pivot; } if (_isSymmetric) { bool flag; flag = full_ldlt_permute(&nn0, n0, dim2, diag.addrCoefs(), dim2, pivot, permute_diag, eps_pivot, &fop); } else { bool flag; flag = full_ldu_permute(&nn0, n0, dim2, diag.addrCoefs(), dim2, pivot, permute_diag, eps_pivot, &fop); } if (higher_precision) { high_factorization = false; if (nn0 > 0) { // if (true) { fprintf(stderr, "%s %d : switch to quadruple at %d %d/%d ", __FILE__, __LINE__, 0, nn0, dim2); high_factorization = true; _high_blocks.push_back(0); ColumnMatrix &diag_high = _diag_blocks_high[0]; *pivot = pivot_save; if (_isSymmetric) { bool flag; flag = full_ldlt_permute(&nn0, n0, dim2, diag_high.addrCoefs(), dim2, pivot, permute_diag, eps_pivot_high, &fop); } else { bool flag; flag = full_ldu_permute(&nn0, n0, dim2, diag_high.addrCoefs(), dim2, pivot, permute_diag, eps_pivot_high, &fop); } fprintf(stderr, "%d\n", nn0); #if 0 for (int i = 0; i < dim2; i++) { fprintf(stderr, "%d %s %s\n", i, tostring(diag(i,i)).c_str(), tostring(diag_high(i,i)).c_str()); } #endif for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim2; i++) { diag(i, j) = conv_prec(diag_high(i, j)); } } } } { _num_null[0] = nn0; *nopd += fop; for (int i = 0; i < nn0; i++) { // _null_lists_local[0].push_back(i + (dim2 - nn0)); // nullifying rows and columns for (int i = (dim2 - nn0); i < dim2; i++) { for (int j = 0; j < dim2; j++) { diag(i, j) = _zero; diag(j, i) = _zero; } } } n0_total += nn0; } } // n = 0 for (int n = 1; n < _nfront; n++) { ColumnMatrix &diag1 = _diag_blocks[n - 1]; vector i0; dim1 = dim2; dim2 = _p_front[n + 1] - _p_front[n]; #ifdef SPARSE_OFFDIAG upper.init(dim1, dim2, upper_work.addrCoefs(), false); lower.init(dim1, dim2, lower_work.addrCoefs(), false); upper_high.init(dim1, dim2, upper_high_work.addrCoefs(), false); lower_high.init(dim1, dim2, lower_high_work.addrCoefs(), false); #else ColumnMatrix &upper = _upper_blocks[n]; // 27 Nov.2015 ColumnMatrix &lower = _lower_blocks[n]; // 27 Nov.2015 upper.init(dim1, dim2); lower.init(dim1, dim2); ColumnMatrix upper_high; ColumnMatrix lower_high; if (high_factorization) { upper_high.init(dim1, dim2); lower_high.init(dim1, dim2); } #endif // ColumnMatrix upper, lower // permute_diag[] is defined in the previous step n-1 // generate permute_offdiag[] from previous permute_diag[] GenPermuteOffdiag(_p_front[n - 1], _p_front[n], _p_front[n + 1], _ptRows, _p_diag, _indCols, permute_diag, permute_diag_inv, permute_offdiag, permute_offdiag_inv, i0); upper.ZeroClear(); if (_isSymmetric) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int jj = permute_offdiag_inv[i - _p_front[n]]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { // access lower block to create upper block const int ii = permute_diag_inv[_indCols[k] - _p_front[n - 1]]; upper(ii, jj) = _coef[_indVals[k]]; } } } else { lower.ZeroClear(); for (int i = _p_front[n - 1]; i < _p_front[n]; i++) { const int ii = permute_diag_inv[i - _p_front[n - 1]]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int jj = permute_offdiag_inv[_indCols[k] - _p_front[n]]; upper(ii, jj) = _coef[_indVals[k]]; } } for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int jj = permute_offdiag_inv[i - _p_front[n]]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { // same as upper() of _isSymmetric const int ii = permute_diag_inv[_indCols[k] - _p_front[n - 1]]; lower(ii, jj) = _coef[_indVals[k]]; } } } ColumnMatrix &diag = _diag_blocks[n]; if (high_factorization) { ColumnMatrix &diag1_high = _diag_blocks_high[n - 1]; for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { upper_high(i, j)=conv_prec(upper(i,j)); lower_high(i, j)=conv_prec(lower(i,j)); } } full_fw_multiprofile(false, dim1, _num_null[n - 1], dim2, diag1_high.addrCoefs(), dim1, upper_high.addrCoefs(), dim1, i0, &fop); if (_isSymmetric) { // reduce arithmeic by setting T(0) for 0 <=i< i0[j] for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { lower_high(i, j) = upper_high(i, j) * diag1_high(i, i); } } } else { full_fw_multiprofile(true, dim1, _num_null[n - 1], dim2, diag1_high.addrCoefs(), dim1, lower_high.addrCoefs(), dim1, i0, &fop); for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { lower_high(i, j) *= diag1_high(i, i); } } } ColumnMatrix &diag_high = _diag_blocks_high[n]; diag_high.init(dim2, dim2); // alpha = -1, beta = 0 SparseSchur(_isSymmetric, dim2, dim1, i0, upper_high, lower_high, diag_high, &fop); for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { for (int k = _p_diag[i]; k < _p_upper[i]; k++) { const int ii = permute_offdiag_inv[i - _p_front[n]]; const int jj = permute_offdiag_inv[_indCols[k] - _p_front[n]]; diag_high(ii, jj) += conv_prec(_coef[_indVals[k]]); } } for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim2; i++) { diag(i, j) = conv_prec(diag_high(i, j)); } } #ifndef SPARSE_OFFDIAG for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { upper_high(i, j) *= diag1_high(i, i); } } #endif for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { upper(i, j)=conv_prec(upper_high(i,j)); lower(i, j)=conv_prec(lower_high(i,j)); } } #ifndef SPARSE_OFFDIAG upper_high.free(); lower_high.free(); #endif high_schur_complement = true; // diag_high.free(); } // if (high_factorization) else { full_fw_multiprofile(false, dim1, _num_null[n - 1], dim2, diag1.addrCoefs(), dim1, upper.addrCoefs(), dim1, i0, &fop); if (_isSymmetric) { // reduce arithmeic by setting T(0) for 0 <=i< i0[j] for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { lower(i, j) = upper(i, j) * diag1(i, i); } } } else { full_fw_multiprofile(true, dim1, _num_null[n - 1], dim2, diag1.addrCoefs(), dim1, lower.addrCoefs(), dim1, i0, &fop); for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { lower(i, j) *= diag1(i, i); } } } // alpha = -1, beta = 0 SparseSchur(_isSymmetric, dim2, dim1, i0, upper, lower, diag, &fop); for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { for (int k = _p_diag[i]; k < _p_upper[i]; k++) { const int ii = permute_offdiag_inv[i - _p_front[n]]; const int jj = permute_offdiag_inv[_indCols[k] - _p_front[n]]; diag(ii, jj) += _coef[_indVals[k]]; } } #ifndef SPARSE_OFFDIAG for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { upper(i, j) *= diag1(i, i); } } #endif } // if (high_factorization) if (higher_precision) { // copy to higher precision ColumnMatrix &diag_high = _diag_blocks_high[n]; if (!high_schur_complement) { diag_high.init(dim2, dim2); for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim2; i++) { diag_high(i, j) = conv_prec(diag(i, j)); } } } pivot_save = *pivot; } if (_isSymmetric) { bool flag; flag = full_ldlt_permute(&nn0, n0, dim2, diag.addrCoefs(), dim2, pivot, permute_work, eps_pivot, &fop); } else { bool flag; flag = full_ldu_permute(&nn0, n0, dim2, diag.addrCoefs(), dim2, pivot, permute_work, eps_pivot, &fop); } if (higher_precision) { high_factorization = false; if (nn0 > 0) { // if (true) { fprintf(stderr, "%s %d : switch to quadruple at %d %d/%d : ", __FILE__, __LINE__, n, nn0, dim2); high_factorization = true; _high_blocks.push_back(n); ColumnMatrix &diag_high = _diag_blocks_high[n]; *pivot = pivot_save; if (_isSymmetric) { bool flag; flag = full_ldlt_permute(&nn0, n0, dim2, diag_high.addrCoefs(), dim2, pivot, permute_work, eps_pivot_high, &fop); } else { bool flag; flag = full_ldu_permute(&nn0, n0, dim2, diag_high.addrCoefs(), dim2, pivot, permute_work, eps_pivot_high, &fop); } fprintf(stderr, "%d\n", nn0); #if 0 for (int i = 0; i < dim2; i++) { fprintf(stderr, "%d %s %s\n", i, tostring(diag(i,i)).c_str(), tostring(diag_high(i,i)).c_str()); } #endif for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim2; i++) { diag(i, j) = conv_prec(diag_high(i, j)); } } } else { #if 0 fprintf(stderr, "%s %d : not using quadruple at %d %d/%d : ", __FILE__, __LINE__, n, nn0, dim2); fprintf(stderr, "%d\n", nn0); // if (nn0 > 0) { { for (int i = 0; i < dim2; i++) { fprintf(stderr, "%d %s\n", i, tostring(diag(i,i)).c_str()); } } #endif ColumnMatrix &diag_high = _diag_blocks_high[n]; diag_high.free(); } high_schur_complement = false; } // if (higher_precision) permute_diag = &_permute[0] + _p_front[n]; // generate permute_diag from pemrute_work + perumte_upper for (int i = 0; i < dim2; i++) { permute_diag[i] = permute_offdiag[permute_work[i]]; } #ifndef SPARSE_OFFDIAG for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { permute_vals[i + j * dim1] = upper(i, permute_work[j]); } } for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { upper(i, j) = permute_vals[i + j * dim1]; } } for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { permute_vals[i + j * dim1] = lower(i, permute_work[j]); } } for (int j = 0; j < dim2; j++) { for (int i = 0; i < dim1; i++) { lower(i, j) = permute_vals[i + j * dim1]; } } #endif { _num_null[n] = nn0; *nopd += fop; for (int i = 0; i < nn0; i++) { // nullifying rows and columns for (int i = (dim2 - nn0); i < dim2; i++) { for (int j = 0; j < dim2; j++) { diag(i, j) = _zero; diag(j, i) = _zero; } } } // loop : i n0_total += nn0; } } // loop : n if ((n0_total > 0) && (dim_aug_kern > 0)) { int count = 0; bool flag = false; num_null_aug.clear(); num_null_aug.resize(_nfront, 0); for (int n = (_nfront - 1); n >= 0; n--) { dim1 = _p_front[n + 1] - _p_front[n]; for (int k = (dim1 - _num_null[n] - 1); k >= 0; k--) { num_null_aug[n]++; count++; if (count >= dim_aug_kern) { flag = true; break; } } if (flag) { break; } } if (flag == false) { diss_printf(_verbose, _fp, "%s %d : dim_aug_kern = %d > dim = %d?\n", __FILE__, __LINE__, dim_aug_kern, _dim); } for (int n = (_nfront - 1); n >= 0; n--) { diag_block_save[n].free(); if (num_null_aug[n] > 0) { dim1 = _p_front[n + 1] - _p_front[n]; diag_block_save[n].init(dim1, dim1); diag_block_save[n].copy(_diag_blocks[n]); _num_null[n] += num_null_aug[n]; // for (int j = 0; j < num_null_aug[n]; j++) { const int jj = dim1 - _num_null[n] + j; for (int i = 0; i < dim1; i++) { _diag_blocks[n](i, jj) = _zero; _diag_blocks[n](jj, i) = _zero; } } } } // loop : n } // if ((n0_total > 0) && kernel_detection) { _permute_ginv.resize(_dim); for (int n = 0; n < _nfront; n++) { const int offset = _p_front[n]; for (int i = offset; i < _p_front[n + 1]; i++) { _permute_ginv[_permute[i] + offset] = i; } } //#ifdef SPARSE_OFFDIAG // delete [] upper_work; // delete [] lower_work; //#else // delete [] permute_vals; //#endif delete [] permute_offdiag; delete [] permute_diag_inv; delete [] permute_offdiag_inv; delete [] permute_work; //#ifdef DEBUG_FACTORIZATION // delete [] diag_orig; //#endif } template void TridiagNumericFact_(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const double* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix* &_diag_blocks, ColumnMatrix* &_diag_blocks_high, ColumnMatrix* &_upper_blocks, ColumnMatrix* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix &_factorized_whole #endif ); template void TridiagNumericFact_, double, complex, quadruple>(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix > *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const complex* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix >* &_diag_blocks, ColumnMatrix >* &_diag_blocks_high, ColumnMatrix >* &_upper_blocks, ColumnMatrix >* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix > &_factorized_whole #endif ); template void TridiagNumericFact_ (double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const float* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix* &_diag_blocks, ColumnMatrix* &_diag_blocks_high, ColumnMatrix* &_upper_blocks, ColumnMatrix* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix &_factorized_whole #endif ); template void TridiagNumericFact_, float, complex, double> (double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix > *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const complex* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix >* &_diag_blocks, ColumnMatrix >* &_diag_blocks_high, ColumnMatrix >* &_upper_blocks, ColumnMatrix >* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix > &_factorized_whole #endif ); template void TridiagBlockMatrix::ComputeSchur(const int dim_, int* color_mask, const int ncol_, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const T *coef, const int size_b1, SquareBlockMatrix &local_s, double *nopd, elapsed_t *tt) { //input : CSR data of upper block : ptrow1, indcols1, indvals1, indvals2, coef //output: Schur complment : -c^T a^-1 b // : permutation of upper block : old2new_j ColumnMatrix b, c, y, z; vector dscale; // const int nrow = _dim; vector i0; vector permute_diag_inv, permute_upper, permute_upper_inv; // T *s; int ncol; vector map_eqn, remap_eqn; get_realtime(&tt[0]); // entering map_eqn.resize(dim_); remap_eqn.resize(dim_); int jtmp = _dim; int itmp = 0; for (int i = 0; i < dim_; i++) { if ((color_mask[i] == _color) || (color_mask[i] == (-_color)) || ((_color == 1) && color_mask[i] == 0)) { map_eqn[itmp++] = i; } else { map_eqn[jtmp++] = i; } } for (int i = 0; i < dim_; i++) { remap_eqn[map_eqn[i]] = i; } const int nrow = _dim; GenPermuteUpper(nrow, remap_eqn, ncol_, ptrow1, indcols1, _nfront, _p_front, _new2old, _permute, permute_diag_inv, permute_upper, permute_upper_inv, i0, _verbose, _fp); ncol = 0; for (vector::const_iterator it = i0.begin(); it != i0.end(); ++it) { ncol++; if (*it >= nrow) { break; } } #ifdef PERMUTE_UPPER ColumnMatrix s(ncol, ncol); #endif b.init(nrow, ncol); c.init(nrow, ncol); dscale.resize(nrow); b.ZeroClear(); FillBlockSparse(coef, nrow, map_eqn, ptrow1, indcols1, indvals1, permute_diag_inv, permute_upper_inv, b); if (!_isSymmetric) { c.ZeroClear(); FillBlockSparse(coef, nrow, map_eqn, ptrow1, indcols1, indvals2, permute_diag_inv, permute_upper_inv, c); } get_realtime(&tt[1]); // after filling enteries // A_12' = A_11^-1 A_12 A_21'^T = A_11^-T A_21^T //[L_11 ][D_11 ][U_11 A_12'] forward : x_1 = L_11^-1 b_1 //[A_21' L_22][ D_22][ U_22 ] x_2 = L_22^-1 (b_2 - A_21' x_1) // backward: x_1 = U_11^-T b_1 // x_2 = U_22^-1 (b_2 - A_12'^T x_1) // symmetric => A_21' = A_12'^T // unsymmetric A_21'^T is stored ==> DGEMM('T','N',...,(tri%a12) or (tri%a21), if (_nscol > 0) { y.init(_nscol, ncol); z.init(_nscol, ncol); for (int j = 0; j < ncol; j++) { for (int i = 0; i < _nscol; i++) { y(i, j) = b(_list_schur[i], j); // nullifying b(_list_schur[i], j) = _zero; } } // alpha = -1; beta = 1; if (_isSymmetric) { // y -= A_12 ^T b blas_gemm(CblasTrans, CblasNoTrans, _nscol, ncol, nrow, _none, _a12.addrCoefs(), nrow, b.addrCoefs(), nrow, _one, y.addrCoefs(), _nscol); } else { // y -= A_21 ^T b blas_gemm(CblasTrans, CblasNoTrans, _nscol, ncol, nrow, _none, _a21.addrCoefs(), nrow, b.addrCoefs(), nrow, _one, y.addrCoefs(), _nscol); } } // if (_nscol > 0) ForwardUpper(false, ncol, b, i0, dscale); // normal if (_nscol > 0) { // alpha = 1.0 blas_trsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, _nscol, ncol, _one, _s22.addrCoefs(), _nscol, y.addrCoefs(), _nscol); if (_isSymmetric) { for (int i = 0; i < _nscol; i++) { // exclude _2 entries dscale[_list_schur[i]] = _zero; } for (int j = 0; j < ncol; j++) { for (int i = 0; i < _nscol; i++) { z(i, j) = y(i, j) * _s22(i, i); } } } } if (_isSymmetric) { // ddtimesu for (int j = 0; j < ncol; j++) { for (int i = 0; i < i0[j]; i++) { c(i, j) = _zero; } for (int i = i0[j]; i < nrow; i++) { c(i, j) = dscale[i] * b(i, j); } // fop += (nrow-j0(j)-1) } } else { // if (isSymmetric) if (_nscol > 0) { for (int j = 0; j < ncol; j++) { for (int i = 0; i < _nscol; i++) { z(i, j) = c(_list_schur[i], j); // nullifying : easier than changing entries in dscle[] c(_list_schur[i], j) = _zero; } } // alpha = -1; beta = 1; // z -= A_12 ^T c blas_gemm(CblasTrans, CblasNoTrans, _nscol, ncol, nrow, _none, _a12.addrCoefs(), nrow, c.addrCoefs(), nrow, _one, z.addrCoefs(), _nscol); } ForwardUpper(true, ncol, c, i0, dscale); // Transposed if (_nscol > 0) { // alpha = 1.0 blas_trsm(CblasLeft, CblasUpper, CblasTrans, CblasUnit, _nscol, ncol, _one, _s22.addrCoefs(), _nscol, z.addrCoefs(), _nscol); for (int i = 0; i < _nscol; i++) { dscale[_list_schur[i]] = _zero; } for (int j = 0; j < ncol; j++) { for (int i = 0; i < _nscol; i++) { z(i, j) *= _s22(i, i); } } } // ddtimesu for (int j = 0; j < ncol; j++) { for (int i = 0; i < i0[j]; i++) { c(i, j) = _zero; } for (int i = i0[j]; i < nrow; i++) { c(i, j) *= dscale[i]; } // fop += (nrow-j0(j)-1) } } // if (_isSymmetric) //nops += fop; double fop; get_realtime(&tt[2]); // after tridiag forward #ifdef PERMUTE_UPPER // s = b * c : alpha = 1.0; beta = 0.0 if (_isSymmetric) { SchurProfileSym(nrow, ncol, i0, b, c, s.addrCoefs(), size_b1, &fop); } else { SchurProfileUnSym(nrow, ncol, i0, b, c, s.addrCoefs(), size_b1, &fop); } if (_nscol > 0) { // s += z y : alpha = 1.0; beta = 1.0; blas_gemm(CblasTrans, CblasNoTrans, ncol, ncol, _nscol, _one, z.addrCoefs(), _nscol, y.addrCoefs(), _nscol, _one, s.addrCoefs(), ncol); } get_realtime(&tt[3]); // after sparse dgemm // nops = tri%nop if (_color == 1) { local_s.ZeroClear(); if (_isSymmetric) { for (int i = 0; i < ncol; i++) { const int ii = permute_upper[i]; for (int j = i; j < ncol; j++) { const int jj = permute_upper[j]; const int iii = (ii <= jj ? ii : jj); const int jjj = (ii <= jj ? jj : ii); local_s(iii, jjj) = s(i, j); } } } else { for (int i = 0; i < ncol; i++) { const int ii = permute_upper[i]; for (int j = 0; j < ncol; j++) { const int jj = permute_upper[j]; local_s(ii, jj) = s(i, j); } } } // local_s.copyFromArrayPermute(s, ncol, &permute_upper_inv[0]); } else { if (_isSymmetric) { for (int i = 0; i < ncol; i++) { const int ii = permute_upper[i]; for (int j = i; j < ncol; j++) { const int jj = permute_upper[j]; const int iii = (ii <= jj ? ii : jj); const int jjj = (ii <= jj ? jj : ii); local_s(iii, jjj) += s(i, +j); } } } else { for (int i = 0; i < ncol; i++) { const int ii = permute_upper[i]; for (int j = 0; j < ncol; j++) { const int jj = permute_upper[j]; local_s(ii, jj) += s(i, j); } } } // local_s.addFromArrayPermute(s, ncol, &permute_upper_inv[0]); } // if (_color == 1) #else // s = b * c : alpha = 1.0; beta = 0.0 if (_isSymmetric) { for (int j = 0; j < local_s.num_blocks(); j++) { for (int i = 0; i < j; i++) { const int nnrow = local_s.nrowBlock(i, j); const int nncol = local_s.ncolBlock(i, j); const int ishift = local_s.IndexBlock(i) * nrow; const int jshift = local_s.IndexBlock(j) * nrow; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, nrow, _one, c.addrCoefs() + ishift, nrow, b.addrCoefs() + jshift, nrow, (_color == 1 ? _zero : _one), local_s.addrCoefBlock(i, j), nnrow); if (_nscol > 0) { const int ishift1 = local_s.IndexBlock(i) * _nscol; const int jshift1 = local_s.IndexBlock(j) * _nscol; // s += z y : alpha = 1.0; beta = 1.0; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, _nscol, _one, z.addrCoefs() + ishift1, _nscol, y.addrCoefs() + jshift1, _nscol, _one, local_s.addrCoefBlock(i, j), nnrow); } } { const int nnrow = local_s.nrowBlock(j, j); const int jshift = local_s.IndexBlock(j) * nrow; C_gemm_symm(nnrow, nrow, _one, c.addrCoefs() + jshift, nrow, b.addrCoefs() + jshift, nrow, (_color == 1 ? _zero : _one), local_s.addrCoefBlock(j, j), nnrow); if (_nscol > 0) { const int jshift1 = local_s.IndexBlock(j) * _nscol; // s += z y : alpha = 1.0; beta = 1.0; C_gemm_symm(nnrow, _nscol, _one, z.addrCoefs() + jshift1, _nscol, y.addrCoefs() + jshift1, _nscol, _one, local_s.addrCoefBlock(j, j), nnrow); } } } // loop : j } // if (_isSymmetric) else { for (int j = 0; j < local_s.num_blocks(); j++) { for (int i = 0; i <= j; i++) { const int nnrow = local_s.nrowBlock(i, j); const int nncol = local_s.ncolBlock(i, j); const int ishift = local_s.IndexBlock(i) * nrow; const int jshift = local_s.IndexBlock(j) * nrow; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, nrow, _one, c.addrCoefs() + ishift, nrow, b.addrCoefs() + jshift, nrow, (_color == 1 ? _zero : _one), local_s.addrCoefBlock(i, j), nnrow); if (_nscol > 0) { const int ishift1 = local_s.IndexBlock(i) * _nscol; const int jshift1 = local_s.IndexBlock(j) * _nscol; // s += z y : alpha = 1.0; beta = 1.0; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, _nscol, _one, z.addrCoefs() + ishift1, _nscol, y.addrCoefs() + jshift1, _nscol, _one, local_s.addrCoefBlock(i, j), nnrow); } } // lower part of local_s stores data in transposed way for (int i = (j + 1); i < local_s.num_blocks(); i++) { const int nnrow = local_s.nrowBlock(i, j); const int nncol = local_s.ncolBlock(i, j); const int ishift = local_s.IndexBlock(i) * nrow; const int jshift = local_s.IndexBlock(j) * nrow; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, nrow, _one, b.addrCoefs() + jshift, nrow, c.addrCoefs() + ishift, nrow, (_color == 1 ? _zero : _one), local_s.addrCoefBlock(i, j), nnrow); if (_nscol > 0) { const int ishift1 = local_s.IndexBlock(i) * _nscol; const int jshift1 = local_s.IndexBlock(j) * _nscol; // s += z y : alpha = 1.0; beta = 1.0; blas_gemm(CblasTrans, CblasNoTrans, nnrow, nncol, _nscol, _one, y.addrCoefs() + jshift1, _nscol, z.addrCoefs() + ishift1, _nscol, _one, local_s.addrCoefBlock(i, j), nnrow); } } } } // if (_isSymmetric) get_realtime(&tt[3]); // after sparse dgemm #endif b.free(); c.free(); y.free(); z.free(); dscale.clear(); permute_diag_inv.clear(); permute_upper_inv.clear(); #ifdef PERMUTE_UPPER s.free(); // delete [] s; #endif get_realtime(&tt[4]); // end of the routine } template void TridiagBlockMatrix:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const double *coef, const int size_b1, SquareBlockMatrix &local_s, double *nopd, elapsed_t *tt); template void TridiagBlockMatrix:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const quadruple *coef, const int size_b1, SquareBlockMatrix &local_s, double *nopd, elapsed_t *tt); template void TridiagBlockMatrix, double>:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const complex *coef, const int size_b1, SquareBlockMatrix > &local_s, double *nopd, elapsed_t *tt); template void TridiagBlockMatrix, quadruple>:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const complex *coef, const int size_b1, SquareBlockMatrix > &local_s, double *nopd, elapsed_t *tt); template void TridiagBlockMatrix:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const float *coef, const int size_b1, SquareBlockMatrix &local_s, double *nopd, elapsed_t *tt); template void TridiagBlockMatrix, float>:: ComputeSchur(const int dim_, int* color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const complex *coef, const int size_b1, SquareBlockMatrix > &local_s, double *nopd, elapsed_t *tt); // template void TridiagBlockMatrix::SolveMulti(const bool flag_new2old, const bool isTrans, const int nrhs, ColumnMatrix& x, const int nscol_) { ColumnMatrix xx, y, zz; double fop; const int nscol = (nscol_ == (-1) ? _nscol : nscol_); xx.init(_dim, nrhs); if (flag_new2old) { for (int m = 0; m < nrhs; m++) { for (int i = 0; i < _dim; i++) { xx(_permute_ginv[i], m) = x(_new2old[i], m); } } } else { for (int m = 0; m < nrhs; m++) { for (int i = 0; i < _dim; i++) { xx(_permute_ginv[i], m) = x(i, m); } } } // x(, m) is reorderd to follow the permutation generated by // TridiagBlockFactorization if (nscol > 0) { y.init(nscol, nrhs); for (int m = 0; m < nrhs; m++) { for (int i = 0; i < nscol; i++) { y(i, m) = xx(_list_schur[i], m); xx(_list_schur[i], m) = _zero; // nullifying } } // alpha = (-1.0); beta = 1.0 if (_isSymmetric) { blas_gemm(CblasTrans, CblasNoTrans, nscol, nrhs, _dim, _none, _a12.addrCoefs(), _dim, xx.addrCoefs(), _dim, _one, y.addrCoefs(), nscol); } else { if (isTrans) { blas_gemm(CblasTrans, CblasNoTrans, nscol, nrhs, _dim, _none, _a12.addrCoefs(), _dim, xx.addrCoefs(), _dim, _one, y.addrCoefs(), nscol); } else { blas_gemm(CblasTrans, CblasNoTrans, nscol, nrhs, _dim, _none, _a21.addrCoefs(), _dim, xx.addrCoefs(), _dim, _one, y.addrCoefs(), nscol); } } blas_trsm(CblasLeft, isTrans ? CblasUpper : CblasLower, isTrans ? CblasTrans : CblasNoTrans, CblasUnit, nscol, nrhs, _one, _s22.addrCoefs(), nscol, y.addrCoefs(), nscol); for (int m = 0; m < nrhs; m++) { for (int i = 0; i < nscol; i++) { y(i, m) *= _s22(i, i); } } blas_trsm(CblasLeft, isTrans ? CblasLower : CblasUpper, isTrans ? CblasTrans : CblasNoTrans, CblasUnit, nscol, nrhs, _one, _s22.addrCoefs(), nscol, y.addrCoefs(), nscol); } // if (nscol > 0) zz.init(_maxdim, nrhs); // forward substitution for (int n = 0; n < (_nfront - 1); n++) { // copy to working array zz const int dim1 = _p_front[n + 1] - _p_front[n]; for (int m = 0; m < nrhs; m++) { for (int i = 0; i < dim1; i++) { zz(i, m) = xx(_p_front[n] + i, m); } } #ifdef SPARSE_OFFDIAG full_fwbw_multi(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, nrhs, zz.addrCoefs(), _maxdim); // sparse matrix * dense matrix if (isTrans) { for (int m = 0; m < nrhs; m++) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i] - _p_front[n]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx(jj, m) -= _coef[_indVals[k]] * zz(ii, m); } } } } else { for (int m = 0; m < nrhs; m++) { for (int i = _p_front[n + 1]; i < _p_front[n + 2]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]] - _p_front[n]; xx(ii, m) -= _coef[_indVals[k]] * zz(jj, m); } } } } // if (isTrans) #else full_fw_multi(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, nrhs, zz.addrCoefs(), _maxdim, &fop); int dim2 = _p_front[n + 2] - _p_front[n + 1]; if (isTrans) { ColumnMatrix &upper = _upper_blocks[n + 1]; blas_gemm(CblasTrans, CblasNoTrans, dim2, nrhs, dim1, _none, upper.addrCoefs(), upper.nbRows(), zz.addrCoefs(), _maxdim, _one, xx.addrCoefs() + _p_front[n + 1], _dim); } else { ColumnMatrix &lower = _lower_blocks[n + 1]; blas_gemm(CblasTrans, CblasNoTrans, dim2, nrhs, dim1, _none, lower.addrCoefs(), lower.nbRows(), zz.addrCoefs(), _maxdim, _one, xx.addrCoefs() + _p_front[n + 1], _dim); } for (int m = 0; m < nrhs; m++) { for (int i = 0; i < dim1; i++) { xx(_p_front[n] + i, m) = zz(i, m) * _diag_blocks[n](i, i); } } #endif } #ifndef SPARSE_OFFDIAG { const int n = _nfront - 1; const int dim1 = _p_front[n + 1] - _p_front[n]; for (int m = 0; m < nrhs; m++) { for (int i = 0; i < dim1; i++) { zz(i, m) = xx(_p_front[n] + i, m); } } full_fw_multi(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, nrhs, zz.addrCoefs(), _maxdim, &fop); for (int m = 0; m < nrhs; m++) { for (int i = 0; i < dim1; i++) { xx(_p_front[n] + i, m) = zz(i, m) * _diag_blocks[n](i, i); } } } #endif // backward substitution for (int n = (_nfront - 1); n > 0; n--) { const int dim1 = _p_front[n + 1] - _p_front[n]; #ifdef SPARSE_OFFDIAG full_fwbw_multi(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, nrhs, xx.addrCoefs() + _p_front[n], _dim); if (_isSymmetric) { // Transposed sparse matrix * dense matrix for (int m = 0; m < nrhs; m++) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx(jj, m) -= _coef[_indVals[k]] * xx(ii, m); } } } } else { if (isTrans) { for (int m = 0; m < nrhs; m++) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx(jj, m) -= _coef[_indVals[k]] * xx(ii, m); } } } } else { for (int m = 0; m < nrhs; m++) { for (int i = _p_front[n - 1]; i < _p_front[n]; i++) { const int ii = _permute_ginv[i]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx(ii, m) -= _coef[_indVals[k]] * xx(jj, m); } } } } // if (isTrans) } #else full_bw_multi(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, nrhs, xx.addrCoefs() + _p_front[n], _dim, &fop); int dim2 = _p_front[n] - _p_front[n - 1]; if (isTrans) { ColumnMatrix &lower = _lower_blocks[n]; blas_gemm(CblasNoTrans, CblasNoTrans, dim2, nrhs, dim1, _none, lower.addrCoefs(), lower.nbRows(), xx.addrCoefs() + _p_front[n], _dim, _one, xx.addrCoefs() + _p_front[n - 1], _dim); } else { ColumnMatrix &upper = _upper_blocks[n]; blas_gemm(CblasNoTrans, CblasNoTrans, dim2, nrhs, dim1, _none, upper.addrCoefs(), upper.nbRows(), xx.addrCoefs() + _p_front[n], _dim, _one, xx.addrCoefs() + _p_front[n - 1], _dim); } #endif } // loop : n { const int dim1 = _p_front[1] - _p_front[0]; #ifdef SPARSE_OFFDIAG full_fwbw_multi(isTrans, dim1, _num_null[0], _diag_blocks[0].addrCoefs(), dim1, nrhs, xx.addrCoefs() + _p_front[0], _dim); #else full_bw_multi(isTrans, dim1, _num_null[0], _diag_blocks[0].addrCoefs(), dim1, nrhs, xx.addrCoefs() + _p_front[0], _dim, &fop); #endif } if (nscol > 0) { // alpha = -1.0; beta = 1.0 if (isTrans) { blas_gemm(CblasNoTrans, CblasNoTrans, _dim, nrhs, nscol, _none, _a21.addrCoefs(), _dim, y.addrCoefs(), nscol, _one, xx.addrCoefs(), _dim); } else { blas_gemm(CblasNoTrans, CblasNoTrans, _dim, nrhs, nscol, _none, _a12.addrCoefs(), _dim, y.addrCoefs(), nscol, _one, xx.addrCoefs(), _dim); } for (int m = 0; m < nrhs; m++) { for (int i = 0; i < nscol; i++) { xx(_list_schur[i], m) = y(i, m); } } y.free(); } if (flag_new2old) { for (int j = 0; j < nrhs; j++) { for (int i = 0; i < _dim; i++) { x(_new2old[i], j) = xx(_permute_ginv[i], j); } } } else { for (int j = 0; j < nrhs; j++) { for (int i = 0; i < _dim; i++) { x(i, j) = xx(_permute_ginv[i], j); } } } } template void TridiagBlockMatrix:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix& x, const int nscol_); template void TridiagBlockMatrix:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix& x, const int nscol_); template void TridiagBlockMatrix, double>:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix >& x, const int nscol_); template void TridiagBlockMatrix, quadruple>:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix >& x, const int nscol_); template void TridiagBlockMatrix:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix& x, const int nscol_); template void TridiagBlockMatrix, float>:: SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix >& x, const int nscol_); // template void TridiagBlockMatrix::SolveSingle(const bool flag_new2old, const bool isTrans, T* x_, const int nscol_) { const int nscol = (nscol_ == (-1) ? _nscol : nscol_); double fop; // zz.resize(_maxdim); VectorArray xx(_dim); VectorArray zz(_maxdim); VectorArray y; // allocated only for nscol > 0 if (flag_new2old) { for (int i = 0; i < _dim; i++) { xx[_permute_ginv[i]] = x_[_new2old[i]]; } } else { for (int i = 0; i < _dim; i++) { xx[_permute_ginv[i]] = x_[i]; } } // xx[ ] is reorderd to follow the permutation generated by // TridiagBlockFactorization if (nscol > 0) { y.init(nscol); for (int i = 0; i < nscol; i++) { y[i] = xx[_list_schur[i]]; xx[_list_schur[i]] = _zero; } if (_isSymmetric) { blas_gemv(CblasTrans, _dim, nscol, _none, _a12.addrCoefs(), _dim, xx.addrCoefs(), 1, _one, y.addrCoefs(), 1); } else { if (isTrans) { blas_gemv(CblasTrans, _dim, nscol, _none, _a12.addrCoefs(), _dim, xx.addrCoefs(), 1, _one, y.addrCoefs(), 1); } else { blas_gemv(CblasTrans, _dim, nscol, _none, _a21.addrCoefs(), _dim, xx.addrCoefs(), 1, _one, y.addrCoefs(), 1); } } blas_trsv(isTrans ? CblasUpper : CblasLower, isTrans ? CblasTrans : CblasNoTrans, CblasUnit, nscol, _s22.addrCoefs(), nscol, y.addrCoefs(), 1); for (int i = 0; i < nscol; i++) { y[i] *= _s22(i, i); } blas_trsv(isTrans ? CblasLower : CblasUpper, isTrans ? CblasTrans : CblasNoTrans, CblasUnit, nscol, _s22.addrCoefs(), nscol, y.addrCoefs(), 1); } // if (nscol > 0) // forward substitution for (int n = 0; n < (_nfront - 1); n++) { // copy to working array zz const int dim1 = _p_front[n + 1] - _p_front[n]; for (int i = 0; i < dim1; i++) { zz[i] = xx[_p_front[n] + i]; } #ifdef SPARSE_OFFDIAG full_fwbw_single(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, &zz[0]); // sparse matrix * vector if (isTrans) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i] - _p_front[n]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx[jj] -= _coef[_indVals[k]] * zz[ii]; } } } else { for (int i = _p_front[n + 1]; i < _p_front[n + 2]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]] - _p_front[n]; xx[ii] -= _coef[_indVals[k]] * zz[jj]; } } } #else full_fw_single(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, &zz[0], &fop); int dim2 = _p_front[n + 2] - _p_front[n + 1]; if (isTrans) { ColumnMatrix &upper = _upper_blocks[n + 1]; blas_gemv(CblasTrans, dim1, dim2, _none, upper.addrCoefs(), upper.nbRows(), &zz[0], 1, _one, xx.addrCoefs() + _p_front[n + 1], 1); } else { ColumnMatrix &lower = _lower_blocks[n + 1]; blas_gemv(CblasTrans, dim1, dim2, _none, lower.addrCoefs(), lower.nbRows(), &zz[0], 1, _one, xx.addrCoefs() + _p_front[n + 1], 1); } for (int i = 0; i < dim1; i++) { xx[_p_front[n] + i] = zz[i] * _diag_blocks[n](i, i); } #endif } #ifndef SPARSE_OFFDIAG { const int n = _nfront - 1; const int dim1 = _p_front[n + 1] - _p_front[n]; for (int i = 0; i < dim1; i++) { zz[i] = xx[_p_front[n] + i]; } full_fw_single(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, &zz[0], &fop); for (int i = 0; i < dim1; i++) { xx[_p_front[n] + i] = zz[i] * _diag_blocks[n](i, i); } } #endif // backward substitution for (int n = (_nfront - 1); n > 0; n--) { const int dim1 = _p_front[n + 1] - _p_front[n]; #ifdef SPARSE_OFFDIAG full_fwbw_single(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, xx.addrCoefs() + _p_front[n]); if (_isSymmetric) { // Transposed sparse matrix * vector for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx[jj] -= _coef[_indVals[k]] * xx[ii]; } } } else { if (isTrans) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx[jj] -= _coef[_indVals[k]] * xx[ii]; } } } else { for (int i = _p_front[n - 1]; i < _p_front[n]; i++) { const int ii = _permute_ginv[i]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int jj = _permute_ginv[_indCols[k]]; xx[ii] -= _coef[_indVals[k]] * xx[jj]; } } } } #else full_bw_single(isTrans, dim1, _num_null[n], _diag_blocks[n].addrCoefs(), dim1, xx.addrCoefs() + _p_front[n], &fop); int dim2 = _p_front[n] - _p_front[n - 1]; if (isTrans) { ColumnMatrix &lower = _lower_blocks[n]; blas_gemv(CblasNoTrans, dim2, dim1, _none, lower.addrCoefs(), lower.nbRows(), xx.addrCoefs() + _p_front[n], 1, _one, xx.addrCoefs() + _p_front[n - 1], 1); } else { ColumnMatrix &upper = _upper_blocks[n]; blas_gemv(CblasNoTrans, dim2, dim1, _none, upper.addrCoefs(), upper.nbRows(), xx.addrCoefs() + _p_front[n], 1, _one, xx.addrCoefs() + _p_front[n - 1], 1); } #endif } // loop : n { const int dim1 = _p_front[1] - _p_front[0]; #ifdef SPARSE_OFFDIAG full_fwbw_single(isTrans, dim1, _num_null[0], _diag_blocks[0].addrCoefs(), dim1, xx.addrCoefs() + _p_front[0]); #else full_bw_single(isTrans, dim1, _num_null[0], _diag_blocks[0].addrCoefs(), dim1, xx.addrCoefs() + _p_front[0], &fop); #endif } if (nscol > 0) { // alpha = -1.0; beta = 1.0 if (isTrans) { blas_gemv(CblasNoTrans, _dim, nscol, _none, _a21.addrCoefs(), _dim, y.addrCoefs(), 1, _one, xx.addrCoefs(), 1); } else { blas_gemv(CblasNoTrans, _dim, nscol, _none, _a12.addrCoefs(), _dim, y.addrCoefs(), 1, _one, xx.addrCoefs(), 1); } for (int i = 0; i < nscol; i++) { xx[_list_schur[i]] = y[i]; } } if (flag_new2old) { for (int i = 0; i < _dim; i++) { x_[_new2old[i]] = xx[_permute_ginv[i]]; } } else { for (int i = 0; i < _dim; i++) { x_[i] = xx[_permute_ginv[i]]; } } // delete [] xx; } template void TridiagBlockMatrix:: SolveSingle(const bool flag_new2old, const bool isTrans, double *x, const int nscol_); template void TridiagBlockMatrix:: SolveSingle(const bool flag_new2old, const bool isTrans, quadruple *x, const int nscol_); template void TridiagBlockMatrix, double>:: SolveSingle(const bool flag_new2old, const bool isTrans, complex *x, const int nscol_); template void TridiagBlockMatrix, quadruple>:: SolveSingle(const bool flag_new2old, const bool isTrans, complex *x, const int nscol_); template void TridiagBlockMatrix:: SolveSingle(const bool flag_new2old, const bool isTrans, float *x, const int nscol_); template void TridiagBlockMatrix, float>:: SolveSingle(const bool flag_new2old, const bool isTrans, complex *x, const int nscol_); // template void TridiagBlockMatrix::ForwardUpper(bool isTransposed, int ncol, ColumnMatrix &b, vector& i0, vector &dscale) { int ncol0; double fop; #ifdef STORE_WHOLE_FACTORIZED blas_trsm(CblasLeft, (isTransposed ? CblasUpper : CblasLower), (isTransposed ? CblasTrans : CblasNoTrans), CblasUnit, _dim, ncol, _one, _factorized_whole.addrCoefs(), _dim, b.addrCoefs(), b.nbRows()); for (int i = 0; i < _dim; i++) { dscale[i] = _factorized_whole(i, i); } #else // #ifdef STORE_WHOLE_FACTORIZED ColumnMatrix zz; vector i0_local; i0_local.resize(ncol); zz.init(_maxdim, ncol); // zz.ZeroClear(); for (int n = 0; n < (_nfront - 1); n++) { const int dim1 = _p_front[n + 1] - _p_front[n]; ColumnMatrix &diag = _diag_blocks[n]; ncol0 = ncol; for (int j = 0; j < ncol; j++) { int itmp = std::max((int)(i0[j] - _p_front[n]), 0); if (i0[j] >= _p_front[n + 1]) { ncol0 = j; break; } i0_local[j] = itmp; } if (ncol0 > 0) { full_fw_multiprofile(isTransposed, dim1, _num_null[n], ncol0, diag.addrCoefs(), dim1, b.addrCoefs() + _p_front[n], _dim, i0_local, &fop); } #ifdef SPARSE_OFFDIAG // dddiag_times_s1 for (int j = 0; j < ncol0; j++) { for (int i = 0; i < i0_local[j]; i++) { zz(i, j) = _zero; } for (int i = i0_local[j]; i < dim1; i++) { zz(i, j) = b(i + _p_front[n], j) * diag(i, i); } fop += (double)(dim1 - i0[j]); } if (ncol0 > 0) { full_bw_multi(isTransposed, dim1, _num_null[n], diag.addrCoefs(), dim1, ncol0, zz.addrCoefs(), _maxdim, &fop); } // sparse matrix * dense matrix if (isTransposed) { for (int j = 0; j < ncol0; j++) { for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int i1 = _permute_ginv[i] - _p_front[n]; for (int k = _p_upper[i]; k < _ptRows[i + 1]; k++) { const int ii = _permute_ginv[_indCols[k]]; b(ii, j) -= _coef[_indVals[k]] * zz(i1, j); } } } } else { for (int j = 0; j < ncol0; j++) { for (int i = _p_front[n + 1]; i < _p_front[n + 2]; i++) { const int ii = _permute_ginv[i]; for (int k = _ptRows[i]; k < _p_diag[i]; k++) { const int i1 = _permute_ginv[_indCols[k]] - _p_front[n]; b(ii, j) -= _coef[_indVals[k]] * zz(i1, j); } } } } #else // #ifdef SPARSE_OFFDIAG int dim2 = _p_front[n + 2] - _p_front[n + 1]; if (isTransposed) { ColumnMatrix &upper = _upper_blocks[n + 1]; blas_gemm(CblasTrans, CblasNoTrans, dim2, ncol0, dim1, _none, upper.addrCoefs(), upper.nbRows(), b.addrCoefs() + _p_front[n], b.nbRows(), _one, b.addrCoefs() + _p_front[n + 1], b.nbRows()); } else { ColumnMatrix &lower = _lower_blocks[n + 1]; blas_gemm(CblasTrans, CblasNoTrans, dim2, ncol0, dim1, _none, lower.addrCoefs(), lower.nbRows(), b.addrCoefs() + _p_front[n], b.nbRows(), _one, b.addrCoefs() + _p_front[n + 1], b.nbRows()); } #endif // #ifdef SPARSE_OFFDIAG } // loop : n { int n = (_nfront - 1); const int dim1 = _p_front[n + 1] - _p_front[n]; ColumnMatrix &diag = _diag_blocks[n]; ncol0 = ncol; for (int j = 0; j < ncol; j++) { int itmp = std::max((int)(i0[j] - _p_front[n]), 0); if (i0[j] >= _p_front[n + 1]) { ncol0 = j; break; } i0_local[j] = itmp; } if (ncol0 > 0) { full_fw_multiprofile(isTransposed, dim1, _num_null[n], ncol0, diag.addrCoefs(), dim1, b.addrCoefs() + _p_front[n], _dim, i0_local, &fop); } } for (int n = 0; n < _nfront; n++) { const int dim1 = _p_front[n + 1] - _p_front[n]; ColumnMatrix &diag = _diag_blocks[n]; for (int i = 0; i < dim1; i++) { dscale[_p_front[n] + i] = diag(i, i); } } zz.free(); #endif // #ifdef STORE_WHOLE_FACTORIZED } template void TridiagBlockMatrix:: ForwardUpper(bool isTransposed, int ncol, ColumnMatrix &b, vector& i0, vector &dscale); template void TridiagBlockMatrix::ForwardUpper(bool isTransposed, int ncol, ColumnMatrix &b, vector& i0, vector &dscale); template void TridiagBlockMatrix, double>:: ForwardUpper(bool isTransposed, int ncol, ColumnMatrix > &b, vector& i0, vector > &dscale); template void TridiagBlockMatrix, quadruple>:: ForwardUpper(bool isTransposed, int ncol, ColumnMatrix > &b, vector& i0, vector > &dscale); template void TridiagBlockMatrix:: ForwardUpper(bool isTransposed, int ncol, ColumnMatrix &b, vector& i0, vector &dscale); template void TridiagBlockMatrix, float>:: ForwardUpper(bool isTransposed, int ncol, ColumnMatrix > &b, vector& i0, vector > &dscale); // void RenumberCSR(const bool shrink_flag, const int dim, const int nnz, vector &b2a, vector &a2b, const int *aptrows, const int *aindcols, const int *aindvals, vector &bptrows, vector &bindcols, vector &bindvals, const bool verbose, FILE *fp) { vector *bbindcols, *bbindvals; if (shrink_flag) { bbindcols = new vector; bbindvals = new vector; (*bbindcols).resize(nnz); (*bbindvals).resize(nnz); } else { bbindcols = &bindcols; bbindvals = &bindvals; } bptrows[0] = 0; for (int i = 0; i < dim; i++) { const int ii = b2a[i]; bptrows[i + 1] = bptrows[i] + (aptrows[ii + 1] - aptrows[ii]); } for (int i = 0; i < dim; i++) { const int ii = b2a[i]; list local_indcols; list local_indvals; for (int kk = aptrows[ii]; kk < aptrows[ii + 1]; kk++) { const int jj = aindcols[kk]; const int j = a2b[jj]; const int ll = aindvals[kk]; list::iterator ic = local_indcols.begin(); list::iterator iv = local_indvals.begin(); bool flag = false; for ( ; ic != local_indcols.end(); ++ic, ++iv) { if (*ic > j) { local_indcols.insert(ic, j); local_indvals.insert(iv, ll); flag = true; break; } } if (flag == false) { local_indcols.push_back(j); local_indvals.push_back(ll); } } // loop : kk { list::const_iterator ic = local_indcols.begin(); list::const_iterator iv = local_indvals.begin(); int k = bptrows[i]; for ( ; k < bptrows[i + 1]; k++, ++ic, ++iv) { (*bbindcols)[k] = *ic; (*bbindvals)[k] = *iv; } } local_indcols.clear(); local_indvals.clear(); } // loop : i if (shrink_flag) { int nnz1 = bptrows[dim]; if (nnz1 != bindcols.size()) { fprintf(fp, "%s %d : %d != %d\n", __FILE__, __LINE__, nnz1, (int)bindcols.size()); } for (int i = 0; i < nnz1; i++) { bindcols[i] = (*bbindcols)[i]; bindvals[i] = (*bbindvals)[i]; } (*bbindcols).clear(); (*bbindvals).clear(); delete bbindcols; delete bbindvals; } } void RenumberCSR(const int dim, vector &b2a, vector &a2b, const int *aptrows, const int *aindcols, vector &bptrows, vector &bindcols, const bool verbose, FILE *fp) { // const int dim = b2a.size(); bptrows[0] = 0; for (int i = 0; i < dim; i++) { const int ii = b2a[i]; bptrows[i + 1] = bptrows[i] + (aptrows[ii + 1] - aptrows[ii]); } for (int i = 0; i < dim; i++) { const int ii = b2a[i]; list local_indcols; for (int kk = aptrows[ii]; kk < aptrows[ii + 1]; kk++) { const int jj = aindcols[kk]; const int j = a2b[jj]; list::iterator ic = local_indcols.begin(); bool flag = false; for ( ; ic != local_indcols.end(); ++ic) { if (*ic > j) { local_indcols.insert(ic, j); flag = true; break; } } if (flag == false) { local_indcols.push_back(j); } } // loop : kk { list::const_iterator ic = local_indcols.begin(); int k = bptrows[i]; for ( ; k < bptrows[i + 1]; k++, ++ic) { bindcols[k] = *ic; } } local_indcols.clear(); } // loop : i } void TridiagStruct(vector &ptrow, vector &indcols, const int nfront, vector &pfront, vector &p_diag, vector &p_upp) //, vector &p_dia_blo) { for (int n = 0; n < nfront; n++) { const int i1 = pfront[n]; const int i2 = pfront[n + 1]; for (int i = i1; i < i2; i ++) { for (int k = ptrow[i]; k < ptrow[i + 1]; k++) { if (indcols[k] >= i1) { p_diag[i] = k; break; } } } // loop : i } for (int n = 0; n < nfront; n++) { const int i1 = pfront[n]; const int i2 = pfront[n + 1]; for (int i = i1; i < i2; i ++) { bool flag = false; for (int k = ptrow[i]; k < ptrow[i + 1]; k++) { if (indcols[k] >= i2) { p_upp[i] = k; flag = true; break; } } if (flag == false) { p_upp[i] = ptrow[i + 1]; } } // loop : i } } void GenPermuteOffdiag(const int pfront0, const int pfront1, const int pfront2, vector &ptrow, vector &ptdiag, vector &indcols, const int *permute_diag, int *permute_diag_inv, int *permute_offdiag, int *permute_offdiag_inv, vector &i0) { const int dim1 = pfront1 - pfront0; const int dim2 = pfront2 - pfront1; vector i0_loc; i0_loc.resize(dim2); i0.resize(dim2); for (int i = 0; i < dim1; i++) { permute_diag_inv[permute_diag[i]] = i; } list perm_offdg; for (int j = 0; j < dim2; j++) { const int jj = j + pfront1; int itmp = dim1; for (int k = ptrow[jj]; k < ptdiag[jj]; k++) { const int i = permute_diag_inv[indcols[k] - pfront0]; itmp = itmp > i ? i : itmp; } bool flag = false; i0_loc[j] = itmp; for (list::iterator it = perm_offdg.begin(); it != perm_offdg.end(); ++it) { if (itmp < i0_loc[(*it)]) { perm_offdg.insert(it, j); flag = true; break; } } if (flag == false) { perm_offdg.push_back(j); } } // loop : j { // copy list to vector list::const_iterator jt = perm_offdg.begin(); int j = 0; for (; j < dim2; j++, ++jt) { permute_offdiag[j] = (*jt); i0[j] = i0_loc[(*jt)]; } for (int i = 0 ; i < dim2; i++) { permute_offdiag_inv[permute_offdiag[i]] = i; } } perm_offdg.clear(); i0_loc.clear(); } void GenPermuteUpper(const int nrow, vector &remap_eqn, const int ncol, const int *ptrow, const int *indcols, const int nfront, vector &p_fronts, vector &new2old, vector &permute_diag, vector &permute_diag_inv, vector &permute_upper, vector &permute_upper_inv, vector &i0, const bool verbose, FILE *fp) { vector colflag; vector permute_tmp; colflag.resize(ncol, false); permute_diag_inv.resize(nrow); permute_tmp.resize(nrow); permute_upper.resize(ncol); permute_upper_inv.resize(ncol); i0.resize(ncol); for (int n = 0; n < nfront; n++) { for (int i = p_fronts[n]; i < p_fronts[n + 1]; i++) { permute_tmp[i] = new2old[permute_diag[i] + p_fronts[n]]; } } #ifdef PERMUTE_UPPER int l = 0; for (int i = 0; i < nrow; i++) { const int ii = permute_tmp[i]; for (int k = ptrow[ii]; k < ptrow[ii + 1]; k++) { const int jj = indcols[k]; if (colflag[jj] == false) { colflag[jj] = true; i0[l] = i; permute_upper[l++] = jj; } } } for(int j = 0; j < ncol; j++) { if (colflag[j] == false) { i0[l] = nrow; permute_upper[l++] = j; } } for (int j = 0; j < ncol; j++) { permute_upper_inv[permute_upper[j]] = j; } #else for(int j = 0; j < ncol; j++) { permute_upper[j] = j; permute_upper_inv[j] = j; } #endif for (int i = 0; i < nrow; i++) { permute_diag_inv[remap_eqn[permute_tmp[i]]] = i; } colflag.clear(); permute_tmp.clear(); } template void TridiagBlockMatrix::extract_column(const int jcol, T *dcol) { for (int i = 0; i < _dim; i++) { for (int k = _ptRows[i]; k < _ptRows[i + 1]; k++) { if (_indCols[k] == jcol) { dcol[i] = _coef[_indVals[k]]; break; } } } } template void TridiagBlockMatrix:: extract_column(const int jcol, double *dcol); template void TridiagBlockMatrix:: extract_column(const int jcol, quadruple *dcol); template void TridiagBlockMatrix, double>:: extract_column(const int jcol, complex *dcol); template void TridiagBlockMatrix, quadruple>:: extract_column(const int jcol, complex *dcol); template void TridiagBlockMatrix:: extract_column(const int jcol, float *dcol); template void TridiagBlockMatrix, float>:: extract_column(const int jcol, complex *dcol); // template void TridiagBlockMatrix::extract_row(const int irow, T *drow) { for (int k = _ptRows[irow]; k < _ptRows[irow + 1]; k++) { const int jj = _indCols[k]; drow[jj] = _coef[_indVals[k]]; } } template void TridiagBlockMatrix:: extract_row(const int irow, double *drow); template void TridiagBlockMatrix::extract_row(const int irow, quadruple *drow); template void TridiagBlockMatrix, double>:: extract_row(const int irow, complex *drow); template void TridiagBlockMatrix, quadruple>:: extract_row(const int irow, complex *drow); template void TridiagBlockMatrix:: extract_row(const int irow, float *drow); template void TridiagBlockMatrix, float>:: extract_row(const int irow, complex *drow); // template void FillBlockSparse(const T *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix &b) { for (int i = 0; i < dim; i++) { const int ii = old2new_i[i]; const int i0 = map_eqn[i]; for (int k = ptrow[i0]; k < ptrow[i0 + 1]; k++) { const int jj = old2new_j[indcols[k]]; b(ii, jj) = coef[indvals[k]]; } } // loop : i } template void FillBlockSparse(const double *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix &b); template void FillBlockSparse(const quadruple *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix &b); template void FillBlockSparse >(const complex *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix > &b); template void FillBlockSparse >(const complex *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix > &b); template void FillBlockSparse(const float *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix &b); template void FillBlockSparse >(const complex *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &old2new_i, vector &old2new_j, ColumnMatrix > &b); // bool isnegative(const double x) { return (x < 0.0); } bool isnegative(const quadruple x) { return (x < quadruple(0.0)); } bool isnegative(const complex &x) { return ((x.real() < 0.0) && (x.imag() == 0.0)); } bool isnegative(const complex &x) { return (x.real() < quadruple(0.0)) && (x.imag() == quadruple(0.0)); } bool isnegative(const float x) { return (x < 0.0); } bool isnegative(const complex &x) { return ((x.real() < 0.0) && (x.imag() == 0.0)); } template int TridiagBlockMatrix::NumNegativeDiags() { int count = 0 ; for (int n = 0; n < _nfront; n++) { ColumnMatrix &diag = _diag_blocks[n]; for (int i = _p_front[n]; i < _p_front[n + 1]; i++) { const int ii = i - _p_front[n]; if (isnegative(diag(ii, ii))) { count++; } } } for (int i = 0; i < _nscol; i++) { if (isnegative(_s22(i, i))) { count++; } } return count; } template int TridiagBlockMatrix:: NumNegativeDiags(); template int TridiagBlockMatrix::NumNegativeDiags(); template int TridiagBlockMatrix, double>:: NumNegativeDiags(); template int TridiagBlockMatrix, quadruple>::NumNegativeDiags(); // template int TridiagBlockMatrix:: NumNegativeDiags(); template int TridiagBlockMatrix, float>:: NumNegativeDiags(); template void TridiagBlockMatrix::KernelBasis(const bool isTrans, ColumnMatrix &a12) { // const T zero(0.0); // const T none(-1.0); vector v; v.resize(_dim); a12.free(); a12.init(_dim, _n0); a12.ZeroClear(); for (int j = 0; j < _n0; j++) { if (isTrans) { extract_row(_list_elim[j], a12.addrCoefs() + (j * _dim)); } else { extract_column(_list_elim[j], a12.addrCoefs() + (j * _dim)); } for (int i = 0; i < _n0; i++) { a12(_list_elim[i], j) = _zero; } } if (_n0 > 1) { SolveMulti(false, isTrans, _n0, a12); } else { SolveSingle(false, isTrans, a12.addrCoefs()); } for (int j = 0; j < _n0; j++) { for (int i = 0; i < _dim; i++) { v[i] = a12(i, j); } for (int i = 0; i < _n0; i++) { v[_list_elim[i]] = _zero; } v[_list_elim[j]] = _none; for (int i = 0; i < _dim; i++) { a12(_new2old[i], j) = v[i]; } } v.clear(); } template void TridiagBlockMatrix:: KernelBasis(const bool isTrans, ColumnMatrix & a12); template void TridiagBlockMatrix::KernelBasis(const bool isTrans, ColumnMatrix &a12); template void TridiagBlockMatrix, double>:: KernelBasis(const bool isTrans, ColumnMatrix > & a12); template void TridiagBlockMatrix, quadruple >:: KernelBasis(const bool isTrans, ColumnMatrix > & a12); template void TridiagBlockMatrix:: KernelBasis(const bool isTrans, ColumnMatrix & a12); template void TridiagBlockMatrix, float>:: KernelBasis(const bool isTrans, ColumnMatrix > & a12); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/TridiagBlockMatrix.hpp000664 000000 000000 00000027603 14037356732 026554 0ustar00rootroot000000 000000 /*! \file TridiagBlockMatrix.hpp \brief tridiagonal factorization algorithm with Cuthill-McKee \author François-Xavier Roux, ONERA, Laboratoire Jacques-Louis Lions \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _ALGEBRA_TRIDIAGBLOCKMATRIX_HPP_ #define _ALGEBRA_TRIDIAGBLOCKMATRIX_HPP_ #include #include #include "Compiler/blas.hpp" #include "Compiler/elapsed_time.hpp" #include "Algebra/ColumnMatrix.hpp" #include "Algebra/SquareBlockMatrix.hpp" #include "Algebra/CSR_matrix.hpp" // setting for SX-ACE to reduce small tridiagonal blocks during DTRSM // #define SPARSE_OFFDIAG // CSR data is used in offdiag blocks of tridiag // #define STORE_WHOLE_FACTORIZED // setting for superscalar CPUs #define PERMUTE_UPPER using std::vector; using std::list; template class TridiagBlockMatrix { public: TridiagBlockMatrix(int dim, int size_b1, bool isSymmetric, const int nb, const bool verbose, FILE *fp) : _diag_block_alloc_status(false), _diag_block_alloc_status_high(false), _nb(nb), _verbose(verbose), _fp(fp) { init(dim, size_b1, isSymmetric); } void init(int dim, int size_b1, bool isSymmetric) { _dim = dim; _size_b1 = size_b1; _isSymmetric = isSymmetric; } ~TridiagBlockMatrix() { free(); if(_diag_block_alloc_status) { delete [] _diag_blocks; #ifndef SPARSE_OFFDIAG delete [] _upper_blocks; // 27 Nov.2015 delete [] _lower_blocks; // 27 Nov.2015 #endif #ifdef STORE_WHOLE_FACTORIZED _factorized_whole.free(); #endif _diag_block_alloc_status = false; } } void free() { for (int n = 0; n < _nfront; n++) { _diag_blocks[n].free(); #ifndef SPARSE_OFFDIAG _upper_blocks[n].free(); // 27 Nov.2015 _lower_blocks[n].free(); // 27 Nov.2015 #endif } _a21.free(); _a12.free(); _s22.free(); } bool isSym() const { return _isSymmetric; } void SymbolicFact(const int color, const int color_max, int *color_mask, const int dim_, const int nnz, const int *prow_, const int *indcols_, const int *indvals_); void NumericFact(const T* coef, const double eps_pivot, double *pivot, const bool kernel_detection, const bool higher_precision, const int dim_aug_kern, const U eps_machine, double *nopd); void ComputeSchurComplement(const int nsing, vector &list_sing, ColumnMatrix& a12, ColumnMatrix& a21, ColumnMatrix& a22); void TridiagNumericFact(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision = false); void ComputeSchur(const int dim_, int *color_mask, const int ncol, const int *ptrow1, const int *indcols1, const int *indvals1, const int *indvals2, const T *coef, const int size_b1, SquareBlockMatrix & local_s, double *nopd, elapsed_t *tt); void SolveMulti(const bool flag_new2old, const bool isTrans, const int nhrs, ColumnMatrix& x, const int nscol_ = (-1)); void SolveSingle(const bool flag_new2old, const bool isTrans, T* x, const int nscol_ = (-1)); void ForwardUpper(bool isTransposed, int ncol, ColumnMatrix &b, vector& i0, vector &dscale); void extract_column(const int jcol, T *dcol); void extract_row(const int irow, T *drow); void SingularNode(vector &list_sing); int NumNegativeDiags(); // void KernelIndex(vector &list); void KernelBasis(const bool isTrans, ColumnMatrix &a12); int dimension() const {return _dim; } int block_size() const {return _size_b1; } int nrow() const { return _dim; } int rank() const { return (_dim - _n0); } int nsing() const { return _n0; } bool detected() const { return _detected; } double nop() const {return _nop; } bool diag_block_alloc_status() const {return _diag_block_alloc_status; } bool diag_block_alloc_status_high() const { return _diag_block_alloc_status_high; } int maxdim() const { return _maxdim; } int nnz() const { return _nnz; } int nscol() const {return _nscol; } int Nfront() const {return _nfront; } vector &getPtRows() { return _ptRows; } vector &getIndCols() { return _indCols; } vector &getIndVals() { return _indVals; } vector &getNew2old() { return _new2old; } vector &getP_front() { return _p_front; } vector &getPermute() { return _permute; } vector &getPermute_ginv() { return _permute_ginv; } vector &getP_diag() { return _p_diag; } vector &getP_upper() { return _p_upper; } vector &getList_schur() { return _list_schur; } vector &getList_elim() { return _list_elim; } vector &getNum_null() { return _num_null; } void setCoef(const T *coef) { _coef = coef; } ColumnMatrix &getA12() { return _a12; } ColumnMatrix &getA21() { return _a21; } ColumnMatrix &getS22() { return _s22; } void setNfront(int nfront) { _nfront = nfront; } void setMaxdim(int maxdim) { _maxdim = maxdim; } void setNnz(int nnz) { _nnz = nnz; } void setNop(double nop) { _nop = nop; } void setDiag_block_alloc_status(int diag_block_alloc_status) { _diag_block_alloc_status = diag_block_alloc_status; } void setDiag_block_alloc_status_high(int diag_block_alloc_status) { _diag_block_alloc_status_high = diag_block_alloc_status; } void setNscol(int nscol) { _nscol = nscol; } void setNsing(int n0) { _n0 = n0; } void setDetected(bool detected) { _detected = detected; } // void setNb(int nb) { _nb = nb; } // for debug int getNb() { return _nb; } // for debug ColumnMatrix*& getaddrDiagMatrix() { return _diag_blocks; } #ifndef SPARSE_OFFDIAG ColumnMatrix*& getaddrLowerMatrix() { return _lower_blocks; } ColumnMatrix*& getaddrUpperMatrix() { return _upper_blocks; } #endif private: int _nb; bool _verbose; FILE *_fp; bool _isSymmetric; const T* _coef; int _color; int _color_max; int _dim; int _size_b1; int _nfront; int _maxdim; int _nnz; double _nop; vector _ptRows; // prow vector _indCols; // column_numb vector _indVals; // column_numb vector _new2old; vector _p_front; vector _permute; vector _permute_ginv; vector _p_diag; vector _p_upper; ColumnMatrix* _diag_blocks; #ifndef SPARSE_OFFDIAG ColumnMatrix* _upper_blocks; // 27 Nov.2015 ColumnMatrix* _lower_blocks; // 27 Nov.2015 #endif #ifdef STORE_WHOLE_FACTORIZED ColumnMatrix _factorized_whole; #endif bool _diag_block_alloc_status; bool _diag_block_alloc_status_high; int _nscol; // number of postponed entries int _n0; // kernel dimension bool _detected; ColumnMatrix _a12; ColumnMatrix _a21; ColumnMatrix _s22; vector _list_schur; vector _list_elim; // ? vector _num_null; static const T _one; // (1.0); static const T _zero; // (0.0); static const T _none; // (-1.0); }; void RenumberCSR(const bool shrink_flag, const int dim, const int nnz, vector &b2a, vector &a2b, const int *aptrows, const int *aindcols, const int *aindvals, vector &bptrows, vector &bindcols, vector &bindvals, const bool verbose, FILE *fp); void RenumberCSR(const int dim, vector &b2a, vector &a2b, const int *aptrows, const int *aindcols, vector &bptrows, vector &bindcols, const bool verbose, FILE *fp); void TridiagStruct(vector &ptrow, vector &indcols, const int nfront, vector &pfront, vector &p_diag, vector &p_upp); void GenPermuteOffdiag(const int pfront0, const int pfront1, const int pfront2, vector &ptrow, vector &ptdiag, vector &indcols, const int *permute_diag, int *permute_diag_inv, int *permute_offdiag, int *permute_offdiag_inv, vector &i0); void GenPermuteUpper(const int nrow, vector &remap_eqn, const int ncol, const int *ptrow, const int *indcols, const int n_front, vector &p_fronts, vector &old2new, vector &permute_diag, vector &permute_diag_inv, vector &permute_upper, vector &permute_upper_inv, vector &i0, const bool verbose, FILE *fp); template void FillBlockSparse(const T *coef, const int dim, vector& map_eqn, const int *ptrow, const int *indcols, const int *indvals, vector &new2old_j, vector &old2new_j, ColumnMatrix &b); template void TridiagNumericFact_(double *pivot, const double eps_pivot, const int dim_aug_kern, ColumnMatrix *diag_block_save, vector &num_null_aug, double *nopd, const bool higher_precision, bool &_verbose, FILE* &_fp, int &_dim, bool &_isSymmetric, int &_nfront, vector &_p_front, vector &_p_diag, vector &_p_upper, const T* &_coef, vector &_ptRows, vector &_indCols, // column_numb vector &_indVals, // column_numb vector &_num_null, vector &_permute, vector &_permute_ginv, ColumnMatrix* &_diag_blocks, ColumnMatrix* &_diag_blocks_high, ColumnMatrix* &_upper_blocks, ColumnMatrix* &_lower_blocks, list &_high_blocks #ifdef STORE_WHOLE_FACTORIZED , ColumnMatrix &_factorized_whole #endif ); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/TridiagMatrix.hpp000664 000000 000000 00000011304 14037356732 025570 0ustar00rootroot000000 000000 /*! \file TridiagMatrix.hpp \brief management of threads for factorization and Fw/Bw substitution \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // # ifndef _DRIVER_TRIDIAGMATRIX_ # define _DRIVER_TRIDIAGMATRIX_ #include #include #include "Driver/C_threads_tasks.hpp" #include "Algebra/SquareBlockMatrix.hpp" using std::vector; template class TridiagMatrix { public: TridiagMatrix(int dim, int nnz, int *ptrows, int *indcols, int *indvals, int *remap_eqn, bool isSym, bool isMapped) : _nrow(dim), _nnz(nnz), _isSym(isSym), _isMapped(isMapped) { _ptrows = new int[dim + 1]; _indcols = new int[nnz]; _indvals = new int[nnz]; _remap_eqn = new int[dim]; for (int i = 0; i < (dim + 1); i++) { _ptrows[i] = ptrows[i] + 1; // C to Fortran } for (int i = 0; i < nnz; i++) { _indcols[i] = indcols[i] + 1; // C to Fortran _indvals[i] = indvals[i] + 1; // C to Fortran } for (int i = 0; i < dim; i++) { _remap_eqn[i] = remap_eqn[i]; } _diag = new SquareBlockMatrix; // to keep pivot information } /** Destructor. */ ~TridiagMatrix() { // FORTRAN_DECL(tridiag_free)(_tridiag_sparse); delete [] _ptrows; delete [] _indcols; delete [] _indvals; delete [] _remap_eqn; delete _diag; } int nrow() { return _nrow; } int nnz() { return _nnz; } void setGlobalNonzero(int nnz) { _nnz_global = nnz; } int nnz_global() { return _nnz_global; } int *ptRows() { return _ptrows; } int *indCols() { return _indcols; } int *indVals() { return _indvals; } int *remap_eqn() { return _remap_eqn; } void setCoef(T *coef) { _coef = coef; } T *getCoef() { return _coef; } SquareBlockMatrix* Diag() { return _diag; } bool isSym() const { return _isSym; } bool isMapped() const { return _isMapped; } void setPtr(void *tridiag_sparse) { _tridiag_sparse = tridiag_sparse; } void* &getPtr() {return _tridiag_sparse; } const void* &getPtr() const {return _tridiag_sparse; } private: // Attributs : // Dissection::Tree _btree; int _nrow; int _nnz; int _nnz_global; int *_ptrows; int *_indcols; int *_indvals; int *_remap_eqn; T* _coef; void *_tridiag_sparse; bool _isSym; bool _isMapped; SquareBlockMatrix *_diag; }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Driver/TridiagQueue.cpp000664 000000 000000 00000024602 14037356732 025410 0ustar00rootroot000000 000000 /*! \file TridiagQueue.cpp \brief task mangemanet of tridiagonal factorization algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include "Driver/TridiagQueue.hpp" #include "Algebra/VectorArray.hpp" #include "Compiler/DissectionIO.hpp" template void TridiagQueue:: generate_queue(TridiagBlockMatrix *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, T *coef) { _dim = dim; _nnz = nnz; _tridiag =tridiag; _isMapped = isMapped; _remap_eqn = new int[_dim]; for (int i = 0; i < dim; i++) { _remap_eqn[i] = remap_eqn[i]; } _ptRows= new int[_dim + 1]; for (int i = 0; i < (dim + 1); i++) { _ptRows[i] = ptUnsymRows[i]; } _indCols = new int[nnz]; _indVals = new int[nnz]; for (int i = 0; i < nnz; i++) { _indCols[i] = indUnsymCol[i]; _indVals[i] = indVals[i]; } _coef = coef; if (_tridiag_solver == false) { diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n", __FILE__, __LINE__); } _allocated = true; } template void TridiagQueue:: generate_queue(TridiagBlockMatrix *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, double *coef); template void TridiagQueue:: generate_queue(TridiagBlockMatrix *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, quadruple *coef); template void TridiagQueue, double>:: generate_queue(TridiagBlockMatrix, double> *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, complex *coef); template void TridiagQueue, quadruple>:: generate_queue(TridiagBlockMatrix, quadruple> *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, complex *coef); template void TridiagQueue:: generate_queue(TridiagBlockMatrix *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, float *coef); template void TridiagQueue, float>:: generate_queue(TridiagBlockMatrix, float> *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, complex *coef); // template void TridiagQueue::generate_queue_fwbw() {} // dummy template void TridiagQueue::generate_queue_fwbw(); template void TridiagQueue::generate_queue_fwbw(); template void TridiagQueue, double>::generate_queue_fwbw(); template void TridiagQueue, quadruple>::generate_queue_fwbw(); template void TridiagQueue::generate_queue_fwbw(); template void TridiagQueue, float>::generate_queue_fwbw(); // template void TridiagQueue::exec_symb_fact() { if (_tridiag_solver == false) { diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n", __FILE__, __LINE__); } vector color_mask(_dim, 1); _tridiag->SymbolicFact(1, 1, &color_mask[0], _dim, // color = color_max = 1 _nnz, _ptRows, _indCols, _indVals); color_mask.clear(); } template void TridiagQueue::exec_symb_fact(); template void TridiagQueue::exec_symb_fact(); template void TridiagQueue, double>::exec_symb_fact(); template void TridiagQueue, quadruple>::exec_symb_fact(); template void TridiagQueue::exec_symb_fact(); template void TridiagQueue, float>::exec_symb_fact(); // template void TridiagQueue::exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const U eps_machine, const bool higher_precision) { double pivot; vector list_sing; double nopd; if (_tridiag_solver == false) { diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n", __FILE__, __LINE__); } _tridiag->NumericFact(_coef, eps_pivot, &pivot, kernel_detection, higher_precision, aug_dim, eps_machine, &nopd); } template void TridiagQueue::exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const double eps_machine, const bool higher_precision); template void TridiagQueue:: exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const quadruple eps_machine, const bool higher_precision); template void TridiagQueue, double>:: exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const double eps_machine, const bool higher_precision); template void TridiagQueue, quadruple>:: exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const quadruple eps_machine, const bool higher_precision); template void TridiagQueue::exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const float eps_machine, const bool higher_precision); template void TridiagQueue, float>:: exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection, const int aug_dim, const float eps_machine, const bool higher_precision); // template void TridiagQueue::exec_fwbw(T *x, const int nrhs, bool isTrans) { if (_tridiag_solver == false) { diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n", __FILE__, __LINE__); } const int nrow = _tridiag->nrow(); if (_isMapped) { if (nrhs == 1) { VectorArray xx(nrow); for (int i = 0; i < nrow; i++) { xx[i] = x[_remap_eqn[i]]; } _tridiag->SolveSingle(true, isTrans, xx.addrCoefs()); for (int i = 0; i < nrow; i++) { x[_remap_eqn[i]] = xx[i]; } xx.free(); } else { ColumnMatrix xx(nrow, nrhs); for (int n = 0; n < nrhs; n++) { for (int i = 0; i < nrow; i++) { xx(i, n) = x[_remap_eqn[i] + n * nrow]; } } _tridiag->SolveMulti(true, isTrans, nrhs, xx); for (int n = 0; n < nrhs; n++) { for (int i = 0; i < nrow; i++) { x[_remap_eqn[i] + n * nrow] = xx(i, n); } } xx.free(); } } else { if (nrhs == 1) { _tridiag->SolveSingle(true, isTrans, x); } else { ColumnMatrix xx(nrow, nrhs, x, false); _tridiag->SolveMulti(true, isTrans, nrhs, xx); } } } template void TridiagQueue::exec_fwbw(double *x, const int nrhs, bool isTrans); template void TridiagQueue::exec_fwbw(quadruple *x, const int nrhs, bool isTrans); template void TridiagQueue, double>:: exec_fwbw(complex *x, const int nrhs, bool isTrans); template void TridiagQueue, quadruple>:: exec_fwbw(complex *x, const int nrhs, bool isTrans); template void TridiagQueue::exec_fwbw(float *x, const int nrhs, bool isTrans); template void TridiagQueue, float>:: exec_fwbw(complex *x, const int nrhs, bool isTrans); // FreeFem-sources-4.9/3rdparty/dissection/src/Driver/TridiagQueue.hpp000664 000000 000000 00000010363 14037356732 025414 0ustar00rootroot000000 000000 /*! \file TridiagQueue.hpp \brief task mangemanet of tridiagonal factorization algorithm \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Jun. 20th 2014 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // # ifndef _DRIVER_TRIDIAGQUEUE_ # define _DRIVER_TRIDIAGQUEUE_ #include #include #include "Driver/C_threads_tasks.hpp" // #include "Driver/TridiagMatrix.hpp" using std::vector; template class TridiagQueue { public: TridiagQueue(bool tridiag_solver, bool verbose, FILE *fp) : _tridiag_solver(tridiag_solver), _verbose(verbose), _fp(fp) { _allocated = false; } void generate_queue(TridiagBlockMatrix *tridiag, const int dim, const int nnz, const bool isMapped, int *remap_eqn, int *ptUnsymRows, int *indUnsymCol, int *indVals, T *coef); void exec_symb_fact(); void generate_queue_fwbw(); void exec_num_fact(const int called, const double eps_pivot, const bool kernel_detection_all, const int aug_dim, const U eps_machine, const bool higher_precision = false); void exec_fwbw(T *x, const int nrhs, bool isTrans); bool tridiagSolver() { return _tridiag_solver; } ~TridiagQueue() { if (_allocated) { delete [] _remap_eqn; delete [] _ptRows; delete [] _indCols; delete [] _indVals; } } bool tridiag_solver() const { return _tridiag_solver; } bool isMapped() const {return _isMapped; } int dimension() const {return _dim; } int nnz() const {return _nnz; } int *remap_eqn() {return _remap_eqn; } int *ptRows() { return _ptRows; } int *indCols() { return _indCols; } int *indVals() { return _indVals; } private: bool _tridiag_solver; TridiagBlockMatrix *_tridiag; bool _isMapped; int _dim; int _nnz; int *_remap_eqn; int *_ptRows; int *_indCols; int *_indVals; T *_coef; bool _verbose; FILE *_fp; bool _allocated; }; #endif FreeFem-sources-4.9/3rdparty/dissection/src/Interfaces/000775 000000 000000 00000000000 14037356732 023140 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Interfaces/Dissection.hpp000664 000000 000000 00000013631 14037356732 025761 0ustar00rootroot000000 000000 /*! \file Dissection.hpp \brief Fortran style interface named as Dissectino-fortran interface \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _INTERFACE_CPPAPI_HPP # define _INTERFACE_CPPAPI_HPP #define _COMPILER_OPTIONCOMPILER_H #define FORTRAN_DECL_WL(x_windows,x_linux) x_linux #define FORTRAN_DECL(x) x##_ #define DISSECTION_API #ifdef BLAS_MKL #include #endif #include # include #define DISSECTION_REAL_MATRIX 1 #define DISSECTION_COMPLEX_MATRIX 2 #define DISS_VERSION FORTRAN_DECL_WL(DISS_VERSION, diss_version) #define DISS_INIT FORTRAN_DECL_WL(DISS_INIT, diss_init) #define DISS_FREE FORTRAN_DECL_WL(DISS_FREE, diss_free) #define DISS_NUMERIC_FREE FORTRAN_DECL_WL(DISS_NUMERIC_FREE, diss_numeric_free) #define DISS_S_FACT FORTRAN_DECL_WL(DISS_S_FACT, diss_s_fact) #define DISS_N_FACT FORTRAN_DECL_WL(DISS_N_FACT, diss_n_fact) #define DISS_GET_COLORS FORTRAN_DECL_WL(DISS_GET_COLORS, diss_get_colors) #define DISS_GET_KERN_DIM FORTRAN_DECL_WL(DISS_GET_KERN_DIM, diss_get_kern_dim) #define DISS_GET_NULLPIVOTS FORTRAN_DECL_WL(DISS_GET_NULLPIVOTS, diss_get_nullpivots) #define DISS_GET_SMALLPIVOTS FORTRAN_DECL_WL(DISS_GET_SMALLPIVOTS, diss_get_smallpivots) #define DISS_GET_KERN_VECS FORTRAN_DECL_WL(DISS_GET_KERN_VECS, diss_get_kern_vecs) #define DISS_GET_KERNT_VECS FORTRAN_DECL_WL(DISS_GET_KERNT_VECS, diss_get_kernt_vecs) #define DISS_PROJECT FORTRAN_DECL_WL(DISS_PROJECT, diss_project) #define DISS_SOLVE_1 FORTRAN_DECL_WL(DISS_SOLVE_1, diss_solve_1) #define DISS_SOLVE_N FORTRAN_DECL_WL(DISS_SOLVE_N, diss_solve_n) #define DISS_MATRIX_PRODUCT FORTRAN_DECL_WL(DISS_MATRIX_PRODUCT, diss_matrix_product) #define COMPUTE_DIM_KERN FORTRAN_DECL_WL(COMPUTE_DIM_KERN, compute_dim_kern) extern "C" { DISSECTION_API void DISS_VERSION(int *versn, int *reles, int *patch); DISSECTION_API void DISS_INIT(uint64_t &dslv_, const int &called, const int &real_or_complex, const int &nthreads, const int &verbose); DISSECTION_API void DISS_FREE(uint64_t &dslv_); DISSECTION_API void DISS_NUMERIC_FREE(uint64_t &dslv_); DISSECTION_API void DISS_S_FACT(uint64_t &dslv_, const int &dim, const int *ptRows, const int *indCols, const int &sym, const int &decomposer); DISSECTION_API void DISS_N_FACT(uint64_t &dslv_, const double *coefs, const int &scaling, const double &eps_pivot, const int &indefinite_flag); DISSECTION_API void DISS_GET_COLORS(uint64_t &dslv_, int *n); DISSECTION_API void DISS_GET_KERN_DIM(uint64_t &dslv_, int *n0); DISSECTION_API void DISS_GET_NULLPIVOTS(uint64_t &dslv_, int *pivots); DISSECTION_API void DISS_GET_SMALLPIVOTS(uint64_t &dslv_, const int &n, int *pivots); DISSECTION_API void DISS_GET_KERN_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_GET_KERNT_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_PROJECT(uint64_t &dslv_, double *x); DISSECTION_API void DISS_SOLVE_1(uint64_t &dslv_, double *x, const int &projection, const int &trans); DISSECTION_API void DISS_SOLVE_N(uint64_t &dslv_, double *x, const int &nrhs, const int &projection, const int &trans); DISSECTION_API void DISS_MATRIX_PRODUCT(uint64_t &dslv_, const double* x, double* y); DISSECTION_API void COMPUTE_DIM_KERN(int* flag, int* n0, double *a_ini, const int &n, const int &dim_ag, const double &eps, const double &machine_eps0, const int &flag_sym, const int *print_cntrl); } #endif FreeFem-sources-4.9/3rdparty/dissection/src/Interfaces/Make.inc000664 000000 000000 00000005617 14037356732 024521 0ustar00rootroot000000 000000 #! \file Makefile # \brief source files in Driver/ directory and depending libraries # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Feb. 29th 2016 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # all: fortranAPI.o CppAPI.o .cpp.o: $(CXX) $(CCFLAGS) -c $< -o $@ fortranAPI.o: fortranAPI.cpp $(CXX) $(CCFLAGS) -DDISSECTION_FORTRAN -c $< -o $@ CppAPI.o: fortranAPI.cpp $(CXX) $(CCFLAGS) -c $< -o CppAPI.o %.d: %.cpp $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.o *.so core *.d *.mod *.a FreeFem-sources-4.9/3rdparty/dissection/src/Interfaces/fortranAPI.cpp000664 000000 000000 00000037504 14037356732 025662 0ustar00rootroot000000 000000 /*! \file fortranAPI.cpp \brief Fortran style interface named as Dissecotion-fortran interface \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #include #include #include #include #ifdef DISSECTION_FORTRAN #include "Interfaces/fortranAPI.h" #else #include "Interfaces/Dissection.hpp" #endif #include "Driver/DissectionSolver.hpp" struct dissection_solver_ptr { int real_or_complex; DissectionSolver *rptr; DissectionSolver, double> *cptr; DissectionSolver *rqtr; DissectionSolver *rqtr_fwbw; DissectionSolver, quadruple> *cqtr; DissectionSolver, double, complex, quadruple> *cqtr_fwbw; FILE *fp; bool verbose; int called; #ifdef BLAS_MKL int mkl_num_threads; #endif int symbolic; int numeric; }; DISSECTION_API void DISS_VERSION(int *versn, int *reles, int *patch) { *versn = DISSECTION_VERSION; *reles = DISSECTION_RELEASE; *patch = DISSECTION_PATCHLEVEL; } DISSECTION_API void DISS_INIT(uint64_t &dslv_, const int &called, const int &real_or_complex, const int &nthreads, const int &verbose) { int num_threads; dissection_solver_ptr *dslv; dslv_ = (uint64_t)new dissection_solver_ptr; dslv = (dissection_solver_ptr *)dslv_; dslv->real_or_complex = real_or_complex; dslv->called = called; dslv->symbolic = 0; dslv->numeric = 0; { int pid = (int)getpid(); char fname[256]; if (verbose > 0) { dslv->verbose = true; } else { dslv->verbose = false; } #if 1 if (dslv->verbose) { fprintf(stderr, "pid = %d\n", pid); sprintf(fname, "dissection.%04d.%04d.log", pid, called); dslv->fp = fopen(fname, "a"); } else { dslv->fp = stderr; } #else dslv->fp = stderr; #endif } if (dslv->verbose) { fprintf(dslv->fp, "%s %d : diss_init : called = %d\n", __FILE__, __LINE__, dslv->called); } // _called++; // counter for dumping matrix data to debug #ifdef BLAS_MKL if (getenv("MKL_NUM_THREADS")) { sscanf(getenv("MKL_NUM_THREADS"), "%d", &dslv->mkl_num_threads); if (dslv->verbose > 0) { fprintf(dslv->fp, "environmental variable MKL_NUM_THREADS = %d\n", dslv->mkl_num_threads); } } else { dslv->mkl_num_threads = mkl_get_max_threads(); } if (dslv->verbose) { fprintf(dslv->fp, "MKL_NUM_THREADS = %d\n", dslv->mkl_num_threads); } #endif if (nthreads == (-1)) { if (getenv("NTHREADS")) { sscanf(getenv("NTHREADS"), "%d", &num_threads); } else { num_threads = 1; } } if (nthreads > 0) { num_threads = nthreads; } { switch(real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr = new DissectionSolver(num_threads, (verbose != 0 ? true : false), dslv->called, dslv->fp); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr = new DissectionSolver, double>(num_threads, (verbose != 0 ? true : false), dslv->called, dslv->fp); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d : unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } } DISSECTION_API void DISS_FREE(uint64_t &dslv_) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: delete dslv->rptr; break; case DISSECTION_COMPLEX_MATRIX: delete dslv->cptr; break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d : unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } delete dslv; dslv_ = (uint64_t)NULL; // _called--; // if ((_called == 0) && (dslv->fp != stderr)) { if (dslv->fp != stderr) { fclose(dslv->fp); } #ifdef VECLIB unsetenv("VECLIB_MAXIMUM_THREADS"); #endif } DISSECTION_API void DISS_NUMERIC_FREE(uint64_t &dslv_) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->NumericFree(); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->NumericFree(); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_S_FACT(uint64_t &dslv_, const int &dim, const int *ptRows, const int *indCols, const int &sym, const int &decomposer) { // sym = 1 : symmetric with upper // = 0 : unsymmetric, // = 3 : symmetric with lower // decomposer = 0 : SCOTCH // = 1 : METIS // = 2 : TRIDAIG without nested bisection dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; // int num_levels; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->SymbolicFact(dim, (int *)ptRows, (int *)indCols, (bool)(sym % 2 == 1), (bool)((sym / 2) == 0), (bool)((sym / 4) == 1), decomposer); // using default parameter break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->SymbolicFact(dim, (int *)ptRows, (int *)indCols, (bool)(sym % 2 == 1), (bool)((sym / 2) == 0), (bool)((sym / 4) == 1), decomposer); // using default parameter break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } if (dslv->verbose) { fprintf(dslv->fp, "%s:%d Dissection::SymbolicFact done\n", __FILE__, __LINE__); } dslv->symbolic++; } DISSECTION_API void DISS_N_FACT(uint64_t &dslv_, const double *coefs, const int &scaling, const double &eps_pivot, const int &indefinite_flag) { // scaling = 0 : without scaling // 1 : 1/sqrt(a_ii) or 1/sqrt(max|a_ij|) // 2 : 1/sqrt(a_ii) or Schur complement corresponding to diagonal // kernel_detection_all (for KKT type) // eps_pivot = 1.0e-2 : threshold of pivot, ratio of contiguous diagonal // entries with absolute value // indefinite_flag = 1 : indefinite -> kernel_detection_all = false // indefinite_flag = 0 : semi-definite -> kernel_detection_all = true bool kernel_detection_all = indefinite_flag == 0 ? true : false; // FILE *fout; dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; #ifdef BLAS_MKL mkl_set_num_threads(1); #endif #ifdef VECLIB setenv("VECLIB_MAXIMUM_THREADS", "1", true); #endif // dslv->NumericFree(); // for debugging : 20 Nov.2013 switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->NumericFact(dslv->numeric, (double *)coefs, scaling, eps_pivot, kernel_detection_all); if (dslv->rptr->getFactorized() == false) { dslv->rptr->SaveMMMatrix(dslv->called, coefs); } break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->NumericFact(dslv->numeric, (complex *)coefs, scaling, eps_pivot, kernel_detection_all); break; } #ifdef BLAS_MKL mkl_set_num_threads(dslv->mkl_num_threads); #endif if (dslv->verbose) { fprintf(dslv->fp, "%s %d : Dissection::NumericFact done : %d\n", __FILE__, __LINE__, dslv->numeric); } dslv->numeric++; } DISSECTION_API void DISS_GET_COLORS(uint64_t &dslv_, int *n) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: *n = dslv->rptr->GetMaxColors(); break; case DISSECTION_COMPLEX_MATRIX: *n = dslv->cptr->GetMaxColors(); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_GET_KERN_DIM(uint64_t &dslv_, int *n0) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: *n0 = dslv->rptr->kern_dimension(); break; case DISSECTION_COMPLEX_MATRIX: if (dslv->verbose) { fprintf(dslv->fp, "%s %d diss_get_kern_dim() for complex is not yet implemented\n", __FILE__, __LINE__); } *n0 = 0; break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_GET_NULLPIVOTS(uint64_t &dslv_, int *pivots) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->GetNullPivotIndices(pivots); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->GetNullPivotIndices(pivots); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_GET_SMALLPIVOTS(uint64_t &dslv_, const int &n, int *pivots) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->GetSmallestPivotIndices(n, pivots); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->GetSmallestPivotIndices(n, pivots); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_GET_KERN_VECS(uint64_t &dslv_, double *vec) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->GetKernelVectors(vec); break; case DISSECTION_COMPLEX_MATRIX: if (dslv->verbose) { fprintf(dslv->fp, "%s %d diss_get_kern_vecs() for complex is not yet implemented\n", __FILE__, __LINE__); } break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_GET_KERNT_VECS(uint64_t &dslv_, double *vec) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->GetTransKernelVectors(vec); break; case DISSECTION_COMPLEX_MATRIX: if (dslv->verbose) { fprintf(dslv->fp, "%s %d diss_get_kern_vecs() for complex is not yet implemented\n", __FILE__, __LINE__); } break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_PROJECT(uint64_t &dslv_, double *x) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; int n0; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: n0 = dslv->rptr->kern_dimension(); if (n0 > 0) { dslv->rptr->ProjectionImageSingle(x); } break; case DISSECTION_COMPLEX_MATRIX: if (dslv->verbose) { fprintf(dslv->fp, "%s %d diss_project() for complex is not yet implemented\n", __FILE__, __LINE__); } break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_SOLVE_1(uint64_t &dslv_, double *x, const int &projection, const int &trans) { const bool isProj = (bool)(projection == 1); const bool isTrans = (bool)(trans == 1); dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->SolveSingle(x, isProj, isTrans, true); break; case DISSECTION_COMPLEX_MATRIX: fprintf(dslv->cptr->get_filedescriptor(), "Dissection::SolveSingle : %p\n", dslv->cptr); dslv->cptr->SolveSingle((complex *)x, isProj, isTrans, true); // isTrans break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_SOLVE_N(uint64_t &dslv_, double *x, const int &nrhs, const int &projection, const int &trans) { const bool isProj = (bool)(projection == 1); const bool isTrans = (bool)(trans == 1); dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->SolveMulti(x, nrhs, isProj, isTrans, true); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->SolveMulti((complex *)x, nrhs, isProj, isTrans, true); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } DISSECTION_API void DISS_MATRIX_PRODUCT(uint64_t &dslv_, const double* x, double* y) { dissection_solver_ptr *dslv = (dissection_solver_ptr *)dslv_; switch(dslv->real_or_complex) { case DISSECTION_REAL_MATRIX: dslv->rptr->SpMV(x, y); break; case DISSECTION_COMPLEX_MATRIX: dslv->cptr->SpMV((complex *)x, (complex *)y); break; default: if (dslv->verbose) { fprintf(dslv->fp, "%s %d unknown matrix data type : %d\n", __FILE__, __LINE__, dslv->real_or_complex); } } } FreeFem-sources-4.9/3rdparty/dissection/src/Interfaces/fortranAPI.h000664 000000 000000 00000013122 14037356732 025315 0ustar00rootroot000000 000000 /*! \file fortranAPI.h \brief Fortran style interface named as Dissecotion-fortran interface \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _INTERFACE_FORTRANAPI_H # define _INTERFACE_FORTRANAPI_H #include "Compiler/OptionCompiler.hpp" #ifdef BLAS_MKL #include #endif #include #include #include #include extern "C" { #define DISSECTION_REAL_MATRIX 1 #define DISSECTION_COMPLEX_MATRIX 2 #define DISS_VERSION FORTRAN_DECL_WL(DISS_VERSION, diss_version) #define DISS_INIT FORTRAN_DECL_WL(DISS_INIT, diss_init) #define DISS_FREE FORTRAN_DECL_WL(DISS_FREE, diss_free) #define DISS_NUMERIC_FREE FORTRAN_DECL_WL(DISS_NUMERIC_FREE, diss_numeric_free) #define DISS_S_FACT FORTRAN_DECL_WL(DISS_S_FACT, diss_s_fact) #define DISS_N_FACT FORTRAN_DECL_WL(DISS_N_FACT, diss_n_fact) #define DISS_GET_COLORS FORTRAN_DECL_WL(DISS_GET_COLORS, diss_get_colors) #define DISS_GET_KERN_DIM FORTRAN_DECL_WL(DISS_GET_KERN_DIM, diss_get_kern_dim) #define DISS_GET_NULLPIVOTS FORTRAN_DECL_WL(DISS_GET_NULLPIVOTS, diss_get_nullpivots) #define DISS_GET_SMALLPIVOTS FORTRAN_DECL_WL(DISS_GET_SMALLPIVOTS, diss_get_smallpivots) #define DISS_GET_KERN_VECS FORTRAN_DECL_WL(DISS_GET_KERN_VECS, diss_get_kern_vecs) #define DISS_GET_KERNT_VECS FORTRAN_DECL_WL(DISS_GET_KERNT_VECS, diss_get_kernt_vecs) #define DISS_PROJECT FORTRAN_DECL_WL(DISS_PROJECT, diss_project) #define DISS_SOLVE_1 FORTRAN_DECL_WL(DISS_SOLVE_1, diss_solve_1) #define DISS_SOLVE_N FORTRAN_DECL_WL(DISS_SOLVE_N, diss_solve_n) #define DISS_MATRIX_PRODUCT FORTRAN_DECL_WL(DISS_MATRIX_PRODUCT, diss_matrix_product) //#define COMPUTE_DIM_KERN FORTRAN_DECL_WL(COMPUTE_DIM_KERN, compute_dim_kern) DISSECTION_API void DISS_VERSION(int *versn, int *reles, int *patch); DISSECTION_API void DISS_INIT(uint64_t &dslv_, const int &called, const int &real_or_complex, const int &nthreads, const int &verbose); DISSECTION_API void DISS_FREE(uint64_t &dslv_); DISSECTION_API void DISS_NUMERIC_FREE(uint64_t &dslv_); DISSECTION_API void DISS_S_FACT(uint64_t &dslv_, const int &dim, const int *ptRows, const int *indCols, const int &sym, const int &decomposer); DISSECTION_API void DISS_N_FACT(uint64_t &dslv_, const double *coefs, const int &scaling, const double &eps_pivot, const int &indefinite_flag); DISSECTION_API void DISS_GET_COLORS(uint64_t &dslv_, int *n); DISSECTION_API void DISS_GET_KERN_DIM(uint64_t &dslv_, int *n0); DISSECTION_API void DISS_GET_NULLPIVOTS(uint64_t &dslv_, int *pivots); DISSECTION_API void DISS_GET_SMALLPIVOTS(uint64_t &dslv_, const int &n, int *pivots); DISSECTION_API void DISS_GET_KERN_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_GET_KERNT_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_PROJECT(uint64_t &dslv_, double *x); DISSECTION_API void DISS_SOLVE_1(uint64_t &dslv_, double *x, const int &projection, const int &trans); DISSECTION_API void DISS_SOLVE_N(uint64_t &dslv_, double *x, const int &nrhs, const int &projection, const int &trans); DISSECTION_API void DISS_MATRIX_PRODUCT(uint64_t &dslv_, const double* x, double* y); } #endif FreeFem-sources-4.9/3rdparty/dissection/src/Make.inc000664 000000 000000 00000005354 14037356732 022434 0ustar00rootroot000000 000000 #! \file Make.inc # \brief fundamental setting of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # OBJS = $(SOURCESCPP:%.cpp=%.o) all: $(OBJS) .SUFFIXES: .cpp .cpp.o: $(CXX) $(CCFLAGS) -c $< -o $@ %.d: %.cpp $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.o *.so core *.d *.a FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/000775 000000 000000 00000000000 14037356732 022755 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Defs.inc.Intel000664 000000 000000 00000023530 14037356732 025406 0ustar00rootroot000000 000000 #! \file DissectionSolver.hpp # \brief task mangemanet of dissection algorithm # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Mar. 30th 2012 # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # WORK_DIR = $(HOME)/work/Linux PROJ_DIR = $(WORK_DIR)/dissection PROJ_SRC = $(PROJ_DIR)/src PROJ_LIBDIR = $(PROJ_DIR)/lib UNAME := $(shell uname -s) # define BLAS MKL or ATLAS : be careful with no space after the name of variable # QUAD specifies 128bit floating point arithmetic : DD_REAL by qd library # LOND_DOUBLE on Intel specifies 80bit floating point, which may be enough ifeq ($(UNAME), Darwin) EXT_SHAREDLIB = dylib # BLAS = VECLIB BLAS = MKL COMPILER=LLVM QUAD = DD_REAL endif ifeq ($(UNAME), Linux) EXT_SHAREDLIB = so BLAS = MKL # BLAS = GENERIC COMPILER = INTEL17 # QUAD = FLOAT128 QUAD = DD_REAL endif PARALLEL = NONE MAKE = make -j 8 # DEBUGFLAG = -g -O3 DISSECTION = #DISSECTION = NO_METIS #DISSECTION = NO_SCOTCH # COMPILERBASE = $(COMPILER) VER = $(subst INTEL,,$(COMPILER)) ifneq ($(VER), $(COMPILER)) COMPILERBASE=$(subst $(VER),,$(COMPILER)) endif # ifeq ($(COMPILERBASE), INTEL) CXX = icpc -std=gnu++98 -restrict -fPIC -pthread LD = $(CXX) else ifeq ($(COMPILERBASE), GNU) CXX = g++ -std=c++98 -fPIC -pthread -Drestrict=__restrict LD = $(CXX) LDOPT = $(DEBUGFLAG) -shared MAKE_DEP_OPT = -MM DEBUGFLAG += #-pg -fno-omit-frame-pointer ifeq ($(UNAME), Darwin) SHARED_COMPILER = -framework Accelerate else SHARED_COMPILER = endif else ifeq ($(COMPILERBASE), LLVM) ifeq ($(UNAME), Linux) CXX = clang++-3.9 -std=c++03 -pthread -stdlib=libc++ -fPIC -Drestrict=__restrict LD = $(CXX) else CXX = clang -x c++ -std=c++03 -pthread -stdlib=libc++ -fPIC -Drestrict=__restrict LD = clang -fPIC -Drestrict=__restrict endif MAKE_DEP_OPT = -MM ifeq ($(UNAME), Darwin) LDOPT = $(DEBUGFLAG) -dynamiclib -install_name '$(PROJ_LIBDIR)/libDissection.dylib' -current_version 1.0 else LDOPT = $(DEBUGFLAG) -shared SHARED_COMPILER = endif VER = 17 else $(error COMPILER should be specified as one of INTEL14, GNU, LLVM) endif endif endif ifeq ($(UNAME), Darwin) # LDOPT = $(DEBUGFLAG) -dynamiclib -install_name '$(PROJ_LIBDIR)/libDissection.dylib' endif ifeq ($(UNAME), Linux) LDOPT = $(DEBUGFLAG) -shared endif MAKE_DEP_OPT = -MM # # metis and scotch are assumed to be compiled as shared library METIS_INC = $(WORK_DIR)/metis-5.1.0/include SCOTCH_INC = $(WORK_DIR)/scotch_6.0.4/include QD_INC = $(WORK_DIR)/include ifeq ($(UNAME), Darwin) LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0/build/Darwin-x86_64/libmetis endif ifeq ($(UNAME), Linux) LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0/build/Linux-x86_64/libmetis endif LIB_DIR_SCOTCH = $(WORK_DIR)/scotch_6.0.4/lib LIB_DIR_QD = $(WORK_DIR)/lib # ifeq ($(BLAS), MKL) # BLAS_DEF = -DBLAS_MKL -DMKL_ILP64 # 64bit (long long) int for CSR data BLAS_DEF = -DBLAS_MKL # 32bit int ifeq ($(VER), 18) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries_2018/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2018/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2018/linux/compiler/lib/intel64 endif else ifeq ($(VER), 17) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries_2017/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2017/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2017/linux/compiler/lib/intel64 endif ifeq ($(UNAME), Darwin) BLAS_INC = /opt/intel/compilers_and_libraries_2017/mac/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries_2017/mac/mkl/lib LIB_DIR_INTEL = /opt/intel/compilers_and_libraries_2017/mac/lib endif else ifeq ($(VER), 16) ifeq ($(UNAME), Linux) BLAS_INC = /opt/intel/compilers_and_libraries/linux/mkl/include/ LIB_DIR_MKL = /opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/compilers_and_libraries/linux/compiler/lib/intel64 endif else ifeq ($(VER), 15) BLAS_INC = /opt/intel/composer_xe_2015/mkl/include ifeq ($(UNAME), Linux) LIB_DIR_MKL = /opt/intel/composer_xe_2015/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/composer_xe_2015/compiler/lib/intel64 else ifeq ($(UNAME), Darwin) LIB_DIR_MKL = /opt/intel/composer_xe_2015/mkl/lib LIB_DIR_INTEL = /opt/intel/composer_xe_2015/lib endif endif else ifeq ($(VER), 14) BLAS_INC = /opt/intel/composer_xe_2013_sp1/mkl/include ifeq ($(UNAME), Linux) LIB_DIR_MKL = /opt/intel/composer_xe_2013_sp1/mkl/lib/intel64 LIB_DIR_INTEL = /opt/intel/composer_xe_2013_sp1/compiler/lib/intel64 else ifeq ($(UNAME), Darwin) LIB_DIR_MKL = /opt/intel/composer_xe_2013_sp1/mkl/lib LIB_DIR_INTEL = /opt/intel/composer_xe_2013_sp1/lib endif endif else $(error MKLverion should be 14, 15, 16, 17 or 18) endif endif endif endif endif # ifeq ($(UNAME), Linux) SHARED_COMPILER = -L$(LIB_DIR_INTEL) -lpthread -lrt SHARED_BLAS = -L$(LIB_DIR_MKL) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # SHARED_BLAS = -L$(LIB_DIR_MKL) -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core ## for 64bit integer else ifeq ($(UNAME), Darwin) SHARED_COMPILER = -L$(LIB_DIR_INTEL) -lc++ -lm #-lintlc -lsvml -lc++ -lm SHARED_BLAS = -Wl,-rpath,$(LIB_DIR_MKL) -L$(LIB_DIR_MKL) -lmkl_intel_lp64 -lmkl_sequential -lmkl_core endif endif # else ifeq ($(BLAS), ATLAS) BLAS_DEF = -DBLAS_ATLAS # for ATLAS BLAS ifeq ($(COMPILER), INTEL) BLAS_INC = /usr/local/atlas/intel/include else BLAS_INC = /usr/local/atlas/include endif else ifeq ($(BLAS), OPENBLAS) BLAS_DEF = -DOPENBLAS OPENBLAS_DIR = $(WORK_DIR)/OpenBLAS-0.2.14 BLAS_INC = $(OPENBLAS_DIR) SHARED_BLAS = -L$(OPENBLAS_DIR) -lopenblas else ifeq ($(BLAS), VECLIB) ifeq ($(UNAME), Darwin) BLAS_DEF = -DVECLIB BLAS_INC = /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers SHARED_COMPILER = -framework Accelerate -lc++ -lm endif else ifeq ($(BLAS), GENERIC) BLAS_DEF = -DBLAS_GENERIC BLAS_INC = .. SHARED_BLAS = SHARED_COMPILER = -lc++ -lm else $(error BLAS should be specified as one of MKL, VECLIB, OpenBLAS, GENERIC) endif endif endif endif endif # CCFLAGS = $(DEBUGFLAG) -I$(SCOTCH_INC) $(BLAS_DEF) -I$(BLAS_INC) -I$(PROJ_SRC) # to use pthread.h in case of C++03 CCFLAGS += -DPOSIX_THREADS # -DNO_TO_STRING for C++ compiler without to_string() defined in C++11 ifeq ($(COMPILERBASE), INTEL) CCFLAGS += -DNO_TO_STRING endif ifeq ($(DISSECTION), NO_METIS) CCFLAGS += -DNO_METIS LIB_METIS = else CCFLAGS += -I$(METIS_INC) ifeq ($(UNAME), Linux) LIB_METIS = -Xlinker -rpath=$(LIB_DIR_METIS) -L$(LIB_DIR_METIS) -lmetis else LIB_METIS = -L$(LIB_DIR_METIS) -lmetis endif endif # ifeq ($(QUAD), DD_REAL) CCFLAGS += -D$(QUAD) -I$(QD_INC) ifeq ($(UNAME), Linux) LIB_QD = -Xlinker -rpath=$(LIB_DIR_QD) -L$(LIB_DIR_QD) -lqd else LIB_QD = -L$(LIB_DIR_QD) -lqd endif endif ifeq ($(QUAD), LONG_DOUBLE) CCFLAGS += -D$(QUAD) -DNO_OCTRUPLE endif ifeq ($(QUAD), FLOAT128) CCFLAGS += -DNO_OCTRUPLE LIB_QD = -lquadmath endif # # ifeq ($(UNAME), Darwin) CCFLAGS += #-DGETRUSAGE LIB_SCOTCH = -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr endif ifeq ($(UNAME), Linux) CCFLAGS += -DCLOCK_GETTIME LIB_SCOTCH = -Xlinker -rpath=$(LIB_DIR_SCOTCH) -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr endif FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Defs.inc.SX000664 000000 000000 00000007262 14037356732 024671 0ustar00rootroot000000 000000 #! \file DissectionSolver.hpp # \brief task mangemanet of dissection algorithm # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Mar. 30th 2012 # \date Jul. 12th 2015 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # PROJ_DIR = $(HOME)/work/sx-ace/dissection PROJ_SRC = $(PROJ_DIR)/src PROJ_LIBDIR = $(PROJ_DIR)/lib UNAME := $(shell uname -s) # define BLAS MKL or ATLAS : be careful with no space after the name of variable # QUAD specifies 128bit floating point arithmetic : DD_REAL by qd library MAKE = make -j 8 # DEBUGFLAG = #-C sopt # CXX = sxc++ -D_REENTRANT -Cvopt -Tnoauto,used -Kstd=c++98 -Kexceptions #-gv -dir,debug MPICXX = sxmpic++ -D_REENTRANT -Cvopt -Tnoauto,used -Kstd=c++98 -Kexceptions #-gv -dir,d ebug CPP = sxc++ -Pmulti LD = $(CXX) -Pmulti MPILD = $(MPICXX) -Pmulti QUAD = DD_REAL QUAD_FLAG = -D$(QUAD) -DBLAS_QUADRUPLE -DBLAS_QUADRUPLE2 #-DBLAS_GENERIC LDOPT = MODULES = -I../Modules MAKE_DEP_OPT = -M # METIS_INC = $(PROJ_DIR)/metis-5.1.0/include SCOTCH_INC = $(PROJ_DIR)/scotch_6.0.4/include QD_INC = $(PROJ_SRC) LIB_DIR_METIS = $(PROJ_DIR)/metis-5.1.0 LIB_DIR_SCOTCH = $(PROJ_DIR)/scotch_6.0.4/lib LINK_QD = #-L$(LIB_DIR_QD) -lqd LDOPT = $(DEBUGFLAG) # BLAS_DEF = -DSX_ACE_BLAS #-DBLAS_FORTRAN SHARED_COMPILER = -lpthread -lm -f90lib # CCFLAGS = $(DEBUGFLAG) -I$(METIS_INC) -I$(SCOTCH_INC) $(BLAS_DEF) \ -I.. -I$(PROJ_SRC)/Compiler $(CCFLAGS_OMP) $(QUAD_FLAG) -I$(QD_INC) \ -DNO_TO_STRING -DPOSIX_THREADS -DSX_ACE FFLAGS = $(DEBUGFLAG) -I.. $(FFFLAGS_OMP) # FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Defs.inc.Solaris000664 000000 000000 00000006751 14037356732 025755 0ustar00rootroot000000 000000 #! \file Defs.inc.Solaris # \brief task mangemanet of dissection algorithm # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Mar. 30th 2012 # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # WORK_DIR = $(HOME)/work/Linux PROJ_DIR = $(WORK_DIR)/dissection PROJ_SRC = $(PROJ_DIR)/src PROJ_INC = $(PROJ_DIR)/src PROJ_LIBDIR = $(PROJ_DIR)/lib EXT_SHAREDLIB = so BLAS = SUNPERF QUAD = DD_REAL PARALLEL = NONE MAKE = gmake -j 8 # DEBUGFLAG = -g -O3 # CC = cc -std=c99 -xrestrict -KPIC -m64 CXX = CC -std=c++03 -xrestrict -KPIC -m64 LD = $(CXX) MAKE_DEP_OPT = -xM LDOPT = $(DEBUGFLAG) -shared -library=sunperf -mt=yes SHARED_COMPILER = # METIS_INC = $(WORK_DIR)/metis-5.1.0/include SCOTCH_INC = $(WORK_DIR)/scotch_5.1.12/include QD_INC = $(WORK_DIR)/include LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0/build/SunOS-i86pc/libmetis/ LIB_DIR_SCOTCH = $(WORK_DIR)/scotch_5.1.12/lib LIB_DIR_QD = $(WORK_DIR)/lib # BLAS_DEF = -D$(BLAS) BLAS_INC = SHARED_BLAS = SHARED_COMPILER = -lm -lpthread SHARED_OPT = -Xlinker -rpath $(LIB_DIR_MKL) # CCFLAGS = $(DEBUGFLAG) -I$(SCOTCH_INC) -I$(METIS_INC) $(BLAS_DEF) -I$(PROJ_INC) $(CCFLAGS_OMP) -D$(QUAD) -I$(QD_INC) -library=sunperf -mt=yes # # CCFLAGS += -DCLOCK_GETTIME -DNO_TO_STRING FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Defs.inc.Windows000664 000000 000000 00000006616 14037356732 025773 0ustar00rootroot000000 000000 #! \file Defs.inc # \brief compiler and option settings for intel compiler on Windows # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Mar. 30th 2012 # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # WORK_DIR = /Users/asuzuki/Desktop PROJ_DIR = $(WORK_DIR)/dissection PROJ_SRC = $(PROJ_DIR)/src EXT_SHAREDLIB = dll BLAS = MKL # COMPILER = INTEL18 QUAD = DD_REAL MAKE = make.exe # DEBUGFLAG = -EHsc #-O3 DISSECTION = # CXX = icl LD = lib LDOPT = /MACHINE:X64 SHARED_COMPILER = # METIS_INC = $(WORK_DIR)/metis-5.1.0/include SCOTCH_INC = $(WORK_DIR)/scotch_6.0.4/include LIB_DIR_METIS = $(WORK_DIR)/metis-5.1.0 LIB_DIR_SCOTCH = $(WORK_DIR)/scotch_6.0.4/lib BLAS_DEF = -DBLAS_MKL BLAS_INC = .. SHARED_BLAS = SHARED_COMPILER = # CCFLAGS = $(DEBUGFLAG) $(BLAS_DEF) -I$(BLAS_INC) -I$(PROJ_SRC) -I$(SCOTCH_INC) \ -I$(METIS_INC) -D$(QUAD) -DCLOCK_GETTIME # ifeq ($(UNAME), Darwin) CCFLAGS += #-DGETRUSAGE endif ifeq ($(UNAME), Linux) CCFLAGS += -DCLOCK_GETTIME endif LIB_SCOTCH = -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr # LIB_SCOTCH = $(LIB_DIR_SCOTCH)/libscotch.lib LIB_METIS = $(LIB_DIR_METIS)/libmetis.lib LIB_MKL = mkl_rt.lib FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Make.inc.Intel000664 000000 000000 00000005354 14037356732 025406 0ustar00rootroot000000 000000 #! \file Make.inc # \brief fundamental setting of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # OBJS = $(SOURCESCPP:%.cpp=%.o) all: $(OBJS) .SUFFIXES: .cpp .cpp.o: $(CXX) $(CCFLAGS) -c $< -o $@ %.d: %.cpp $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.o *.so core *.d *.a FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Make.inc.Solaris000664 000000 000000 00000005354 14037356732 025747 0ustar00rootroot000000 000000 #! \file Make.inc # \brief fundamental setting of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # OBJS = $(SOURCESCPP:%.cpp=%.o) all: $(OBJS) .SUFFIXES: .cpp .cpp.o: $(CXX) $(CCFLAGS) -c $< -o $@ %.d: %.cpp $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.o *.so core *.d *.a FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Make.inc.Windows000664 000000 000000 00000005374 14037356732 025767 0ustar00rootroot000000 000000 #! \file Make.inc # \brief fundamental setting of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # OBJS = $(SOURCESCPP:%.cpp=%.obj) all: $(OBJS) .SUFFIXES: .cpp .obj .cpp.obj: $(CXX) $(CCFLAGS) -c $< -o $@ #%.d: %.cpp # $(CXX) $(MAKE_DEP_OPT) $(CCFLAGS) $< > $@.$$$$; \ # sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ # rm -f $@.$$$$ # include $(SOURCESCPP:%.cpp=%.d) clean: echo "Cleaning the trash..." @rm -fr *~ *.obj *.so core *.d *.a FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Makefile.Intel000664 000000 000000 00000006565 14037356732 025503 0ustar00rootroot000000 000000 #! \file Makefile # \brief top of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # include ./Defs.inc # @(cd Utilities; $(MAKE) ) all: @(cd Compiler; $(MAKE) ) @(cd Splitters; $(MAKE) ) @(cd Algebra; $(MAKE) ) @(cd Driver; $(MAKE) ) @(cd Interfaces; $(MAKE) ) $(LD) $(LDOPT) -o libDissection.$(EXT_SHAREDLIB) \ Driver/*.o \ Algebra/*.o \ Splitters/*.o \ Compiler/*.o \ Interfaces/*.o \ $(LIB_METIS) \ $(LIB_SCOTCH) \ $(LIB_QD) \ $(SHARED_BLAS) $(SHARED_COMPILER) mv libDissection.$(EXT_SHAREDLIB) $(PROJ_LIBDIR) clean: @(cd Compiler; make clean) @(cd Splitters; make clean) @(cd Algebra; make clean) @(cd Driver; make clean) @(cd Interfaces; make clean) @rm -fr *~ *.o *.so core *.d distclean: -make clean @rm -rf */*.d @(cd C-test; make clean) tar: @(cd ..; tar -zcvf src.tar.gz lib src/Defs.inc.Intel src/Defs.inc src/Make.inc src/Makefile.Intel src/Makefile src/*/*.cpp src/*/*.hpp src/*/*.h src/*/Makefile src/*/Make.inc src/freefem++-interface/ LICENSE COPYRIGHT README qd-2.3.17-for-LLVM.patch) FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Makefile.SX000664 000000 000000 00000007007 14037356732 024752 0ustar00rootroot000000 000000 #! \file Makefile # \brief top of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # include ./Defs.inc all: @(cd Compiler; $(MAKE) ) @(cd Splitters; $(MAKE) ) @(cd Algebra; $(MAKE) ) @(cd Driver; $(MAKE) ) @(cd qd; $(MAKE) ) @(cd C-test; $(MAKE) ) $(MPILD) $(LDOPT) -o MM-Dissection C-test/MM-Dissection.o \ Driver/*.o \ Algebra/*.o \ Splitters/*.o \ Compiler/*.o \ qd/*.o \ $(ASLQUAD_DIR)/qgemm.o \ $(ASLQUAD_DIR)/qtrsm.o \ $(ASLQUAD_DIR)/qger.o \ $(ASLQUAD_DIR)/qgemv.o \ $(ASLQUAD_DIR)/qtrsv.o \ $(ASLQUAD_DIR)/qscal.o \ $(ASLQUAD_DIR)/xerbla.o \ $(ASLQUAD_DIR)/lsame.o \ -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr \ -L$(LIB_DIR_METIS) -lmetis \ $(SHARED_COMPILER) clean: @(cd Compiler; make clean) @(cd Splitters; make clean) @(cd Algebra; make clean) @(cd Driver; make clean) @(cd Interfaces; make clean) @(cd qd; make clean) @rm -fr *~ *.o *.so *.d distclean: -make clean @rm -rf */*.d */*.ti @(cd C-test; make clean) tar: @tar cvzf src.tar.gz Defs.inc Make.inc Makefile */*.cpp */*.hpp */*.h */Makefile */Make.inc */*/Makefile FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Makefile.Solaris000664 000000 000000 00000006624 14037356732 026040 0ustar00rootroot000000 000000 #! \file Makefile.Solaris # \brief top of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # include ./Defs.inc # @(cd Utilities; $(MAKE) ) all: @(cd ../include ; cd ../src ; tar cfp - `find . -name '*.hpp'` | (cd ../include ; tar xvfp -) ) @(tar cfp - `find . -name '*.h'` | (cd ../include ; tar xvfp -) ) @(cd Compiler; $(MAKE) ) @(cd Splitters; $(MAKE) ) @(cd Algebra; $(MAKE) ) @(cd Driver; $(MAKE) ) @(cd Interfaces; $(MAKE) ) @(cd C-test; $(MAKE) MM-Dissection.o ) $(LD) -library=sunperf -o MM-Dissection \ C-test/MM-Dissection.o \ Driver/*.o \ Algebra/*.o \ Splitters/*.o \ Compiler/*.o \ Interfaces/*.o \ -L$(LIB_DIR_SCOTCH) -lscotch -lscotcherr \ -L$(LIB_DIR_METIS) -lmetis \ -Xlinker -rpath=$(LIB_DIR_QD) -L$(LIB_DIR_QD) -lqd \ $(SHARED_BLAS) $(SHARED_COMPILER) clean: @(cd Compiler; make clean) @(cd Splitters; make clean) @(cd Algebra; make clean) @(cd Driver; make clean) @(cd Interfaces; make clean) @rm -fr *~ *.o *.so core *.d *.mod *.a *.i90 distclean: -make clean @rm -rf */*.d @(cd C-test; make clean) FreeFem-sources-4.9/3rdparty/dissection/src/Makefiles/Makefile.Windows000664 000000 000000 00000006370 14037356732 026054 0ustar00rootroot000000 000000 #! \file Makefile # \brief top of Makefile # \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions # \date Jul. 12th 2015 # \date Nov. 30th 2016 # This file is part of Dissection # # Dissection is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Linking Dissection statically or dynamically with other modules is making # a combined work based on Disssection. Thus, the terms and conditions of # the GNU General Public License cover the whole combination. # # As a special exception, the copyright holders of Dissection give you # permission to combine Dissection program with free software programs or # libraries that are released under the GNU LGPL and with independent modules # that communicate with Dissection solely through the Dissection-fortran # interface. You may copy and distribute such a system following the terms of # the GNU GPL for Dissection and the licenses of the other code concerned, # provided that you include the source code of that other code when and as # the GNU GPL requires distribution of source code and provided that you do # not modify the Dissection-fortran interface. # # Note that people who make modified versions of Dissection are not obligated # to grant this special exception for their modified versions; it is their # choice whether to do so. The GNU General Public License gives permission to # release a modified version without this exception; this exception also makes # it possible to release a modified version which carries forward this # exception. If you modify the Dissection-fortran interface, this exception # does not apply to your modified version of Dissection, and you must remove # this exception when you distribute your modified version. # # This exception is an additional permission under section 7 of the GNU # General Public License, version 3 ("GPLv3") # # Dissection is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Dissection. If not, see . # include ./Defs.inc MAKE = make.exe all: Dissection.lib MM-Dissection Dissection.lib: @(cd Compiler; $(MAKE) ) @(cd Splitters; $(MAKE) ) @(cd Algebra; $(MAKE) ) @(cd Driver; $(MAKE) ) @(cd Interfaces; $(MAKE) ) @(cd qd; $(MAKE) ) $(LD) $(LDOPT) /out:Dissection.lib \ Driver/*.obj \ Algebra/*.obj \ Splitters/*.obj \ Compiler/*.obj \ Interfaces/*.obj \ qd/*.obj MM-Dissection: @(cd C-test; $(MAKE) ) $(CXX) -Qmkl:sequential -o MM-Dissection \ C-test/MM-Dissection-Cinterface.obj Dissection.lib \ $(LIB_METIS) $(LIB_SCOTCH) $(LIB_MKL) clean: @(cd Compiler; make.exe clean) @(cd Splitters; make clean) @(cd Algebra; make clean) @(cd Driver; make clean) @(cd Interfaces; make clean) @(cd qd; make clean) @rm -fr *~ *.obj distclean: -make clean @rm -rf */*.d @(cd C-test; make clean) FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/000775 000000 000000 00000000000 14037356732 023046 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/BisectionInterConnection.cpp000664 000000 000000 00000044227 14037356732 030524 0ustar00rootroot000000 000000 /*! \file BisectionInterConnetction.cpp \brief Inter connection between off-diag strips \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Implementation of the Interconnection methods == // ============================================================== # include # include # include # include # include "Splitters/BitManipulations.hpp" # include "Compiler/DebugUtils.hpp" # include "Splitters/BisectionInterConnection.hpp" using namespace Dissection; // ============================================================== // == Strip methods == // ============================================================== Strip::Strip(std::list& indices) { assert(!indices.empty()); /* Next contiguous index. */ unsigned nextContInd; /* First local index is the first element of the list */ begin_dst = indices.front(); /* Which one remove from the list */ indices.pop_front(); nextContInd = begin_dst + 1; while ((!indices.empty()) && (nextContInd == indices.front())) { nextContInd += 1; indices.pop_front(); } width = nextContInd - begin_dst; } // .............................................................. bool Strip::inside(const Strip& T, unsigned& decal) const { bool flg = (begin_dst>=T.begin_dst) && (nextIndice()<=T.nextIndice()); decal = unsigned(T.begin_dst - begin_dst); return flg; } // -------------------------------------------------------------- // ============================================================== // == SetOfStrips methods == // ============================================================== SetOfStrips::SetOfStrips(std::list& indices) : _lst_strips() { unsigned start = 0; while (!indices.empty()) { _lst_strips.push_back(Strip(indices)); Strip& strp = _lst_strips.back(); strp.begin_src = start; start += strp.width; } assert(indices.empty()); } // .............................................................. void SetOfStrips::push_back(const SetOfStrips& strps) { for (const_iterator it = strps.begin(); it != strps.end(); ++it) { _lst_strips.push_back(*it); } } // .............................................................. SetOfStrips& SetOfStrips::operator += (const SetOfStrips& strips) { std::list unionOfStrips; const std::list& lstS2= strips._lst_strips; std::list::iterator itS1 = _lst_strips.begin(); std::list::const_iterator itS2 = lstS2.begin(); // Easy cases when current or parameter set of strips // is empty // -------------------------------------------------- if (itS2 == lstS2.end()) return *this; if (itS1 == _lst_strips.end()) { _lst_strips = lstS2; return *this; } // General case, when neither set is empty : // ---------------------------------------- Strip curStrip; unsigned nextCurStripInd; if ((*itS1).begin_dst < (*itS2).begin_dst) { curStrip = *itS1; itS1++; } else { curStrip = *itS2; itS2++; } nextCurStripInd = curStrip.nextIndice(); while ((itS1 != _lst_strips.end()) && (itS2 != lstS2.end() )) { // Cases where we bind strips : Update current strip if ((*itS1).begin_dst<=nextCurStripInd) { unsigned nextS1StripInd = (*itS1).nextIndice(); if (nextS1StripInd>nextCurStripInd) { curStrip.width = nextS1StripInd-curStrip.begin_dst; nextCurStripInd = nextS1StripInd; } itS1++; } else if ((*itS2).begin_dst<=nextCurStripInd) { unsigned nextS2StripInd = (*itS2).nextIndice(); if (nextS2StripInd>nextCurStripInd) { curStrip.width = nextS2StripInd-curStrip.begin_dst; nextCurStripInd = nextS2StripInd; } itS2++; } else { // Case where we can't bind strips. // -------------------------------- // 1. Register current strip : unionOfStrips.push_back(curStrip); // Create new strip : if ((*itS1).begin_dst < (*itS2).begin_dst) { curStrip = *itS1; itS1++; } else { curStrip = *itS2; itS2++; } nextCurStripInd = curStrip.nextIndice(); } }// While (((itS1 != _lst_strips.end())&&(itS2 != lstS2.end())) // One of set of strips is cared in the union. // Finish the union computation with the remained strips in // one of set of strips : if (itS1 != _lst_strips.end()) { while ((itS1 != _lst_strips.end()) && ((*itS1).begin_dst<=nextCurStripInd)) { unsigned nextS1StripInd = (*itS1).nextIndice(); if (nextS1StripInd>nextCurStripInd) { curStrip.width = nextS1StripInd-curStrip.begin_dst; nextCurStripInd = nextS1StripInd; } itS1++; } unionOfStrips.push_back(curStrip); while (itS1 != _lst_strips.end()){ unionOfStrips.push_back(*itS1); itS1++; } } if (itS2 != lstS2.end()) { while ((itS2 != lstS2.end()) && ((*itS2).begin_dst<=nextCurStripInd)) { unsigned nextS2StripInd = (*itS2).begin_dst+(*itS2).width; if (nextS2StripInd>nextCurStripInd) { curStrip.width = nextS2StripInd-curStrip.begin_dst; nextCurStripInd = nextS2StripInd; } itS2++; } unionOfStrips.push_back(curStrip); while (itS2 != lstS2.end()){ unionOfStrips.push_back(*itS2); itS2++; } } // Update src indices in union of strips : unsigned start = 0; for (itS1 = unionOfStrips.begin(); itS1 != unionOfStrips.end(); itS1++) { (*itS1).begin_src = start; start += (*itS1).width; } // Copy new list of strips in current strip : _lst_strips = unionOfStrips; return *this; } // ............................................................ unsigned SetOfStrips::numberOfIndices() const { unsigned width = 0U; for (const_iterator it = begin(); it != end(); it++) { width += (*it).width; } return width; } // ............................................................ SetOfStrips::operator std::list() { std::list lst; for (SetOfStrips::iterator itS = begin(); itS != end(); itS++) { for (unsigned l = (*itS).begin_dst; l < (*itS).nextIndice();l++) lst.push_back(l); } return lst; } // ............................................................ void SetOfStrips::para_union(const SetOfStrips& strips, SetOfStrips& paraStrips1, SetOfStrips& paraStrips2, SetOfStrips& seqStrips) const { const std::list& lstS1= _lst_strips; const std::list& lstS2= strips._lst_strips; std::list::const_iterator itS1 = lstS1.begin(); std::list::const_iterator itS2 = lstS2.begin(); std::list& lstP1= paraStrips1._lst_strips; std::list& lstP2= paraStrips2._lst_strips; std::list& lstS = seqStrips._lst_strips; // Easy cases when current or parameter set of strips // is empty // -------------------------------------------------- if (itS2 == lstS2.end()) { paraStrips1 = *this; return; } if (itS1 == lstS1.end()) { paraStrips2 = strips; return; } // General case : neither set is empty // ----------------------------------- bool curStripSeq;// Parallel(false) or sequential(true) strip ? unsigned parFromStrip; // Number from which is build the strip unsigned begCurStrip, szCurStrip, endCurStrip; begCurStrip=std::min((*itS1).begin_dst,(*itS2).begin_dst); if ((*itS1).begin_dst == (*itS2).begin_dst) {// First strip is sequential in this case : curStripSeq = true; if ((*itS1).width < (*itS2).width) { szCurStrip = (*itS1).width; itS1++; } else if ((*itS2).width < (*itS1).width) { szCurStrip = (*itS2).width; itS2++; } else { szCurStrip = (*itS1).width; itS1++; itS2++; } lstS.push_back(Strip(begCurStrip,szCurStrip)); } else {// First strip is parallel curStripSeq = false; if ((*itS1).begin_dst < (*itS2).begin_dst) { unsigned endS1 = (*itS1).nextIndice()-1; parFromStrip = 1; if (endS1 < (*itS2).begin_dst) { szCurStrip = (*itS1).width; itS1++; } else szCurStrip = (*itS2).begin_dst - begCurStrip; } else { unsigned endS2 = (*itS2).nextIndice()-1; parFromStrip = 2; if (endS2 < (*itS1).begin_dst) { szCurStrip = (*itS2).width; itS2++; } else szCurStrip = (*itS1).begin_dst - begCurStrip; } if (1 == parFromStrip) lstP1.push_back(Strip(begCurStrip,szCurStrip)); else { assert(2 == parFromStrip); lstP2.push_back(Strip(begCurStrip,szCurStrip)); } } endCurStrip = begCurStrip + szCurStrip - 1; while ((itS1 != lstS1.end()) && (itS2 != lstS2.end())) { // Search first indice for the next strip and if it's // sequential or not if ( ((*itS1).begin_dst <= endCurStrip) || ((*itS2).begin_dst <= endCurStrip) ) { // We are in this case : // |---------| <---- itS1 or itS2 // |-----------| <---- itS2 or itS1 // |----| <---- The current union strip // or this case : // |---------| <---- itS1 or itS2 // |----| <---- itS2 or itS1 // |----| <---- The current union strip begCurStrip = endCurStrip+1; // In this case, we switch between parallel and sequential // strips curStripSeq = !curStripSeq; } else { // We are in this case : // |-----| <---- itS1 or itS2 // |---------| <---- itS2 or itS1 // |--| <---- The current union strip begCurStrip= std::min((*itS1).begin_dst,(*itS2).begin_dst); // In this case, the next strip is sequential only // if both strips to union begin with same indices curStripSeq=((*itS1).begin_dst==(*itS2).begin_dst); } // Search size of the next strip : if (curStripSeq) {// Strips S1 and S2 have common indices unsigned endS1 = (*itS1).nextIndice()-1; unsigned endS2 = (*itS2).nextIndice()-1; if (endS1 < endS2) { szCurStrip = endS1-begCurStrip+1; itS1++; } else if (endS2 < endS1) { szCurStrip = endS2-begCurStrip+1; itS2++; } else { szCurStrip = endS1-begCurStrip+1; itS1++; itS2++; } lstS.push_back(Strip(begCurStrip,szCurStrip)); } else {// We care about some part of S1 or S2 // having exclusive indices if ((*itS1).begin_dst <= begCurStrip) {// S1 has some exclusive indices unsigned endS1 = (*itS1).nextIndice()-1; parFromStrip = 1; if (endS1 < (*itS2).begin_dst) { szCurStrip = endS1 - begCurStrip + 1; itS1++; } else szCurStrip = (*itS2).begin_dst - begCurStrip; } else { // S2 has some exclusive indices unsigned endS2 = (*itS2).nextIndice()-1; parFromStrip = 2; if (endS2 < (*itS1).begin_dst) { szCurStrip = endS2 - begCurStrip + 1; itS2++; } else szCurStrip = (*itS1).begin_dst - begCurStrip; } if (1 == parFromStrip) lstP1.push_back(Strip(begCurStrip,szCurStrip)); else { assert(2 == parFromStrip); lstP2.push_back(Strip(begCurStrip,szCurStrip)); } } endCurStrip = begCurStrip + szCurStrip - 1; }// end while // Now, one of S1 or (and) S2 has all indices in union if (itS1==lstS1.end()) { if (itS2!=lstS2.end()) { if ((*itS2).begin_dst0) { _outOfDiags = new SetOfStrips[lvl]; _row_strips = new SetOfStrips[((lvl+1)*lvl)/2]; _col_strips = new SetOfStrips[((lvl+1)*lvl)/2]; assert(_row_strips!=NULL); assert(_col_strips!=NULL); } } // ------------------------------------------------------------ FathersStrips::FathersStrips(FathersStrips& inter) : _level(inter._level), _outOfDiags(inter._outOfDiags), _seq_strips(inter._seq_strips), _par_strips(inter._par_strips), _row_strips(inter._row_strips), _col_strips(inter._col_strips) { inter._level = 0; inter._outOfDiags = NULL; inter._seq_strips = NULL; inter._par_strips = NULL; inter._row_strips = NULL; inter._col_strips = NULL; } // ------------------------------------------------------------ FathersStrips::~FathersStrips() { if (_outOfDiags) delete [] _outOfDiags; if (_seq_strips) delete [] _seq_strips; if (_par_strips) delete [] _par_strips; if (_row_strips) delete [] _row_strips; if (_col_strips) delete [] _col_strips; } // ------------------------------------------------------------ void FathersStrips::alloc(unsigned lvl) { if (_outOfDiags) delete [] _outOfDiags; if (_seq_strips) delete [] _seq_strips; if (_par_strips) delete [] _par_strips; if (_row_strips) delete [] _row_strips; if (_col_strips) delete [] _col_strips; _level = lvl; if (lvl>0) { _outOfDiags = new SetOfStrips[lvl]; _row_strips = new SetOfStrips[((lvl+1)*lvl)/2]; _col_strips = new SetOfStrips[((lvl+1)*lvl)/2]; assert(_row_strips!=NULL); assert(_col_strips!=NULL); } } // ------------------------------------------------------------ SetOfStrips& FathersStrips::operator [] (unsigned lvl) { assert(lvl<_level); assert(_outOfDiags!=NULL); return _outOfDiags[lvl]; } // ------------------------------------------------------------ const SetOfStrips& FathersStrips::operator [] (unsigned lvl) const { assert(lvl<_level); assert(_outOfDiags!=NULL); return _outOfDiags[lvl]; } // ____________________________________________________________ SetOfStrips& FathersStrips::getRowSetOfStrips(unsigned l1, unsigned l2) { unsigned ind; assert(l1<_level); assert(l2<_level); if (l1 > l2) { ind = l2+((l1+1)*l1)/2; return _col_strips[ind]; } ind = l1+((l2+1)*l2)/2; return _row_strips[ind]; } // ------------------------------------------------------------ const SetOfStrips& FathersStrips::getRowSetOfStrips(unsigned l1, unsigned l2) const { unsigned ind; assert(l1<_level); assert(l2<_level); if (l1 > l2) { ind = l2+((l1+1)*l1)/2; return _col_strips[ind]; } ind = l1+((l2+1)*l2)/2; return _row_strips[ind]; } // ------------------------------------------------------------ SetOfStrips& FathersStrips::getColSetOfStrips(unsigned l1, unsigned l2) { unsigned ind; assert(l1<_level); assert(l2<_level); if (l1 > l2) { ind = l2+((l1+1)*l1)/2; return _row_strips[ind]; } ind = l1+((l2+1)*l2)/2; return _col_strips[ind]; } // ------------------------------------------------------------ const SetOfStrips& FathersStrips::getColSetOfStrips(unsigned l1, unsigned l2) const { unsigned ind; assert(l1<_level); assert(l2<_level); if (l1 > l2) { ind = l2+((l1+1)*l1)/2; return _row_strips[ind]; } ind = l1+((l2+1)*l2)/2; return _col_strips[ind]; } // -------------------------------------------------------------- FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/BisectionInterConnection.hpp000664 000000 000000 00000026200 14037356732 030520 0ustar00rootroot000000 000000 /*! \file BisectionInterConnetction.hpp \brief Inter connection between off-diag strips \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Bisection interconnection : store the indices of the== // == nodes of an interface connected with nodes of a child == // == interface (or domain). The indices are stored as local of== // == the interface and global. == // ============================================================== #ifndef _DISSECTION_SPLITTERS_BISECTIONINTERCONNECTION_HPP_ #define _DISSECTION_SPLITTERS_BISECTIONINTERCONNECTION_HPP_ # include namespace Dissection { struct Strip { unsigned begin_src;// Starting indice in source block unsigned begin_dst;// Starting indice in target block unsigned width; ///@name Constructors and destructor ///@{ /** Default constructor. Initialize with unvalid indices */ Strip() : begin_dst(unsigned(-1)), width(unsigned(-1)) {} /** Simple constructor initializing directly members of the instance */ Strip(unsigned first, unsigned sz) : begin_src(0), begin_dst(first), width(sz) {} /** Simple constructor which initialize members with starting indices for source block */ Strip(unsigned firstSnd, unsigned firstRcv, unsigned sz) : begin_src(firstSnd), begin_dst(firstRcv), width(sz) {} /** Copy constructor (default behaviour) */ Strip(const Strip& strp) : begin_src(strp.begin_src), begin_dst(strp.begin_dst), width(strp.width) {} /** From a list of indices, compute the biggest valid strip from the first indices of the list. @return The indices include in the strip are removed from the list. */ Strip(std::list& indices); /** Return first indice contiguous to the last indice of the strip */ unsigned nextIndice() const { return begin_dst + width; } /** Check if a strip S is contained inside another strip T. If true, initialize decal with the difference between the first indice of T and S. If false, decal is undefined */ bool inside(const Strip& T, unsigned& decal) const; /** Check if a strip S contains another strip T. If true, initialize decal with the difference between the first indice of S and T. If false, decal is undefined */ bool contains(const Strip& T, unsigned& decal) const { return T.inside(*this,decal); } /** Check if an indice is contained by the strip */ bool contains(unsigned ind) const { return ((ind>=begin_dst)&&(ind::iterator iterator; typedef std::list::const_iterator const_iterator; ///@name Constructors and destructor //@{ /** @brief Default constructor Build a empty set of strips */ SetOfStrips() : _lst_strips() {} /** @brief Build a list of strips from a set of indices At the exit of the constructor, indices will be an empty list. */ SetOfStrips(std::list& indices); //@} ///@name Getters/Setters //@{ /// @brief Iterator on the first strip iterator begin() { return _lst_strips.begin(); } /// @brief Const iterator on the first strip const_iterator begin() const { return _lst_strips.begin(); } /// @brief Iterator on the end of the set iterator end() { return _lst_strips.end(); } /// @brief Const iterator on the end of the set const_iterator end() const { return _lst_strips.end(); } /// @brief Number of indices included into the set of strips unsigned numberOfIndices() const; /// @brief Return the number of strips unsigned numberOfStrips() const { return unsigned(_lst_strips.size()); } //@} ///@name Operations on SetOfStrips //@{ /** Check if an indice is contained by the strip */ bool contains(unsigned ind) const { bool ok = false; for (const_iterator it = begin(); it != end(); it++) ok |= (*it).contains(ind); return ok; } /** Add a new strip in the SetOfStrips */ void push_back(const Strip& strp) { _lst_strips.push_back(strp); } /** Add a new SetOfStrips in the SetOfStrips */ void push_back(const SetOfStrips& strps); /** Convert set of strips as list of indices */ operator std::list(); /** Compute the union of current strips indices and strips coming from strips parameter. */ SetOfStrips& operator += (const SetOfStrips& strips); /** \brief Compute the union of current strips indices with indices of another strips as union of intersections and differences (for parallel managing). The first indice in the source is not computed here, the main reason is than we renumber after this computation. */ void para_union(const SetOfStrips& strips, SetOfStrips& paraStrips1, SetOfStrips& paraStrips2, SetOfStrips& seqStrips) const; //@} private: std::list _lst_strips; }; // -------------------------------------------------------------- /** @brief Interconnection between a block B and his ancestors in the bisection tree at the time when the block B must be eliminated by the factorization algorithm. During numerical factorization, Schur complements must be computed by the LDU factorization. Schur complements are computed for ancestors of a domain and added on a part of the condensed problem on these ancestors. This class provides some helpers to keep which nodes indices in each ancestor are changed by the Schur complement. NB : For the direct father, only one strip is stored --- for the reason that all nodes of the father are connected with the domain. */ class FathersStrips { public: /// @name Constructors and destructor //@{ /** @brief Initialize interconnection with empty connections */ FathersStrips(); /** @brief Initialize interconnection with his Schur complement for a node at level lvl (>=1). */ FathersStrips(unsigned lvl); /** @brief Copy constructor Steal pointer of the copied object which is destroyed ! */ FathersStrips(FathersStrips& connections); /// @brief Destructor ~FathersStrips(); //@} void alloc(unsigned lvl); /// @name Getters/Setters //@{ /** @brief Return the set of strips for the out of diagonal block coming from ancestor at level lvl. */ SetOfStrips& operator [] (unsigned lvl); /** @brief Return the set of strips for the out of diagonal block coming from ancestor at level lvl. */ const SetOfStrips& operator [] (unsigned lvl) const; /** @brief Return the set of strips for row indices for interaction block between the ancestor at level l1 and ancestor at level l2 (0 <= l1,l2. // // ============================================================== // == Bisection tree : Implementation of some methods == // ============================================================== # include # include # include # include # include # include # include "Splitters/BisectionTree.hpp" # include "Splitters/ScotchSplitter.hpp" # include "Splitters/MetisSplitter.hpp" # include "Splitters/BitManipulations.hpp" # include "Compiler/DebugUtils.hpp" # include "Compiler/arithmetic.hpp" # include "Compiler/DissectionIO.hpp" #include #include using namespace Dissection; using std::vector; using std::list; unsigned* Tree::compLoc2Dom(unsigned dim) const { /** The splitters sort the domains to be compatible with the domain numerotation cared in this class... */ unsigned nbDoms = (1<<_nbLevels) - 1; unsigned* loc2Dom = new unsigned[dim]; for (unsigned iDom = 0; iDom < nbDoms; iDom++) { for (unsigned iNode = _ptOnDomains[iDom]; iNode<_ptOnDomains[iDom]+_sizeOfDomains[iDom]; iNode++) { assert(iNode 0) { unsigned iperm = 0; while (long(iperm) <= endPerm) { if (paral.contains(perm[iperm])) iperm++; else { std::swap(perm[iperm],perm[endPerm]); endPerm -= 1; } } endParal = iperm; if (endParal > 1) std::sort(perm,perm+endParal); } else endParal = 0; // Sorting sequential indices given if (seq.numberOfStrips() > 0) { unsigned iperm = endParal; endPerm = nperm-1; while ((long)iperm <= endPerm) { if (seq.contains(perm[iperm])) ++iperm; else { std::swap(perm[iperm],perm[endPerm]); --endPerm; } } endSeq = iperm; if (endSeq-endParal > 1) std::sort(perm+endParal,perm+endSeq); } else endSeq = endParal; // Sorting parallel indices from right interface : if (parar.numberOfStrips() > 0) { unsigned iperm = endSeq; endPerm = nperm-1; while (long(iperm) <= endPerm) { if (parar.contains(perm[iperm])) iperm++; else { std::swap(perm[iperm],perm[endPerm]); endPerm -= 1; } } endParar = iperm; if (endParar-endSeq > 1) std::sort(perm+endSeq,perm+endParar); } else endParar = endSeq; if (endParar < nperm-1) std::sort(perm+endParar,perm+nperm); // Recursive calling : if (nodeLayer(iDom) < _nbLevels-2) { SetOfStrips paral2, parar2, seq2; iDom *= 2; unsigned leftChild = (iDom<<1); unsigned rghtChild = leftChild+1; SetOfStrips& lStrip = _interco[leftChild-1][layer]; SetOfStrips& rStrip = _interco[rghtChild-1][layer]; lStrip.para_union(rStrip, paral2, parar2,seq2); if (endParal > 0) { renumberingInterface(endParal, perm, paral2, parar2, seq2, layer, iDom ); } // For sequential part, given priority to the left // interface if right interface has parallel indices if (endParar > endSeq) { renumberingInterface(endSeq-endParal, perm+endParal, paral2, parar2, seq2, layer, iDom ); iDom += 1; unsigned leftChild = (iDom<<1); unsigned rghtChild = leftChild+1; SetOfStrips& lStrip = _interco[leftChild-1][layer]; SetOfStrips& rStrip = _interco[rghtChild-1][layer]; lStrip.para_union(rStrip, paral2, parar2,seq2); renumberingInterface(endParar-endSeq, perm+endSeq, paral2, parar2, seq2, layer, iDom ); } else { iDom += 1; unsigned leftChild = (iDom<<1); unsigned rghtChild = leftChild+1; SetOfStrips& lStrip = _interco[leftChild-1][layer]; SetOfStrips& rStrip = _interco[rghtChild-1][layer]; lStrip.para_union(rStrip, paral2, parar2,seq2); renumberingInterface(endSeq-endParal, perm+endParal, paral2, parar2, seq2, layer, iDom ); } } } // ============================================================== Tree::Tree(FILE *fp, bool &berr, unsigned dim, const CSR_indirect *csr, const bool isSym, //const bool isLower, const int *remap_eqn, //const int* map_indcols, unsigned nbMaxLevel, unsigned minSize, splitter spltFct, bool checkData, const bool verbose) : _verbose(verbose), _glob2dom(NULL), _isSym(isSym) { const int *ptRows = csr->ptRows; const int *indCols = csr->indCols; const int *indVals = csr->indVals; const int *unsym_upper2lower = csr->indVals_unsym; bool ok; if (!spltFct) spltFct = &ScotchSplitter; // 0. Remove loops for diagonal elements (Metis and Scotch // don't like loops into graph) int * ptRows2, * indCols2; int *indCols_sbdmn, *indCols_idxStrip, *indCols_idxSbdmn; unsigned *l2d; removeLoops(dim, ptRows, indCols, ptRows2, indCols2); _local2global = new int[dim]; _global2local = new int[dim]; int nbDoms; ok = (*spltFct)(dim, ptRows2,indCols2,nbMaxLevel,minSize, _global2local,_local2global, nbDoms,_ptOnDomains,_sizeOfDomains, checkData, verbose, fp); _nbLevels = nbMaxLevel; assert(nbDoms == (1<<_nbLevels)-1); l2d = compLoc2Dom(dim); # if defined(DISSECTION_DEBUG) for (unsigned ii = 0; ii < dim; ii++) assert(_global2local[_local2global[ii]] == ii); // Save profile of the skeleton of the matrix with new ordering // into a file. fich = fopen("matrixProfile.dat", "w"); fprintf(fich,"%d\n",dim); int nz = 0; fprintf(fich,"%d ",nz); for (int iLoc = 0; iLoc < dim; iLoc++) { int iGlob = _local2global[iLoc]; int nbCol = ptRows[iGlob+1]-ptRows[iGlob]; nz += nbCol; fprintf(fich,"%d ",nz); } CHECK(nz == ptRows[dim], "Wrong number of non zero coefficients?"); fprintf(fich,"\n"); for (int iLoc = 0; iLoc < dim; iLoc++) { int iGlob = _local2global[iLoc]; for (int jCol = ptRows[iGlob]; jCol=0; iLvl--) { unsigned begDom = (1<=0; iLvl--) { unsigned begDom = (1<** connections = new std::list*[nbDoms]; assert(connections != NULL); connections[0] = NULL; for (unsigned iDom = 2; iDom <= nbDoms; iDom++) { unsigned layer = nodeLayer(iDom); assert(layer < _nbLevels); assert(layer > 0); connections[iDom-1] = new std::list[layer]; // a. Filling interconnection between iDom and his ancestors // // i. For each node in iDom : for (unsigned idxVert = _ptOnDomains[iDom-1]; idxVert<_ptOnDomains[iDom-1]+_sizeOfDomains[iDom-1]; idxVert++) { unsigned indGlob = _local2global[idxVert]; // ii. For global indice node indGlob, search connected // nodes from the matrix skeleton : for (unsigned ptRow = ptRows[indGlob]; ptRow < ptRows[indGlob+1]; ptRow++) { unsigned connectGlobInd = indCols[ptRow]; unsigned connectLocInd = _global2local[connectGlobInd]; unsigned ancestor = loc2dom[connectLocInd]; connectLocInd -= _ptOnDomains[ancestor-1]; if (ancestor < iDom) // If ancestor is an ancestor (and not itself or a child) { unsigned lvlLocDom = nodeLayer(ancestor); connections[iDom-1][lvlLocDom].push_back(connectLocInd); } }// End for (unsigned ptRow }// End for (unsigned idxVert for (unsigned iLayer = 0; iLayer < layer; iLayer++) { connections[iDom-1][iLayer].sort(); connections[iDom-1][iLayer].unique(); } }// End for (iDom // 4. Convert the nodes connections lists into strips // connections lists assert(connections != NULL); SetOfStrips** stripConnections = new SetOfStrips*[nbDoms]; assert(stripConnections != NULL); stripConnections[0] = NULL; for (unsigned iDom = 2; iDom <= nbDoms; iDom++) { unsigned layer = nodeLayer(iDom); assert(layer < _nbLevels); stripConnections[iDom-1] = new SetOfStrips[layer]; for (unsigned iLayer = 0; iLayer < layer; iLayer++) { assert(connections != NULL); stripConnections[iDom-1][iLayer]= SetOfStrips(connections[iDom-1][iLayer]); } } // OK, initial connection is decribed as strip // with local indices per domains. // 5. Begin symbolic factorization : _interco = symbolicFactorization(stripConnections); // 6. Renumbering father--children to have one parallel strip // and one sequential strip for interconnection between // children and father. // Renumbering : // Parallel indices from left child | Sequential indices from // left and right children | Parallel indices from right children for (unsigned iDom = 1; iDom < (1<<(_nbLevels-1)); iDom++) { SetOfStrips paral, parar, seq; SetOfStrips::iterator itS; unsigned *perm = new unsigned[sizeOfDomain(iDom)]; for (unsigned i = 0; i < sizeOfDomain(iDom); i++) perm[i] = i; unsigned layer = nodeLayer(iDom); assert(layer < _nbLevels-1); unsigned leftChild = (iDom<<1); unsigned rghtChild = leftChild+1; SetOfStrips& lStrip = _interco[leftChild-1][layer]; SetOfStrips& rStrip = _interco[rghtChild-1][layer]; lStrip.para_union(rStrip, paral, parar,seq); renumberingInterface(sizeOfDomain(iDom), perm, paral, parar, seq, layer, iDom ); unsigned* invperm = new unsigned[sizeOfDomain(iDom)]; for (unsigned ii = 0; ii < sizeOfDomain(iDom); ii++) invperm[perm[ii]] = ii; unsigned curDom = iDom; unsigned nbDoms = 1; for (unsigned iLayer = layer+1; iLayer < _nbLevels; iLayer++) { curDom *= 2; nbDoms *= 2; for (unsigned jDom = curDom; jDom < curDom+nbDoms; jDom++) { // Retrieve connection between jDom and iDom // in old renumbering SetOfStrips& strips = _interco[jDom-1][layer]; // and convert into list std::list locConnect = strips; // Convert old renumbering into new renumbering in the list for (std::list::iterator itL = locConnect.begin(); itL != locConnect.end(); itL++) { (*itL) = invperm[(*itL)]; } locConnect.sort(); // Build new strips coming from new renumbering strips = SetOfStrips(locConnect); }// for (jDom }// for (iLayer // Renumbering the local indices for subdomain iDom : // ------------------------------------------------ int* tempInd = new int[sizeOfDomain(iDom)]; memcpy(tempInd, _local2global+ _ptOnDomains[iDom-1], sizeof(int)*sizeOfDomain(iDom)); for (unsigned i = 0; i < sizeOfDomain(iDom); i++) { _local2global[_ptOnDomains[iDom-1]+i] = tempInd[perm[i]]; } delete [] tempInd; // 01 Feb.2013 Atsushi delete [] invperm; delete [] perm; for (unsigned i = 0; i < sizeOfDomain(iDom); i++) { int l2g = _local2global[_ptOnDomains[iDom-1]+i]; _global2local[l2g] = _ptOnDomains[iDom-1]+i; } }// for (iDom // At last, create interconnections between a node set of strips // and his ancestors (out of diagonal and diagonals blocks). // Nb : For layer 0, no ancestors, so... for (unsigned iLayer = 1; iLayer < _nbLevels; iLayer++) { // For each domain contained in layer iLayer : for (unsigned iDom = (1< 0; jLayer--) { // Compute index of the ancestor in layer jLayer : jDom = jDom/2; FathersStrips& connection_j = _interco[jDom-1]; // Retrieve the set of strips describing the direct connection between the // strips for (unsigned kLayer = jLayer; kLayer > 0; kLayer --) { SetOfStrips& stripForRowBlock_jk = connection_i.getRowSetOfStrips(jLayer-1,kLayer-1); SetOfStrips& stripForColBlock_jk = connection_i.getColSetOfStrips(jLayer-1,kLayer-1); // Most of data are copy of out of diagonal strips : stripForRowBlock_jk = connection_i[jLayer-1]; stripForColBlock_jk = connection_i[kLayer-1]; if (kLayer != jLayer) {// In this case, must change the destination index : SetOfStrips& outOfDiag_strips_jk = connection_j[kLayer-1]; SetOfStrips::iterator it_DstBjk = stripForColBlock_jk.begin(); for (SetOfStrips::iterator it_Bjk = outOfDiag_strips_jk.begin(); it_Bjk != outOfDiag_strips_jk.end(); it_Bjk++) { unsigned decal; while ((it_DstBjk != stripForColBlock_jk.end()) && (*it_DstBjk).inside(*it_Bjk,decal)) { (*it_DstBjk).begin_dst = (*it_Bjk).begin_src + decal; it_DstBjk ++; } } } } } } } # if defined(DISSECTION_DEBUG) { unsigned iDom; // DEBUGGING : Save filling of the matrix fich = fopen("filledMatrix2.dat", "w"); fprintf(fich,"%d\n",_nbLevels); for (iDom = 1; iDom <= nbDoms; iDom++) fprintf(fich,"%d %d\n",_ptOnDomains[iDom - 1], _sizeOfDomains[iDom - 1]); // ancestors connection : // added by Atsush : // iDom ==1 does not have father, hence it is not shown here // strips are shown in order of Idom : {2, 3} {4, 5, 6, 7} // NB : index of _interco[] starts at 0, (iDom - 1) is obligation!! for (unsigned iLevel = 1; iLevel < _nbLevels; iLevel++) { // First domain indice for this level: unsigned begDom = (1<* indCols_bylvl = new std::map[_nbLevels]; int *ptRow_diag = new int[dim]; int nnzh = (ptRows[dim] + dim) / 2; int *indVals_tmp = new int [nnzh]; indCols_sbdmn = new int[ptRows[dim]]; indCols_idxStrip = new int[ptRows[dim]]; indCols_idxSbdmn = new int[ptRows[dim]]; for (int n = (_nbLevels - 1); n >=0; n--) { unsigned begDom = (1 << n); unsigned endDom = 2 * begDom; // (1 << (n + 1)); unsigned dd; for (unsigned d = begDom; d < endDom; d++) { const unsigned d1 = d - 1; // to access C array for (unsigned i = _ptOnDomains[d1]; i < (_ptOnDomains[d1] + _sizeOfDomains[d1]); i++) { for (int m = 0; m <= n; m++) { indCols_bylvl[m].clear(); } const int ii = _local2global[i]; for (int k = ptRows[ii]; k < ptRows[ii + 1]; k++) { const int j = indCols[k]; const int jj = _global2local[j]; dd = l2d[jj]; if (dd <= d) { unsigned ndl = nodeLayer(dd); indCols_bylvl[ndl].insert(std::map::value_type(jj, k)); } } // loop : k // m == n : diagonal dd = d; for (std::map::iterator it = indCols_bylvl[n].begin(); it != indCols_bylvl[n].end(); ++it) { indCols_sbdmn[(*it).second] = dd; indCols_idxStrip[(*it).second] = // within one strip indCols_idxSbdmn[(*it).second] = ((*it).first - _ptOnDomains[dd - 1]); } for (int m = (n - 1); m >= 0; m--) { dd /= 2; const unsigned dd1 = dd - 1; SetOfStrips::iterator is = _interco[d1][m].begin(); int width_strips = 0; for (std::map::iterator it = indCols_bylvl[m].begin(); it != indCols_bylvl[m].end(); ++it) { int endStrip = ((*is).begin_dst + _ptOnDomains[dd1] + (*is).width); while (endStrip < (*it).first) { width_strips += (*is).width; ++is; if (is == _interco[d1][m].end()) { } else { endStrip = ((*is).begin_dst + _ptOnDomains[dd1] + (*is).width); } } const int begStrip = (*is).begin_dst + _ptOnDomains[dd1]; if ((begStrip <= (*it).first) && ((*it).first < endStrip)) { indCols_sbdmn[(*it).second] = dd; indCols_idxStrip[(*it).second] = (((*it).first - begStrip) + width_strips); indCols_idxSbdmn[(*it).second] = ((*it).first - _ptOnDomains[dd1]); } } // loop : it } // loop : m } // loop : i } } // // gnereating map from diagonal matrix / offdiagonal strips to matrix value // in original CSR format int itmp = 0; for (int d = 0; d < nbDoms; d++) { if (itmp < _sizeOfDomains[d]) { itmp = _sizeOfDomains[d]; } } diss_printf(verbose, fp, "max of size of subdomains = %d\n", itmp); std::map *sparse_row_diag = new std::map[itmp]; std::map *sparse_row_offdiag = new std::map[itmp]; _csr_diag = new CSR_indirect[nbDoms]; _csr_offdiag = new CSR_indirect[nbDoms]; _sizeOfFathersStrips = new int[nbDoms]; int *width_strips_sbdmn = new int[_nbLevels]; for (int n = (_nbLevels - 1); n >=0; n--) { unsigned begDom = (1 << n); unsigned endDom = 2 * begDom; // (1 << (n + 1)); for (unsigned d = begDom; d < endDom; d++) { const int d1 = d - 1; for (int m = nodeLayer(d) - 1; m >= 0; m--) { width_strips_sbdmn[m] = 0; } for (int m = (nodeLayer(d) - 1); m > 0; m--) { width_strips_sbdmn[m - 1] = width_strips_sbdmn[m]; for (SetOfStrips::iterator it = _interco[d1][m].begin(); it != _interco[d1][m].end(); ++it) { width_strips_sbdmn[m - 1] += (*it).width; } } for (int i = 0; i < _sizeOfDomains[d1]; i++) { sparse_row_diag[i].clear(); sparse_row_offdiag[i].clear(); } int ii0, ii; for (ii0 = 0, ii = _ptOnDomains[d1]; ii0 < _sizeOfDomains[d1]; ii++, ii0++) { const int i = _local2global[ii]; for (int k = ptRows[i]; k < ptRows[i + 1]; k++) { int jj = _global2local[indCols[k]]; unsigned dd = l2d[jj]; if (dd == d) { #ifdef DEBUG_MAPPING_CSR sparse_row_diag[ii0].insert(std::map::value_type(indCols_idxStrip[k], k)); #else sparse_row_diag[ii0].insert(std::map::value_type(indCols_idxStrip[k], k)); //(isLower ? toSym[k] : k))); #endif } else { if (dd < d) { #ifdef DEBUG_MAPPING_CSR sparse_row_offdiag[ii0].insert(std::map::value_type(indCols_idxStrip[k] + width_strips_sbdmn[nodeLayer(dd)], k)); #else sparse_row_offdiag[ii0].insert(std::map::value_type(indCols_idxStrip[k] + width_strips_sbdmn[nodeLayer(dd)], k)); //(isLower ? toSym[k] : k))); #endif } } } // loop : k } // loop : ii, ii0 _csr_diag[d1].n = _sizeOfDomains[d1]; _csr_offdiag[d1].n = _sizeOfDomains[d1]; _csr_diag[d1].ptRows = new int [_csr_diag[d1].n + 1]; _csr_offdiag[d1].ptRows = new int [_csr_offdiag[d1].n + 1]; _csr_diag[d1].ptRows[0] = 0; _csr_offdiag[d1].ptRows[0] = 0; for (int i = 0; i < _csr_diag[d1].n; i++) { _csr_diag[d1].ptRows[i + 1] = (_csr_diag[d1].ptRows[i] + sparse_row_diag[i].size()); _csr_offdiag[d1].ptRows[i + 1] = (_csr_offdiag[d1].ptRows[i] + sparse_row_offdiag[i].size()); } _csr_diag[d1].nnz = _csr_diag[d1].ptRows[_csr_diag[d1].n]; _csr_offdiag[d1].nnz = _csr_offdiag[d1].ptRows[_csr_offdiag[d1].n]; _csr_diag[d1].indCols = new int [_csr_diag[d1].nnz]; _csr_offdiag[d1].indCols = new int [_csr_offdiag[d1].nnz]; _csr_diag[d1].indVals = new int [_csr_diag[d1].nnz]; _csr_offdiag[d1].indVals = new int [_csr_offdiag[d1].nnz]; _csr_diag[d1].indVals0 = new int [_csr_diag[d1].nnz]; _csr_offdiag[d1].indVals0 = new int [_csr_offdiag[d1].nnz]; if(!isSym) { _csr_offdiag[d1].indVals_unsym = new int [_csr_offdiag[d1].nnz]; } #ifdef DEBUG_MAPPING_CSR _csr_diag[d1].indVals2 = new int [_csr_diag[d1].nnz]; _csr_offdiag[d1].indVals2 = new int [_csr_offdiag[d1].nnz]; #endif int k0, k1; k0 = 0; k1 = 0; for (int i = 0; i < _csr_diag[d1].n; i++){ for (std::map::iterator it = sparse_row_diag[i].begin(); it != sparse_row_diag[i].end(); ++it, k0++) { _csr_diag[d1].indCols[k0] = (*it).first; #ifdef DEBUG_MAPPING_CSR _csr_diag[d1].indVals2[k0] = (*it).second; _csr_diag[d1].indVals[k0] = toSym[(*it).second]; #else _csr_diag[d1].indVals0[k0] = (*it).second; //map_indcols[(*it).second]; _csr_diag[d1].indVals[k0] = indVals[(*it).second]; //remap_indcols[(*it).second]; #endif } for (std::map::iterator it = sparse_row_offdiag[i].begin(); it != sparse_row_offdiag[i].end(); ++it, k1++) { _csr_offdiag[d1].indCols[k1] = (*it).first; #ifdef DEBUG_MAPPING_CSR _csr_offdiag[d1].indVals2[k1] = (*it).second; _csr_offdiag[d1].indVals[k1] = toSym[(*it).second]; #else _csr_offdiag[d1].indVals0[k1] = (*it).second; //remap_indcols[(*it).second]; _csr_offdiag[d1].indVals[k1] = indVals[(*it).second]; //remap_indcols[(*it).second]; #endif } } if (!isSym) { int k = 0; for (int i = 0; i < _csr_diag[d1].n; i++){ for (std::map::iterator it = sparse_row_offdiag[i].begin(); it != sparse_row_offdiag[i].end(); ++it, k++) { _csr_offdiag[d1].indVals_unsym[k] = unsym_upper2lower[(*it).second]; // unsym_upper2lower[(*it).second]; } } } // if (!isSym) } // loop : d } // loop : n delete [] sparse_row_diag; delete [] sparse_row_offdiag; // delete [] l2d; #if 0 fich = fopen("sparseMatrix4.dat", "w"); fprintf(fich, "dim = %d\n", dim); fprintf(fich, "# of subdomais = %d\n", nbDoms); for (int d = 0; d < nbDoms; d++) { fprintf(fich, "subdomain = %d : %d diagonal n = %d nnz = %d\n", d, _sizeOfDomains[d], _csr_diag[d].n, _csr_diag[d].nnz); for (int i = 0; i < (_csr_diag[d].n + 1); i++) { fprintf(fich, "%d ", _csr_diag[d].ptRows[i]); } fprintf(fich, "\n"); for (int i = 0; i < _csr_diag[d].n; i++) { for (int k = _csr_diag[d].ptRows[i]; k < _csr_diag[d].ptRows[i + 1]; k++) { fprintf(fich, "(%d %d) ", _csr_diag[d].indCols[k], _csr_diag[d].indVals0[k]); } } fprintf(fich, "\n"); fprintf(fich, "subdomain = %d : %d offdiagonal n = %d nnz = %d\n", d, _sizeOfDomains[d], _csr_offdiag[d].n, _csr_offdiag[d].nnz); for (int i = 0; i < (_csr_offdiag[d].n + 1); i++) { fprintf(fich, "%d ", _csr_offdiag[d].ptRows[i]); } fprintf(fich, "\n"); for (int i = 0; i < _csr_offdiag[d].n; i++) { for (int k = _csr_offdiag[d].ptRows[i]; k < _csr_offdiag[d].ptRows[i + 1]; k++) { fprintf(fich, "(%d %d) ", _csr_offdiag[d].indCols[k], _csr_offdiag[d].indVals0[k]); } } fprintf(fich, "\n"); } // loop : d #endif _nbDoms = nbDoms; for (int d = 0; d < nbDoms; d++) { _sizeOfFathersStrips[d] = 0; int level = nodeLayer(d + 1); for (int m = level - 1; m >= 0; m--) { SetOfStrips &strps = _interco[d][level - 1 - m]; _sizeOfFathersStrips[d] += strps.numberOfIndices(); } } // Create glob 2 dom links : _glob2dom = new int[dim]; for (unsigned iDom = 0; iDom < (1<<_nbLevels)-1; iDom++) { for (unsigned iNode = _ptOnDomains[iDom]; iNode < _ptOnDomains[iDom+1]; iNode++) { _glob2dom[_local2global[iNode]] = iDom + 1; } } _loc2glob_diag = new int*[nbDoms]; _loc2glob_offdiag = new int *[nbDoms]; for (int d = 0; d < nbDoms; d++) { _loc2glob_diag[d] = new int[_sizeOfDomains[d]]; int ii = _ptOnDomains[d]; for (int i = 0; i < _sizeOfDomains[d]; i++, ii++) { // _loc2glob_diag[d][i] = _local2global[ii]; // 16 Jul.2013 Atsushi getDiagLoc2Glob(nd)[] = loc2glob_diag[nd][] _loc2glob_diag[d][i] = remap_eqn[_local2global[ii]]; // _local2global[ii]; // // 19 Dec. } int itmp = 0; FathersStrips &fstrps = _interco[d]; const int layer = nodeLayer(d + 1); for (int m = (layer - 1); m >= 0; m--) { itmp += fstrps[m].numberOfIndices(); } _loc2glob_offdiag[d] = new int[_sizeOfFathersStrips[d]]; int i0 = 0; // index inside of strips which are stored contiguously for (int m = (layer - 1); m >= 0; m--) { for (SetOfStrips::iterator it = fstrps[m].begin(); it != fstrps[m].end(); ++it) { const int n = nthfatherIndex((d + 1), (layer - m)); ii = (_ptOnDomains[n - 1] + (*it).begin_dst); for (int j = 0; j < (*it).width; j++, i0++, ii++) { // _loc2glob_offdiag[d][i0] = _local2global[ii]; // 16 Jul.2013 Atsushi getOffdiagLoc2Glob(nd)[] = loc2glob_offdiag[nd][] _loc2glob_offdiag[d][i0] = remap_eqn[_local2global[ii]]; // _local2global[ii]; // 19 Dec. } } } } // Destroy temporary data for (int d = 0; d < nbDoms; d++) { delete [] _csr_diag[d].indVals0; delete [] _csr_offdiag[d].indVals0; _csr_diag[d].indVals0 = NULL; _csr_offdiag[d].indVals0 = NULL; } for (unsigned iDom = 2; iDom <= nbDoms; iDom++) { delete [] stripConnections[iDom-1]; delete [] connections[iDom-1]; } delete [] stripConnections; delete [] connections; delete [] loc2dom; delete [] indCols2; delete [] ptRows2 ; delete [] l2d; // 01 Feb.2013 : Atsushi -- begin delete [] indVals_tmp; delete [] ptRow_diag; delete [] indCols_bylvl; delete [] indCols_sbdmn; delete [] indCols_idxStrip; delete [] indCols_idxSbdmn; delete [] width_strips_sbdmn; // 01 Feb.2013 : Atsushi -- end berr = true; } // -------------------------------------------------------------- Tree::~Tree() { delete [] _sizeOfDomains; delete [] _ptOnDomains; delete [] _local2global; delete [] _global2local; if (_interco) delete [] _interco; //unsigned nDoms = (1<<_nbLevels)-1; // 01 Feb.2013 : Atsushi -- begin delete [] _glob2dom; delete [] _sizeOfFathersStrips; for (int i = 0; i < _nbDoms; i++) { delete [] _loc2glob_diag[i]; delete [] _loc2glob_offdiag[i]; delete [] _csr_diag[i].ptRows; delete [] _csr_diag[i].indCols; delete [] _csr_diag[i].indVals; // delete [] _csr_diag[i].indVals0; delete [] _csr_offdiag[i].ptRows; delete [] _csr_offdiag[i].indCols; delete [] _csr_offdiag[i].indVals; // delete [] _csr_offdiag[i].indVals0; } if (!_isSym) { for (int i = 0; i < _nbDoms; i++) { delete [] _csr_offdiag[i].indVals_unsym; } } delete [] _loc2glob_diag; delete [] _loc2glob_offdiag; delete [] _csr_diag; delete [] _csr_offdiag; // 01 Feb.2013 : Atsushi -- end } // ============================================================== bool Tree::save(FILE* stream) const { // To do return true; } // -------------------------------------------------------------- bool Tree::load(FILE* stream) { // To do return true; } // -------------------------------------------------------------- void Tree::printInfo(FILE* stream, unsigned verboseLevel) const { unsigned l,n,p; fprintf(stream,"Number of bisection level : %u\n",_nbLevels); for (l=0; l < _nbLevels; l++) { double meanSize = 0., varSize = 0.; for (n = 1< 0; iLevel--) { // First domain indice for this level: unsigned begDom = (1< 0; iLayer--) { ancestDom = ancestDom/2;// Compute indice (starting to 1) // of next ancestor : diss_printf(verbose, stderr, "ancestDom = %d\niLayer = %d\n", ancestDom, iLayer); for (jLayer = iLayer; jLayer > 0; jLayer--) { diss_printf(verbose, stderr, "jLayer = %d\n", (jLayer - 1)); factConnect[ancestDom-1][jLayer-1] += factConnect[iDom-1][jLayer-1]; } } } } // end symbolic factorization // ............................................................ # if defined(DISSECTION_DEBUG) // DEBUGGING : Save filling of the matrix FILE* fich = fopen("filledMatrix.dat", "w"); fprintf(fich,"%d\n",_nbLevels); for (iDom = 0; iDom < nbDoms; iDom++) fprintf(fich,"%d %d\n",_ptOnDomains[iDom], _sizeOfDomains[iDom]); // ancestors connection : for (iLevel = 1; iLevel < _nbLevels; iLevel++) { // First domain indice for this level: unsigned begDom = (1<=0; n--) { unsigned begDom = (1 << n); unsigned endDom = 2 * begDom; // (1 << (n + 1)); for (unsigned d = begDom; d < endDom; d++) { const int d1 = d - 1; posDom[d1] = pos; pos += _sizeOfDomains[d1]; } } // drawing position of non-zero elemnet laying over domain decompositions //strcpy(color, "yellow"); for (int n = (_nbLevels - 1); n >=0; n--) { unsigned begDom = (1 << n); unsigned endDom = 2 * begDom; // (1 << (n + 1)); for (unsigned d = begDom; d < endDom; d++) { const int d1 = d - 1; int dd = d; if (n == (_nbLevels - 1)) { strcpy(color, "blue"); } else { strcpy(color, "red"); dense += _sizeOfDomains[d1] * _sizeOfDomains[d1]; } fprintf(fp, "n %g %g m %g 0 rl 0 %g rl %g 0 rl c gs %s f gr %s s\n", xpos + (double)posDom[d1] * xscale, ypos - (double)(posDom[d1] + _sizeOfDomains[d1]) * yscale, _sizeOfDomains[d1] * xscale, (double)_sizeOfDomains[d1] * yscale, (-1.0) * (double)_sizeOfDomains[d1] * xscale, color, color); dd = d; if (n == (_nbLevels - 1)) { strcpy(color, "darkgreen"); } else { strcpy(color, "graycyan"); } for (int m = (n - 1); m >= 0; m--) { dd /= 2; const unsigned dd1 = dd - 1; for (SetOfStrips::iterator it = _interco[d1][m].begin(); it != _interco[d1][m].end(); ++it) { fprintf(fp, "n %g %g m %g 0 rl 0 %g rl %g 0 rl c gs %s f gr %s s\n", xpos + (double)(posDom[dd1] + (*it).begin_dst) * xscale, ypos - (double)(posDom[d1] + _sizeOfDomains[d1]) * yscale, (double)(*it).width * xscale, (double)_sizeOfDomains[d1] * yscale, (-1.0) * (double)(*it).width * xscale, color, color); if (n < (_nbLevels - 1)) { dense += _sizeOfDomains[d1] * (*it).width; } } } } } // loop : n #if 1 int k = 0; for (int i = 0; i < dim; i++) { for (int kk = ptRow_diag[i]; kk < ptRows[i + 1]; kk++, k++) { switch (indVals[k]) { case 0 : strcpy(color, "red"); break; case 1: strcpy(color, "black"); break; case 2: strcpy(color, "cyan"); break; default: strcpy(color, "green"); break; } int j = indCols[kk]; #if 1 int ii = _global2local[i]; int jj = _global2local[j]; int di = l2d[ii]; int dj = l2d[jj]; #else unsigned ii, jj, di, dj; getGlob2Loc_dom(i, ii, di); getGlob2Loc_dom(j, jj, dj); #endif if (dj > di) { int itmp; itmp = ii; ii = jj; jj = itmp; itmp = di; di = dj; dj = itmp; if (indVals[k] == 0) { strcpy(color, "magenta"); } } fprintf(fp,"n %g %g rr 0 360 arc %s sfill\n", xpos + ((double)(jj - _ptOnDomains[dj - 1] + posDom[dj - 1]) + 0.5) * xscale, ypos - ((double)(ii - _ptOnDomains[di - 1] + posDom[di - 1]) + 0.5) * yscale, color); if (dj == di) { strcpy(color, "graycyan"); fprintf(fp,"n %g %g rr 0 360 arc %s sfill\n", xpos + ((double)(ii - _ptOnDomains[di - 1] + posDom[di - 1]) + 0.5) * xscale, ypos - ((double)(jj - _ptOnDomains[di - 1] + posDom[di - 1]) + 0.5) * yscale, color); } } } #endif fprintf(fp, "%%dense=%d\n", dense); fprintf(fp, "showpage\n"); fclose(fp); } FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/BisectionTree.hpp000664 000000 000000 00000023621 14037356732 026322 0ustar00rootroot000000 000000 /*! \file BisectionTree.hpp \brief ordeing by graph decomposer, SCOTCH or METIS \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Bisection tree : stored by components in 1D arrays == // ============================================================== #ifndef _DISSECTION_SPLITTERS_BISECTIONTREE_HPP_ #define _DISSECTION_SPLITTERS_BISECTIONTREE_HPP_ #include #include "Splitters/BitManipulations.hpp" #include "Splitters/BisectionInterConnection.hpp" #include "Splitters/Splitter.hpp" #include "Algebra/CSR_matrix.hpp" namespace Dissection { /** @brief Nested Bisection tree The nested bisection tree is the core of the symbolic factorization. Nodes are numbered from root,starting at 1, to leaves by layer order. By example : (layer level) ----------- 1 0 / \ 2 3 1 /\ /\ 4 5 6 7 2 Data are stored components by components, ordered as above : [D1,D2,D3,D4,D5,D6,D7] Each layer l data begin at 2^l (0 <= l < number of layers) A node n is in layer highest_one_idx(n) The brother of a node n (node sharing same father) is n^1 (^ means xor operation as C convention). Ancestors are n>>p where 0 < p < number of layers - level of domain */ class Tree { public: /**@name Constructors and destructor */ //@{ /** @brief Build bisection tree From the sparse matrix CSR structure, build the bisection tree for symbolic factorization computing some block permutations to speed-up data transfert between blocks. @param dim Dimension of the sparse matrix @param ptRows Indices pointing on the beginning of each row of the sparse matrix. The last value ptRows[dim] provides the number of non zero coefficients of the sparse matrix. @param indCols Column indice of each non zero coefficients of the sparse matrix. Values must be stored and sorted (increasing) by row @param nbMaxLevel Maximal number of iteration for the bisection method. @param minSize Minimal number of nodes per subdomains in the leaves of the tree. @param spltFct The nested bisection library function to use By default, Scotch is used to do nested bisection @param checkData If true, verify some consistencies in the given sparse matrix structure. If false, assume than provided sparse matrix structure is consistency. */ Tree(FILE *fp, bool &berr, unsigned dim, const CSR_indirect *csr, const bool isSym, //const bool isLower, const int *remap_eqn, //const int *remap_indcols, unsigned nbMaxLevel=8, unsigned minSize = 120, splitter spltFct = NULL, bool checkData = false, bool verbose = true); /** @brief Destructor */ ~Tree(); //@} /** @name Getters and setters */ //@{ /// Return index of the brother (i.e having same father) node /// of a node nd. static int selfIndex(unsigned nd) { return (int)(nd - 1); } static int Index2Node(unsigned nd) { return (int)(nd + 1); } static unsigned brotherIndex(unsigned nd) { return (nd ^ 1); } /// Return index of the father node index of a node nd static unsigned fatherIndex(unsigned nd) { return (nd / 2); } /// Return index of nth forerunner of domain nd : static unsigned nthfatherIndex(unsigned nd, unsigned nth) { return (nd >> nth); } /// Return index of the first child index for node nd : static unsigned childIndex(unsigned nd) { return (nd * 2); } /// Return layer number where lies the domain nd : static unsigned nodeLayer(unsigned nd) { return highest_one_idx(nd); } /// Return unsigned NumberOfLevels() { return _nbLevels; } unsigned NumberOfSubdomains() { return _nbDoms; } // unsigned NumberOfSubdomains(unsigned level) { // (_sbLevels - 1) == (void) if (level < _nbLevels) { return ((1 << (level + 1)) - 1); // including own level } else { return 0U; } } /// Return number of internal nodes of a domain nd (>=1) unsigned sizeOfDomain(unsigned nd) const { return _sizeOfDomains[selfIndex(nd)]; } unsigned sizeOfFathersStrips(unsigned nd) const { return _sizeOfFathersStrips[selfIndex(nd)]; } /// Return connection between a domain nd (nd >= 1) and his ancestors const FathersStrips& getFathersStrips(unsigned nd) { return _interco[selfIndex(nd)]; } const CSR_indirect& getDiagCSR(unsigned nd) { return _csr_diag[selfIndex(nd)]; } const CSR_indirect& getOffdiagCSR(unsigned nd) { return _csr_offdiag[selfIndex(nd)]; } int *getDiagLoc2Glob(unsigned nd) { return _loc2glob_diag[selfIndex(nd)]; } int *getOffdiagLoc2Glob(unsigned nd) { return _loc2glob_offdiag[selfIndex(nd)]; } bool save(FILE* stream) const; /** @brief Load bisection tree contents */ bool load(FILE* stream) ; /** @brief Print information on bisection tree A verbose level can be provided. Depending of his value, informations printed are : >=1 : Some statistics data (number of domains, mean number of nodes per domains per layer, variance of number of nodes per domains,...) >=2 : Global indices of nodes contained per domains >=3 : Block permutation from a domain and his forerunnings @param verboseLevel Choose the level of verbose to print some information for dissection tree. */ void printInfo(FILE* stream, unsigned verboseLevel = 1) const; //@} private: bool _verbose; /* Allocate and fill an array with indices of domain containing each local index. The caller of this method must free the returned array. */ unsigned* compLoc2Dom(unsigned dim) const; /* Remove self references from the skeleton graph of the sparse matrix provided to the splitter */ void removeLoops(int dim, const int* ptRows, const int* indCols, int*& ptRows2, int*& indCols2); /* Do symbolic factorization */ FathersStrips* symbolicFactorization(SetOfStrips** connections); /* Renumbering interfaces to optimize number of strips */ void renumberingInterface(unsigned nperm, unsigned* perm, const SetOfStrips& paral, const SetOfStrips& parar, const SetOfStrips& seq, unsigned layer, unsigned iDom) const; void draw_csr(char *filename, int dim, const int *ptRows, const int *indCols, const int *ptRow_diag, const int *indVals, const unsigned *l2d); /// Number of levels done in nested bisection algorithm unsigned _nbDoms; unsigned _nbLevels; /// Number of nodes (internal only for each leaf) int *_sizeOfDomains; int *_sizeOfFathersStrips; /// Indice (starting at 0) of the beginning of each domain /// in local 2 global correspondance. int *_ptOnDomains; /// Local 2 global array for each subdomain (partition) int *_local2global; /// global 2 local array (partition) int *_global2local; /// FathersStrips* _interco; /// For global indices, store the domain containing it : int* _glob2dom; int **_loc2glob_diag; int **_loc2glob_offdiag; bool _isSym; CSR_indirect *_csr_diag; CSR_indirect *_csr_offdiag; }; } #endif FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/BitManipulations.hpp000664 000000 000000 00000007204 14037356732 027044 0ustar00rootroot000000 000000 /*! \file BitManupulations.hpp \brief routines for bit arithmetics \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Some function to manipulate easily bits on integers == // ============================================================== #ifndef _DISSECTION_BITTOOLS_BITMANIPULATIONS_HPP_ # define _DISSECTION_BITTOOLS_BITMANIPULATIONS_HPP_ /** @brief Return the highest power of 2 which is lesser or equal to x */ inline unsigned highestbit(unsigned x) { if (0==x) return 0; # if defined(USE_X86_ASM) __asm__("bsr{l}\t%0, %0\n\t" : "=r" (x) : "0" (x)); //x = asm("bsrl %0 %0" : "=r" (x) : "0" (x)); return 1U<<(x-1); # else x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16; return x ^ (x>>1); # endif } // -------------------------------------------------------------- // @brief Return the position of the highest bit of x inline unsigned highest_one_idx(unsigned x) { # if defined(USE_X86_ASM) __asm__("bsr{l}\t%0, %0\n\t" : "=r" (x) : "0" (x)); return x; # else unsigned r = 0; if (x & 0xffff0000U) { x >>= 16; r += 16; } if (x & 0x0000ff00U) { x >>= 8 ; r += 8; } if (x & 0x000000f0U) { x >>= 4 ; r += 4; } if (x & 0x0000000cU) { x >>= 2 ; r += 2; } if (x & 0x00000002U) { x >>= 1 ; r += 1; } return r; # endif } #endif FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/MetisSplitter.cpp000664 000000 000000 00000020045 14037356732 026363 0ustar00rootroot000000 000000 /*! \file MetisSplitter.cpp \brief to call grpah decomposer : METIS \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 \date Oct. 15th 2017 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================ // == Implementation of splitter using Metis library == // ============================================================ #include #include #include #include #ifndef NO_METIS #include "metis.h" #endif #include "Compiler/DebugUtils.hpp" #include "Splitters/BitManipulations.hpp" #include "Splitters/MetisSplitter.hpp" static unsigned compBegOfDomains(unsigned invLevel, unsigned&begDom, unsigned indDom, const int* sizeOfDomains, int* ptOnDomains) { if (invLevel==1) { ptOnDomains[indDom-1] = begDom; return sizeOfDomains[indDom-1]; } else { begDom += compBegOfDomains(invLevel-1, begDom, 2*indDom, sizeOfDomains, ptOnDomains); begDom += compBegOfDomains(invLevel-1, begDom, 2*indDom+1, sizeOfDomains, ptOnDomains); ptOnDomains[indDom-1] = begDom; return sizeOfDomains[indDom-1]; } } bool MetisSplitter(unsigned dim, const int* ptRows, const int* indCols, unsigned& nbMaxLevels, unsigned minSize, int* loc2glob, int* glob2loc, int& nbDoms, int*& ptOnDomains, int*& sizeOfDomains, bool checkData, const bool verbose, FILE *fp) { #ifdef NO_METIS if (verbose) { fprintf(stderr, "%s %d : Metis is not linked\n", __FILE__, __LINE__); } return false; #else /** Check inputs */ CHECK(ptRows !=NULL, "Null pointer for ptRows !"); CHECK(indCols !=NULL, "Null pointer for indCols !"); CHECK(loc2glob!=NULL, "Null pointer for loc2glob!"); CHECK(glob2loc!=NULL, "Null pointer for glob2loc!"); int ierr; // Compute nbMaxLevels according to the minSize parameter int maxBlocks = dim/minSize; unsigned nbLevels = highestbit(maxBlocks)-1; nbMaxLevels = (nbMaxLevels 0; iLvl --) { for (unsigned iDom = (1<<(iLvl-1)); iDom < (1<. // // ============================================================== // == Definition of splitter function using Metis library. == // ============================================================== #ifndef _DISSECTION_SPLITTERS_METISSPLITTER_HPP_ #define _DISSECTION_SPLITTERS_METISSPLITTER_HPP_ bool MetisSplitter(unsigned dim, const int* ptRows, const int* indCols, unsigned& nbMaxLevels, unsigned minSize, int* permtab, int* peritab, int& nbDoms, int*& ptOnDomains, int*& sizeOfDomains, bool checkData, const bool verbose, FILE *fp); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/ScotchSplitter.cpp000664 000000 000000 00000026161 14037356732 026532 0ustar00rootroot000000 000000 /*! \file ScotchSplitter.cpp \brief to call grpah decomposer : SCOTCH \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Implementation of splitter using Scotch library == // ============================================================== #include #include #include #include extern"C" { #include } #include "Compiler/DebugUtils.hpp" #include "Compiler/arithmetic.hpp" #include "Splitters/BitManipulations.hpp" #include "Splitters/ScotchSplitter.hpp" #include "Compiler/DissectionIO.hpp" #define TOO_SMALL 16 static unsigned compLevelOfDoms(int nbDoms, int maxLevels, const int* treetab, int* levels, int*& nbDomsPerLevel) { int iDebScan, iEndScan, iCurScan; // DBG_PRINT("Number of levels : %d\n",maxLevels); // DBG_PRINT("Number of doms : %d\n",nbDoms ); // Search the effective number of levels given by Scotch : int nbLvls = 0; for (unsigned i = 0; i < nbDoms; i++) { int locLvl = 1; int indDom = i; while (indDom != nbDoms-1) { locLvl ++; indDom = treetab[indDom]; } nbLvls = std::max(nbLvls, locLvl); } // DBG_PRINT("Effective level : %d\n",nbLvls); nbDomsPerLevel = new int[nbLvls]; for (int i = 0; i < nbDoms-1; i++) levels[i] = -1; assert(treetab[nbDoms-1]==-1); levels[nbDoms-1] = 0; iDebScan = 0; while (iDebScan < nbDoms) { // Search next domains without level of bissection computation : for ( ;(iDebScan=0); // Run child to father into the tree to set levels on some domains : // We assume than this domain is on a leaf of the tree (uppest layer) levels[iDebScan] = nbLvls-1;//maxLevels-1; // From this domain, we go through all ancestors until // we find a ancestor with defined level (we known here than // the root interface has his level defined at 0) iCurScan = iDebScan; iEndScan = treetab[iDebScan]; while (-1==levels[iEndScan]) { assert(iEndScan 0); iCurScan = iDebScan; while (iCurScan != iEndScan) { levels[iCurScan] -= diff; iCurScan = treetab[iCurScan]; } } }// if iDebScan= 0); CHECK((levels[i]%d))?m{type=h,rat=0.7,vert=100,low=h{pass=10},asc=b{width=3,bnd=f{bal=0.2},org=h{pass=10}f{bal=0.2}}}|m{type=h,rat=0.7,vert=100,low=h{pass=10},asc=b{width=3,bnd=f{bal=0.2},org=h{pass=10}f{bal=0.2}}};,ole=f{cmin=%d,cmax=%d,frat=0.05},ose=s},unc=n{sep=/(levl<%d)?(m{type=h,rat=0.7,vert=100,low=h{pass=10},asc=b{width=3,bnd=f{bal=0.2},org=h{pass=10}f{bal=0.2}}})|m{type=h,rat=0.7,vert=100,low=h{pass=10},asc=b{width=3,bnd=f{bal=0.2},org=h{pass=10}f{bal=0.2}}};,ole=f{cmin=%d,cmax=%d,frat=0.05},ose=s}}", nbLvls-1,2*minSize-1,minSize,dim,nbLvls-1,minSize,dim); // DBG_PRINT("Strategy string : %s\n", str_Strat); ierr = SCOTCH_stratGraphOrder(&ptStrat, str_Strat); delete [] str_Strat; CHECK(ierr==0, "Failed build graph ordering strategy for Scotch"); // Ordering with nested bisection : // TRACE("Split the graph\n"); int* rangtab = new int[dim+1]; int* treetab = new int[dim]; int nbSplitDoms; bool repeat = true; int lastCompleteLevel; int *levels, *nbDomsPerLevels; SCOTCH_randomReset(); while (repeat) { ierr = SCOTCH_graphOrder(&ptGraph, &ptStrat, (SCOTCH_Num*)loc2glob, (SCOTCH_Num*)glob2loc, (SCOTCH_Num*)&nbSplitDoms, (SCOTCH_Num*)rangtab, (SCOTCH_Num*)treetab); if (ierr) { diss_printf(verbose, fp, "Failed reordering sparse matrix graph !\n"); SCOTCH_stratExit(&ptStrat); SCOTCH_graphFree(&ptGraph); return false; } levels = new int[nbSplitDoms]; // int *nbDomsPerLevels;// = new int[nbLvls]; unsigned nbLvlsScotch= compLevelOfDoms(nbSplitDoms, nbLvls, treetab, levels, nbDomsPerLevels); /** Search last level where number of domains is a power of two */ lastCompleteLevel = 0; while ((lastCompleteLevel=lastCompleteLevel) { sz += rangtab[i+1]-rangtab[i]; i++; } if (sz+rangtab[i+1]-rangtab[i] <= TOO_SMALL) { flag_size_check = true; break; } //DBG_PRINT("Domain %d begin at %d\n",indDom+1,begDom); } // loop : i if (!flag_size_check) { repeat = false; break; } else { delete [] levels; delete [] nbDomsPerLevels; } } // while (repeat) ptOnDomains = new int[nbDoms+1]; sizeOfDomains = new int[nbDoms]; memset(sizeOfDomains, 0, nbDoms*sizeof(int)); int* indDomPerLevel = new int[lastCompleteLevel+1]; memset(indDomPerLevel,0,(lastCompleteLevel+1)*sizeof(int)); int begDom = 0; for (int i = 0; i < nbSplitDoms; i++) { int sz = 0; while (levels[i]>=lastCompleteLevel) { sz += rangtab[i+1]-rangtab[i]; i++; } int indDom = (1<. // // ============================================================== // == Definition of splitter function using Scotch library. == // ============================================================== #ifndef _DISSECTION_SPLITTERS_SCOTCHSPLITTER_HPP_ #define _DISSECTION_SPLITTERS_SCOTCHSPLITTER_HPP_ bool ScotchSplitter(unsigned dim, const int* ptRows, const int* indCols, unsigned& nbMaxLevels, unsigned minSize, int* permtab, int* peritab, int& nbDoms, int*& ptOnDomains, int*& sizeOfDomains, bool checkData, const bool verbose, FILE *fp); #endif FreeFem-sources-4.9/3rdparty/dissection/src/Splitters/Splitter.hpp000664 000000 000000 00000005737 14037356732 025401 0ustar00rootroot000000 000000 /*! \file Splitter.hpp \brief pointer for generic grpah decomposer \author Xavier Juvigny, ONERA \date Jul. 2nd 2012 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // // ============================================================== // == Definition of pointer function for splitter tool == // ============================================================== #ifndef _DISSECTION_SPLITTERS_SPLITTER_HPP_ #define _DISSECTION_SPLITTERS_SPLITTER_HPP_ typedef bool (*splitter)(unsigned dim, const int* ptRows, const int* indCols, unsigned& nbMaxLevels, unsigned minSize, int* loc2glob, int* glob2loc, int& nbDoms, int*& ptOnDomains, int*& sizeOfDomains, bool checkData, const bool verbose, FILE *fp); #endif FreeFem-sources-4.9/3rdparty/dissection/src/freefem++-interface/000775 000000 000000 00000000000 14037356732 024552 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/freefem++-interface/examples++-load/000775 000000 000000 00000000000 14037356732 027433 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/freefem++-interface/examples++-load/BDD.edp000664 000000 000000 00000025351 14037356732 030524 0ustar00rootroot000000 000000 // Interative Substructuring metod with Neumann-Neuman / BDD // for tutorial by Japan SIAM, Tokyo, 4-5 Jun.2016, Atsushi Suzuki bool withmetis=true; if (withmetis) { load "metis"; } load "Dissection"; bool flagRAS=true; bool twolevel = true; int sizeoverlaps=1; // size off overlap int nn=10,mm=10; func bool SubdomainsPartitionUnity(mesh & Th, int nnpart, real[int] & partdof, int sizeoverlaps, mesh[int] & aTh, matrix[int] & Rih, matrix[int] & Dih) { fespace Vh(Th,P1); fespace Ph(Th,P0); mesh Thi=Th; fespace Vhi(Thi,P1); // FreeFEM trick, formal definition Vhi[int] pun(nnpart); // local fem functions Vh sun=0, unssd=0; Vh sun = 0, unssd = 0; Ph part; part[]=partdof; for(int i=0;i0,label=10,split=1); // mesh interfaces label 10 Rih[i]=interpolate(Vhi,Vh,inside=true); // Restriction operator : Vh to Vhi pun[i][] = 1.0; sun[] += Rih[i]'*pun[i][]; // ' } for(int i=0;i 1) } return true; } func int SkeltonIndex(mesh & Th, int nnpart, mesh[int] & Tha, matrix[int] & Rih, matrix[int] & Dih, int[int] &skelindex) { fespace Vh(Th, P1); int ndeg = 0; mesh Thi = Th; Vh sum = 0; int[int] lindex(Vh.ndof); for (int n = 0; n < nnpart; n++) { real[int] bi(Tha[n].nv); bi = 0.0; for (int k = 0; k < Tha[n].nbe; k++) { // over boundary elements if (Tha[n].be(k).label == 10) { bi[Tha[n].be(k)[0]] = 1.0; bi[Tha[n].be(k)[1]] = 1.0; } } real[int] di = Dih[n]*bi; Vh localone; localone[] = Rih[n]'*di; // ' sum[] +=localone[]; } for (int i = 0; i < Vh.ndof; i++) { if (abs(sum[][i] - 1.0) < 0.1) { lindex[ndeg++] = i; } } skelindex = lindex; return ndeg; } func int LocalIndexes(int nnpart, mesh[int] & Tha, int[int] &num0, int[int,int] &idx0, int[int] &num1, int[int,int] &idx1, int[int] &num2, int[int,int] &idx2) { for (int n = 0; n < nnpart; n++) { mesh Thi = Tha[n]; fespace Vhi(Thi, P1); Vhi u; u = 0.0; for (int k = 0; k < Tha[n].nbe; k++) { // over boundary elements if (Tha[n].be(k).label == 10) { u[][Tha[n].be(k)[0]] = 1.0; u[][Tha[n].be(k)[1]] = 1.0; } if (Tha[n].be(k).label == 1) { u[][Tha[n].be(k)[0]] = -1.0; u[][Tha[n].be(k)[1]] = -1.0; } } int itmp0 = 0; int itmp1 = 0; int itmp2 = 0; for (int i = 0; i < Vhi.ndof; i++) { if (u[][i] == 0.0) { idx0(n, itmp0++) = i; } else if (u[][i] == (-1.0)) { idx2(n, itmp2++) = i; } else { idx1(n, itmp1++) = i; } num0[n] = itmp0; num1[n] = itmp1; num2[n] = itmp2; } } } int withplot=3; mesh Thg=square(160,160); int[int] chlab=[1,1 ,2,1 ,3,1 ,4,1 ]; // Dirichlet b.c. segments w label 1 Thg=change(Thg,refe=chlab); int npart= nn*mm; fespace Ph(Thg,P0); fespace Vh(Thg,P1); Ph part; Vh sun=0,unssd=0; Ph xx=x,yy=y; part = int(xx*nn)*mm + int(yy*mm); //plot(part,wait=1); if (withmetis) { int[int] nupart(Thg.nt); metisdual(nupart,Thg,npart); for(int n=0;n1) { plot(part,fill=1,cmm="subdomains",wait=1); } mesh[int] aTh(npart); matrix[int] Rih(npart); matrix[int] Dih(npart); SubdomainsPartitionUnity(Thg, npart, part[], sizeoverlaps, aTh, Rih, Dih); int nskel; int[int] skelindexg(Vh.ndof); nskel = SkeltonIndex(Thg, npart, aTh, Rih, Dih, skelindexg); skelindexg.resize(nskel); int[int] interiorn(npart); int[int, int] interior(npart, Vh.ndof); // too large => max Vhi[n].ndof int[int] interfn(npart); int[int, int] interf(npart, Vh.ndof); // too large => max Vhi[n].nbe * 2 int[int] bcorign(npart); int[int, int] bcorig(npart, Vh.ndof); // too large => max Vhi[n].nbe * 2 LocalIndexes(npart, aTh, interiorn, interior, interfn, interf, bcorign, bcorig); Vh sum=0,fctone=1; for(int n=0; n < npart;n++) { Vh localone; real[int] bi = Rih[n]*fctone[]; // restriction to the local domain real[int] di = Dih[n]*bi; localone[] = Rih[n]'*di; // ' sum[] +=localone[]; cout << "# interior(" << n << ",) = " << interiorn[n] << " " << "# interf(" << n << ",) =" << interfn[n] << "# bcorig(" << n << ",) =" << bcorign[n] << endl; } sum = 0; for (int i = 0; i < nskel; i++) { sum[][skelindexg[i]] = 1.0; } cout << "DOF skelton " << nskel << endl; plot(sum, cmm="Skelton",wait=true); macro Grad(u) [dx(u),dy(u)] // EOM varf vBC(u,v)= on(1, u=1.0); // to treat original bc func f = 1.0; varf vexternal(u,v)=int2d(Thg)(f*v); Vh fh, uglobal; fh[] = vexternal(0, Vh); real tgv = 1e+30; real[int] bc = vBC(0, Vh, tgv=tgv); matrix[int] AAD(npart); matrix[int] AAN(npart); Vh[int] Z(npart); // coarse space : only used as set of arrays matrix E; for (int n = 0; n < npart; n++) { mesh Thi; fespace Vhi(Thi, P1); Thi = aTh[n]; Vhi u, v; if (bcorign[n] == 0) { // floating subdomain cout << n << " : floating subdomain" << endl; varf aD(u,v)=int2d(Thi)(Grad(u)'*Grad(v)) + on(10, u=1.0); // ' varf aN(u,v)=int2d(Thi)(Grad(u)'*Grad(v)); // ' AAD[n] = aD(Vhi, Vhi, strategy=102,solver=sparsesolver,tolpivot=1.0e-16); AAN[n] = aN(Vhi, Vhi, strategy=102,solver=sparsesolver,tolpivot=1.0e-2); // Neumann matrix is singular } else { varf aD(u,v)=int2d(Thi)(Grad(u)'*Grad(v)) + on(1,10, u=1.0); // ' varf aN(u,v)=int2d(Thi)(Grad(u)'*Grad(v)) + on(1, u=1.0); // ' AAD[n] = aD(Vhi, Vhi, strategy=102,solver=sparsesolver,tolpivot=1.0e-16); AAN[n] = aN(Vhi, Vhi, strategy=102,solver=sparsesolver,tolpivot=1.0e-16); } } func real[int] opS(real[int] &v) { real[int] s(v.n); Vh vh, uh; vh = 0.0; for (int i = 0; i < nskel; i++) { vh[][skelindexg[i]] = v[i]; } uh = 0.0; for (int n = 0; n < npart; n++) { real[int] vi = Rih[n] * vh[]; for (int i = 0; i < interfn[n]; i++) { vi[interf(n, i)] *= tgv; // tgv to treat Dirichlet data } for (int i = 0; i < bcorign[n]; i++) { vi[bcorig(n, i)] = 0.0; // homogeneous Dirichlet data } real[int] ui = AAD[n]^-1 * vi; // solve local problem with Dirichlet data real[int] wi = AAN[n] * ui; uh[] += Rih[n]'*wi; //' } uh[] = bc ? 0.0 : uh[]; // set homogeneous Dirichlet on the original BC for (int i = 0; i < nskel; i++) { s[i] = uh[][skelindexg[i]]; } return s; } func real[int] opNN(real[int] &v) { real[int] s(v.n); Vh vh, uh; vh = 0.0; for (int i = 0; i < nskel; i++) { vh[][skelindexg[i]] = v[i]; } uh = 0.0; for (int n = 0; n < npart; n++) { real[int] vi = Rih[n] * vh[]; real[int] wi = Dih[n] * vi; for (int i = 0; i < interiorn[n]; i++) { wi[interior(n, i)] = 0.0; } for (int i = 0; i < bcorign[n]; i++) { wi[bcorig(n, i)] = 0.0; } vi = AAN[n]^-1 * wi; // solve local problem with Dirichlet data for (int i = 0; i < interiorn[n]; i++) { vi[interior(n, i)] = 0.0; } wi = Dih[n] * vi; uh[] += Rih[n]'*wi; //' } uh[] = bc ? 0.0 : uh[]; // set homogeneous Dirichlet on the original BC for (int i = 0; i < nskel; i++) { s[i] = uh[][skelindexg[i]]; } return s; } func bool CoarseSpace(matrix &EE) { for (int n = 0; n < npart; n++) { Z[n] = 1.0; // Z is kept on the whole domain real[int] zit = Rih[n]*Z[n][]; real[int] zitemp = Dih[n]*zit; // with the partition of unity Z[n][] = Rih[n]'*zitemp; //' } real[int,int] Ef(npart,npart); // real[int] zz(nskel), vv(nskel); Vh vh; for(int m=0;m upper for(int n=0;n #include using namespace std; #include "rgraph.hpp" #include "error.hpp" #include "AFunction.hpp" #include "ff++.hpp" #include "MatriceCreuse_tpl.hpp" #include #include "Dissection.hpp" template int generate_CSR(list* ind_cols_tmp, list* val_tmp, int nrow, int nrow1, int *old2new, int *new2old, int *ptrow, int *ind_col, T* val) { int nnz = 0; // ind_cols_tmp = new list[nrow]; // val_tmp = new list[nrow]; for (int i = 0; i < nrow1; i++) { const int ii = new2old[i]; bool diag_flag = false; for (int k = ptrow[ii]; k < ptrow[ii + 1]; k++) { const int j = old2new[ind_col[k]]; if (i == j) { diag_flag = true; } // fprintf(stderr, "%d %d -> %d %d \n", i0, j0, ii, jj); if (ind_cols_tmp[i].empty()) { ind_cols_tmp[i].push_back(j); val_tmp[i].push_back(val[k]); nnz++; } else { if (ind_cols_tmp[i].back() < j) { ind_cols_tmp[i].push_back(j); val_tmp[i].push_back(val[k]); nnz++; } else { typename list::iterator iv = val_tmp[i].begin(); list::iterator it = ind_cols_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv) { if (*it == j) { fprintf(stderr, "already exits? (%d %d)\n", ii, j); break; } if (*it > j) { ind_cols_tmp[i].insert(it, j); val_tmp[i].insert(iv, val[k]); nnz++; break; } } } } } // loop : k if (!diag_flag) { fprintf(stderr, "%s %d : adding zero-entry %d\n", __FILE__, __LINE__, i); typename list::iterator iv = val_tmp[i].begin(); list::iterator it = ind_cols_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv) { if ((*it) > i) { ind_cols_tmp[i].insert(it, i); val_tmp[i].insert(iv, 0.0); nnz++; break; } } // loop : iv } // if (!diag_flag) } // loop : i return nnz; } template int generate_CSR(list* ind_cols_tmp, list* val_tmp, int nrow, int nrow1, int *old2new, int *new2old, int *ptrow, int *ind_col, double * val); template int generate_CSR(list* ind_cols_tmp, list* val_tmp, int nrow, int nrow1, int *old2new, int *new2old, int *ptrow, int *ind_col, Complex * val); template int copy_CSR(int *ptrows, int *indcols, T* coefs, int nrow, list* ind_cols_tmp, list* val_tmp, int lower, int upper) { ptrows[0] = 0; int k = 0; for (int i = 0; i < nrow; i++) { list::iterator it = ind_cols_tmp[i].begin(); typename list::iterator iv = val_tmp[i].begin(); for ( ; it != ind_cols_tmp[i].end(); ++it, ++iv) { if ((*it) >= lower && (*it) < upper) { indcols[k] = *it; coefs[k] = *iv; k++; } } ptrows[i + 1] = k; } // loop : i return k; } template int copy_CSR(int *ptrows, int *indcols, double* coefs, int nrow, list* ind_cols_tmp, list* val_tmp, int lower, int upper); template int copy_CSR(int *ptrows, int *indcols, Complex* coefs, int nrow, list* ind_cols_tmp, list* val_tmp, int lower, int upper); template class SolveDissection: public MatriceMorse::VirtualSolver { double _tgv; double _pivot; int _dim0; int *_new2old; int *_ptrows0; int *_indcols0; double *_coefs0; int *_ptrows1; int *_indcols1; double *_coefs1; double *_xtmp; uint64_t *_dslv; public: SolveDissection(const MatriceMorse &A, int strategy_, double ttgv, double pivot) : _tgv(ttgv), _pivot(pivot) { const int dim = A.n; int num_threads = 1; if (getenv("DISSECTION_NUM_THREADS")) { sscanf(getenv("DISSECTION_NUM_THREADS"), "%d", &num_threads); fprintf(stderr, "environmental variable DISSECTION_NUM_THREADS = %d\n", num_threads); } _dslv = new uint64_t; diss_init(*_dslv, 0, 1, num_threads, ((verbosity > 3) ? 1 : 0)); // real matrix, with double precision factorization, # of threads const int decomposer = (strategy_ % 100) / 10; const int scaling = strategy_ == 0 ? 2 : strategy_ % 10; // sym + lower + isWhole = 1 + 2 + 4 const int sym = (strategy_ / 100) ? 5 : 0; int *old2new = new int[dim]; _new2old = new int[dim]; { int m = 0; for (int i = 0; i < dim; i++) { for (int k = (A.lg)[i]; k < (A.lg)[i + 1]; k++) { const int j = (A.cl)[k]; if (i == j) { if ((A.a)[k] != ttgv) { _new2old[m] = i; m++; } break; } } } _dim0 = m; for (int i = 0; i < dim; i++) { for (int k = (A.lg)[i]; k < (A.lg)[i + 1]; k++) { const int j = (A.cl)[k]; if (i == j) { if ((A.a)[k] == ttgv) { _new2old[m] = i; m++; } break; } } } if (verbosity > 10) { cout << "m = " << m << " dim = " << dim << endl; } } for (int i = 0; i < dim; i++) { old2new[_new2old[i]] = i; } // _xtmp = new double[_dim0]; int nnz; list *indcols_tmp = new list[_dim0]; list *coefs_tmp = new list[_dim0]; nnz = generate_CSR(indcols_tmp, coefs_tmp, dim, _dim0, old2new, _new2old, (int *)A.lg, (int *)A.cl, (double *)A.a); delete [] old2new; if (verbosity > 10) { cout << "nnz " << nnz << endl; } _ptrows0 = new int[_dim0 + 1]; _ptrows1 = new int[_dim0 + 1]; int *indcol_tmp = new int[nnz]; double *coef_tmp = new double[nnz]; int nnz1; nnz1 = copy_CSR(_ptrows0, indcol_tmp, coef_tmp, _dim0, indcols_tmp, coefs_tmp, 0, _dim0); if (verbosity > 10) { cout << "nnz1 " << nnz1 << endl; } _indcols0 = new int[nnz1]; _coefs0 = new double[nnz1]; for (int i = 0; i < nnz1; i++) { _indcols0[i] = indcol_tmp[i]; _coefs0[i] = coef_tmp[i]; } nnz1 = copy_CSR(_ptrows1, indcol_tmp, coef_tmp, _dim0, indcols_tmp, coefs_tmp, _dim0, dim); if (verbosity > 10) { cout << "nnz1 " << nnz1 << endl; } _indcols1 = new int[nnz1]; _coefs1 = new double[nnz1]; for (int i = 0; i < nnz1; i++) { _indcols1[i] = indcol_tmp[i]; _coefs1[i] = coef_tmp[i]; } delete [] indcols_tmp; delete [] coefs_tmp; delete [] indcol_tmp; delete [] coef_tmp; diss_s_fact(*_dslv, _dim0, _ptrows0, _indcols0, sym, decomposer); const int indefinite_flag = 1; const double eps_pivot = _pivot == (-1.0) ? 1.0e-2 : _pivot; diss_n_fact(*_dslv, _coefs0, scaling, eps_pivot, indefinite_flag); int n0; diss_get_kern_dim(*_dslv, &n0); if (n0 > 0) { cout << "the matrix with size = " << _dim0 << " is singular with " << n0 << " dimenisonal kernel." << endl; } } void Solver(const MatriceMorse &A,KN_ &x,const KN_ &b) const { ffassert (&x[0] != &b[0]); int dim = A.n; // #x_1[] = _dim0; x_1 = b_1 - A_12 x_2 for (int i = 0; i < _dim0; i++) { _xtmp[i] = b[_new2old[i]]; } for (int i = _dim0; i < dim; i++) { const int ii = _new2old[i]; x[ii] = b[ii] / _tgv; } for (int i = 0; i < _dim0; i++) { for (int k = _ptrows1[i]; k < _ptrows1[i + 1]; k++) { _xtmp[i] -= _coefs1[k] * x[_new2old[_indcols1[k]]]; } } const int projection = 1; const int transpose = 0; diss_solve_1(*_dslv, _xtmp, projection, transpose); for (int i = 0; i < _dim0; i++) { x[_new2old[i]] = _xtmp[i]; } } ~SolveDissection() { diss_free(*_dslv); delete _dslv; delete [] _new2old; delete [] _ptrows0; delete [] _indcols0; delete [] _coefs0; delete [] _ptrows1; delete [] _indcols1; delete [] _coefs1; delete [] _xtmp; } void addMatMul(const KN_ & x, KN_ & Ax) const { ffassert(x.N()==Ax.N()); Ax += (const MatriceMorse &) (*this) * x; } uint64_t *dslv() { return _dslv; } int dim0() const { return _dim0; } int * new2old() const { return _new2old; } }; template<> class SolveDissection : public MatriceMorse::VirtualSolver { // double eps; // mutable double epsr; double _tgv; double _pivot; //, _pivot_sym; int _dim0; int *_new2old; int *_ptrows0; int *_indcols0; Complex *_coefs0; int *_ptrows1; int *_indcols1; Complex *_coefs1; Complex *_xtmp; uint64_t *_dslv; public: SolveDissection(const MatriceMorse &A, int strategy_, double ttgv, double pivot) : _tgv(ttgv), _pivot(pivot) { const int dim = A.n; int num_threads = 1; if (getenv("DISSECTION_NUM_THREADS")) { sscanf(getenv("DISSECTION_NUM_THREADS"), "%d", &num_threads); fprintf(stderr, "environmental variable DISSECTION_NUM_THREADS = %d\n", num_threads); } _dslv = new uint64_t; diss_init(*_dslv, 0, 2, num_threads, ((verbosity > 3) ? 1 : 0)); // // complex matrix, with double precision factorization, # of threads const int decomposer = (strategy_ % 100) / 10; const int scaling = strategy_ == 0 ? 2 : strategy_ % 10; // sym + lower + isWhole = 1 + 2 + 4 const int sym = (strategy_ / 100) ? 5 : 0; int *old2new = new int[dim]; _new2old = new int[dim]; { int m = 0; for (int i = 0; i < dim; i++) { for (int k = (A.lg)[i]; k < (A.lg)[i + 1]; k++) { const int j = (A.cl)[k]; if (i == j) { if ((A.a)[k] != ttgv) { _new2old[m] = i; m++; } break; } } } _dim0 = m; for (int i = 0; i < dim; i++) { for (int k = (A.lg)[i]; k < (A.lg)[i + 1]; k++) { const int j = (A.cl)[k]; if (i == j) { if ((A.a)[k] == ttgv) { _new2old[m] = i; m++; } break; } } } if (verbosity > 10) { cout << "m = " << m << " dim = " << dim << endl; } } for (int i = 0; i < dim; i++) { old2new[_new2old[i]] = i; } // _xtmp = new Complex[_dim0]; int nnz; list *indcols_tmp = new list[_dim0]; list *coefs_tmp = new list[_dim0]; nnz = generate_CSR(indcols_tmp, coefs_tmp, dim, _dim0, old2new, _new2old, (int *)A.lg, (int *)A.cl, (Complex *)A.a); delete [] old2new; if (verbosity > 10) { cout << "nnz " << nnz << endl; } _ptrows0 = new int[_dim0 + 1]; _ptrows1 = new int[_dim0 + 1]; int *indcol_tmp = new int[nnz]; Complex *coef_tmp = new Complex[nnz]; int nnz1; nnz1 = copy_CSR(_ptrows0, indcol_tmp, coef_tmp, _dim0, indcols_tmp, coefs_tmp, 0, _dim0); if (verbosity > 10) { cout << "nnz1 " << nnz1 << endl; } _indcols0 = new int[nnz1]; _coefs0 = new Complex[nnz1]; for (int i = 0; i < nnz1; i++) { _indcols0[i] = indcol_tmp[i]; _coefs0[i] = coef_tmp[i]; } nnz1 = copy_CSR(_ptrows1, indcol_tmp, coef_tmp, _dim0, indcols_tmp, coefs_tmp, _dim0, dim); if (verbosity > 10) { cout << "nnz1 " << nnz1 << endl; } _indcols1 = new int[nnz1]; _coefs1 = new Complex[nnz1]; for (int i = 0; i < nnz1; i++) { _indcols1[i] = indcol_tmp[i]; _coefs1[i] = coef_tmp[i]; } delete [] indcols_tmp; delete [] coefs_tmp; delete [] indcol_tmp; delete [] coef_tmp; diss_s_fact(*_dslv, _dim0, _ptrows0, _indcols0, sym, decomposer); const int indefinite_flag = 1; const double eps_pivot = _pivot == (-1.0) ? 1.0e-2 : _pivot; diss_n_fact(*_dslv, (double *)_coefs0, scaling, eps_pivot, indefinite_flag); int n0; diss_get_kern_dim(*_dslv, &n0); if (n0 > 0) { cout << "the matrix with size = " << _dim0 << " is singular with " << n0 << " dimenisonal kernel." << endl; } } void Solver(const MatriceMorse &A,KN_ &x,const KN_ &b) const { ffassert (&x[0] != &b[0]); int dim = A.n; // #x_1[] = _dim0; x_1 = b_1 - A_12 x_2 for (int i = 0; i < _dim0; i++) { _xtmp[i] = b[_new2old[i]]; } for (int i = _dim0; i < dim; i++) { const int ii = _new2old[i]; x[ii] = b[ii] / _tgv; } for (int i = 0; i < _dim0; i++) { for (int k = _ptrows1[i]; k < _ptrows1[i + 1]; k++) { _xtmp[i] -= _coefs1[k] * x[_new2old[_indcols1[k]]]; } } const int projection = 1; const int transpose = 0; diss_solve_1(*_dslv, (double *)_xtmp, projection, transpose); for (int i = 0; i < _dim0; i++) { x[_new2old[i]] = _xtmp[i]; } } ~SolveDissection() { diss_free(*_dslv); delete _dslv; } void addMatMul(const KN_ & x, KN_ & Ax) const { ffassert(x.N()==Ax.N()); Ax += (const MatriceMorse &) (*this) * x; } uint64_t *dslv() { return _dslv; } int dim0() const { return _dim0; } int * new2old() const { return _new2old; } }; inline MatriceMorse::VirtualSolver * BuildSolverIDissection(DCL_ARG_SPARSE_SOLVER(double,A)) { if( verbosity>9) cout << " BuildSolverDissection" << endl; return new SolveDissection(*A, ds.strategy,ds.tgv,ds.tol_pivot); } inline MatriceMorse::VirtualSolver * BuildSolverIDissection(DCL_ARG_SPARSE_SOLVER(Complex,A)) { if( verbosity>9) cout << " BuildSolverDissection" << endl; return new SolveDissection(*A, ds.strategy, ds.tgv,ds.tol_pivot); } // the 2 default sparse solver double and complex DefSparseSolver::SparseMatSolver SparseMatSolver_R ; DefSparseSolver::SparseMatSolver SparseMatSolver_C; DefSparseSolverSym::SparseMatSolver SparseMatSolverSym_R ; DefSparseSolverSym::SparseMatSolver SparseMatSolverSym_C; // the default probleme solver TypeSolveMat::TSolveMat TypeSolveMatdefaultvalue=TypeSolveMat::defaultvalue; template class dissectionkernel_Op : public E_F0mps { public: Expression mat; static const int n_name_param = 3; static basicAC_F0::name_and_type name_param[]; Expression nargs[n_name_param]; dissectionkernel_Op(const basicAC_F0& args, Expression param1) : mat(param1) { args.SetNameParam(n_name_param, name_param, nargs); } AnyType operator()(Stack stack) const; }; template basicAC_F0::name_and_type dissectionkernel_Op::name_param[] = { {"kerneldim", &typeid(long *)}, {"kerneln", &typeid(KNM*)}, {"kernelt", &typeid(KNM*)} }; template class dissectionkernel : public OneOperator { public: dissectionkernel() : OneOperator(atype(), atype*>()) {} E_F0* code(const basicAC_F0& args) const { return new dissectionkernel_Op(args, t[0]->CastTo(args[0])); } }; #if 0 template AnyType dissectionkernel_Op::operator()(Stack stack) const { MatriceMorse* mA = static_cast*>(&(*GetAny*>((*mat)(stack))->A)); // SolveDissection* mdissection = dynamic_cast *>(mA); const SolveDissection* mdissection; mA->GetSolver(mdissection); if (mdissection) { uint64_t *dslv = ((SolveDissection*)mdissection)->dslv(); int n0; long *kerndim = nargs[0] ? GetAny((*nargs[0])(stack)) : 0; if (kerndim) { diss_get_kern_dim(*dslv, &n0); *kerndim = n0; } KNM* ptKerN = nargs[1] ? GetAny*>((*nargs[1])(stack)) : 0; if(ptKerN) { diss_get_kern_dim(*dslv, &n0); if (n0 > 0) { const int dim = mA->n; const int dim0 = mdissection->dim0(); const int *new2old = mdissection->new2old(); ptKerN->resize(mA->n, n0); double *kernv = new double[n0 * dim]; diss_get_kern_vecs(*dslv, kernv); for (int k = 0; k < n0; k++) { for (int i = 0; i < dim0; i++) { (*ptKerN)(new2old[i], k) = kernv[i + k * dim0]; } for (int i = dim0; i < dim; i++) { (*ptKerN)(new2old[i], k) = 0.0; } } delete [] kernv; } // if (n0 > 0) } KNM* ptKerT = nargs[2] ? GetAny*>((*nargs[2])(stack)) : 0; if(ptKerT) { const int dim = mA->n; const int dim0 = mdissection->dim0(); const int *new2old = mdissection->new2old(); ptKerT->resize(mA->n, n0); double *kernv = new double[n0 * dim]; diss_get_kernt_vecs(*dslv, kernv); for (int k = 0; k < n0; k++) { for (int i = 0; i < dim0; i++) { (*ptKerT)(new2old[i], k) = kernv[i + k * dim0]; } for (int i = dim0; i < dim; i++) { (*ptKerT)(new2old[i], k) = 0.0; } } delete [] kernv; } return static_cast(n0); } // else { // error mddisection is not casted! return 0L; } } #endif // -- bool SetDissection() { if(verbosity>1) cout << " SetDefault sparse solver to Dissection" << endl; DefSparseSolver::solver =BuildSolverIDissection; DefSparseSolver::solver =BuildSolverIDissection; DefSparseSolverSym::solver =BuildSolverIDissection; DefSparseSolverSym::solver =BuildSolverIDissection; TypeSolveMat::defaultvalue =TypeSolveMatdefaultvalue; return true; } void init22() { SparseMatSolver_R= DefSparseSolver::solver; SparseMatSolver_C= DefSparseSolver::solver; SparseMatSolverSym_R= DefSparseSolverSym::solver; SparseMatSolverSym_C= DefSparseSolverSym::solver; if(verbosity>1) cout << "\n Add: Dissection: defaultsolver defaultsolverDissection" << endl; TypeSolveMat::defaultvalue=TypeSolveMat::SparseSolver; DefSparseSolver::solver =BuildSolverIDissection; DefSparseSolver::solver =BuildSolverIDissection; if(! Global.Find("defaulttoDissection").NotNull() ) Global.Add("defaulttoDissection","(",new OneOperator0(SetDissection)); #if 0 Global.Add("dissectionkernel", "(", new dissectionkernel); Global.Add("dissectionkernel", "(", new dissectionkernel); #endif } LOADFUNC(init22); FreeFem-sources-4.9/3rdparty/dissection/src/freefem++-interface/examples++-load/Dissection.hpp000664 000000 000000 00000013631 14037356732 032254 0ustar00rootroot000000 000000 /*! \file Dissection.hpp \brief Fortran style interface named as Dissectino-fortran interface \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions \date Mar. 30th 2012 \date Jul. 12th 2015 \date Nov. 30th 2016 */ // This file is part of Dissection // // Dissection is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Linking Dissection statically or dynamically with other modules is making // a combined work based on Disssection. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // // As a special exception, the copyright holders of Dissection give you // permission to combine Dissection program with free software programs or // libraries that are released under the GNU LGPL and with independent modules // that communicate with Dissection solely through the Dissection-fortran // interface. You may copy and distribute such a system following the terms of // the GNU GPL for Dissection and the licenses of the other code concerned, // provided that you include the source code of that other code when and as // the GNU GPL requires distribution of source code and provided that you do // not modify the Dissection-fortran interface. // // Note that people who make modified versions of Dissection are not obligated // to grant this special exception for their modified versions; it is their // choice whether to do so. The GNU General Public License gives permission to // release a modified version without this exception; this exception also makes // it possible to release a modified version which carries forward this // exception. If you modify the Dissection-fortran interface, this exception // does not apply to your modified version of Dissection, and you must remove // this exception when you distribute your modified version. // // This exception is an additional permission under section 7 of the GNU // General Public License, version 3 ("GPLv3") // // Dissection is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Dissection. If not, see . // #ifndef _INTERFACE_CPPAPI_HPP # define _INTERFACE_CPPAPI_HPP #define _COMPILER_OPTIONCOMPILER_H #define FORTRAN_DECL_WL(x_windows,x_linux) x_linux #define FORTRAN_DECL(x) x##_ #define DISSECTION_API #ifdef BLAS_MKL #include #endif #include # include #define DISSECTION_REAL_MATRIX 1 #define DISSECTION_COMPLEX_MATRIX 2 #define DISS_VERSION FORTRAN_DECL_WL(DISS_VERSION, diss_version) #define DISS_INIT FORTRAN_DECL_WL(DISS_INIT, diss_init) #define DISS_FREE FORTRAN_DECL_WL(DISS_FREE, diss_free) #define DISS_NUMERIC_FREE FORTRAN_DECL_WL(DISS_NUMERIC_FREE, diss_numeric_free) #define DISS_S_FACT FORTRAN_DECL_WL(DISS_S_FACT, diss_s_fact) #define DISS_N_FACT FORTRAN_DECL_WL(DISS_N_FACT, diss_n_fact) #define DISS_GET_COLORS FORTRAN_DECL_WL(DISS_GET_COLORS, diss_get_colors) #define DISS_GET_KERN_DIM FORTRAN_DECL_WL(DISS_GET_KERN_DIM, diss_get_kern_dim) #define DISS_GET_NULLPIVOTS FORTRAN_DECL_WL(DISS_GET_NULLPIVOTS, diss_get_nullpivots) #define DISS_GET_SMALLPIVOTS FORTRAN_DECL_WL(DISS_GET_SMALLPIVOTS, diss_get_smallpivots) #define DISS_GET_KERN_VECS FORTRAN_DECL_WL(DISS_GET_KERN_VECS, diss_get_kern_vecs) #define DISS_GET_KERNT_VECS FORTRAN_DECL_WL(DISS_GET_KERNT_VECS, diss_get_kernt_vecs) #define DISS_PROJECT FORTRAN_DECL_WL(DISS_PROJECT, diss_project) #define DISS_SOLVE_1 FORTRAN_DECL_WL(DISS_SOLVE_1, diss_solve_1) #define DISS_SOLVE_N FORTRAN_DECL_WL(DISS_SOLVE_N, diss_solve_n) #define DISS_MATRIX_PRODUCT FORTRAN_DECL_WL(DISS_MATRIX_PRODUCT, diss_matrix_product) #define COMPUTE_DIM_KERN FORTRAN_DECL_WL(COMPUTE_DIM_KERN, compute_dim_kern) extern "C" { DISSECTION_API void DISS_VERSION(int *versn, int *reles, int *patch); DISSECTION_API void DISS_INIT(uint64_t &dslv_, const int &called, const int &real_or_complex, const int &nthreads, const int &verbose); DISSECTION_API void DISS_FREE(uint64_t &dslv_); DISSECTION_API void DISS_NUMERIC_FREE(uint64_t &dslv_); DISSECTION_API void DISS_S_FACT(uint64_t &dslv_, const int &dim, const int *ptRows, const int *indCols, const int &sym, const int &decomposer); DISSECTION_API void DISS_N_FACT(uint64_t &dslv_, const double *coefs, const int &scaling, const double &eps_pivot, const int &indefinite_flag); DISSECTION_API void DISS_GET_COLORS(uint64_t &dslv_, int *n); DISSECTION_API void DISS_GET_KERN_DIM(uint64_t &dslv_, int *n0); DISSECTION_API void DISS_GET_NULLPIVOTS(uint64_t &dslv_, int *pivots); DISSECTION_API void DISS_GET_SMALLPIVOTS(uint64_t &dslv_, const int &n, int *pivots); DISSECTION_API void DISS_GET_KERN_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_GET_KERNT_VECS(uint64_t &dslv_, double *vec); DISSECTION_API void DISS_PROJECT(uint64_t &dslv_, double *x); DISSECTION_API void DISS_SOLVE_1(uint64_t &dslv_, double *x, const int &projection, const int &trans); DISSECTION_API void DISS_SOLVE_N(uint64_t &dslv_, double *x, const int &nrhs, const int &projection, const int &trans); DISSECTION_API void DISS_MATRIX_PRODUCT(uint64_t &dslv_, const double* x, double* y); DISSECTION_API void COMPUTE_DIM_KERN(int* flag, int* n0, double *a_ini, const int &n, const int &dim_ag, const double &eps, const double &machine_eps0, const int &flag_sym, const int *print_cntrl); } #endif FreeFem-sources-4.9/3rdparty/dissection/src/freefem++-interface/examples++-load/LaplaceRT.edp000664 000000 000000 00000002373 14037356732 031741 0ustar00rootroot000000 000000 /* Solving the following Poisson problem Find $p$, such that; $ - \Delta p = f $ on $\Omega$, $ dp / dn = (g1d,g2d). n $ on $\Gamma_{123}$ $ p = gd $ on $\Gamma_{1}$ with de Mixte finite element formulation Find $p\in L^2(\Omega) and $u\in H(div) $ such than u - Grad p = 0 - div u = f $ u. n = (g1d,g2d). n $ on $\Gamma_{123}$ $ p = gd $ on $\Gamma_{1}$ the variationnel form is: $\forall v\in H(div)$; $v.n = 0$ on $\Gamma_{4}\} $: $ \int_\Omega u v + p div v -\int_{\Gamma_{123}} gd* v.n = 0 $ $\forall q\in L^2$: $ -\int_\Omega q div u = \int_Omega f q $ and $ u.n = (g1n,g2n).n$ on $\Gamma_4$ */ load "Dissection" defaulttoDissection(); mesh Th=square(100,100); fespace Vh(Th,RT0); fespace Ph(Th,P0); func gd = 1.; func g1n = 1.; func g2n = 1.; func f = 1.; Vh [u1,u2],[v1,v2]; Ph p,q; problem laplaceMixte([u1,u2,p],[v1,v2,q],solver=sparsesolver,strategy=102,tolpivot=1.e-2) = int2d(Th)( p*q*0e-10+ u1*v1 + u2*v2 + p*(dx(v1)+dy(v2)) + (dx(u1)+dy(u2))*q ) + int2d(Th) ( f*q) - int1d(Th,1,2,3)( gd*(v1*N.x +v2*N.y)) // int on gamma + on(4,u1=g1n,u2=g2n); laplaceMixte; plot([u1,u2],coef=0.1,wait=1,ps="lapRTuv.eps",value=true); plot(p,fill=1,wait=1,ps="laRTp.eps",value=true); 3rdparty/dissection/src/freefem++-interface/examples++-load/cavityNewtow.Dissection.edp000664 000000 000000 00000011646 14037356732 034664 0ustar00rootroot000000 000000 FreeFem-sources-4.9/* Incompressible Navier Stokes with Taylor-Hood Finite element No linearity : Newton methode continuation on Reynols Number Mesh adaptation */ //load "UMFPACK64" //defaulttoUMFPACK64(); load "Dissection" defaulttoDissection(); verbosity=5; real reymax = 6400; // ok < 125000 mesh Th=square(60,60); fespace Xh(Th,P2); fespace Mh(Th,P1); fespace XXMh(Th,[P2,P2,P1]); XXMh [u1,u2,p]; XXMh [v1,v2,q]; macro div(u1,u2) (dx(u1)+dy(u2))// macro grad(u1,u2) [dx(u1),dy(u2)]// macro ugrad(u1,u2,v) (u1*dx(v)+u2*dy(v)) // macro Ugrad(u1,u2,v1,v2) [ugrad(u1,u2,v1),ugrad(u1,u2,v2)]// solve Stokes ([u1,u2,p],[v1,v2,q],solver=sparsesolver,strategy=102,tolpivot=1.0e-2) = int2d(Th)( ( dx(u1)*dx(v1) + dy(u1)*dy(v1) + dx(u2)*dx(v2) + dy(u2)*dy(v2) ) - p*div(v1,v2)-q*div(u1,u2) // - p*q*(0.000001) ) + on(3,u1=4*x*(1-x),u2=0) + on(1,2,4,u1=0,u2=0); Xh uu1=u1,uu2=u2; //plot(coef=0.2,cmm=" [u1,u2] et p ",p,[uu1,uu2]); Xh psi,phi; solve streamlines(psi,phi,solver=sparsesolver,strategy=101,tolpivot=1.0e-16) = int2d(Th)( dx(psi)*dx(phi) + dy(psi)*dy(phi)) + int2d(Th)( -phi*(dy(u1)-dx(u2))) + on(1,2,3,4,psi=0); // default tolpivot=1.0e-16 without kernel detection // strategy=101 <=> symmetric = 100 + diagonal scaling = 1 int i=0; real nu=1./100.; real dt=0.1; real alpha=1/dt; /* NL varf vNS ([u1,u2,p],[v1,v2,q],solver=Crout,init=i) = int2d(Th)( alpha*( u1*v1 + u2*v2) + nu * ( dx(u1)*dx(v1) + dy(u1)*dy(v1) + dx(u2)*dx(v2) + dy(u2)*dy(v2) ) + p*q*(0.000001) + p*dx(v1)+ p*dy(v2) + dx(u1)*q+ dy(u2)*q + Ugrad(u1,u2,u1,u2)'*[v1,v2] ) + on(3,u1=1,u2=0) + on(1,2,4,u1=0,u2=0) */ XXMh [up1,up2,pp]; varf vDNS ([u1,u2,p],[v1,v2,q]) = int2d(Th)( + nu * ( dx(u1)*dx(v1) + dy(u1)*dy(v1) + dx(u2)*dx(v2) + dy(u2)*dy(v2) ) // - p*q*(0.000001) + p*dx(v1)+ p*dy(v2) - dx(u1)*q- dy(u2)*q + Ugrad(u1,u2,up1,up2)'*[v1,v2] + Ugrad(up1,up2,u1,u2)'*[v1,v2] ) + on(1,2,3,4,u1=1,u2=1) // u1 = 0, u2 = 0 ; varf vNS ([u1,u2,p],[v1,v2,q]) = int2d(Th)( + nu * ( dx(up1)*dx(v1) + dy(up1)*dy(v1) + dx(up2)*dx(v2) + dy(up2)*dy(v2) ) // - pp*q*(0.000001) + pp*dx(v1)+ pp*dy(v2) - dx(up1)*q- dy(up2)*q + Ugrad(up1,up2,up1,up2)'*[v1,v2]//' ) + on(1,2,3,4,u1=0,u2=0) //homogeneous Dirichlet bc. for linear not affine ; for(real re=100;re<=reymax;re *=2) { real lerr=0.04; if(re>8000) lerr=0.01; if(re>10000) lerr=0.005; for(int step=0;step<2;step++) { Th=adaptmesh(Th,[u1,u2],p,err=lerr,nbvx=100000); //plot(Th,wait=0,ps="mesh."+re+".eps"); [u1,u2,p]=[u1,u2,p]; [up1,up2,pp]=[up1,up2,pp]; for (i=0;i<=20;i++) { nu =1./re; up1[]=u1[]; real walltime0=time(); real clocktime0=clock(); matrix Ans=vDNS(XXMh,XXMh,tgv=1e+30); real walltime1=time(); real clocktime1=clock(); real[int] b = vNS(0,XXMh,tgv=1e+30); real walltime2=time(); real clocktime2=clock(); real[int,int] kernn(1,1); real[int,int] kernt(1,1); int kerndim; set(Ans,solver=sparsesolver,strategy=2, tolpivot=1.0e-2); // default scaling startegy = KKT_SCALE dissectionkernel(Ans,kerneldim=kerndim,kerneln=kernn,kernelt=kernt); cout << "kern dim = " << kerndim << endl; real[int] kern(XXMh.ndof); kern = kernn(:,0); real[int] bBC = vDNS(0, XXMh, tgv=1.0); //Ans.diag; real[int] vv(XXMh.ndof); vv = Ans * kern; vv = bBC ? 0.0 : vv; cout << "orthogonality of kernel vector of A " << vv.max << endl; kern = kernt(:,0); vv = Ans' * kern; vv = bBC ? 0.0 : vv; cout << "orthogonality of kernel vector of A^T " << vv.max << endl; XXMh [w1, w2, r]; w1[] = kern; plot([w1, w2], cmm="kernel vector",value=true,wait=1); plot(r, cmm="kernel pressure",value=true,wait=1); real walltime3=time(); real clocktime3=clock(); real[int] w = Ans^-1*b; real walltime4=time(); real clocktime4=clock(); u1[] -= w; cout << " iter = "<< i << " " << w.l2 << " rey = " << re << endl; /* cout << " time " << walltime1 - walltime0 << " / " << clocktime1 - clocktime0 << " : " << walltime2 - walltime1 << " / " << clocktime2 - clocktime1 << " : " << walltime3 - walltime2 << " / " << clocktime3 - clocktime2 << " : " << walltime4 - walltime3 << " / " << clocktime4 - clocktime3 << endl; */ if(w.l2<1e-6) break; uu1=u1;uu2=u2; } ; } uu1=u1;uu2=u2; // streamlines; plot(Th,wait=0); plot(coef=0.2,cmm="rey="+re+" [u1,u2]",[uu1,uu2],wait=0,nbiso=20,ps="cavity-"+re+".eps"); } FreeFem-sources-4.9/3rdparty/dissection/src/qd/000775 000000 000000 00000000000 14037356732 021461 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/dissection/src/qd/bits.cpp000664 000000 000000 00000003325 14037356732 023131 0ustar00rootroot000000 000000 /* * src/bits.cc * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Defines various routines to get / set bits of a IEEE floating point * number. This used by the library for debugging purposes. */ #include #include #include #include #include "config.h" #include #include #ifdef HAVE_IEEEFP_H #include #endif using std::setw; int get_double_expn(double x) { if (x == 0.0) return INT_MIN; if (QD_ISINF(x) || QD_ISNAN(x)) return INT_MAX; double y = std::abs(x); int i = 0; if (y < 1.0) { while (y < 1.0) { y *= 2.0; i++; } return -i; } else if (y >= 2.0) { while (y >= 2.0) { y *= 0.5; i++; } return i; } return 0; } void print_double_info(std::ostream &os, double x) { #ifndef SX_ACE std::streamsize old_prec = os.precision(19); std::ios_base::fmtflags old_flags = os.flags(); os << std::scientific; os << setw(27) << x << ' '; if (QD_ISNAN(x) || QD_ISINF(x) || (x == 0.0)) { os << " "; } else { x = std::abs(x); int expn = get_double_expn(x); double d = std::ldexp(1.0, expn); os << setw(5) << expn << " "; for (int i = 0; i < 53; i++) { if (x >= d) { x -= d; os << '1'; } else os << '0'; d *= 0.5; } if (x != 0.0) { // should not happen os << " +trailing stuff"; } } os.precision(old_prec); os.flags(old_flags); #endif } FreeFem-sources-4.9/3rdparty/dissection/src/qd/bits.h000664 000000 000000 00000001576 14037356732 022604 0ustar00rootroot000000 000000 /* * include/bits.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * This file defines various routines to get / set bits of a IEEE floating * point number. This is used by the library for debugging purposes. */ #ifndef _QD_BITS_H #define _QD_BITS_H #include #include /* Returns the exponent of the double precision number. Returns INT_MIN is x is zero, and INT_MAX if x is INF or NaN. */ int get_double_expn(double x); /* Prints SIGN EXPN MANTISSA of the given double. If x is NaN, INF, or Zero, this prints out the strings NaN, +/- INF, and 0. */ void print_double_info(std::ostream &os, double x); #endif /* _QD_BITS_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/c_dd.h000664 000000 000000 00000006035 14037356732 022527 0ustar00rootroot000000 000000 /* * include/c_dd.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Contains C wrapper function prototypes for double-double precision * arithmetic. This can also be used from fortran code. */ #ifndef _QD_C_DD_H #define _QD_C_DD_H #include #include #ifdef __cplusplus extern "C" { #endif /* add */ void c_dd_add(const double *a, const double *b, double *c); void c_dd_add_d_dd(double a, const double *b, double *c); void c_dd_add_dd_d(const double *a, double b, double *c); /* sub */ void c_dd_sub(const double *a, const double *b, double *c); void c_dd_sub_d_dd(double a, const double *b, double *c); void c_dd_sub_dd_d(const double *a, double b, double *c); /* mul */ void c_dd_mul(const double *a, const double *b, double *c); void c_dd_mul_d_dd(double a, const double *b, double *c); void c_dd_mul_dd_d(const double *a, double b, double *c); /* div */ void c_dd_div(const double *a, const double *b, double *c); void c_dd_div_d_dd(double a, const double *b, double *c); void c_dd_div_dd_d(const double *a, double b, double *c); /* copy */ void c_dd_copy(const double *a, double *b); void c_dd_copy_d(double a, double *b); void c_dd_sqrt(const double *a, double *b); void c_dd_sqr(const double *a, double *b); void c_dd_abs(const double *a, double *b); void c_dd_npwr(const double *a, int b, double *c); void c_dd_nroot(const double *a, int b, double *c); void c_dd_nint(const double *a, double *b); void c_dd_aint(const double *a, double *b); void c_dd_floor(const double *a, double *b); void c_dd_ceil(const double *a, double *b); void c_dd_exp(const double *a, double *b); void c_dd_log(const double *a, double *b); void c_dd_log10(const double *a, double *b); void c_dd_sin(const double *a, double *b); void c_dd_cos(const double *a, double *b); void c_dd_tan(const double *a, double *b); void c_dd_asin(const double *a, double *b); void c_dd_acos(const double *a, double *b); void c_dd_atan(const double *a, double *b); void c_dd_atan2(const double *a, const double *b, double *c); void c_dd_sinh(const double *a, double *b); void c_dd_cosh(const double *a, double *b); void c_dd_tanh(const double *a, double *b); void c_dd_asinh(const double *a, double *b); void c_dd_acosh(const double *a, double *b); void c_dd_atanh(const double *a, double *b); void c_dd_sincos(const double *a, double *s, double *c); void c_dd_sincosh(const double *a, double *s, double *c); void c_dd_read(const char *s, double *a); void c_dd_swrite(const double *a, int precision, char *s, int len); void c_dd_write(const double *a); void c_dd_neg(const double *a, double *b); void c_dd_rand(double *a); void c_dd_comp(const double *a, const double *b, int *result); void c_dd_comp_dd_d(const double *a, double b, int *result); void c_dd_comp_d_dd(double a, const double *b, int *result); void c_dd_pi(double *a); #ifdef __cplusplus } #endif #endif /* _QD_C_DD_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/c_qd.h000664 000000 000000 00000010210 14037356732 022532 0ustar00rootroot000000 000000 /* * include/c_qd.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Contains C wrapper function prototypes for quad-double precision * arithmetic. This can also be used from fortran code. */ #ifndef _QD_C_QD_H #define _QD_C_QD_H #include #include #ifdef __cplusplus extern "C" { #endif /* add */ void c_qd_add(const double *a, const double *b, double *c); void c_qd_add_dd_qd(const double *a, const double *b, double *c); void c_qd_add_qd_dd(const double *a, const double *b, double *c); void c_qd_add_d_qd(double a, const double *b, double *c); void c_qd_add_qd_d(const double *a, double b, double *c); void c_qd_selfadd(const double *a, double *b); void c_qd_selfadd_dd(const double *a, double *b); void c_qd_selfadd_d(double a, double *b); /* sub */ void c_qd_sub(const double *a, const double *b, double *c); void c_qd_sub_dd_qd(const double *a, const double *b, double *c); void c_qd_sub_qd_dd(const double *a, const double *b, double *c); void c_qd_sub_d_qd(double a, const double *b, double *c); void c_qd_sub_qd_d(const double *a, double b, double *c); void c_qd_selfsub(const double *a, double *b); void c_qd_selfsub_dd(const double *a, double *b); void c_qd_selfsub_d(double a, double *b); /* mul */ void c_qd_mul(const double *a, const double *b, double *c); void c_qd_mul_dd_qd(const double *a, const double *b, double *c); void c_qd_mul_qd_dd(const double *a, const double *b, double *c); void c_qd_mul_d_qd(double a, const double *b, double *c); void c_qd_mul_qd_d(const double *a, double b, double *c); void c_qd_selfmul(const double *a, double *b); void c_qd_selfmul_dd(const double *a, double *b); void c_qd_selfmul_d(double a, double *b); /* div */ void c_qd_div(const double *a, const double *b, double *c); void c_qd_div_dd_qd(const double *a, const double *b, double *c); void c_qd_div_qd_dd(const double *a, const double *b, double *c); void c_qd_div_d_qd(double a, const double *b, double *c); void c_qd_div_qd_d(const double *a, double b, double *c); void c_qd_selfdiv(const double *a, double *b); void c_qd_selfdiv_dd(const double *a, double *b); void c_qd_selfdiv_d(double a, double *b); /* copy */ void c_qd_copy(const double *a, double *b); void c_qd_copy_dd(const double *a, double *b); void c_qd_copy_d(double a, double *b); void c_qd_sqrt(const double *a, double *b); void c_qd_sqr(const double *a, double *b); void c_qd_abs(const double *a, double *b); void c_qd_npwr(const double *a, int b, double *c); void c_qd_nroot(const double *a, int b, double *c); void c_qd_nint(const double *a, double *b); void c_qd_aint(const double *a, double *b); void c_qd_floor(const double *a, double *b); void c_qd_ceil(const double *a, double *b); void c_qd_exp(const double *a, double *b); void c_qd_log(const double *a, double *b); void c_qd_log10(const double *a, double *b); void c_qd_sin(const double *a, double *b); void c_qd_cos(const double *a, double *b); void c_qd_tan(const double *a, double *b); void c_qd_asin(const double *a, double *b); void c_qd_acos(const double *a, double *b); void c_qd_atan(const double *a, double *b); void c_qd_atan2(const double *a, const double *b, double *c); void c_qd_sinh(const double *a, double *b); void c_qd_cosh(const double *a, double *b); void c_qd_tanh(const double *a, double *b); void c_qd_asinh(const double *a, double *b); void c_qd_acosh(const double *a, double *b); void c_qd_atanh(const double *a, double *b); void c_qd_sincos(const double *a, double *s, double *c); void c_qd_sincosh(const double *a, double *s, double *c); void c_qd_read(const char *s, double *a); void c_qd_swrite(const double *a, int precision, char *s, int len); void c_qd_write(const double *a); void c_qd_neg(const double *a, double *b); void c_qd_rand(double *a); void c_qd_comp(const double *a, const double *b, int *result); void c_qd_comp_qd_d(const double *a, double b, int *result); void c_qd_comp_d_qd(double a, const double *b, int *result); void c_qd_pi(double *a); #ifdef __cplusplus } #endif #endif /* _QD_C_QD_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/dd_const.cpp000664 000000 000000 00000003514 14037356732 023765 0ustar00rootroot000000 000000 /* * src/dd_const.cc * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2007 */ #include "config.h" #include const dd_real dd_real::_2pi = dd_real(6.283185307179586232e+00, 2.449293598294706414e-16); const dd_real dd_real::_pi = dd_real(3.141592653589793116e+00, 1.224646799147353207e-16); const dd_real dd_real::_pi2 = dd_real(1.570796326794896558e+00, 6.123233995736766036e-17); const dd_real dd_real::_pi4 = dd_real(7.853981633974482790e-01, 3.061616997868383018e-17); const dd_real dd_real::_3pi4 = dd_real(2.356194490192344837e+00, 9.1848509936051484375e-17); const dd_real dd_real::_e = dd_real(2.718281828459045091e+00, 1.445646891729250158e-16); const dd_real dd_real::_log2 = dd_real(6.931471805599452862e-01, 2.319046813846299558e-17); const dd_real dd_real::_log10 = dd_real(2.302585092994045901e+00, -2.170756223382249351e-16); const dd_real dd_real::_nan = dd_real(qd::_d_nan, qd::_d_nan); const dd_real dd_real::_inf = dd_real(qd::_d_inf, qd::_d_inf); const double dd_real::_eps = 4.93038065763132e-32; // 2^-104 const double dd_real::_min_normalized = 2.0041683600089728e-292; // = 2^(-1022 + 53) const dd_real dd_real::_max = dd_real(1.79769313486231570815e+308, 9.97920154767359795037e+291); const dd_real dd_real::_safe_max = dd_real(1.7976931080746007281e+308, 9.97920154767359795037e+291); const int dd_real::_ndigits = 31; FreeFem-sources-4.9/3rdparty/dissection/src/qd/dd_inline.h000664 000000 000000 00000036752 14037356732 023574 0ustar00rootroot000000 000000 /* * include/dd_inline.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Contains small functions (suitable for inlining) in the double-double * arithmetic package. */ #ifndef _QD_DD_INLINE_H #define _QD_DD_INLINE_H #include #include #ifndef QD_INLINE #define inline #endif /*********** Additions ************/ /* double-double = double + double */ inline dd_real dd_real::add(double a, double b) { double s, e; s = qd::two_sum(a, b, e); return dd_real(s, e); } /* double-double + double */ inline dd_real operator+(const dd_real &a, double b) { double s1, s2; s1 = qd::two_sum(a.x[0], b, s2); s2 += a.x[1]; s1 = qd::quick_two_sum(s1, s2, s2); return dd_real(s1, s2); } /* double-double + double-double */ inline dd_real dd_real::ieee_add(const dd_real &a, const dd_real &b) { /* This one satisfies IEEE style error bound, due to K. Briggs and W. Kahan. */ double s1, s2, t1, t2; s1 = qd::two_sum(a.x[0], b.x[0], s2); t1 = qd::two_sum(a.x[1], b.x[1], t2); s2 += t1; s1 = qd::quick_two_sum(s1, s2, s2); s2 += t2; s1 = qd::quick_two_sum(s1, s2, s2); return dd_real(s1, s2); } inline dd_real dd_real::sloppy_add(const dd_real &a, const dd_real &b) { /* This is the less accurate version ... obeys Cray-style error bound. */ double s, e; s = qd::two_sum(a.x[0], b.x[0], e); e += (a.x[1] + b.x[1]); s = qd::quick_two_sum(s, e, e); return dd_real(s, e); } inline dd_real operator+(const dd_real &a, const dd_real &b) { #ifndef QD_IEEE_ADD return dd_real::sloppy_add(a, b); #else return dd_real::ieee_add(a, b); #endif } /* double + double-double */ inline dd_real operator+(double a, const dd_real &b) { return (b + a); } /*********** Self-Additions ************/ /* double-double += double */ inline dd_real &dd_real::operator+=(double a) { double s1, s2; s1 = qd::two_sum(x[0], a, s2); s2 += x[1]; x[0] = qd::quick_two_sum(s1, s2, x[1]); return *this; } /* double-double += double-double */ inline dd_real &dd_real::operator+=(const dd_real &a) { #ifndef QD_IEEE_ADD double s, e; s = qd::two_sum(x[0], a.x[0], e); e += x[1]; e += a.x[1]; x[0] = qd::quick_two_sum(s, e, x[1]); return *this; #else double s1, s2, t1, t2; s1 = qd::two_sum(x[0], a.x[0], s2); t1 = qd::two_sum(x[1], a.x[1], t2); s2 += t1; s1 = qd::quick_two_sum(s1, s2, s2); s2 += t2; x[0] = qd::quick_two_sum(s1, s2, x[1]); return *this; #endif } /*********** Subtractions ************/ /* double-double = double - double */ inline dd_real dd_real::sub(double a, double b) { double s, e; s = qd::two_diff(a, b, e); return dd_real(s, e); } /* double-double - double */ inline dd_real operator-(const dd_real &a, double b) { double s1, s2; s1 = qd::two_diff(a.x[0], b, s2); s2 += a.x[1]; s1 = qd::quick_two_sum(s1, s2, s2); return dd_real(s1, s2); } /* double-double - double-double */ inline dd_real operator-(const dd_real &a, const dd_real &b) { #ifndef QD_IEEE_ADD double s, e; s = qd::two_diff(a.x[0], b.x[0], e); e += a.x[1]; e -= b.x[1]; s = qd::quick_two_sum(s, e, e); return dd_real(s, e); #else double s1, s2, t1, t2; s1 = qd::two_diff(a.x[0], b.x[0], s2); t1 = qd::two_diff(a.x[1], b.x[1], t2); s2 += t1; s1 = qd::quick_two_sum(s1, s2, s2); s2 += t2; s1 = qd::quick_two_sum(s1, s2, s2); return dd_real(s1, s2); #endif } /* double - double-double */ inline dd_real operator-(double a, const dd_real &b) { double s1, s2; s1 = qd::two_diff(a, b.x[0], s2); s2 -= b.x[1]; s1 = qd::quick_two_sum(s1, s2, s2); return dd_real(s1, s2); } /*********** Self-Subtractions ************/ /* double-double -= double */ inline dd_real &dd_real::operator-=(double a) { double s1, s2; s1 = qd::two_diff(x[0], a, s2); s2 += x[1]; x[0] = qd::quick_two_sum(s1, s2, x[1]); return *this; } /* double-double -= double-double */ inline dd_real &dd_real::operator-=(const dd_real &a) { #ifndef QD_IEEE_ADD double s, e; s = qd::two_diff(x[0], a.x[0], e); e += x[1]; e -= a.x[1]; x[0] = qd::quick_two_sum(s, e, x[1]); return *this; #else double s1, s2, t1, t2; s1 = qd::two_diff(x[0], a.x[0], s2); t1 = qd::two_diff(x[1], a.x[1], t2); s2 += t1; s1 = qd::quick_two_sum(s1, s2, s2); s2 += t2; x[0] = qd::quick_two_sum(s1, s2, x[1]); return *this; #endif } /*********** Unary Minus ***********/ inline dd_real dd_real::operator-() const { return dd_real(-x[0], -x[1]); } /*********** Unary Plus ***********/ inline dd_real dd_real::operator+() const { return *this; } /*********** Multiplications ************/ /* double-double = double * double */ inline dd_real dd_real::mul(double a, double b) { double p, e; p = qd::two_prod(a, b, e); return dd_real(p, e); } /* double-double * (2.0 ^ exp) */ inline dd_real ldexp(const dd_real &a, int exp) { return dd_real(std::ldexp(a.x[0], exp), std::ldexp(a.x[1], exp)); } /* double-double * double, where double is a power of 2. */ inline dd_real mul_pwr2(const dd_real &a, double b) { return dd_real(a.x[0] * b, a.x[1] * b); } /* double-double * double */ inline dd_real operator*(const dd_real &a, double b) { double p1, p2; p1 = qd::two_prod(a.x[0], b, p2); p2 += (a.x[1] * b); p1 = qd::quick_two_sum(p1, p2, p2); return dd_real(p1, p2); } /* double-double * double-double */ inline dd_real operator*(const dd_real &a, const dd_real &b) { double p1, p2; p1 = qd::two_prod(a.x[0], b.x[0], p2); p2 += (a.x[0] * b.x[1] + a.x[1] * b.x[0]); p1 = qd::quick_two_sum(p1, p2, p2); return dd_real(p1, p2); } /* double * double-double */ inline dd_real operator*(double a, const dd_real &b) { return (b * a); } /*********** Self-Multiplications ************/ /* double-double *= double */ inline dd_real &dd_real::operator*=(double a) { double p1, p2; p1 = qd::two_prod(x[0], a, p2); p2 += x[1] * a; x[0] = qd::quick_two_sum(p1, p2, x[1]); return *this; } /* double-double *= double-double */ inline dd_real &dd_real::operator*=(const dd_real &a) { double p1, p2; p1 = qd::two_prod(x[0], a.x[0], p2); p2 += a.x[1] * x[0]; p2 += a.x[0] * x[1]; x[0] = qd::quick_two_sum(p1, p2, x[1]); return *this; } /*********** Divisions ************/ inline dd_real dd_real::div(double a, double b) { double q1, q2; double p1, p2; double s, e; q1 = a / b; /* Compute a - q1 * b */ p1 = qd::two_prod(q1, b, p2); s = qd::two_diff(a, p1, e); e -= p2; /* get next approximation */ q2 = (s + e) / b; s = qd::quick_two_sum(q1, q2, e); return dd_real(s, e); } /* double-double / double */ inline dd_real operator/(const dd_real &a, double b) { double q1, q2; double p1, p2; double s, e; dd_real r; q1 = a.x[0] / b; /* approximate quotient. */ /* Compute this - q1 * d */ p1 = qd::two_prod(q1, b, p2); s = qd::two_diff(a.x[0], p1, e); e += a.x[1]; e -= p2; /* get next approximation. */ q2 = (s + e) / b; /* renormalize */ r.x[0] = qd::quick_two_sum(q1, q2, r.x[1]); return r; } inline dd_real dd_real::sloppy_div(const dd_real &a, const dd_real &b) { double s1, s2; double q1, q2; dd_real r; q1 = a.x[0] / b.x[0]; /* approximate quotient */ /* compute this - q1 * dd */ r = b * q1; s1 = qd::two_diff(a.x[0], r.x[0], s2); s2 -= r.x[1]; s2 += a.x[1]; /* get next approximation */ q2 = (s1 + s2) / b.x[0]; /* renormalize */ r.x[0] = qd::quick_two_sum(q1, q2, r.x[1]); return r; } inline dd_real dd_real::accurate_div(const dd_real &a, const dd_real &b) { double q1, q2, q3; dd_real r; q1 = a.x[0] / b.x[0]; /* approximate quotient */ r = a - q1 * b; q2 = r.x[0] / b.x[0]; r -= (q2 * b); q3 = r.x[0] / b.x[0]; q1 = qd::quick_two_sum(q1, q2, q2); r = dd_real(q1, q2) + q3; return r; } /* double-double / double-double */ inline dd_real operator/(const dd_real &a, const dd_real &b) { #ifdef QD_SLOPPY_DIV return dd_real::sloppy_div(a, b); #else return dd_real::accurate_div(a, b); #endif } /* double / double-double */ inline dd_real operator/(double a, const dd_real &b) { return dd_real(a) / b; } inline dd_real inv(const dd_real &a) { return 1.0 / a; } /*********** Self-Divisions ************/ /* double-double /= double */ inline dd_real &dd_real::operator/=(double a) { *this = *this / a; return *this; } /* double-double /= double-double */ inline dd_real &dd_real::operator/=(const dd_real &a) { *this = *this / a; return *this; } /********** Remainder **********/ inline dd_real drem(const dd_real &a, const dd_real &b) { dd_real n = nint(a / b); return (a - n * b); } inline dd_real divrem(const dd_real &a, const dd_real &b, dd_real &r) { dd_real n = nint(a / b); r = a - n * b; return n; } /*********** Squaring **********/ inline dd_real sqr(const dd_real &a) { double p1, p2; double s1, s2; p1 = qd::two_sqr(a.x[0], p2); p2 += 2.0 * a.x[0] * a.x[1]; p2 += a.x[1] * a.x[1]; s1 = qd::quick_two_sum(p1, p2, s2); return dd_real(s1, s2); } inline dd_real dd_real::sqr(double a) { double p1, p2; p1 = qd::two_sqr(a, p2); return dd_real(p1, p2); } /********** Exponentiation **********/ inline dd_real dd_real::operator^(int n) { return npwr(*this, n); } /*********** Assignments ************/ /* double-double = double */ inline dd_real &dd_real::operator=(double a) { x[0] = a; x[1] = 0.0; return *this; } /*********** Equality Comparisons ************/ inline bool operator==(const dd_real &a, int b) { return (a.x[0] == double(b) && a.x[1] == 0.0); } inline bool operator==(int a, const dd_real &b) { return (double(a) == b.x[0] && b.x[1] == 0.0); } /* double-double == double */ inline bool operator==(const dd_real &a, double b) { return (a.x[0] == b && a.x[1] == 0.0); } /* double-double == double-double */ inline bool operator==(const dd_real &a, const dd_real &b) { return (a.x[0] == b.x[0] && a.x[1] == b.x[1]); } /* double == double-double */ inline bool operator==(double a, const dd_real &b) { return (a == b.x[0] && b.x[1] == 0.0); } /*********** Greater-Than Comparisons ************/ inline bool operator>(const dd_real &a, int b) { return (a.x[0] > double(b) || (a.x[0] == double(b) && a.x[1] > 0.0)); } inline bool operator>(int a, const dd_real &b) { return (double(a) > b.x[0] || (double(a) == b.x[0] && b.x[1] < 0.0)); } /* double-double > double */ inline bool operator>(const dd_real &a, double b) { return (a.x[0] > b || (a.x[0] == b && a.x[1] > 0.0)); } /* double-double > double-double */ inline bool operator>(const dd_real &a, const dd_real &b) { return (a.x[0] > b.x[0] || (a.x[0] == b.x[0] && a.x[1] > b.x[1])); } /* double > double-double */ inline bool operator>(double a, const dd_real &b) { return (a > b.x[0] || (a == b.x[0] && b.x[1] < 0.0)); } /*********** Less-Than Comparisons ************/ inline bool operator<(const dd_real &a, int b) { return (a.x[0] < double(b) || (a.x[0] == double(b) && a.x[1] < 0.0)); } inline bool operator<(int a, const dd_real &b) { return (double(a) < b.x[0] || (double(a) == b.x[0] && b.x[1] > 0.0)); } /* double-double < double */ inline bool operator<(const dd_real &a, double b) { return (a.x[0] < b || (a.x[0] == b && a.x[1] < 0.0)); } /* double-double < double-double */ inline bool operator<(const dd_real &a, const dd_real &b) { return (a.x[0] < b.x[0] || (a.x[0] == b.x[0] && a.x[1] < b.x[1])); } /* double < double-double */ inline bool operator<(double a, const dd_real &b) { return (a < b.x[0] || (a == b.x[0] && b.x[1] > 0.0)); } /*********** Greater-Than-Or-Equal-To Comparisons ************/ inline bool operator>=(const dd_real &a, int b) { return ((a > b) || (a == b)); } inline bool operator>=(int a, const dd_real &b) { return ((b < a) || (b == a)); } /* double-double >= double */ inline bool operator>=(const dd_real &a, double b) { return (a.x[0] > b || (a.x[0] == b && a.x[1] >= 0.0)); } /* double-double >= double-double */ inline bool operator>=(const dd_real &a, const dd_real &b) { return (a.x[0] > b.x[0] || (a.x[0] == b.x[0] && a.x[1] >= b.x[1])); } /* double >= double-double */ inline bool operator>=(double a, const dd_real &b) { return (b <= a); } /*********** Less-Than-Or-Equal-To Comparisons ************/ inline bool operator<=(const dd_real &a, int b) { return ((a < b) || (a == b)); } inline bool operator<=(int a, const dd_real &b) { return ((b > a) || (b == a)); } /* double-double <= double */ inline bool operator<=(const dd_real &a, double b) { return (a.x[0] < b || (a.x[0] == b && a.x[1] <= 0.0)); } /* double-double <= double-double */ inline bool operator<=(const dd_real &a, const dd_real &b) { return (a.x[0] < b.x[0] || (a.x[0] == b.x[0] && a.x[1] <= b.x[1])); } /* double <= double-double */ inline bool operator<=(double a, const dd_real &b) { return (b >= a); } /*********** Not-Equal-To Comparisons ************/ inline bool operator!=(const dd_real &a, int b) { return (a.x[0] != double(b) || a.x[1] != 0.0); } inline bool operator!=(int a, const dd_real &b) { return (double(a) != b.x[0] || b.x[1] != 0.0); } /* double-double != double */ inline bool operator!=(const dd_real &a, double b) { return (a.x[0] != b || a.x[1] != 0.0); } /* double-double != double-double */ inline bool operator!=(const dd_real &a, const dd_real &b) { return (a.x[0] != b.x[0] || a.x[1] != b.x[1]); } /* double != double-double */ inline bool operator!=(double a, const dd_real &b) { return (a != b.x[0] || b.x[1] != 0.0); } /*********** Micellaneous ************/ /* this == 0 */ inline bool dd_real::is_zero() const { return (x[0] == 0.0); } /* this == 1 */ inline bool dd_real::is_one() const { return (x[0] == 1.0 && x[1] == 0.0); } /* this > 0 */ inline bool dd_real::is_positive() const { return (x[0] > 0.0); } /* this < 0 */ inline bool dd_real::is_negative() const { return (x[0] < 0.0); } /* Absolute value */ inline dd_real abs(const dd_real &a) { return (a.x[0] < 0.0) ? -a : a; } inline dd_real fabs(const dd_real &a) { return abs(a); } /* Round to Nearest integer */ inline dd_real nint(const dd_real &a) { double hi = qd::nint(a.x[0]); double lo; if (hi == a.x[0]) { /* High word is an integer already. Round the low word.*/ lo = qd::nint(a.x[1]); /* Renormalize. This is needed if x[0] = some integer, x[1] = 1/2.*/ hi = qd::quick_two_sum(hi, lo, lo); } else { /* High word is not an integer. */ lo = 0.0; if (std::abs(hi-a.x[0]) == 0.5 && a.x[1] < 0.0) { /* There is a tie in the high word, consult the low word to break the tie. */ hi -= 1.0; /* NOTE: This does not cause INEXACT. */ } } return dd_real(hi, lo); } inline dd_real floor(const dd_real &a) { double hi = std::floor(a.x[0]); double lo = 0.0; if (hi == a.x[0]) { /* High word is integer already. Round the low word. */ lo = std::floor(a.x[1]); hi = qd::quick_two_sum(hi, lo, lo); } return dd_real(hi, lo); } inline dd_real ceil(const dd_real &a) { double hi = std::ceil(a.x[0]); double lo = 0.0; if (hi == a.x[0]) { /* High word is integer already. Round the low word. */ lo = std::ceil(a.x[1]); hi = qd::quick_two_sum(hi, lo, lo); } return dd_real(hi, lo); } inline dd_real aint(const dd_real &a) { return (a.x[0] >= 0.0) ? floor(a) : ceil(a); } /* Cast to double. */ inline double to_double(const dd_real &a) { return a.x[0]; } /* Cast to int. */ inline int to_int(const dd_real &a) { return static_cast(a.x[0]); } /* Random number generator */ inline dd_real dd_real::rand() { return ddrand(); } #endif /* _QD_DD_INLINE_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/dd_real.cpp000664 000000 000000 00000073720 14037356732 023570 0ustar00rootroot000000 000000 /* * src/dd_real.cc * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2007 * * Contains implementation of non-inlined functions of double-double * package. Inlined functions are found in dd_inline.h (in include directory). */ #include #include #include #include #include #include #include #include "config.h" #include #include "util.h" #include #ifndef QD_INLINE #include #endif #include // 05 Apr.2018 using std::cout; using std::cerr; using std::endl; using std::ostream; using std::istream; using std::ios_base; using std::string; using std::setw; /* This routine is called whenever a fatal error occurs. */ void dd_real::error(const char *msg) { // if (msg) { cerr << "ERROR " << msg << endl; } } /* Computes the square root of the double-double number dd. NOTE: dd must be a non-negative number. */ QD_API dd_real sqrt(const dd_real &a) { /* Strategy: Use Karp's trick: if x is an approximation to sqrt(a), then sqrt(a) = a*x + [a - (a*x)^2] * x / 2 (approx) The approximation is accurate to twice the accuracy of x. Also, the multiplication (a*x) and [-]*x can be done with only half the precision. */ if (a.is_zero()) return 0.0; if (a.is_negative()) { dd_real::error("(dd_real::sqrt): Negative argument."); return dd_real::_nan; } double x = 1.0 / std::sqrt(a.x[0]); double ax = a.x[0] * x; return dd_real::add(ax, (a - dd_real::sqr(ax)).x[0] * (x * 0.5)); } /* Computes the square root of a double in double-double precision. NOTE: d must not be negative. */ dd_real dd_real::sqrt(double d) { return ::sqrt(dd_real(d)); } /* Computes the n-th root of the double-double number a. NOTE: n must be a positive integer. NOTE: If n is even, then a must not be negative. */ dd_real nroot(const dd_real &a, int n) { /* Strategy: Use Newton iteration for the function f(x) = x^(-n) - a to find its root a^{-1/n}. The iteration is thus x' = x + x * (1 - a * x^n) / n which converges quadratically. We can then find a^{1/n} by taking the reciprocal. */ if (n <= 0) { dd_real::error("(dd_real::nroot): N must be positive."); return dd_real::_nan; } if (n%2 == 0 && a.is_negative()) { dd_real::error("(dd_real::nroot): Negative argument."); return dd_real::_nan; } if (n == 1) { return a; } if (n == 2) { return sqrt(a); } if (a.is_zero()) return 0.0; /* Note a^{-1/n} = exp(-log(a)/n) */ dd_real r = abs(a); dd_real x = std::exp(-std::log(r.x[0]) / n); /* Perform Newton's iteration. */ x += x * (1.0 - r * npwr(x, n)) / static_cast(n); if (a.x[0] < 0.0) x = -x; return 1.0/x; } /* Computes the n-th power of a double-double number. NOTE: 0^0 causes an error. */ dd_real npwr(const dd_real &a, int n) { if (n == 0) { if (a.is_zero()) { dd_real::error("(dd_real::npwr): Invalid argument."); return dd_real::_nan; } return 1.0; } dd_real r = a; dd_real s = 1.0; int N = std::abs(n); if (N > 1) { /* Use binary exponentiation */ while (N > 0) { if (N % 2 == 1) { s *= r; } N /= 2; if (N > 0) r = sqr(r); } } else { s = r; } /* Compute the reciprocal if n is negative. */ if (n < 0) return (1.0 / s); return s; } dd_real pow(const dd_real &a, int n) { return npwr(a, n); } dd_real pow(const dd_real &a, const dd_real &b) { return dd_real(1.0); // 5 Apr.2018 // return exp(b * log(a)); } static const int n_inv_fact = 15; static const double inv_fact[n_inv_fact][2] = { { 1.66666666666666657e-01, 9.25185853854297066e-18}, { 4.16666666666666644e-02, 2.31296463463574266e-18}, { 8.33333333333333322e-03, 1.15648231731787138e-19}, { 1.38888888888888894e-03, -5.30054395437357706e-20}, { 1.98412698412698413e-04, 1.72095582934207053e-22}, { 2.48015873015873016e-05, 2.15119478667758816e-23}, { 2.75573192239858925e-06, -1.85839327404647208e-22}, { 2.75573192239858883e-07, 2.37677146222502973e-23}, { 2.50521083854417202e-08, -1.44881407093591197e-24}, { 2.08767569878681002e-09, -1.20734505911325997e-25}, { 1.60590438368216133e-10, 1.25852945887520981e-26}, { 1.14707455977297245e-11, 2.06555127528307454e-28}, { 7.64716373181981641e-13, 7.03872877733453001e-30}, { 4.77947733238738525e-14, 4.39920548583408126e-31}, { 2.81145725434552060e-15, 1.65088427308614326e-31} }; /* Exponential. Computes exp(x) in double-double precision. */ dd_real exp(const dd_real &a) { /* Strategy: We first reduce the size of x by noting that exp(kr + m * log(2)) = 2^m * exp(r)^k where m and k are integers. By choosing m appropriately we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is evaluated using the familiar Taylor series. Reducing the argument substantially speeds up the convergence. */ const double k = 512.0; const double inv_k = 1.0 / k; if (a.x[0] <= -709.0) return 0.0; if (a.x[0] >= 709.0) return dd_real::_inf; if (a.is_zero()) return 1.0; if (a.is_one()) return dd_real::_e; double m = std::floor(a.x[0] / dd_real::_log2.x[0] + 0.5); dd_real r = mul_pwr2(a - dd_real::_log2 * m, inv_k); dd_real s, t, p; p = sqr(r); s = r + mul_pwr2(p, 0.5); p *= r; t = p * dd_real(inv_fact[0][0], inv_fact[0][1]); int i = 0; do { s += t; p *= r; ++i; t = p * dd_real(inv_fact[i][0], inv_fact[i][1]); } while (std::abs(to_double(t)) > inv_k * dd_real::_eps && i < 5); s += t; s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s += 1.0; return ldexp(s, static_cast(m)); } /* Logarithm. Computes log(x) in double-double precision. This is a natural logarithm (i.e., base e). */ dd_real log(const dd_real &a) { /* Strategy. The Taylor series for log converges much more slowly than that of exp, due to the lack of the factorial term in the denominator. Hence this routine instead tries to determine the root of the function f(x) = exp(x) - a using Newton iteration. The iteration is given by x' = x - f(x)/f'(x) = x - (1 - a * exp(-x)) = x + a * exp(-x) - 1. Only one iteration is needed, since Newton's iteration approximately doubles the number of digits per iteration. */ if (a.is_one()) { return 0.0; } if (a.x[0] <= 0.0) { dd_real::error("(dd_real::log): Non-positive argument."); return dd_real::_nan; } dd_real x = std::log(a.x[0]); /* Initial approximation */ x = x + a * exp(-x) - 1.0; return x; } dd_real log10(const dd_real &a) { return log(a) / dd_real::_log10; } static const dd_real _pi16 = dd_real(1.963495408493620697e-01, 7.654042494670957545e-18); /* Table of sin(k * pi/16) and cos(k * pi/16). */ static const double sin_table [4][2] = { {1.950903220161282758e-01, -7.991079068461731263e-18}, {3.826834323650897818e-01, -1.005077269646158761e-17}, {5.555702330196021776e-01, 4.709410940561676821e-17}, {7.071067811865475727e-01, -4.833646656726456726e-17} }; static const double cos_table [4][2] = { {9.807852804032304306e-01, 1.854693999782500573e-17}, {9.238795325112867385e-01, 1.764504708433667706e-17}, {8.314696123025452357e-01, 1.407385698472802389e-18}, {7.071067811865475727e-01, -4.833646656726456726e-17} }; /* Computes sin(a) using Taylor series. Assumes |a| <= pi/32. */ static dd_real sin_taylor(const dd_real &a) { const double thresh = 0.5 * std::abs(to_double(a)) * dd_real::_eps; dd_real r, s, t, x; if (a.is_zero()) { return 0.0; } int i = 0; x = -sqr(a); s = a; r = a; do { r *= x; t = r * dd_real(inv_fact[i][0], inv_fact[i][1]); s += t; i += 2; } while (i < n_inv_fact && std::abs(to_double(t)) > thresh); return s; } static dd_real cos_taylor(const dd_real &a) { const double thresh = 0.5 * dd_real::_eps; dd_real r, s, t, x; if (a.is_zero()) { return 1.0; } x = -sqr(a); r = x; s = 1.0 + mul_pwr2(r, 0.5); int i = 1; do { r *= x; t = r * dd_real(inv_fact[i][0], inv_fact[i][1]); s += t; i += 2; } while (i < n_inv_fact && std::abs(to_double(t)) > thresh); return s; } static void sincos_taylor(const dd_real &a, dd_real &sin_a, dd_real &cos_a) { if (a.is_zero()) { sin_a = 0.0; cos_a = 1.0; return; } sin_a = sin_taylor(a); cos_a = sqrt(1.0 - sqr(sin_a)); } dd_real sin(const dd_real &a) { /* Strategy. To compute sin(x), we choose integers a, b so that x = s + a * (pi/2) + b * (pi/16) and |s| <= pi/32. Using the fact that sin(pi/16) = 0.5 * sqrt(2 - sqrt(2 + sqrt(2))) we can compute sin(x) from sin(s), cos(s). This greatly increases the convergence of the sine Taylor series. */ if (a.is_zero()) { return 0.0; } // approximately reduce modulo 2*pi dd_real z = nint(a / dd_real::_2pi); dd_real r = a - dd_real::_2pi * z; // approximately reduce modulo pi/2 and then modulo pi/16. dd_real t; double q = std::floor(r.x[0] / dd_real::_pi2.x[0] + 0.5); t = r - dd_real::_pi2 * q; int j = static_cast(q); q = std::floor(t.x[0] / _pi16.x[0] + 0.5); t -= _pi16 * q; int k = static_cast(q); int abs_k = std::abs(k); if (j < -2 || j > 2) { dd_real::error("(dd_real::sin): Cannot reduce modulo pi/2."); return dd_real::_nan; } if (abs_k > 4) { dd_real::error("(dd_real::sin): Cannot reduce modulo pi/16."); return dd_real::_nan; } if (k == 0) { switch (j) { case 0: return sin_taylor(t); case 1: return cos_taylor(t); case -1: return -cos_taylor(t); default: return -sin_taylor(t); } } dd_real u(cos_table[abs_k-1][0], cos_table[abs_k-1][1]); dd_real v(sin_table[abs_k-1][0], sin_table[abs_k-1][1]); dd_real sin_t, cos_t; sincos_taylor(t, sin_t, cos_t); if (j == 0) { if (k > 0) { r = u * sin_t + v * cos_t; } else { r = u * sin_t - v * cos_t; } } else if (j == 1) { if (k > 0) { r = u * cos_t - v * sin_t; } else { r = u * cos_t + v * sin_t; } } else if (j == -1) { if (k > 0) { r = v * sin_t - u * cos_t; } else if (k < 0) { r = -u * cos_t - v * sin_t; } } else { if (k > 0) { r = -u * sin_t - v * cos_t; } else { r = v * cos_t - u * sin_t; } } return r; } dd_real cos(const dd_real &a) { if (a.is_zero()) { return 1.0; } // approximately reduce modulo 2*pi dd_real z = nint(a / dd_real::_2pi); dd_real r = a - z * dd_real::_2pi; // approximately reduce modulo pi/2 and then modulo pi/16 dd_real t; double q = std::floor(r.x[0] / dd_real::_pi2.x[0] + 0.5); t = r - dd_real::_pi2 * q; int j = static_cast(q); q = std::floor(t.x[0] / _pi16.x[0] + 0.5); t -= _pi16 * q; int k = static_cast(q); int abs_k = std::abs(k); if (j < -2 || j > 2) { dd_real::error("(dd_real::cos): Cannot reduce modulo pi/2."); return dd_real::_nan; } if (abs_k > 4) { dd_real::error("(dd_real::cos): Cannot reduce modulo pi/16."); return dd_real::_nan; } if (k == 0) { switch (j) { case 0: return cos_taylor(t); case 1: return -sin_taylor(t); case -1: return sin_taylor(t); default: return -cos_taylor(t); } } dd_real sin_t, cos_t; sincos_taylor(t, sin_t, cos_t); dd_real u(cos_table[abs_k-1][0], cos_table[abs_k-1][1]); dd_real v(sin_table[abs_k-1][0], sin_table[abs_k-1][1]); if (j == 0) { if (k > 0) { r = u * cos_t - v * sin_t; } else { r = u * cos_t + v * sin_t; } } else if (j == 1) { if (k > 0) { r = - u * sin_t - v * cos_t; } else { r = v * cos_t - u * sin_t; } } else if (j == -1) { if (k > 0) { r = u * sin_t + v * cos_t; } else { r = u * sin_t - v * cos_t; } } else { if (k > 0) { r = v * sin_t - u * cos_t; } else { r = - u * cos_t - v * sin_t; } } return r; } void sincos(const dd_real &a, dd_real &sin_a, dd_real &cos_a) { if (a.is_zero()) { sin_a = 0.0; cos_a = 1.0; return; } // approximately reduce modulo 2*pi dd_real z = nint(a / dd_real::_2pi); dd_real r = a - dd_real::_2pi * z; // approximately reduce module pi/2 and pi/16 dd_real t; double q = std::floor(r.x[0] / dd_real::_pi2.x[0] + 0.5); t = r - dd_real::_pi2 * q; int j = static_cast(q); int abs_j = std::abs(j); q = std::floor(t.x[0] / _pi16.x[0] + 0.5); t -= _pi16 * q; int k = static_cast(q); int abs_k = std::abs(k); if (abs_j > 2) { dd_real::error("(dd_real::sincos): Cannot reduce modulo pi/2."); cos_a = sin_a = dd_real::_nan; return; } if (abs_k > 4) { dd_real::error("(dd_real::sincos): Cannot reduce modulo pi/16."); cos_a = sin_a = dd_real::_nan; return; } dd_real sin_t, cos_t; dd_real s, c; sincos_taylor(t, sin_t, cos_t); if (abs_k == 0) { s = sin_t; c = cos_t; } else { dd_real u(cos_table[abs_k-1][0], cos_table[abs_k-1][1]); dd_real v(sin_table[abs_k-1][0], sin_table[abs_k-1][1]); if (k > 0) { s = u * sin_t + v * cos_t; c = u * cos_t - v * sin_t; } else { s = u * sin_t - v * cos_t; c = u * cos_t + v * sin_t; } } if (abs_j == 0) { sin_a = s; cos_a = c; } else if (j == 1) { sin_a = c; cos_a = -s; } else if (j == -1) { sin_a = -c; cos_a = s; } else { sin_a = -s; cos_a = -c; } } dd_real atan(const dd_real &a) { return atan2(a, dd_real(1.0)); } dd_real atan2(const dd_real &y, const dd_real &x) { /* Strategy: Instead of using Taylor series to compute arctan, we instead use Newton's iteration to solve the equation sin(z) = y/r or cos(z) = x/r where r = sqrt(x^2 + y^2). The iteration is given by z' = z + (y - sin(z)) / cos(z) (for equation 1) z' = z - (x - cos(z)) / sin(z) (for equation 2) Here, x and y are normalized so that x^2 + y^2 = 1. If |x| > |y|, then first iteration is used since the denominator is larger. Otherwise, the second is used. */ if (x.is_zero()) { if (y.is_zero()) { /* Both x and y is zero. */ dd_real::error("(dd_real::atan2): Both arguments zero."); return dd_real::_nan; } return (y.is_positive()) ? dd_real::_pi2 : -dd_real::_pi2; } else if (y.is_zero()) { return (x.is_positive()) ? dd_real(0.0) : dd_real::_pi; } if (x == y) { return (y.is_positive()) ? dd_real::_pi4 : -dd_real::_3pi4; } if (x == -y) { return (y.is_positive()) ? dd_real::_3pi4 : -dd_real::_pi4; } dd_real r = sqrt(sqr(x) + sqr(y)); dd_real xx = x / r; dd_real yy = y / r; /* Compute double precision approximation to atan. */ dd_real z = std::atan2(to_double(y), to_double(x)); dd_real sin_z, cos_z; if (std::abs(xx.x[0]) > std::abs(yy.x[0])) { /* Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) */ sincos(z, sin_z, cos_z); z += (yy - sin_z) / cos_z; } else { /* Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) */ sincos(z, sin_z, cos_z); z -= (xx - cos_z) / sin_z; } return z; } dd_real tan(const dd_real &a) { dd_real s, c; sincos(a, s, c); return s/c; } dd_real asin(const dd_real &a) { dd_real abs_a = abs(a); if (abs_a > 1.0) { dd_real::error("(dd_real::asin): Argument out of domain."); return dd_real::_nan; } if (abs_a.is_one()) { return (a.is_positive()) ? dd_real::_pi2 : -dd_real::_pi2; } return atan2(a, sqrt(1.0 - sqr(a))); } dd_real acos(const dd_real &a) { dd_real abs_a = abs(a); if (abs_a > 1.0) { dd_real::error("(dd_real::acos): Argument out of domain."); return dd_real::_nan; } if (abs_a.is_one()) { return (a.is_positive()) ? dd_real(0.0) : dd_real::_pi; } return atan2(sqrt(1.0 - sqr(a)), a); } dd_real sinh(const dd_real &a) { if (a.is_zero()) { return 0.0; } if (abs(a) > 0.05) { dd_real ea = exp(a); return mul_pwr2(ea - inv(ea), 0.5); } /* since a is small, using the above formula gives a lot of cancellation. So use Taylor series. */ dd_real s = a; dd_real t = a; dd_real r = sqr(t); double m = 1.0; double thresh = std::abs((to_double(a)) * dd_real::_eps); do { m += 2.0; t *= r; t /= (m-1) * m; s += t; } while (abs(t) > thresh); return s; } dd_real cosh(const dd_real &a) { if (a.is_zero()) { return 1.0; } dd_real ea = exp(a); return mul_pwr2(ea + inv(ea), 0.5); } dd_real tanh(const dd_real &a) { if (a.is_zero()) { return 0.0; } if (std::abs(to_double(a)) > 0.05) { dd_real ea = exp(a); dd_real inv_ea = inv(ea); return (ea - inv_ea) / (ea + inv_ea); } else { dd_real s, c; s = sinh(a); c = sqrt(1.0 + sqr(s)); return s / c; } } void sincosh(const dd_real &a, dd_real &s, dd_real &c) { if (std::abs(to_double(a)) <= 0.05) { s = sinh(a); c = sqrt(1.0 + sqr(s)); } else { dd_real ea = exp(a); dd_real inv_ea = inv(ea); s = mul_pwr2(ea - inv_ea, 0.5); c = mul_pwr2(ea + inv_ea, 0.5); } } dd_real asinh(const dd_real &a) { return log(a + sqrt(sqr(a) + 1.0)); } dd_real acosh(const dd_real &a) { if (a < 1.0) { dd_real::error("(dd_real::acosh): Argument out of domain."); return dd_real::_nan; } return log(a + sqrt(sqr(a) - 1.0)); } dd_real atanh(const dd_real &a) { if (abs(a) >= 1.0) { dd_real::error("(dd_real::atanh): Argument out of domain."); return dd_real::_nan; } return mul_pwr2(log((1.0 + a) / (1.0 - a)), 0.5); } QD_API dd_real fmod(const dd_real &a, const dd_real &b) { dd_real n = aint(a / b); return (a - b * n); } QD_API dd_real ddrand() { static const double m_const = 4.6566128730773926e-10; /* = 2^{-31} */ double m = m_const; dd_real r = 0.0; double d; /* Strategy: Generate 31 bits at a time, using lrand48 random number generator. Shift the bits, and reapeat 4 times. */ for (int i = 0; i < 4; i++, m *= m_const) { // d = lrand48() * m; d = std::rand() * m; r += d; } return r; } /* polyeval(c, n, x) Evaluates the given n-th degree polynomial at x. The polynomial is given by the array of (n+1) coefficients. */ dd_real polyeval(const dd_real *c, int n, const dd_real &x) { /* Just use Horner's method of polynomial evaluation. */ dd_real r = c[n]; for (int i = n-1; i >= 0; i--) { r *= x; r += c[i]; } return r; } /* polyroot(c, n, x0) Given an n-th degree polynomial, finds a root close to the given guess x0. Note that this uses simple Newton iteration scheme, and does not work for multiple roots. */ QD_API dd_real polyroot(const dd_real *c, int n, const dd_real &x0, int max_iter, double thresh) { dd_real x = x0; dd_real f; dd_real *d = new dd_real[n]; bool conv = false; int i; double max_c = std::abs(to_double(c[0])); double v; if (thresh == 0.0) thresh = dd_real::_eps; /* Compute the coefficients of the derivatives. */ for (i = 1; i <= n; i++) { v = std::abs(to_double(c[i])); if (v > max_c) max_c = v; d[i-1] = c[i] * static_cast(i); } thresh *= max_c; /* Newton iteration. */ for (i = 0; i < max_iter; i++) { f = polyeval(c, n, x); if (abs(f) < thresh) { conv = true; break; } x -= (f / polyeval(d, n-1, x)); } delete [] d; if (!conv) { dd_real::error("(dd_real::polyroot): Failed to converge."); return dd_real::_nan; } return x; } /* Constructor. Reads a double-double number from the string s and constructs a double-double number. */ dd_real::dd_real(const char *s) { if (dd_real::read(s, *this)) { dd_real::error("(dd_real::dd_real): INPUT ERROR."); *this = dd_real::_nan; } } dd_real &dd_real::operator=(const char *s) { if (dd_real::read(s, *this)) { dd_real::error("(dd_real::operator=): INPUT ERROR."); *this = dd_real::_nan; } return *this; } /* Outputs the double-double number dd. */ ostream &operator<<(ostream &os, const dd_real &dd) { bool showpos = (os.flags() & ios_base::showpos) != 0; bool uppercase = (os.flags() & ios_base::uppercase) != 0; return os << dd.to_string(os.precision(), os.width(), os.flags(), showpos, uppercase, os.fill()); } /* Reads in the double-double number a. */ istream &operator>>(istream &s, dd_real &a) { char str[255]; s >> str; a = dd_real(str); return s; } void dd_real::to_digits(char *s, int &expn, int precision) const { int D = precision + 1; /* number of digits to compute */ dd_real r = abs(*this); int e; /* exponent */ int i, d; if (x[0] == 0.0) { /* this == 0.0 */ expn = 0; for (i = 0; i < precision; i++) s[i] = '0'; return; } /* First determine the (approximate) exponent. */ e = to_int(std::floor(std::log10(std::abs(x[0])))); if (e < -300) { r *= dd_real(10.0) ^ 300; r /= dd_real(10.0) ^ (e + 300); } else if (e > 300) { r = ldexp(r, -53); r /= dd_real(10.0) ^ e; r = ldexp(r, 53); } else { r /= dd_real(10.0) ^ e; } /* Fix exponent if we are off by one */ if (r >= 10.0) { r /= 10.0; e++; } else if (r < 1.0) { r *= 10.0; e--; } if (r >= 10.0 || r < 1.0) { dd_real::error("(dd_real::to_digits): can't compute exponent."); return; } /* Extract the digits */ for (i = 0; i < D; i++) { d = static_cast(r.x[0]); r -= d; r *= 10.0; s[i] = static_cast(d + '0'); } /* Fix out of range digits. */ for (i = D-1; i > 0; i--) { if (s[i] < '0') { s[i-1]--; s[i] += 10; } else if (s[i] > '9') { s[i-1]++; s[i] -= 10; } } if (s[0] <= '0') { dd_real::error("(dd_real::to_digits): non-positive leading digit."); return; } /* Round, handle carry */ if (s[D-1] >= '5') { s[D-2]++; i = D-2; while (i > 0 && s[i] > '9') { s[i] -= 10; s[--i]++; } } /* If first digit is 10, shift everything. */ if (s[0] > '9') { e++; for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; } s[precision] = 0; expn = e; } /* Writes the double-double number into the character array s of length len. The integer d specifies how many significant digits to write. The string s must be able to hold at least (d+8) characters. showpos indicates whether to use the + sign, and uppercase indicates whether the E or e is to be used for the exponent. */ void dd_real::write(char *s, int len, int precision, bool showpos, bool uppercase) const { string str = to_string(precision, 0, ios_base::scientific, showpos, uppercase); std::strncpy(s, str.c_str(), len-1); s[len-1] = 0; } void round_string(char *s, int precision, int *offset){ /* Input string must be all digits or errors will occur. */ int i; int D = precision ; /* Round, handle carry */ if (s[D-1] >= '5') { s[D-2]++; i = D-2; while (i > 0 && s[i] > '9') { s[i] -= 10; s[--i]++; } } /* If first digit is 10, shift everything. */ if (s[0] > '9') { // e++; // don't modify exponent here for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; (*offset)++ ; // now offset needs to be increased by one precision++ ; } s[precision] = 0; // add terminator for array } string dd_real::to_string(int precision, int width, ios_base::fmtflags fmt, bool showpos, bool uppercase, char fill) const { string s; bool fixed = (fmt & ios_base::fixed) != 0; bool sgn = true; int i, e = 0; if (isnan()) { s = uppercase ? "NAN" : "nan"; sgn = false; } else { if (*this < 0.0) s += '-'; else if (showpos) s += '+'; else sgn = false; if (isinf()) { s += uppercase ? "INF" : "inf"; } else if (*this == 0.0) { /* Zero case */ s += '0'; if (precision > 0) { s += '.'; s.append(precision, '0'); } } else { /* Non-zero case */ int off = (fixed ? (1 + to_int(floor(log10(abs(*this))))) : 1); int d = precision + off; int d_with_extra = d; if(fixed) d_with_extra = std::max(60, d); // longer than the max accuracy for DD // highly special case - fixed mode, precision is zero, abs(*this) < 1.0 // without this trap a number like 0.9 printed fixed with 0 precision prints as 0 // should be rounded to 1. if(fixed && (precision == 0) && (abs(*this) < 1.0)){ if(abs(*this) >= 0.5) s += '1'; else s += '0'; return s; } // handle near zero to working precision (but not exactly zero) if (fixed && d <= 0) { s += '0'; if (precision > 0) { s += '.'; s.append(precision, '0'); } } else { // default char *t; // = new char[d+1]; int j; if(fixed){ t = new char[d_with_extra+1]; to_digits(t, e, d_with_extra); } else{ t = new char[d+1]; to_digits(t, e, d); } if (fixed) { // fix the string if it's been computed incorrectly // round here in the decimal string if required round_string(t, d + 1 , &off); if (off > 0) { for (i = 0; i < off; i++) s += t[i]; if (precision > 0) { s += '.'; for (j = 0; j < precision; j++, i++) s += t[i]; } } else { s += "0."; if (off < 0) s.append(-off, '0'); for (i = 0; i < d; i++) s += t[i]; } } else { s += t[0]; if (precision > 0) s += '.'; for (i = 1; i <= precision; i++) s += t[i]; } delete [] t; } } // trap for improper offset with large values // without this trap, output of values of the for 10^j - 1 fail for j > 28 // and are output with the point in the wrong place, leading to a dramatically off value if(fixed && (precision > 0)){ // make sure that the value isn't dramatically larger double from_string = atof(s.c_str()); // if this ratio is large, then we've got problems if( fabs( from_string / this->x[0] ) > 3.0 ){ int point_position; char temp; // loop on the string, find the point, move it up one // don't act on the first character for(i=1; i < s.length(); i++){ if(s[i] == '.'){ s[i] = s[i-1] ; s[i-1] = '.' ; break; } } from_string = atof(s.c_str()); // if this ratio is large, then the string has not been fixed if( fabs( from_string / this->x[0] ) > 3.0 ){ dd_real::error("Re-rounding unsuccessful in large number fixed point trap.") ; } } } if (!fixed && !isinf()) { /* Fill in exponent part */ s += uppercase ? 'E' : 'e'; append_expn(s, e); } } /* Fill in the blanks */ int len = s.length(); if (len < width) { int delta = width - len; if (fmt & ios_base::internal) { if (sgn) s.insert(static_cast(1), delta, fill); else s.insert(static_cast(0), delta, fill); } else if (fmt & ios_base::left) { s.append(delta, fill); } else { s.insert(static_cast(0), delta, fill); } } return s; } /* Reads in a double-double number from the string s. */ int dd_real::read(const char *s, dd_real &a) { const char *p = s; char ch; int sign = 0; int point = -1; int nd = 0; int e = 0; bool done = false; dd_real r = 0.0; int nread; /* Skip any leading spaces */ while (*p == ' ') p++; while (!done && (ch = *p) != '\0') { if (ch >= '0' && ch <= '9') { int d = ch - '0'; r *= 10.0; r += static_cast(d); nd++; } else { switch (ch) { case '.': if (point >= 0) return -1; point = nd; break; case '-': case '+': if (sign != 0 || nd > 0) return -1; sign = (ch == '-') ? -1 : 1; break; case 'E': case 'e': nread = std::sscanf(p+1, "%d", &e); done = true; if (nread != 1) return -1; break; default: return -1; } } p++; } if (point >= 0) { e -= (nd - point); } if (e != 0) { r *= (dd_real(10.0) ^ e); } a = (sign == -1) ? -r : r; return 0; } #if 0 /* Debugging routines */ void dd_real::dump(const string &name, std::ostream &os) const { std::ios_base::fmtflags old_flags = os.flags(); std::streamsize old_prec = os.precision(19); os << std::scientific; if (name.length() > 0) os << name << " = "; os << "[ " << setw(27) << x[0] << ", " << setw(27) << x[1] << " ]" << endl; os.precision(old_prec); os.flags(old_flags); } void dd_real::dump_bits(const string &name, std::ostream &os) const { string::size_type len = name.length(); if (len > 0) { os << name << " = "; len +=3; } os << "[ "; len += 2; print_double_info(os, x[0]); os << endl; for (string::size_type i = 0; i < len; i++) os << ' '; print_double_info(os, x[1]); os << " ]" << endl; } #endif dd_real dd_real::debug_rand() { if (std::rand() % 2 == 0) return ddrand(); int expn = 0; dd_real a = 0.0; double d; for (int i = 0; i < 2; i++) { d = std::ldexp(static_cast(std::rand()) / RAND_MAX, -expn); a += d; expn = expn + 54 + std::rand() % 200; } return a; } FreeFem-sources-4.9/3rdparty/dissection/src/qd/dd_real.h000664 000000 000000 00000024011 14037356732 023222 0ustar00rootroot000000 000000 /* * include/dd_real.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2007 * * Double-double precision (>= 106-bit significand) floating point * arithmetic package based on David Bailey's Fortran-90 double-double * package, with some changes. See * * http://www.nersc.gov/~dhbailey/mpdist/mpdist.html * * for the original Fortran-90 version. * * Overall structure is similar to that of Keith Brigg's C++ double-double * package. See * * http://www-epidem.plansci.cam.ac.uk/~kbriggs/doubledouble.html * * for more details. In particular, the fix for x86 computers is borrowed * from his code. * * Yozo Hida */ // operator int(), dd_real copysign(), fmax(), lobg(), scalbn() are added // for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki #ifndef _QD_DD_REAL_H #define _QD_DD_REAL_H #include #include #include #include #include #include // Some compilers define isnan, isfinite, and isinf as macros, even for // C++ codes, which cause havoc when overloading these functions. We undef // them here. #ifdef isnan #undef isnan #endif #ifdef isfinite #undef isfinite #endif #ifdef isinf #undef isinf #endif #ifdef max #undef max #endif #ifdef min #undef min #endif struct QD_API dd_real { double x[2]; dd_real(double hi, double lo) { x[0] = hi; x[1] = lo; } dd_real() {x[0] = 0.0; x[1] = 0.0; } dd_real(double h) { x[0] = h; x[1] = 0.0; } dd_real(int h) { x[0] = (static_cast(h)); x[1] = 0.0; } dd_real (const char *s); explicit dd_real (const double *d) { x[0] = d[0]; x[1] = d[1]; } static void error(const char *msg); double _hi() const { return x[0]; } double _lo() const { return x[1]; } static const dd_real _2pi; static const dd_real _pi; static const dd_real _3pi4; static const dd_real _pi2; static const dd_real _pi4; static const dd_real _e; static const dd_real _log2; static const dd_real _log10; static const dd_real _nan; static const dd_real _inf; static const double _eps; static const double _min_normalized; static const dd_real _max; static const dd_real _safe_max; static const int _ndigits; bool isnan() const { return QD_ISNAN(x[0]) || QD_ISNAN(x[1]); } bool isfinite() const { return QD_ISFINITE(x[0]); } bool isinf() const { return QD_ISINF(x[0]); } static dd_real add(double a, double b); static dd_real ieee_add(const dd_real &a, const dd_real &b); static dd_real sloppy_add(const dd_real &a, const dd_real &b); dd_real &operator+=(double a); dd_real &operator+=(const dd_real &a); static dd_real sub(double a, double b); dd_real &operator-=(double a); dd_real &operator-=(const dd_real &a); dd_real operator-() const; dd_real operator+() const; static dd_real mul(double a, double b); dd_real &operator*=(double a); dd_real &operator*=(const dd_real &a); static dd_real div(double a, double b); static dd_real sloppy_div(const dd_real &a, const dd_real &b); static dd_real accurate_div(const dd_real &a, const dd_real &b); dd_real &operator/=(double a); dd_real &operator/=(const dd_real &a); dd_real &operator=(double a); dd_real &operator=(const char *s); dd_real operator^(int n); static dd_real sqr(double d); static dd_real sqrt(double a); bool is_zero() const; bool is_one() const; bool is_positive() const; bool is_negative() const; static dd_real rand(void); void to_digits(char *s, int &expn, int precision = _ndigits) const; void write(char *s, int len, int precision = _ndigits, bool showpos = false, bool uppercase = false) const; std::string to_string(int precision = _ndigits, int width = 0, std::ios_base::fmtflags fmt = static_cast(0), bool showpos = false, bool uppercase = false, char fill = ' ') const; int read(const char *s, dd_real &a); /* Debugging Methods */ void dump(const std::string &name = "", std::ostream &os = std::cerr) const; void dump_bits(const std::string &name = "", std::ostream &os = std::cerr) const; static dd_real debug_rand(); // added for complex class of LLVM Clang++: 23 Jul.2015 Atsushi Suzuki operator int() { return int(x[0]); } operator int() const { return int(x[0]); } }; namespace std { template <> class numeric_limits : public numeric_limits { public: inline static double epsilon() { return dd_real::_eps; } inline static dd_real max() { return dd_real::_max; } inline static dd_real safe_max() { return dd_real::_safe_max; } inline static double min() { return dd_real::_min_normalized; } static const int digits = 104; static const int digits10 = 31; }; } QD_API dd_real ddrand(void); QD_API dd_real sqrt(const dd_real &a); QD_API dd_real polyeval(const dd_real *c, int n, const dd_real &x); QD_API dd_real polyroot(const dd_real *c, int n, const dd_real &x0, int max_iter = 32, double thresh = 0.0); QD_API inline bool isnan(const dd_real &a) { return a.isnan(); } QD_API inline bool isfinite(const dd_real &a) { return a.isfinite(); } QD_API inline bool isinf(const dd_real &a) { return a.isinf(); } /* Computes dd * d where d is known to be a power of 2. */ QD_API dd_real mul_pwr2(const dd_real &dd, double d); QD_API dd_real operator+(const dd_real &a, double b); QD_API dd_real operator+(double a, const dd_real &b); QD_API dd_real operator+(const dd_real &a, const dd_real &b); QD_API dd_real operator-(const dd_real &a, double b); QD_API dd_real operator-(double a, const dd_real &b); QD_API dd_real operator-(const dd_real &a, const dd_real &b); QD_API dd_real operator*(const dd_real &a, double b); QD_API dd_real operator*(double a, const dd_real &b); QD_API dd_real operator*(const dd_real &a, const dd_real &b); QD_API dd_real operator/(const dd_real &a, double b); QD_API dd_real operator/(double a, const dd_real &b); QD_API dd_real operator/(const dd_real &a, const dd_real &b); QD_API dd_real inv(const dd_real &a); QD_API dd_real rem(const dd_real &a, const dd_real &b); QD_API dd_real drem(const dd_real &a, const dd_real &b); QD_API dd_real divrem(const dd_real &a, const dd_real &b, dd_real &r); QD_API dd_real pow(const dd_real &a, int n); QD_API dd_real pow(const dd_real &a, const dd_real &b); QD_API dd_real npwr(const dd_real &a, int n); QD_API dd_real sqr(const dd_real &a); QD_API dd_real sqrt(const dd_real &a); QD_API dd_real nroot(const dd_real &a, int n); QD_API bool operator==(const dd_real &a, int b); QD_API bool operator==(int a, const dd_real &b); QD_API bool operator==(const dd_real &a, double b); QD_API bool operator==(double a, const dd_real &b); QD_API bool operator==(const dd_real &a, const dd_real &b); QD_API bool operator<=(const dd_real &a, int b); QD_API bool operator<=(int a, const dd_real &b); QD_API bool operator<=(const dd_real &a, double b); QD_API bool operator<=(double a, const dd_real &b); QD_API bool operator<=(const dd_real &a, const dd_real &b); QD_API bool operator>=(const dd_real &a, int b); QD_API bool operator>=(int a, const dd_real &b); QD_API bool operator>=(const dd_real &a, double b); QD_API bool operator>=(double a, const dd_real &b); QD_API bool operator>=(const dd_real &a, const dd_real &b); QD_API bool operator<(const dd_real &a, int b); QD_API bool operator<(int a, const dd_real &b); QD_API bool operator<(const dd_real &a, double b); QD_API bool operator<(double a, const dd_real &b); QD_API bool operator<(const dd_real &a, const dd_real &b); QD_API bool operator>(const dd_real &a, int b); QD_API bool operator>(int a, const dd_real &b); QD_API bool operator>(const dd_real &a, double b); QD_API bool operator>(double a, const dd_real &b); QD_API bool operator>(const dd_real &a, const dd_real &b); QD_API bool operator!=(const dd_real &a, int b); QD_API bool operator!=(int a, const dd_real &b); QD_API bool operator!=(const dd_real &a, double b); QD_API bool operator!=(double a, const dd_real &b); QD_API bool operator!=(const dd_real &a, const dd_real &b); QD_API dd_real nint(const dd_real &a); QD_API dd_real floor(const dd_real &a); QD_API dd_real ceil(const dd_real &a); QD_API dd_real aint(const dd_real &a); QD_API dd_real ddrand(void); double to_double(const dd_real &a); int to_int(const dd_real &a); // QD_API dd_real exp(const dd_real &a); QD_API dd_real ldexp(const dd_real &a, int exp); QD_API dd_real log(const dd_real &a); QD_API dd_real log10(const dd_real &a); QD_API dd_real sin(const dd_real &a); QD_API dd_real cos(const dd_real &a); QD_API dd_real tan(const dd_real &a); QD_API void sincos(const dd_real &a, dd_real &sin_a, dd_real &cos_a); QD_API dd_real asin(const dd_real &a); QD_API dd_real acos(const dd_real &a); QD_API dd_real atan(const dd_real &a); QD_API dd_real atan2(const dd_real &y, const dd_real &x); QD_API dd_real sinh(const dd_real &a); QD_API dd_real cosh(const dd_real &a); QD_API dd_real tanh(const dd_real &a); QD_API void sincosh(const dd_real &a, dd_real &sinh_a, dd_real &cosh_a); QD_API dd_real asinh(const dd_real &a); QD_API dd_real acosh(const dd_real &a); QD_API dd_real atanh(const dd_real &a); QD_API dd_real fabs(const dd_real &a); QD_API dd_real abs(const dd_real &a); /* same as fabs */ QD_API dd_real fmod(const dd_real &a, const dd_real &b); QD_API std::ostream& operator<<(std::ostream &s, const dd_real &a); QD_API std::istream& operator>>(std::istream &s, dd_real &a); #ifdef QD_INLINE #include #endif // added for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki inline dd_real copysign(const dd_real &x, const dd_real &y) { return (y.x[0] < 0.0) ? ((x.x[0] < 0.0) ? x : (-x)) : ((x.x[0] < 0.0) ? (-x) : x); } inline dd_real fmax(const dd_real &x, const dd_real &y) { return x.x[0] < y.x[0] ? y : x; } inline dd_real logb(const dd_real &y) { return dd_real(logb(y.x[0])); } #if 0 inline dd_real scalbn(const dd_real &x, int n) { return dd_real(scalb(x.x[0], n)); } #endif #endif /* _QD_DD_REAL_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/fpu.h000664 000000 000000 00000001634 14037356732 022430 0ustar00rootroot000000 000000 /* * include/fpu.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2001 * * Contains functions to set and restore the round-to-double flag in the * control word of a x86 FPU. The algorithms in the double-double and * quad-double package does not function with the extended mode found in * these FPU. */ #ifndef _QD_FPU_H #define _QD_FPU_H #include #ifdef __cplusplus extern "C" { #endif /* * Set the round-to-double flag, and save the old control word in old_cw. * If old_cw is NULL, the old control word is not saved. */ QD_API void fpu_fix_start(unsigned int *old_cw); /* * Restore the control word. */ QD_API void fpu_fix_end(unsigned int *old_cw); #ifdef __cplusplus } #endif #endif /* _QD_FPU_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/inline.h000664 000000 000000 00000007703 14037356732 023117 0ustar00rootroot000000 000000 /* * include/inline.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * This file contains the basic functions used both by double-double * and quad-double package. These are declared as inline functions as * they are the smallest building blocks of the double-double and * quad-double arithmetic. */ #ifndef _QD_INLINE_H #define _QD_INLINE_H #define _QD_SPLITTER 134217729.0 // = 2^27 + 1 #define _QD_SPLIT_THRESH 6.69692879491417e+299 // = 2^996 #ifdef QD_VACPP_BUILTINS_H /* For VisualAge C++ __fmadd */ #include #endif #include #include namespace qd { static const double _d_nan = std::numeric_limits::quiet_NaN(); static const double _d_inf = std::numeric_limits::infinity(); /*********** Basic Functions ************/ /* Computes fl(a+b) and err(a+b). Assumes |a| >= |b|. */ inline double quick_two_sum(double a, double b, volatile double &err) { volatile double s = a + b; volatile double ss = s - a; err = b - ss; // err = b - (s - a); return s; } /* Computes fl(a-b) and err(a-b). Assumes |a| >= |b| */ inline double quick_two_diff(double a, double b, volatile double &err) { volatile double s = a - b; volatile double ss = a - s; err = ss - b; // err = (a - s) - b; return s; } /* Computes fl(a+b) and err(a+b). */ inline double two_sum(double a, double b, volatile double &err) { volatile double s = a + b; volatile double bb = s - a; volatile double sbb = s - bb; volatile double bbb = b - bb; err = (a - sbb) + bbb; // err = (a - (s - bb)) + (b - bb); return s; } /* Computes fl(a-b) and err(a-b). */ inline double two_diff(double a, double b, volatile double &err) { volatile double s = a - b; volatile double bb = s - a; volatile double sbb = s - bb; volatile double bbb = b + bb; err = (a - sbb) - bbb; return s; } #ifndef QD_FMS /* Computes high word and lo word of a */ inline void split(double a, double &hi, double &lo) { volatile double temp; if (a > _QD_SPLIT_THRESH || a < -_QD_SPLIT_THRESH) { a *= 3.7252902984619140625e-09; // 2^-28 temp = _QD_SPLITTER * a; volatile double tempa = temp - a; hi = temp - tempa; lo = a - hi; hi *= 268435456.0; // 2^28 lo *= 268435456.0; // 2^28 } else { temp = _QD_SPLITTER * a; volatile double tempa = temp - a; hi = temp - tempa; lo = a - hi; } } #endif /* Computes fl(a*b) and err(a*b). */ inline double two_prod(double a, double b, volatile double &err) { #ifdef QD_FMS volatile double p = a * b; err = QD_FMS(a, b, p); return p; #else double a_hi, a_lo, b_hi, b_lo; volatile double p = a * b; split(a, a_hi, a_lo); split(b, b_hi, b_lo); err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo; return p; #endif } /* Computes fl(a*a) and err(a*a). Faster than the above method. */ inline double two_sqr(double a, volatile double &err) { #ifdef QD_FMS volatile double p = a * a; err = QD_FMS(a, a, p); return p; #else double hi, lo; volatile double q = a * a; split(a, hi, lo); err = ((hi * hi - q) + 2.0 * hi * lo) + lo * lo; return q; #endif } /* Computes the nearest integer to d. */ inline double nint(double d) { if (d == std::floor(d)) return d; return std::floor(d + 0.5); } /* Computes the truncated integer. */ inline double aint(double d) { return (d >= 0.0) ? std::floor(d) : std::ceil(d); } /* These are provided to give consistent interface for double with double-double and quad-double. */ inline void sincosh(double t, double &sinh_t, double &cosh_t) { sinh_t = std::sinh(t); cosh_t = std::cosh(t); } inline double sqr(double t) { return t * t; } inline double to_double(double a) { return a; } inline int to_int(double a) { return static_cast(a); } } #endif /* _QD_INLINE_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/qd_const.cpp000664 000000 000000 00000006250 14037356732 024002 0ustar00rootroot000000 000000 /* * src/qd_const.cc * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Defines constants used in quad-double package. */ #include "config.h" #include /* Some useful constants. */ const qd_real qd_real::_2pi = qd_real(6.283185307179586232e+00, 2.449293598294706414e-16, -5.989539619436679332e-33, 2.224908441726730563e-49); const qd_real qd_real::_pi = qd_real(3.141592653589793116e+00, 1.224646799147353207e-16, -2.994769809718339666e-33, 1.112454220863365282e-49); const qd_real qd_real::_pi2 = qd_real(1.570796326794896558e+00, 6.123233995736766036e-17, -1.497384904859169833e-33, 5.562271104316826408e-50); const qd_real qd_real::_pi4 = qd_real(7.853981633974482790e-01, 3.061616997868383018e-17, -7.486924524295849165e-34, 2.781135552158413204e-50); const qd_real qd_real::_3pi4 = qd_real(2.356194490192344837e+00, 9.1848509936051484375e-17, 3.9168984647504003225e-33, -2.5867981632704860386e-49); const qd_real qd_real::_e = qd_real(2.718281828459045091e+00, 1.445646891729250158e-16, -2.127717108038176765e-33, 1.515630159841218954e-49); const qd_real qd_real::_log2 = qd_real(6.931471805599452862e-01, 2.319046813846299558e-17, 5.707708438416212066e-34, -3.582432210601811423e-50); const qd_real qd_real::_log10 = qd_real(2.302585092994045901e+00, -2.170756223382249351e-16, -9.984262454465776570e-33, -4.023357454450206379e-49); const qd_real qd_real::_nan = qd_real(qd::_d_nan, qd::_d_nan, qd::_d_nan, qd::_d_nan); const qd_real qd_real::_inf = qd_real(qd::_d_inf, qd::_d_inf, qd::_d_inf, qd::_d_inf); const double qd_real::_eps = 1.21543267145725e-63; // = 2^-209 const double qd_real::_min_normalized = 1.6259745436952323e-260; // = 2^(-1022 + 3*53) const qd_real qd_real::_max = qd_real( 1.79769313486231570815e+308, 9.97920154767359795037e+291, 5.53956966280111259858e+275, 3.07507889307840487279e+259); const qd_real qd_real::_safe_max = qd_real( 1.7976931080746007281e+308, 9.97920154767359795037e+291, 5.53956966280111259858e+275, 3.07507889307840487279e+259); const int qd_real::_ndigits = 62; FreeFem-sources-4.9/3rdparty/dissection/src/qd/qd_inline.h000664 000000 000000 00000061313 14037356732 023600 0ustar00rootroot000000 000000 /* * include/qd_inline.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2001 * * Contains small functions (suitable for inlining) in the quad-double * arithmetic package. */ #ifndef _QD_QD_INLINE_H #define _QD_QD_INLINE_H #include #include #ifndef QD_INLINE #define inline #endif /********** Constructors **********/ inline qd_real::qd_real(double x0, double x1, double x2, double x3) { x[0] = x0; x[1] = x1; x[2] = x2; x[3] = x3; } inline qd_real::qd_real(const double *xx) { x[0] = xx[0]; x[1] = xx[1]; x[2] = xx[2]; x[3] = xx[3]; } inline qd_real::qd_real(double x0) { x[0] = x0; x[1] = x[2] = x[3] = 0.0; } inline qd_real::qd_real() { x[0] = 0.0; x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } inline qd_real::qd_real(const dd_real &a) { x[0] = a._hi(); x[1] = a._lo(); x[2] = x[3] = 0.0; } inline qd_real::qd_real(int i) { x[0] = static_cast(i); x[1] = x[2] = x[3] = 0.0; } /********** Accessors **********/ inline double qd_real::operator[](int i) const { return x[i]; } inline double &qd_real::operator[](int i) { return x[i]; } inline bool qd_real::isnan() const { return QD_ISNAN(x[0]) || QD_ISNAN(x[1]) || QD_ISNAN(x[2]) || QD_ISNAN(x[3]); } /********** Renormalization **********/ namespace qd { inline void quick_renorm(double &c0, double &c1, double &c2, double &c3, double &c4) { double t0, t1, t2, t3; double s; s = qd::quick_two_sum(c3, c4, t3); s = qd::quick_two_sum(c2, s , t2); s = qd::quick_two_sum(c1, s , t1); c0 = qd::quick_two_sum(c0, s , t0); s = qd::quick_two_sum(t2, t3, t2); s = qd::quick_two_sum(t1, s , t1); c1 = qd::quick_two_sum(t0, s , t0); s = qd::quick_two_sum(t1, t2, t1); c2 = qd::quick_two_sum(t0, s , t0); c3 = t0 + t1; } inline void renorm(double &c0, double &c1, double &c2, double &c3) { double s0, s1, s2 = 0.0, s3 = 0.0; if (QD_ISINF(c0)) return; s0 = qd::quick_two_sum(c2, c3, c3); s0 = qd::quick_two_sum(c1, s0, c2); c0 = qd::quick_two_sum(c0, s0, c1); s0 = c0; s1 = c1; if (s1 != 0.0) { s1 = qd::quick_two_sum(s1, c2, s2); if (s2 != 0.0) s2 = qd::quick_two_sum(s2, c3, s3); else s1 = qd::quick_two_sum(s1, c3, s2); } else { s0 = qd::quick_two_sum(s0, c2, s1); if (s1 != 0.0) s1 = qd::quick_two_sum(s1, c3, s2); else s0 = qd::quick_two_sum(s0, c3, s1); } c0 = s0; c1 = s1; c2 = s2; c3 = s3; } inline void renorm(double &c0, double &c1, double &c2, double &c3, double &c4) { double s0, s1, s2 = 0.0, s3 = 0.0; if (QD_ISINF(c0)) return; s0 = qd::quick_two_sum(c3, c4, c4); s0 = qd::quick_two_sum(c2, s0, c3); s0 = qd::quick_two_sum(c1, s0, c2); c0 = qd::quick_two_sum(c0, s0, c1); s0 = c0; s1 = c1; s0 = qd::quick_two_sum(c0, c1, s1); if (s1 != 0.0) { s1 = qd::quick_two_sum(s1, c2, s2); if (s2 != 0.0) { s2 = qd::quick_two_sum(s2, c3, s3); if (s3 != 0.0) s3 += c4; else s2 += c4; } else { s1 = qd::quick_two_sum(s1, c3, s2); if (s2 != 0.0) s2 = qd::quick_two_sum(s2, c4, s3); else s1 = qd::quick_two_sum(s1, c4, s2); } } else { s0 = qd::quick_two_sum(s0, c2, s1); if (s1 != 0.0) { s1 = qd::quick_two_sum(s1, c3, s2); if (s2 != 0.0) s2 = qd::quick_two_sum(s2, c4, s3); else s1 = qd::quick_two_sum(s1, c4, s2); } else { s0 = qd::quick_two_sum(s0, c3, s1); if (s1 != 0.0) s1 = qd::quick_two_sum(s1, c4, s2); else s0 = qd::quick_two_sum(s0, c4, s1); } } c0 = s0; c1 = s1; c2 = s2; c3 = s3; } } inline void qd_real::renorm() { qd::renorm(x[0], x[1], x[2], x[3]); } inline void qd_real::renorm(double &e) { qd::renorm(x[0], x[1], x[2], x[3], e); } /********** Additions ************/ namespace qd { inline void three_sum(double &a, double &b, double &c) { double t1, t2, t3; t1 = qd::two_sum(a, b, t2); a = qd::two_sum(c, t1, t3); b = qd::two_sum(t2, t3, c); } inline void three_sum2(double &a, double &b, double &c) { double t1, t2, t3; t1 = qd::two_sum(a, b, t2); a = qd::two_sum(c, t1, t3); b = t2 + t3; } } /* quad-double + double */ inline qd_real operator+(const qd_real &a, double b) { double c0, c1, c2, c3; double e; c0 = qd::two_sum(a[0], b, e); c1 = qd::two_sum(a[1], e, e); c2 = qd::two_sum(a[2], e, e); c3 = qd::two_sum(a[3], e, e); qd::renorm(c0, c1, c2, c3, e); return qd_real(c0, c1, c2, c3); } /* quad-double + double-double */ inline qd_real operator+(const qd_real &a, const dd_real &b) { double s0, s1, s2, s3; double t0, t1; s0 = qd::two_sum(a[0], b._hi(), t0); s1 = qd::two_sum(a[1], b._lo(), t1); s1 = qd::two_sum(s1, t0, t0); s2 = a[2]; qd::three_sum(s2, t0, t1); s3 = qd::two_sum(t0, a[3], t0); t0 += t1; qd::renorm(s0, s1, s2, s3, t0); return qd_real(s0, s1, s2, s3); } /* double + quad-double */ inline qd_real operator+(double a, const qd_real &b) { return (b + a); } /* double-double + quad-double */ inline qd_real operator+(const dd_real &a, const qd_real &b) { return (b + a); } namespace qd { /* s = quick_three_accum(a, b, c) adds c to the dd-pair (a, b). * If the result does not fit in two doubles, then the sum is * output into s and (a,b) contains the remainder. Otherwise * s is zero and (a,b) contains the sum. */ inline double quick_three_accum(double &a, double &b, double c) { double s; bool za, zb; s = qd::two_sum(b, c, b); s = qd::two_sum(a, s, a); za = (a != 0.0); zb = (b != 0.0); if (za && zb) return s; if (!zb) { b = a; a = s; } else { a = s; } return 0.0; } } inline qd_real qd_real::ieee_add(const qd_real &a, const qd_real &b) { int i, j, k; double s, t; double u, v; /* double-length accumulator */ double x[4] = {0.0, 0.0, 0.0, 0.0}; i = j = k = 0; if (std::abs(a[i]) > std::abs(b[j])) u = a[i++]; else u = b[j++]; if (std::abs(a[i]) > std::abs(b[j])) v = a[i++]; else v = b[j++]; u = qd::quick_two_sum(u, v, v); while (k < 4) { if (i >= 4 && j >= 4) { x[k] = u; if (k < 3) x[++k] = v; break; } if (i >= 4) t = b[j++]; else if (j >= 4) t = a[i++]; else if (std::abs(a[i]) > std::abs(b[j])) { t = a[i++]; } else t = b[j++]; s = qd::quick_three_accum(u, v, t); if (s != 0.0) { x[k++] = s; } } /* add the rest. */ for (k = i; k < 4; k++) x[3] += a[k]; for (k = j; k < 4; k++) x[3] += b[k]; qd::renorm(x[0], x[1], x[2], x[3]); return qd_real(x[0], x[1], x[2], x[3]); } inline qd_real qd_real::sloppy_add(const qd_real &a, const qd_real &b) { /* double s0, s1, s2, s3; double t0, t1, t2, t3; s0 = qd::two_sum(a[0], b[0], t0); s1 = qd::two_sum(a[1], b[1], t1); s2 = qd::two_sum(a[2], b[2], t2); s3 = qd::two_sum(a[3], b[3], t3); s1 = qd::two_sum(s1, t0, t0); qd::three_sum(s2, t0, t1); qd::three_sum2(s3, t0, t2); t0 = t0 + t1 + t3; qd::renorm(s0, s1, s2, s3, t0); return qd_real(s0, s1, s2, s3, t0); */ /* Same as above, but addition re-organized to minimize data dependency ... unfortunately some compilers are not very smart to do this automatically */ double s0, s1, s2, s3; double t0, t1, t2, t3; volatile double v0, v1, v2, v3; volatile double u0, u1, u2, u3; volatile double w0, w1, w2, w3; s0 = a[0] + b[0]; s1 = a[1] + b[1]; s2 = a[2] + b[2]; s3 = a[3] + b[3]; v0 = s0 - a[0]; v1 = s1 - a[1]; v2 = s2 - a[2]; v3 = s3 - a[3]; u0 = s0 - v0; u1 = s1 - v1; u2 = s2 - v2; u3 = s3 - v3; w0 = a[0] - u0; w1 = a[1] - u1; w2 = a[2] - u2; w3 = a[3] - u3; u0 = b[0] - v0; u1 = b[1] - v1; u2 = b[2] - v2; u3 = b[3] - v3; t0 = w0 + u0; t1 = w1 + u1; t2 = w2 + u2; t3 = w3 + u3; s1 = qd::two_sum(s1, t0, t0); qd::three_sum(s2, t0, t1); qd::three_sum2(s3, t0, t2); t0 = t0 + t1 + t3; /* renormalize */ qd::renorm(s0, s1, s2, s3, t0); return qd_real(s0, s1, s2, s3); } /* quad-double + quad-double */ inline qd_real operator+(const qd_real &a, const qd_real &b) { #ifndef QD_IEEE_ADD return qd_real::sloppy_add(a, b); #else return qd_real::ieee_add(a, b); #endif } /********** Self-Additions ************/ /* quad-double += double */ inline qd_real &qd_real::operator+=(double a) { *this = *this + a; return *this; } /* quad-double += double-double */ inline qd_real &qd_real::operator+=(const dd_real &a) { *this = *this + a; return *this; } /* quad-double += quad-double */ inline qd_real &qd_real::operator+=(const qd_real &a) { *this = *this + a; return *this; } /********** Unary Minus **********/ inline qd_real qd_real::operator-() const { return qd_real(-x[0], -x[1], -x[2], -x[3]); } /********** Unary Minus **********/ inline qd_real qd_real::operator+() const { return *this; } /********** Subtractions **********/ inline qd_real operator-(const qd_real &a, double b) { return (a + (-b)); } inline qd_real operator-(double a, const qd_real &b) { return (a + (-b)); } inline qd_real operator-(const qd_real &a, const dd_real &b) { return (a + (-b)); } inline qd_real operator-(const dd_real &a, const qd_real &b) { return (a + (-b)); } inline qd_real operator-(const qd_real &a, const qd_real &b) { return (a + (-b)); } /********** Self-Subtractions **********/ inline qd_real &qd_real::operator-=(double a) { return ((*this) += (-a)); } inline qd_real &qd_real::operator-=(const dd_real &a) { return ((*this) += (-a)); } inline qd_real &qd_real::operator-=(const qd_real &a) { return ((*this) += (-a)); } inline qd_real operator*(double a, const qd_real &b) { return (b * a); } inline qd_real operator*(const dd_real &a, const qd_real &b) { return (b * a); } inline qd_real mul_pwr2(const qd_real &a, double b) { return qd_real(a[0] * b, a[1] * b, a[2] * b, a[3] * b); } /********** Multiplications **********/ inline qd_real operator*(const qd_real &a, double b) { double p0, p1, p2, p3; double q0, q1, q2; double s0, s1, s2, s3, s4; p0 = qd::two_prod(a[0], b, q0); p1 = qd::two_prod(a[1], b, q1); p2 = qd::two_prod(a[2], b, q2); p3 = a[3] * b; s0 = p0; s1 = qd::two_sum(q0, p1, s2); qd::three_sum(s2, q1, p2); qd::three_sum2(q1, q2, p3); s3 = q1; s4 = q2 + p2; qd::renorm(s0, s1, s2, s3, s4); return qd_real(s0, s1, s2, s3); } /* quad-double * double-double */ /* a0 * b0 0 a0 * b1 1 a1 * b0 2 a1 * b1 3 a2 * b0 4 a2 * b1 5 a3 * b0 6 a3 * b1 7 */ inline qd_real operator*(const qd_real &a, const dd_real &b) { double p0, p1, p2, p3, p4; double q0, q1, q2, q3, q4; double s0, s1, s2; double t0, t1; p0 = qd::two_prod(a[0], b._hi(), q0); p1 = qd::two_prod(a[0], b._lo(), q1); p2 = qd::two_prod(a[1], b._hi(), q2); p3 = qd::two_prod(a[1], b._lo(), q3); p4 = qd::two_prod(a[2], b._hi(), q4); qd::three_sum(p1, p2, q0); /* Five-Three-Sum */ qd::three_sum(p2, p3, p4); q1 = qd::two_sum(q1, q2, q2); s0 = qd::two_sum(p2, q1, t0); s1 = qd::two_sum(p3, q2, t1); s1 = qd::two_sum(s1, t0, t0); s2 = t0 + t1 + p4; p2 = s0; p3 = a[2] * b._hi() + a[3] * b._lo() + q3 + q4; qd::three_sum2(p3, q0, s1); p4 = q0 + s2; qd::renorm(p0, p1, p2, p3, p4); return qd_real(p0, p1, p2, p3); } /* quad-double * quad-double */ /* a0 * b0 0 a0 * b1 1 a1 * b0 2 a0 * b2 3 a1 * b1 4 a2 * b0 5 a0 * b3 6 a1 * b2 7 a2 * b1 8 a3 * b0 9 */ inline qd_real qd_real::sloppy_mul(const qd_real &a, const qd_real &b) { double p0, p1, p2, p3, p4, p5; double q0, q1, q2, q3, q4, q5; double t0, t1; double s0, s1, s2; p0 = qd::two_prod(a[0], b[0], q0); p1 = qd::two_prod(a[0], b[1], q1); p2 = qd::two_prod(a[1], b[0], q2); p3 = qd::two_prod(a[0], b[2], q3); p4 = qd::two_prod(a[1], b[1], q4); p5 = qd::two_prod(a[2], b[0], q5); /* Start Accumulation */ qd::three_sum(p1, p2, q0); /* Six-Three Sum of p2, q1, q2, p3, p4, p5. */ qd::three_sum(p2, q1, q2); qd::three_sum(p3, p4, p5); /* compute (s0, s1, s2) = (p2, q1, q2) + (p3, p4, p5). */ s0 = qd::two_sum(p2, p3, t0); s1 = qd::two_sum(q1, p4, t1); s2 = q2 + p5; s1 = qd::two_sum(s1, t0, t0); s2 += (t0 + t1); /* O(eps^3) order terms */ s1 += a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0] + q0 + q3 + q4 + q5; qd::renorm(p0, p1, s0, s1, s2); return qd_real(p0, p1, s0, s1); } inline qd_real qd_real::accurate_mul(const qd_real &a, const qd_real &b) { double p0, p1, p2, p3, p4, p5; double q0, q1, q2, q3, q4, q5; double p6, p7, p8, p9; double q6, q7, q8, q9; double r0, r1; double t0, t1; double s0, s1, s2; p0 = qd::two_prod(a[0], b[0], q0); p1 = qd::two_prod(a[0], b[1], q1); p2 = qd::two_prod(a[1], b[0], q2); p3 = qd::two_prod(a[0], b[2], q3); p4 = qd::two_prod(a[1], b[1], q4); p5 = qd::two_prod(a[2], b[0], q5); /* Start Accumulation */ qd::three_sum(p1, p2, q0); /* Six-Three Sum of p2, q1, q2, p3, p4, p5. */ qd::three_sum(p2, q1, q2); qd::three_sum(p3, p4, p5); /* compute (s0, s1, s2) = (p2, q1, q2) + (p3, p4, p5). */ s0 = qd::two_sum(p2, p3, t0); s1 = qd::two_sum(q1, p4, t1); s2 = q2 + p5; s1 = qd::two_sum(s1, t0, t0); s2 += (t0 + t1); /* O(eps^3) order terms */ p6 = qd::two_prod(a[0], b[3], q6); p7 = qd::two_prod(a[1], b[2], q7); p8 = qd::two_prod(a[2], b[1], q8); p9 = qd::two_prod(a[3], b[0], q9); /* Nine-Two-Sum of q0, s1, q3, q4, q5, p6, p7, p8, p9. */ q0 = qd::two_sum(q0, q3, q3); q4 = qd::two_sum(q4, q5, q5); p6 = qd::two_sum(p6, p7, p7); p8 = qd::two_sum(p8, p9, p9); /* Compute (t0, t1) = (q0, q3) + (q4, q5). */ t0 = qd::two_sum(q0, q4, t1); t1 += (q3 + q5); /* Compute (r0, r1) = (p6, p7) + (p8, p9). */ r0 = qd::two_sum(p6, p8, r1); r1 += (p7 + p9); /* Compute (q3, q4) = (t0, t1) + (r0, r1). */ q3 = qd::two_sum(t0, r0, q4); q4 += (t1 + r1); /* Compute (t0, t1) = (q3, q4) + s1. */ t0 = qd::two_sum(q3, s1, t1); t1 += q4; /* O(eps^4) terms -- Nine-One-Sum */ t1 += a[1] * b[3] + a[2] * b[2] + a[3] * b[1] + q6 + q7 + q8 + q9 + s2; qd::renorm(p0, p1, s0, t0, t1); return qd_real(p0, p1, s0, t0); } inline qd_real operator*(const qd_real &a, const qd_real &b) { #ifdef QD_SLOPPY_MUL return qd_real::sloppy_mul(a, b); #else return qd_real::accurate_mul(a, b); #endif } /* quad-double ^ 2 = (x0 + x1 + x2 + x3) ^ 2 = x0 ^ 2 + 2 x0 * x1 + (2 x0 * x2 + x1 ^ 2) + (2 x0 * x3 + 2 x1 * x2) */ inline qd_real sqr(const qd_real &a) { double p0, p1, p2, p3, p4, p5; double q0, q1, q2, q3; double s0, s1; double t0, t1; p0 = qd::two_sqr(a[0], q0); p1 = qd::two_prod(2.0 * a[0], a[1], q1); p2 = qd::two_prod(2.0 * a[0], a[2], q2); p3 = qd::two_sqr(a[1], q3); p1 = qd::two_sum(q0, p1, q0); q0 = qd::two_sum(q0, q1, q1); p2 = qd::two_sum(p2, p3, p3); s0 = qd::two_sum(q0, p2, t0); s1 = qd::two_sum(q1, p3, t1); s1 = qd::two_sum(s1, t0, t0); t0 += t1; s1 = qd::quick_two_sum(s1, t0, t0); p2 = qd::quick_two_sum(s0, s1, t1); p3 = qd::quick_two_sum(t1, t0, q0); p4 = 2.0 * a[0] * a[3]; p5 = 2.0 * a[1] * a[2]; p4 = qd::two_sum(p4, p5, p5); q2 = qd::two_sum(q2, q3, q3); t0 = qd::two_sum(p4, q2, t1); t1 = t1 + p5 + q3; p3 = qd::two_sum(p3, t0, p4); p4 = p4 + q0 + t1; qd::renorm(p0, p1, p2, p3, p4); return qd_real(p0, p1, p2, p3); } /********** Self-Multiplication **********/ /* quad-double *= double */ inline qd_real &qd_real::operator*=(double a) { *this = (*this * a); return *this; } /* quad-double *= double-double */ inline qd_real &qd_real::operator*=(const dd_real &a) { *this = (*this * a); return *this; } /* quad-double *= quad-double */ inline qd_real &qd_real::operator*=(const qd_real &a) { *this = *this * a; return *this; } inline qd_real operator/ (const qd_real &a, const dd_real &b) { #ifdef QD_SLOPPY_DIV return qd_real::sloppy_div(a, b); #else return qd_real::accurate_div(a, b); #endif } inline qd_real operator/(const qd_real &a, const qd_real &b) { #ifdef QD_SLOPPY_DIV return qd_real::sloppy_div(a, b); #else return qd_real::accurate_div(a, b); #endif } /* double / quad-double */ inline qd_real operator/(double a, const qd_real &b) { return qd_real(a) / b; } /* double-double / quad-double */ inline qd_real operator/(const dd_real &a, const qd_real &b) { return qd_real(a) / b; } /********** Self-Divisions **********/ /* quad-double /= double */ inline qd_real &qd_real::operator/=(double a) { *this = (*this / a); return *this; } /* quad-double /= double-double */ inline qd_real &qd_real::operator/=(const dd_real &a) { *this = (*this / a); return *this; } /* quad-double /= quad-double */ inline qd_real &qd_real::operator/=(const qd_real &a) { *this = (*this / a); return *this; } /********** Exponentiation **********/ inline qd_real qd_real::operator^(int n) const { return pow(*this, n); } /********** Miscellaneous **********/ inline qd_real abs(const qd_real &a) { return (a[0] < 0.0) ? -a : a; } inline qd_real fabs(const qd_real &a) { return abs(a); } /* Quick version. May be off by one when qd is very close to the middle of two integers. */ inline qd_real quick_nint(const qd_real &a) { qd_real r = qd_real(qd::nint(a[0]), qd::nint(a[1]), qd::nint(a[2]), qd::nint(a[3])); r.renorm(); return r; } /*********** Assignments ************/ /* quad-double = double */ inline qd_real &qd_real::operator=(double a) { x[0] = a; x[1] = x[2] = x[3] = 0.0; return *this; } /* quad-double = double-double */ inline qd_real &qd_real::operator=(const dd_real &a) { x[0] = a._hi(); x[1] = a._lo(); x[2] = x[3] = 0.0; return *this; } /********** Equality Comparison **********/ inline bool operator==(const qd_real &a, int b) { return (a[0] == double(b) && a[1] == 0.0 && a[2] == 0.0 && a[3] == 0.0); } inline bool operator==(int a, const qd_real &b) { return (b == a); } inline bool operator==(const qd_real &a, double b) { return (a[0] == b && a[1] == 0.0 && a[2] == 0.0 && a[3] == 0.0); } inline bool operator==(double a, const qd_real &b) { return (b == a); } inline bool operator==(const qd_real &a, const dd_real &b) { return (a[0] == b._hi() && a[1] == b._lo() && a[2] == 0.0 && a[3] == 0.0); } inline bool operator==(const dd_real &a, const qd_real &b) { return (b == a); } inline bool operator==(const qd_real &a, const qd_real &b) { return (a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]); } /********** Less-Than Comparison ***********/ inline bool operator<(const qd_real &a, int b) { return (a[0] < double(b) || (a[0] == double(b) && a[1] < 0.0)); } inline bool operator<(int a, const qd_real &b) { return (b > a); } inline bool operator<(const qd_real &a, double b) { return (a[0] < b || (a[0] == b && a[1] < 0.0)); } inline bool operator<(double a, const qd_real &b) { return (b > a); } inline bool operator<(const qd_real &a, const dd_real &b) { return (a[0] < b._hi() || (a[0] == b._hi() && (a[1] < b._lo() || (a[1] == b._lo() && a[2] < 0.0)))); } inline bool operator<(const dd_real &a, const qd_real &b) { return (b > a); } inline bool operator<(const qd_real &a, const qd_real &b) { return (a[0] < b[0] || (a[0] == b[0] && (a[1] < b[1] || (a[1] == b[1] && (a[2] < b[2] || (a[2] == b[2] && a[3] < b[3])))))); } /********** Greater-Than Comparison ***********/ inline bool operator>(const qd_real &a, int b) { return (a[0] > double(b) || (a[0] == double(b) && a[1] > 0.0)); } inline bool operator>(int a, const qd_real &b) { return (b < a); } inline bool operator>(const qd_real &a, double b) { return (a[0] > b || (a[0] == b && a[1] > 0.0)); } inline bool operator>(double a, const qd_real &b) { return (b < a); } inline bool operator>(const qd_real &a, const dd_real &b) { return (a[0] > b._hi() || (a[0] == b._hi() && (a[1] > b._lo() || (a[1] == b._lo() && a[2] > 0.0)))); } inline bool operator>(const dd_real &a, const qd_real &b) { return (b < a); } inline bool operator>(const qd_real &a, const qd_real &b) { return (a[0] > b[0] || (a[0] == b[0] && (a[1] > b[1] || (a[1] == b[1] && (a[2] > b[2] || (a[2] == b[2] && a[3] > b[3])))))); } /********** Less-Than-Or-Equal-To Comparison **********/ inline bool operator<=(const qd_real &a, int b) { return (a[0] < double(b) || (a[0] == double(b) && a[1] <= 0.0)); } inline bool operator<=(int a, const qd_real &b) { return (b >= a); } inline bool operator<=(const qd_real &a, double b) { return (a[0] < b || (a[0] == b && a[1] <= 0.0)); } inline bool operator<=(double a, const qd_real &b) { return (b >= a); } inline bool operator<=(const qd_real &a, const dd_real &b) { return (a[0] < b._hi() || (a[0] == b._hi() && (a[1] < b._lo() || (a[1] == b._lo() && a[2] <= 0.0)))); } inline bool operator<=(const dd_real &a, const qd_real &b) { return (b >= a); } inline bool operator<=(const qd_real &a, const qd_real &b) { return (a[0] < b[0] || (a[0] == b[0] && (a[1] < b[1] || (a[1] == b[1] && (a[2] < b[2] || (a[2] == b[2] && a[3] <= b[3])))))); } /********** Greater-Than-Or-Equal-To Comparison **********/ inline bool operator>=(const qd_real &a, int b) { return (a[0] > double(b) || (a[0] == double(b) && a[1] >= 0.0)); } inline bool operator>=(int a, const qd_real &b) { return (b <= a); } inline bool operator>=(const qd_real &a, double b) { return (a[0] > b || (a[0] == b && a[1] >= 0.0)); } inline bool operator>=(double a, const qd_real &b) { return (b <= a); } inline bool operator>=(const qd_real &a, const dd_real &b) { return (a[0] > b._hi() || (a[0] == b._hi() && (a[1] > b._lo() || (a[1] == b._lo() && a[2] >= 0.0)))); } inline bool operator>=(const dd_real &a, const qd_real &b) { return (b <= a); } inline bool operator>=(const qd_real &a, const qd_real &b) { return (a[0] > b[0] || (a[0] == b[0] && (a[1] > b[1] || (a[1] == b[1] && (a[2] > b[2] || (a[2] == b[2] && a[3] >= b[3])))))); } /********** Not-Equal-To Comparison **********/ inline bool operator!=(const qd_real &a, int b) { return !(a == b); } inline bool operator!=(int a, const qd_real &b) { return !(a == b); } inline bool operator!=(const qd_real &a, double b) { return !(a == b); } inline bool operator!=(double a, const qd_real &b) { return !(a == b); } inline bool operator!=(const qd_real &a, const dd_real &b) { return !(a == b); } inline bool operator!=(const dd_real &a, const qd_real &b) { return !(a == b); } inline bool operator!=(const qd_real &a, const qd_real &b) { return !(a == b); } inline qd_real aint(const qd_real &a) { return (a[0] >= 0) ? floor(a) : ceil(a); } inline bool qd_real::is_zero() const { return (x[0] == 0.0); } inline bool qd_real::is_one() const { return (x[0] == 1.0 && x[1] == 0.0 && x[2] == 0.0 && x[3] == 0.0); } inline bool qd_real::is_positive() const { return (x[0] > 0.0); } inline bool qd_real::is_negative() const { return (x[0] < 0.0); } inline dd_real to_dd_real(const qd_real &a) { return dd_real(a[0], a[1]); } inline double to_double(const qd_real &a) { return a[0]; } inline int to_int(const qd_real &a) { return static_cast(a[0]); } inline qd_real inv(const qd_real &qd) { return 1.0 / qd; } inline qd_real max(const qd_real &a, const qd_real &b) { return (a > b) ? a : b; } inline qd_real max(const qd_real &a, const qd_real &b, const qd_real &c) { return (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c); } inline qd_real min(const qd_real &a, const qd_real &b) { return (a < b) ? a : b; } inline qd_real min(const qd_real &a, const qd_real &b, const qd_real &c) { return (a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c); } /* Random number generator */ inline qd_real qd_real::rand() { return qdrand(); } inline qd_real ldexp(const qd_real &a, int n) { return qd_real(std::ldexp(a[0], n), std::ldexp(a[1], n), std::ldexp(a[2], n), std::ldexp(a[3], n)); } #endif /* _QD_QD_INLINE_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/qd_real.cpp000664 000000 000000 00000274233 14037356732 023607 0ustar00rootroot000000 000000 /* * src/qd_real.cc * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2007 * * Contains implementation of non-inlined functions of quad-double * package. Inlined functions are found in qd_inline.h (in include directory). */ #define NOMINMAX #include #include #include #include #include #include #include #include #include "config.h" #include #include "util.h" #include #ifndef QD_INLINE #include #endif using std::cout; using std::cerr; using std::endl; using std::istream; using std::ostream; using std::ios_base; using std::string; using std::setw; using namespace qd; void qd_real::error(const char *msg) { #ifndef SX_ACE if (msg) { cerr << "ERROR " << msg << endl; } #endif } /********** Multiplications **********/ qd_real nint(const qd_real &a) { double x0, x1, x2, x3; x0 = nint(a[0]); x1 = x2 = x3 = 0.0; if (x0 == a[0]) { /* First double is already an integer. */ x1 = nint(a[1]); if (x1 == a[1]) { /* Second double is already an integer. */ x2 = nint(a[2]); if (x2 == a[2]) { /* Third double is already an integer. */ x3 = nint(a[3]); } else { if (std::abs(x2 - a[2]) == 0.5 && a[3] < 0.0) { x2 -= 1.0; } } } else { if (std::abs(x1 - a[1]) == 0.5 && a[2] < 0.0) { x1 -= 1.0; } } } else { /* First double is not an integer. */ if (std::abs(x0 - a[0]) == 0.5 && a[1] < 0.0) { x0 -= 1.0; } } renorm(x0, x1, x2, x3); return qd_real(x0, x1, x2, x3); } qd_real floor(const qd_real &a) { double x0, x1, x2, x3; x1 = x2 = x3 = 0.0; x0 = std::floor(a[0]); if (x0 == a[0]) { x1 = std::floor(a[1]); if (x1 == a[1]) { x2 = std::floor(a[2]); if (x2 == a[2]) { x3 = std::floor(a[3]); } } renorm(x0, x1, x2, x3); return qd_real(x0, x1, x2, x3); } return qd_real(x0, x1, x2, x3); } qd_real ceil(const qd_real &a) { double x0, x1, x2, x3; x1 = x2 = x3 = 0.0; x0 = std::ceil(a[0]); if (x0 == a[0]) { x1 = std::ceil(a[1]); if (x1 == a[1]) { x2 = std::ceil(a[2]); if (x2 == a[2]) { x3 = std::ceil(a[3]); } } renorm(x0, x1, x2, x3); return qd_real(x0, x1, x2, x3); } return qd_real(x0, x1, x2, x3); } /********** Divisions **********/ /* quad-double / double */ qd_real operator/(const qd_real &a, double b) { /* Strategy: compute approximate quotient using high order doubles, and then correct it 3 times using the remainder. (Analogous to long division.) */ double t0, t1; double q0, q1, q2, q3; qd_real r; q0 = a[0] / b; /* approximate quotient */ /* Compute the remainder a - q0 * b */ t0 = two_prod(q0, b, t1); r = a - dd_real(t0, t1); /* Compute the first correction */ q1 = r[0] / b; t0 = two_prod(q1, b, t1); r -= dd_real(t0, t1); /* Second correction to the quotient. */ q2 = r[0] / b; t0 = two_prod(q2, b, t1); r -= dd_real(t0, t1); /* Final correction to the quotient. */ q3 = r[0] / b; renorm(q0, q1, q2, q3); return qd_real(q0, q1, q2, q3); } qd_real::qd_real(const char *s) { if (qd_real::read(s, *this)) { qd_real::error("(qd_real::qd_real): INPUT ERROR."); *this = qd_real::_nan; } } qd_real &qd_real::operator=(const char *s) { if (qd_real::read(s, *this)) { qd_real::error("(qd_real::operator=): INPUT ERROR."); *this = qd_real::_nan; } return *this; } istream &operator>>(istream &s, qd_real &qd) { char str[255]; s >> str; qd = qd_real(str); return s; } ostream &operator<<(ostream &os, const qd_real &qd) { bool showpos = (os.flags() & ios_base::showpos) != 0; bool uppercase = (os.flags() & ios_base::uppercase) != 0; return os << qd.to_string(os.precision(), os.width(), os.flags(), showpos, uppercase, os.fill()); } /* Read a quad-double from s. */ int qd_real::read(const char *s, qd_real &qd) { const char *p = s; char ch; int sign = 0; int point = -1; /* location of decimal point */ int nd = 0; /* number of digits read */ int e = 0; /* exponent. */ bool done = false; qd_real r = 0.0; /* number being read */ /* Skip any leading spaces */ while (*p == ' ') p++; while (!done && (ch = *p) != '\0') { if (ch >= '0' && ch <= '9') { /* It's a digit */ int d = ch - '0'; r *= 10.0; r += static_cast(d); nd++; } else { /* Non-digit */ switch (ch) { case '.': if (point >= 0) return -1; /* we've already encountered a decimal point. */ point = nd; break; case '-': case '+': if (sign != 0 || nd > 0) return -1; /* we've already encountered a sign, or if its not at first position. */ sign = (ch == '-') ? -1 : 1; break; case 'E': case 'e': int nread; nread = std::sscanf(p+1, "%d", &e); done = true; if (nread != 1) return -1; /* read of exponent failed. */ break; case ' ': done = true; break; default: return -1; } } p++; } /* Adjust exponent to account for decimal point */ if (point >= 0) { e -= (nd - point); } /* Multiply the the exponent */ if (e != 0) { r *= (qd_real(10.0) ^ e); } qd = (sign < 0) ? -r : r; return 0; } void qd_real::to_digits(char *s, int &expn, int precision) const { int D = precision + 1; /* number of digits to compute */ qd_real r = abs(*this); int e; /* exponent */ int i, d; if (x[0] == 0.0) { /* this == 0.0 */ expn = 0; for (i = 0; i < precision; i++) s[i] = '0'; return; } /* First determine the (approximate) exponent. */ e = static_cast(std::floor(std::log10(std::abs(x[0])))); if (e < -300) { r *= qd_real(10.0) ^ 300; r /= qd_real(10.0) ^ (e + 300); } else if (e > 300) { r = ldexp(r, -53); r /= qd_real(10.0) ^ e; r = ldexp(r, 53); } else { r /= qd_real(10.0) ^ e; } /* Fix exponent if we are off by one */ if (r >= 10.0) { r /= 10.0; e++; } else if (r < 1.0) { r *= 10.0; e--; } if (r >= 10.0 || r < 1.0) { qd_real::error("(qd_real::to_digits): can't compute exponent."); return; } /* Extract the digits */ for (i = 0; i < D; i++) { d = static_cast(r[0]); r -= d; r *= 10.0; s[i] = static_cast(d + '0'); } /* Fix out of range digits. */ for (i = D-1; i > 0; i--) { if (s[i] < '0') { s[i-1]--; s[i] += 10; } else if (s[i] > '9') { s[i-1]++; s[i] -= 10; } } if (s[0] <= '0') { qd_real::error("(qd_real::to_digits): non-positive leading digit."); return; } /* Round, handle carry */ if (s[D-1] >= '5') { s[D-2]++; i = D-2; while (i > 0 && s[i] > '9') { s[i] -= 10; s[--i]++; } } /* If first digit is 10, shift everything. */ if (s[0] > '9') { e++; for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; } s[precision] = 0; expn = e; } /* Writes the quad-double number into the character array s of length len. The integer d specifies how many significant digits to write. The string s must be able to hold at least (d+8) characters. showpos indicates whether to use the + sign, and uppercase indicates whether the E or e is to be used for the exponent. */ void qd_real::write(char *s, int len, int precision, bool showpos, bool uppercase) const { string str = to_string(precision, 0, ios_base::scientific, showpos, uppercase); strncpy(s, str.c_str(), len-1); s[len-1] = 0; } void round_string_qd(char *s, int precision, int *offset){ /* Input string must be all digits or errors will occur. */ int i; int D = precision ; /* Round, handle carry */ if (s[D-1] >= '5') { s[D-2]++; i = D-2; while (i > 0 && s[i] > '9') { s[i] -= 10; s[--i]++; } } /* If first digit is 10, shift everything. */ if (s[0] > '9') { // e++; // don't modify exponent here for (i = precision; i >= 2; i--) s[i] = s[i-1]; s[0] = '1'; s[1] = '0'; (*offset)++ ; // now offset needs to be increased by one precision++ ; } s[precision] = 0; // add terminator for array } string qd_real::to_string(int precision, int width, ios_base::fmtflags fmt, bool showpos, bool uppercase, char fill) const { string s; bool fixed = (fmt & ios_base::fixed) != 0; bool sgn = true; int i, e = 0; if (isinf()) { if (*this < 0.0) s += '-'; else if (showpos) s += '+'; else sgn = false; s += uppercase ? "INF" : "inf"; } else if (isnan()) { s = uppercase ? "NAN" : "nan"; sgn = false; } else { if (*this < 0.0) s += '-'; else if (showpos) s += '+'; else sgn = false; if (*this == 0.0) { /* Zero case */ s += '0'; if (precision > 0) { s += '.'; s.append(precision, '0'); } } else { /* Non-zero case */ int off = (fixed ? (1 + to_int(floor(log10(abs(*this))))) : 1); int d = precision + off; int d_with_extra = d; if(fixed) d_with_extra = std::max(120, d); // longer than the max accuracy for DD // highly special case - fixed mode, precision is zero, abs(*this) < 1.0 // without this trap a number like 0.9 printed fixed with 0 precision prints as 0 // should be rounded to 1. if(fixed && (precision == 0) && (abs(*this) < 1.0)){ if(abs(*this) >= 0.5) s += '1'; else s += '0'; return s; } // handle near zero to working precision (but not exactly zero) if (fixed && d <= 0) { s += '0'; if (precision > 0) { s += '.'; s.append(precision, '0'); } } else { // default char *t ; // = new char[d+1]; int j; if(fixed){ t = new char[d_with_extra+1]; to_digits(t, e, d_with_extra); } else{ t = new char[d+1]; to_digits(t, e, d); } if (fixed) { // fix the string if it's been computed incorrectly // round here in the decimal string if required round_string_qd(t, d + 1 , &off); if (off > 0) { for (i = 0; i < off; i++) s += t[i]; if (precision > 0) { s += '.'; for (j = 0; j < precision; j++, i++) s += t[i]; } } else { s += "0."; if (off < 0) s.append(-off, '0'); for (i = 0; i < d; i++) s += t[i]; } } else { s += t[0]; if (precision > 0) s += '.'; for (i = 1; i <= precision; i++) s += t[i]; } delete [] t; } } // trap for improper offset with large values // without this trap, output of values of the for 10^j - 1 fail for j > 28 // and are output with the point in the wrong place, leading to a dramatically off value if(fixed && (precision > 0)){ // make sure that the value isn't dramatically larger double from_string = atof(s.c_str()); // if this ratio is large, then we've got problems if( fabs( from_string / this->x[0] ) > 3.0 ){ int point_position; char temp; // loop on the string, find the point, move it up one // don't act on the first character for(i=1; i < s.length(); i++){ if(s[i] == '.'){ s[i] = s[i-1] ; s[i-1] = '.' ; break; } } from_string = atof(s.c_str()); // if this ratio is large, then the string has not been fixed if( fabs( from_string / this->x[0] ) > 3.0 ){ dd_real::error("Re-rounding unsuccessful in large number fixed point trap.") ; } } } if (!fixed) { /* Fill in exponent part */ s += uppercase ? 'E' : 'e'; append_expn(s, e); } } /* Fill in the blanks */ int len = s.length(); if (len < width) { int delta = width - len; if (fmt & ios_base::internal) { if (sgn) s.insert(static_cast(1), delta, fill); else s.insert(static_cast(0), delta, fill); } else if (fmt & ios_base::left) { s.append(delta, fill); } else { s.insert(static_cast(0), delta, fill); } } return s; } /* Computes qd^n, where n is an integer. */ qd_real pow(const qd_real &a, int n) { if (n == 0) return 1.0; qd_real r = a; /* odd-case multiplier */ qd_real s = 1.0; /* current answer */ int N = std::abs(n); if (N > 1) { /* Use binary exponentiation. */ while (N > 0) { if (N % 2 == 1) { /* If odd, multiply by r. Note eventually N = 1, so this eventually executes. */ s *= r; } N /= 2; if (N > 0) r = sqr(r); } } else { s = r; } if (n < 0) return (1.0 / s); return s; } qd_real pow(const qd_real &a, const qd_real &b) { return exp(b * log(a)); } qd_real npwr(const qd_real &a, int n) { return pow(a, n); } /* Debugging routines */ void qd_real::dump_bits(const string &name, std::ostream &os) const { string::size_type len = name.length(); if (len > 0) { os << name << " = "; len += 3; } os << "[ "; len += 2; for (int j = 0; j < 4; j++) { if (j > 0) for (string::size_type i = 0; i < len; i++) os << ' '; print_double_info(os, x[j]); if (j < 3) os << endl; else os << " ]" << endl; } } void qd_real::dump(const string &name, std::ostream &os) const { std::ios_base::fmtflags old_flags = os.flags(); std::streamsize old_prec = os.precision(19); os << std::scientific; string::size_type len = name.length(); if (len > 0) { os << name << " = "; len += 3; } os << "[ "; len += 2; os << setw(27) << x[0] << ", " << setw(26) << x[1] << "," << endl; for (string::size_type i = 0; i < len; i++) os << ' '; os << setw(27) << x[2] << ", " << setw(26) << x[3] << " ]" << endl; os.precision(old_prec); os.flags(old_flags); } /* Divisions */ /* quad-double / double-double */ qd_real qd_real::sloppy_div(const qd_real &a, const dd_real &b) { double q0, q1, q2, q3; qd_real r; qd_real qd_b(b); q0 = a[0] / b._hi(); r = a - q0 * qd_b; q1 = r[0] / b._hi(); r -= (q1 * qd_b); q2 = r[0] / b._hi(); r -= (q2 * qd_b); q3 = r[0] / b._hi(); ::renorm(q0, q1, q2, q3); return qd_real(q0, q1, q2, q3); } qd_real qd_real::accurate_div(const qd_real &a, const dd_real &b) { double q0, q1, q2, q3, q4; qd_real r; qd_real qd_b(b); q0 = a[0] / b._hi(); r = a - q0 * qd_b; q1 = r[0] / b._hi(); r -= (q1 * qd_b); q2 = r[0] / b._hi(); r -= (q2 * qd_b); q3 = r[0] / b._hi(); r -= (q3 * qd_b); q4 = r[0] / b._hi(); ::renorm(q0, q1, q2, q3, q4); return qd_real(q0, q1, q2, q3); } /* quad-double / quad-double */ qd_real qd_real::sloppy_div(const qd_real &a, const qd_real &b) { double q0, q1, q2, q3; qd_real r; q0 = a[0] / b[0]; r = a - (b * q0); q1 = r[0] / b[0]; r -= (b * q1); q2 = r[0] / b[0]; r -= (b * q2); q3 = r[0] / b[0]; ::renorm(q0, q1, q2, q3); return qd_real(q0, q1, q2, q3); } qd_real qd_real::accurate_div(const qd_real &a, const qd_real &b) { double q0, q1, q2, q3; qd_real r; q0 = a[0] / b[0]; r = a - (b * q0); q1 = r[0] / b[0]; r -= (b * q1); q2 = r[0] / b[0]; r -= (b * q2); q3 = r[0] / b[0]; r -= (b * q3); double q4 = r[0] / b[0]; ::renorm(q0, q1, q2, q3, q4); return qd_real(q0, q1, q2, q3); } QD_API qd_real sqrt(const qd_real &a) { /* Strategy: Perform the following Newton iteration: x' = x + (1 - a * x^2) * x / 2; which converges to 1/sqrt(a), starting with the double precision approximation to 1/sqrt(a). Since Newton's iteration more or less doubles the number of correct digits, we only need to perform it twice. */ if (a.is_zero()) return 0.0; if (a.is_negative()) { qd_real::error("(qd_real::sqrt): Negative argument."); return qd_real::_nan; } qd_real r = (1.0 / std::sqrt(a[0])); qd_real h = mul_pwr2(a, 0.5); r += ((0.5 - h * sqr(r)) * r); r += ((0.5 - h * sqr(r)) * r); r += ((0.5 - h * sqr(r)) * r); r *= a; return r; } /* Computes the n-th root of a */ qd_real nroot(const qd_real &a, int n) { /* Strategy: Use Newton's iteration to solve 1/(x^n) - a = 0 Newton iteration becomes x' = x + x * (1 - a * x^n) / n Since Newton's iteration converges quadratically, we only need to perform it twice. */ if (n <= 0) { qd_real::error("(qd_real::nroot): N must be positive."); return qd_real::_nan; } if (n % 2 == 0 && a.is_negative()) { qd_real::error("(qd_real::nroot): Negative argument."); return qd_real::_nan; } if (n == 1) { return a; } if (n == 2) { return sqrt(a); } if (a.is_zero()) { return qd_real(0.0); } /* Note a^{-1/n} = exp(-log(a)/n) */ qd_real r = abs(a); qd_real x = std::exp(-std::log(r.x[0]) / n); /* Perform Newton's iteration. */ double dbl_n = static_cast(n); x += x * (1.0 - r * npwr(x, n)) / dbl_n; x += x * (1.0 - r * npwr(x, n)) / dbl_n; x += x * (1.0 - r * npwr(x, n)) / dbl_n; if (a[0] < 0.0){ x = -x; } return 1.0 / x; } static const int n_inv_fact = 15; static const qd_real inv_fact[n_inv_fact] = { qd_real( 1.66666666666666657e-01, 9.25185853854297066e-18, 5.13581318503262866e-34, 2.85094902409834186e-50), qd_real( 4.16666666666666644e-02, 2.31296463463574266e-18, 1.28395329625815716e-34, 7.12737256024585466e-51), qd_real( 8.33333333333333322e-03, 1.15648231731787138e-19, 1.60494162032269652e-36, 2.22730392507682967e-53), qd_real( 1.38888888888888894e-03, -5.30054395437357706e-20, -1.73868675534958776e-36, -1.63335621172300840e-52), qd_real( 1.98412698412698413e-04, 1.72095582934207053e-22, 1.49269123913941271e-40, 1.29470326746002471e-58), qd_real( 2.48015873015873016e-05, 2.15119478667758816e-23, 1.86586404892426588e-41, 1.61837908432503088e-59), qd_real( 2.75573192239858925e-06, -1.85839327404647208e-22, 8.49175460488199287e-39, -5.72661640789429621e-55), qd_real( 2.75573192239858883e-07, 2.37677146222502973e-23, -3.26318890334088294e-40, 1.61435111860404415e-56), qd_real( 2.50521083854417202e-08, -1.44881407093591197e-24, 2.04267351467144546e-41, -8.49632672007163175e-58), qd_real( 2.08767569878681002e-09, -1.20734505911325997e-25, 1.70222792889287100e-42, 1.41609532150396700e-58), qd_real( 1.60590438368216133e-10, 1.25852945887520981e-26, -5.31334602762985031e-43, 3.54021472597605528e-59), qd_real( 1.14707455977297245e-11, 2.06555127528307454e-28, 6.88907923246664603e-45, 5.72920002655109095e-61), qd_real( 7.64716373181981641e-13, 7.03872877733453001e-30, -7.82753927716258345e-48, 1.92138649443790242e-64), qd_real( 4.77947733238738525e-14, 4.39920548583408126e-31, -4.89221204822661465e-49, 1.20086655902368901e-65), qd_real( 2.81145725434552060e-15, 1.65088427308614326e-31, -2.87777179307447918e-50, 4.27110689256293549e-67) }; qd_real exp(const qd_real &a) { /* Strategy: We first reduce the size of x by noting that exp(kr + m * log(2)) = 2^m * exp(r)^k where m and k are integers. By choosing m appropriately we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is evaluated using the familiar Taylor series. Reducing the argument substantially speeds up the convergence. */ const double k = ldexp(1.0, 16); const double inv_k = 1.0 / k; if (a[0] <= -709.0) return 0.0; if (a[0] >= 709.0) return qd_real::_inf; if (a.is_zero()) return 1.0; if (a.is_one()) return qd_real::_e; double m = std::floor(a.x[0] / qd_real::_log2.x[0] + 0.5); qd_real r = mul_pwr2(a - qd_real::_log2 * m, inv_k); qd_real s, p, t; double thresh = inv_k * qd_real::_eps; p = sqr(r); s = r + mul_pwr2(p, 0.5); int i = 0; do { p *= r; t = p * inv_fact[i++]; s += t; } while (std::abs(to_double(t)) > thresh && i < 9); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s = mul_pwr2(s, 2.0) + sqr(s); s += 1.0; return ldexp(s, static_cast(m)); } /* Logarithm. Computes log(x) in quad-double precision. This is a natural logarithm (i.e., base e). */ qd_real log(const qd_real &a) { /* Strategy. The Taylor series for log converges much more slowly than that of exp, due to the lack of the factorial term in the denominator. Hence this routine instead tries to determine the root of the function f(x) = exp(x) - a using Newton iteration. The iteration is given by x' = x - f(x)/f'(x) = x - (1 - a * exp(-x)) = x + a * exp(-x) - 1. Two iteration is needed, since Newton's iteration approximately doubles the number of digits per iteration. */ if (a.is_one()) { return 0.0; } if (a[0] <= 0.0) { qd_real::error("(qd_real::log): Non-positive argument."); return qd_real::_nan; } if (a[0] == 0.0) { return -qd_real::_inf; } qd_real x = std::log(a[0]); /* Initial approximation */ x = x + a * exp(-x) - 1.0; x = x + a * exp(-x) - 1.0; x = x + a * exp(-x) - 1.0; return x; } qd_real log10(const qd_real &a) { return log(a) / qd_real::_log10; } static const qd_real _pi1024 = qd_real( 3.067961575771282340e-03, 1.195944139792337116e-19, -2.924579892303066080e-36, 1.086381075061880158e-52); /* Table of sin(k * pi/1024) and cos(k * pi/1024). */ static const qd_real sin_table [] = { qd_real( 3.0679567629659761e-03, 1.2690279085455925e-19, 5.2879464245328389e-36, -1.7820334081955298e-52), qd_real( 6.1358846491544753e-03, 9.0545257482474933e-20, 1.6260113133745320e-37, -9.7492001208767410e-55), qd_real( 9.2037547820598194e-03, -1.2136591693535934e-19, 5.5696903949425567e-36, 1.2505635791936951e-52), qd_real( 1.2271538285719925e-02, 6.9197907640283170e-19, -4.0203726713435555e-36, -2.0688703606952816e-52), qd_real( 1.5339206284988102e-02, -8.4462578865401696e-19, 4.6535897505058629e-35, -1.3923682978570467e-51), qd_real( 1.8406729905804820e-02, 7.4195533812833160e-19, 3.9068476486787607e-35, 3.6393321292898614e-52), qd_real( 2.1474080275469508e-02, -4.5407960207688566e-19, -2.2031770119723005e-35, 1.2709814654833741e-51), qd_real( 2.4541228522912288e-02, -9.1868490125778782e-20, 4.8706148704467061e-36, -2.8153947855469224e-52), qd_real( 2.7608145778965743e-02, -1.5932358831389269e-18, -7.0475416242776030e-35, -2.7518494176602744e-51), qd_real( 3.0674803176636626e-02, -1.6936054844107918e-20, -2.0039543064442544e-36, -1.6267505108658196e-52), qd_real( 3.3741171851377587e-02, -2.0096074292368340e-18, -1.3548237016537134e-34, 6.5554881875899973e-51), qd_real( 3.6807222941358832e-02, 6.1060088803529842e-19, -4.0448721259852727e-35, -2.1111056765671495e-51), qd_real( 3.9872927587739811e-02, 4.6657453481183289e-19, 3.4119333562288684e-35, 2.4007534726187511e-51), qd_real( 4.2938256934940820e-02, 2.8351940588660907e-18, 1.6991309601186475e-34, 6.8026536098672629e-51), qd_real( 4.6003182130914630e-02, -1.1182813940157788e-18, 7.5235020270378946e-35, 4.1187304955493722e-52), qd_real( 4.9067674327418015e-02, -6.7961037205182801e-19, -4.4318868124718325e-35, -9.9376628132525316e-52), qd_real( 5.2131704680283324e-02, -2.4243695291953779e-18, -1.3675405320092298e-34, -8.3938137621145070e-51), qd_real( 5.5195244349689941e-02, -1.3340299860891103e-18, -3.4359574125665608e-35, 1.1911462755409369e-51), qd_real( 5.8258264500435759e-02, 2.3299905496077492e-19, 1.9376108990628660e-36, -5.1273775710095301e-53), qd_real( 6.1320736302208578e-02, -5.1181134064638108e-19, -4.2726335866706313e-35, 2.6368495557440691e-51), qd_real( 6.4382630929857465e-02, -4.2325997000052705e-18, 3.3260117711855937e-35, 1.4736267706718352e-51), qd_real( 6.7443919563664065e-02, -6.9221796556983636e-18, 1.5909286358911040e-34, -7.8828946891835218e-51), qd_real( 7.0504573389613870e-02, -6.8552791107342883e-18, -1.9961177630841580e-34, 2.0127129580485300e-50), qd_real( 7.3564563599667426e-02, -2.7784941506273593e-18, -9.1240375489852821e-35, -1.9589752023546795e-51), qd_real( 7.6623861392031492e-02, 2.3253700287958801e-19, -1.3186083921213440e-36, -4.9927872608099673e-53), qd_real( 7.9682437971430126e-02, -4.4867664311373041e-18, 2.8540789143650264e-34, 2.8491348583262741e-51), qd_real( 8.2740264549375692e-02, 1.4735983530877760e-18, 3.7284093452233713e-35, 2.9024430036724088e-52), qd_real( 8.5797312344439894e-02, -3.3881893830684029e-18, -1.6135529531508258e-34, 7.7294651620588049e-51), qd_real( 8.8853552582524600e-02, -3.7501775830290691e-18, 3.7543606373911573e-34, 2.2233701854451859e-50), qd_real( 9.1908956497132724e-02, 4.7631594854274564e-18, 1.5722874642939344e-34, -4.8464145447831456e-51), qd_real( 9.4963495329639006e-02, -6.5885886400417564e-18, -2.1371116991641965e-34, 1.3819370559249300e-50), qd_real( 9.8017140329560604e-02, -1.6345823622442560e-18, -1.3209238810006454e-35, -3.5691060049117942e-52), qd_real( 1.0106986275482782e-01, 3.3164325719308656e-18, -1.2004224885132282e-34, 7.2028828495418631e-51), qd_real( 1.0412163387205457e-01, 6.5760254085385100e-18, 1.7066246171219214e-34, -4.9499340996893514e-51), qd_real( 1.0717242495680884e-01, 6.4424044279026198e-18, -8.3956976499698139e-35, -4.0667730213318321e-51), qd_real( 1.1022220729388306e-01, -5.6789503537823233e-19, 1.0380274792383233e-35, 1.5213997918456695e-52), qd_real( 1.1327095217756435e-01, 2.7100481012132900e-18, 1.5323292999491619e-35, 4.9564432810360879e-52), qd_real( 1.1631863091190477e-01, 1.0294914877509705e-18, -9.3975734948993038e-35, 1.3534827323719708e-52), qd_real( 1.1936521481099137e-01, -3.9500089391898506e-18, 3.5317349978227311e-34, 1.8856046807012275e-51), qd_real( 1.2241067519921620e-01, 2.8354501489965335e-18, 1.8151655751493305e-34, -2.8716592177915192e-51), qd_real( 1.2545498341154623e-01, 4.8686751763148235e-18, 5.9878105258097936e-35, -3.3534629098722107e-51), qd_real( 1.2849811079379317e-01, 3.8198603954988802e-18, -1.8627501455947798e-34, -2.4308161133527791e-51), qd_real( 1.3154002870288312e-01, -5.0039708262213813e-18, -1.2983004159245552e-34, -4.6872034915794122e-51), qd_real( 1.3458070850712620e-01, -9.1670359171480699e-18, 1.5916493007073973e-34, 4.0237002484366833e-51), qd_real( 1.3762012158648604e-01, 6.6253255866774482e-18, -2.3746583031401459e-34, -9.3703876173093250e-52), qd_real( 1.4065823933284924e-01, -7.9193932965524741e-18, 6.0972464202108397e-34, 2.4566623241035797e-50), qd_real( 1.4369503315029444e-01, 1.1472723016618666e-17, -5.1884954557576435e-35, -4.2220684832186607e-51), qd_real( 1.4673047445536175e-01, 3.7269471470465677e-18, 3.7352398151250827e-34, -4.0881822289508634e-51), qd_real( 1.4976453467732151e-01, 8.0812114131285151e-18, 1.2979142554917325e-34, 9.9380667487736254e-51), qd_real( 1.5279718525844344e-01, -7.6313573938416838e-18, 5.7714690450284125e-34, -3.7731132582986687e-50), qd_real( 1.5582839765426523e-01, 3.0351307187678221e-18, -1.0976942315176184e-34, 7.8734647685257867e-51), qd_real( 1.5885814333386145e-01, -4.0163200573859079e-18, -9.2840580257628812e-35, -2.8567420029274875e-51), qd_real( 1.6188639378011183e-01, 1.1850519643573528e-17, -5.0440990519162957e-34, 3.0510028707928009e-50), qd_real( 1.6491312048996992e-01, -7.0405288319166738e-19, 3.3211107491245527e-35, 8.6663299254686031e-52), qd_real( 1.6793829497473117e-01, 5.4284533721558139e-18, -3.3263339336181369e-34, -1.8536367335123848e-50), qd_real( 1.7096188876030122e-01, 9.1919980181759094e-18, -6.7688743940982606e-34, -1.0377711384318389e-50), qd_real( 1.7398387338746382e-01, 5.8151994618107928e-18, -1.6751014298301606e-34, -6.6982259797164963e-51), qd_real( 1.7700422041214875e-01, 6.7329300635408167e-18, 2.8042736644246623e-34, 3.6786888232793599e-51), qd_real( 1.8002290140569951e-01, 7.9701826047392143e-18, -7.0765920110524977e-34, 1.9622512608461784e-50), qd_real( 1.8303988795514095e-01, 7.7349918688637383e-18, -4.4803769968145083e-34, 1.1201148793328890e-50), qd_real( 1.8605515166344666e-01, -1.2564893007679552e-17, 7.5953844248530810e-34, -3.8471695132415039e-51), qd_real( 1.8906866414980622e-01, -7.6208955803527778e-18, -4.4792298656662981e-34, -4.4136824096645007e-50), qd_real( 1.9208039704989244e-01, 4.3348343941174903e-18, -2.3404121848139937e-34, 1.5789970962611856e-50), qd_real( 1.9509032201612828e-01, -7.9910790684617313e-18, 6.1846270024220713e-34, -3.5840270918032937e-50), qd_real( 1.9809841071795359e-01, -1.8434411800689445e-18, 1.4139031318237285e-34, 1.0542811125343809e-50), qd_real( 2.0110463484209190e-01, 1.1010032669300739e-17, -3.9123576757413791e-34, 2.4084852500063531e-51), qd_real( 2.0410896609281687e-01, 6.0941297773957752e-18, -2.8275409970449641e-34, 4.6101008563532989e-51), qd_real( 2.0711137619221856e-01, -1.0613362528971356e-17, 2.2456805112690884e-34, 1.3483736125280904e-50), qd_real( 2.1011183688046961e-01, 1.1561548476512844e-17, 6.0355905610401254e-34, 3.3329909618405675e-50), qd_real( 2.1311031991609136e-01, 1.2031873821063860e-17, -3.4142699719695635e-34, -1.2436262780241778e-50), qd_real( 2.1610679707621952e-01, -1.0111196082609117e-17, 7.2789545335189643e-34, -2.9347540365258610e-50), qd_real( 2.1910124015686980e-01, -3.6513812299150776e-19, -2.3359499418606442e-35, 3.1785298198458653e-52), qd_real( 2.2209362097320354e-01, -3.0337210995812162e-18, 6.6654668033632998e-35, 2.0110862322656942e-51), qd_real( 2.2508391135979283e-01, 3.9507040822556510e-18, 2.4287993958305375e-35, 5.6662797513020322e-52), qd_real( 2.2807208317088573e-01, 8.2361837339258012e-18, 6.9786781316397937e-34, -6.4122962482639504e-51), qd_real( 2.3105810828067111e-01, 1.0129787149761869e-17, -6.9359234615816044e-34, -2.8877355604883782e-50), qd_real( 2.3404195858354343e-01, -6.9922402696101173e-18, -5.7323031922750280e-34, 5.3092579966872727e-51), qd_real( 2.3702360599436720e-01, 8.8544852285039918e-18, 1.3588480826354134e-34, 1.0381022520213867e-50), qd_real( 2.4000302244874150e-01, -1.2137758975632164e-17, -2.6448807731703891e-34, -1.9929733800670473e-51), qd_real( 2.4298017990326390e-01, -8.7514315297196632e-18, -6.5723260373079431e-34, -1.0333158083172177e-50), qd_real( 2.4595505033579462e-01, -1.1129044052741832e-17, 4.3805998202883397e-34, 1.2219399554686291e-50), qd_real( 2.4892760574572018e-01, -8.1783436100020990e-18, 5.5666875261111840e-34, 3.8080473058748167e-50), qd_real( 2.5189781815421697e-01, -1.7591436032517039e-17, -1.0959681232525285e-33, 5.6209426020232456e-50), qd_real( 2.5486565960451457e-01, -1.3602299806901461e-19, -6.0073844642762535e-36, -3.0072751311893878e-52), qd_real( 2.5783110216215899e-01, 1.8480038630879957e-17, 3.3201664714047599e-34, -5.5547819290576764e-51), qd_real( 2.6079411791527551e-01, 4.2721420983550075e-18, 5.6782126934777920e-35, 3.1428338084365397e-51), qd_real( 2.6375467897483140e-01, -1.8837947680038700e-17, 1.3720129045754794e-33, -8.2763406665966033e-50), qd_real( 2.6671275747489837e-01, 2.0941222578826688e-17, -1.1303466524727989e-33, 1.9954224050508963e-50), qd_real( 2.6966832557291509e-01, 1.5765657618133259e-17, -6.9696142173370086e-34, -4.0455346879146776e-50), qd_real( 2.7262135544994898e-01, 7.8697166076387850e-18, 6.6179388602933372e-35, -2.7642903696386267e-51), qd_real( 2.7557181931095814e-01, 1.9320328962556582e-17, 1.3932094180100280e-33, 1.3617253920018116e-50), qd_real( 2.7851968938505312e-01, -1.0030273719543544e-17, 7.2592115325689254e-34, -1.0068516296655851e-50), qd_real( 2.8146493792575800e-01, -1.2322299641274009e-17, -1.0564788706386435e-34, 7.5137424251265885e-51), qd_real( 2.8440753721127182e-01, 2.2209268510661475e-17, -9.1823095629523708e-34, -5.2192875308892218e-50), qd_real( 2.8734745954472951e-01, 1.5461117367645717e-17, -6.3263973663444076e-34, -2.2982538416476214e-50), qd_real( 2.9028467725446239e-01, -1.8927978707774251e-17, 1.1522953157142315e-33, 7.4738655654716596e-50), qd_real( 2.9321916269425863e-01, 2.2385430811901833e-17, 1.3662484646539680e-33, -4.2451325253996938e-50), qd_real( 2.9615088824362384e-01, -2.0220736360876938e-17, -7.9252212533920413e-35, -2.8990577729572470e-51), qd_real( 2.9907982630804048e-01, 1.6701181609219447e-18, 8.6091151117316292e-35, 3.9931286230012102e-52), qd_real( 3.0200594931922808e-01, -1.7167666235262474e-17, 2.3336182149008069e-34, 8.3025334555220004e-51), qd_real( 3.0492922973540243e-01, -2.2989033898191262e-17, -1.4598901099661133e-34, 3.7760487693121827e-51), qd_real( 3.0784964004153487e-01, 2.7074088527245185e-17, 1.2568858206899284e-33, 7.2931815105901645e-50), qd_real( 3.1076715274961147e-01, 2.0887076364048513e-17, -3.0130590791065942e-34, 1.3876739009935179e-51), qd_real( 3.1368174039889146e-01, 1.4560447299968912e-17, 3.6564186898011595e-34, 1.1654264734999375e-50), qd_real( 3.1659337555616585e-01, 2.1435292512726283e-17, 1.2338169231377316e-33, 3.3963542100989293e-50), qd_real( 3.1950203081601569e-01, -1.3981562491096626e-17, 8.1730000697411350e-34, -7.7671096270210952e-50), qd_real( 3.2240767880106985e-01, -4.0519039937959398e-18, 3.7438302780296796e-34, 8.7936731046639195e-51), qd_real( 3.2531029216226293e-01, 7.9171249463765892e-18, -6.7576622068146391e-35, 2.3021655066929538e-51), qd_real( 3.2820984357909255e-01, -2.6693140719641896e-17, 7.8928851447534788e-34, 2.5525163821987809e-51), qd_real( 3.3110630575987643e-01, -2.7469465474778694e-17, -1.3401245916610206e-33, 6.5531762489976163e-50), qd_real( 3.3399965144200938e-01, 2.2598986806288142e-17, 7.8063057192586115e-34, 2.0427600895486683e-50), qd_real( 3.3688985339222005e-01, -4.2000940033475092e-19, -2.9178652969985438e-36, -1.1597376437036749e-52), qd_real( 3.3977688440682685e-01, 6.6028679499418282e-18, 1.2575009988669683e-34, 2.5569067699008304e-51), qd_real( 3.4266071731199438e-01, 1.9261518449306319e-17, -9.2754189135990867e-34, 8.5439996687390166e-50), qd_real( 3.4554132496398904e-01, 2.7251143672916123e-17, 7.0138163601941737e-34, -1.4176292197454015e-50), qd_real( 3.4841868024943456e-01, 3.6974420514204918e-18, 3.5532146878499996e-34, 1.9565462544501322e-50), qd_real( 3.5129275608556715e-01, -2.2670712098795844e-17, -1.6994216673139631e-34, -1.2271556077284517e-50), qd_real( 3.5416352542049040e-01, -1.6951763305764860e-17, 1.2772331777814617e-33, -3.3703785435843310e-50), qd_real( 3.5703096123343003e-01, -4.8218191137919166e-19, -4.1672436994492361e-35, -7.1531167149364352e-52), qd_real( 3.5989503653498817e-01, -1.7601687123839282e-17, 1.3375125473046791e-33, 7.9467815593584340e-50), qd_real( 3.6275572436739723e-01, -9.1668352663749849e-18, -7.4317843956936735e-34, -2.0199582511804564e-50), qd_real( 3.6561299780477385e-01, 1.6217898770457546e-17, 1.1286970151961055e-33, -7.1825287318139010e-50), qd_real( 3.6846682995337232e-01, 1.0463640796159268e-17, 2.0554984738517304e-35, 1.0441861305618769e-51), qd_real( 3.7131719395183754e-01, 3.4749239648238266e-19, -7.5151053042866671e-37, -2.8153468438650851e-53), qd_real( 3.7416406297145799e-01, 8.0114103761962118e-18, 5.3429599813406052e-34, 1.0351378796539210e-50), qd_real( 3.7700741021641826e-01, -2.7255302041956930e-18, 6.3646586445018137e-35, 8.3048657176503559e-52), qd_real( 3.7984720892405116e-01, 9.9151305855172370e-18, 4.8761409697224886e-34, 1.4025084000776705e-50), qd_real( 3.8268343236508978e-01, -1.0050772696461588e-17, -2.0605316302806695e-34, -1.2717724698085205e-50), qd_real( 3.8551605384391885e-01, 1.5177665396472313e-17, 1.4198230518016535e-33, 5.8955167159904235e-50), qd_real( 3.8834504669882630e-01, -1.0053770598398717e-17, 7.5942999255057131e-34, -3.1967974046654219e-50), qd_real( 3.9117038430225387e-01, 1.7997787858243995e-17, -1.0613482402609856e-33, -5.4582148817791032e-50), qd_real( 3.9399204006104810e-01, 9.7649241641239336e-18, -2.1233599441284617e-34, -5.5529836795340819e-51), qd_real( 3.9680998741671031e-01, 2.0545063670840126e-17, 6.1347058801922842e-34, 1.0733788150636430e-50), qd_real( 3.9962419984564684e-01, -1.5065497476189372e-17, -9.9653258881867298e-34, -5.7524323712725355e-50), qd_real( 4.0243465085941843e-01, 1.0902619339328270e-17, 7.3998528125989765e-34, 2.2745784806823499e-50), qd_real( 4.0524131400498986e-01, 9.9111401942899884e-18, -2.5169070895434648e-34, 9.2772984818436573e-53), qd_real( 4.0804416286497869e-01, -7.0006015137351311e-18, -1.4108207334268228e-34, 1.5175546997577136e-52), qd_real( 4.1084317105790397e-01, -2.4219835190355499e-17, -1.1418902925313314e-33, -2.0996843165093468e-50), qd_real( 4.1363831223843456e-01, -1.0393984940597871e-17, -1.1481681174503880e-34, -2.0281052851028680e-51), qd_real( 4.1642956009763721e-01, -2.5475580413131732e-17, -3.4482678506112824e-34, 7.1788619351865480e-51), qd_real( 4.1921688836322396e-01, -4.2232463750110590e-18, -3.6053023045255790e-34, -2.2209673210025631e-50), qd_real( 4.2200027079979968e-01, 4.3543266994128527e-18, 3.1734310272251190e-34, -1.3573247980738668e-50), qd_real( 4.2477968120910881e-01, 2.7462312204277281e-17, -4.6552847802111948e-34, 6.5961781099193122e-51), qd_real( 4.2755509343028208e-01, 9.4111898162954726e-18, -1.7446682426598801e-34, -2.2054492626480169e-51), qd_real( 4.3032648134008261e-01, 2.2259686974092690e-17, 8.5972591314085075e-34, -2.9420897889003020e-50), qd_real( 4.3309381885315196e-01, 1.1224283329847517e-17, 5.3223748041075651e-35, 5.3926192627014212e-51), qd_real( 4.3585707992225547e-01, 1.6230515450644527e-17, -6.4371449063579431e-35, -6.9102436481386757e-51), qd_real( 4.3861623853852766e-01, -2.0883315831075090e-17, -1.4259583540891877e-34, 6.3864763590657077e-52), qd_real( 4.4137126873171667e-01, 2.2360783886964969e-17, 1.1864769603515770e-34, -3.8087003266189232e-51), qd_real( 4.4412214457042926e-01, -2.4218874422178315e-17, 2.2205230838703907e-34, 9.2133035911356258e-51), qd_real( 4.4686884016237421e-01, -1.9222136142309382e-17, -4.4425678589732049e-35, -1.3673609292149535e-51), qd_real( 4.4961132965460660e-01, 4.8831924232035243e-18, 2.7151084498191381e-34, -1.5653993171613154e-50), qd_real( 4.5234958723377089e-01, -1.4827977472196122e-17, -7.6947501088972324e-34, 1.7656856882031319e-50), qd_real( 4.5508358712634384e-01, -1.2379906758116472e-17, 5.5289688955542643e-34, -8.5382312840209386e-51), qd_real( 4.5781330359887723e-01, -8.4554254922295949e-18, -6.3770394246764263e-34, 3.1778253575564249e-50), qd_real( 4.6053871095824001e-01, 1.8488777492177872e-17, -1.0527732154209725e-33, 3.3235593490947102e-50), qd_real( 4.6325978355186020e-01, -7.3514924533231707e-18, 6.7175396881707035e-34, 3.9594127612123379e-50), qd_real( 4.6597649576796618e-01, -3.3023547778235135e-18, 3.4904677050476886e-35, 3.4483855263874246e-51), qd_real( 4.6868882203582796e-01, -2.2949251681845054e-17, -1.1364757641823658e-33, 6.8840522501918612e-50), qd_real( 4.7139673682599764e-01, 6.5166781360690130e-18, 2.9457546966235984e-34, -6.2159717738836630e-51), qd_real( 4.7410021465055002e-01, -8.1451601548978075e-18, -3.4789448555614422e-34, -1.1681943974658508e-50), qd_real( 4.7679923006332214e-01, -1.0293515338305794e-17, -3.6582045008369952e-34, 1.7424131479176475e-50), qd_real( 4.7949375766015301e-01, 1.8419999662684771e-17, -1.3040838621273312e-33, 1.0977131822246471e-50), qd_real( 4.8218377207912277e-01, -2.5861500925520442e-17, -6.2913197606500007e-36, 4.0802359808684726e-52), qd_real( 4.8486924800079112e-01, -1.8034004203262245e-17, -3.5244276906958044e-34, -1.7138318654749246e-50), qd_real( 4.8755016014843594e-01, 1.4231090931273653e-17, -1.8277733073262697e-34, -1.5208291790429557e-51), qd_real( 4.9022648328829116e-01, -5.1496145643440404e-18, -3.6903027405284104e-34, 1.5172940095151304e-50), qd_real( 4.9289819222978404e-01, -1.0257831676562186e-18, 6.9520817760885069e-35, -2.4260961214090389e-51), qd_real( 4.9556526182577254e-01, -9.4323241942365362e-18, 3.1212918657699143e-35, 4.2009072375242736e-52), qd_real( 4.9822766697278187e-01, -1.6126383830540798e-17, -1.5092897319298871e-33, 1.1049298890895917e-50), qd_real( 5.0088538261124083e-01, -3.9604015147074639e-17, -2.2208395201898007e-33, 1.3648202735839417e-49), qd_real( 5.0353838372571758e-01, -1.6731308204967497e-17, -1.0140233644074786e-33, 4.0953071937671477e-50), qd_real( 5.0618664534515534e-01, -4.8321592986493711e-17, 9.2858107226642252e-34, 4.2699802401037005e-50), qd_real( 5.0883014254310699e-01, 4.7836968268014130e-17, -1.0727022928806035e-33, 2.7309374513672757e-50), qd_real( 5.1146885043797041e-01, -1.3088001221007579e-17, 4.0929033363366899e-34, -3.7952190153477926e-50), qd_real( 5.1410274419322177e-01, -4.5712707523615624e-17, 1.5488279442238283e-33, -2.5853959305521130e-50), qd_real( 5.1673179901764987e-01, 8.3018617233836515e-18, 5.8251027467695202e-34, -2.2812397190535076e-50), qd_real( 5.1935599016558964e-01, -5.5331248144171145e-17, -3.1628375609769026e-35, -2.4091972051188571e-51), qd_real( 5.2197529293715439e-01, -4.6555795692088883e-17, 4.6378980936850430e-34, -3.3470542934689532e-51), qd_real( 5.2458968267846895e-01, -4.3068869040082345e-17, -4.2013155291932055e-34, -1.5096069926700274e-50), qd_real( 5.2719913478190139e-01, -4.2202983480560619e-17, 8.5585916184867295e-34, 7.9974339336732307e-50), qd_real( 5.2980362468629472e-01, -4.8067841706482342e-17, 5.8309721046630296e-34, -8.9740761521756660e-51), qd_real( 5.3240312787719801e-01, -4.1020306135800895e-17, -1.9239996374230821e-33, -1.5326987913812184e-49), qd_real( 5.3499761988709726e-01, -5.3683132708358134e-17, -1.3900569918838112e-33, 2.7154084726474092e-50), qd_real( 5.3758707629564551e-01, -2.2617365388403054e-17, -5.9787279033447075e-34, 3.1204419729043625e-51), qd_real( 5.4017147272989285e-01, 2.7072447965935839e-17, 1.1698799709213829e-33, -5.9094668515881500e-50), qd_real( 5.4275078486451589e-01, 1.7148261004757101e-17, -1.3525905925200870e-33, 4.9724411290727323e-50), qd_real( 5.4532498842204646e-01, -4.1517817538384258e-17, -1.5318930219385941e-33, 6.3629921101413974e-50), qd_real( 5.4789405917310019e-01, -2.4065878297113363e-17, -3.5639213669362606e-36, -2.6013270854271645e-52), qd_real( 5.5045797293660481e-01, -8.3319903015807663e-18, -2.3058454035767633e-34, -2.1611290432369010e-50), qd_real( 5.5301670558002758e-01, -4.7061536623798204e-17, -1.0617111545918056e-33, -1.6196316144407379e-50), qd_real( 5.5557023301960218e-01, 4.7094109405616768e-17, -2.0640520383682921e-33, 1.2290163188567138e-49), qd_real( 5.5811853122055610e-01, 1.3481176324765226e-17, -5.5016743873011438e-34, -2.3484822739335416e-50), qd_real( 5.6066157619733603e-01, -7.3956418153476152e-18, 3.9680620611731193e-34, 3.1995952200836223e-50), qd_real( 5.6319934401383409e-01, 2.3835775146854829e-17, 1.3511793173769814e-34, 9.3201311581248143e-51), qd_real( 5.6573181078361323e-01, -3.4096079596590466e-17, -1.7073289744303546e-33, 8.9147089975404507e-50), qd_real( 5.6825895267013160e-01, -5.0935673642769248e-17, -1.6274356351028249e-33, 9.8183151561702966e-51), qd_real( 5.7078074588696726e-01, 2.4568151455566208e-17, -1.2844481247560350e-33, -1.8037634376936261e-50), qd_real( 5.7329716669804220e-01, 8.5176611669306400e-18, -6.4443208788026766e-34, 2.2546105543273003e-50), qd_real( 5.7580819141784534e-01, -3.7909495458942734e-17, -2.7433738046854309e-33, 1.1130841524216795e-49), qd_real( 5.7831379641165559e-01, -2.6237691512372831e-17, 1.3679051680738167e-33, -3.1409808935335900e-50), qd_real( 5.8081395809576453e-01, 1.8585338586613408e-17, 2.7673843114549181e-34, 1.9605349619836937e-50), qd_real( 5.8330865293769829e-01, 3.4516601079044858e-18, 1.8065977478946306e-34, -6.3953958038544646e-51), qd_real( 5.8579785745643886e-01, -3.7485501964311294e-18, 2.7965403775536614e-34, -7.1816936024157202e-51), qd_real( 5.8828154822264533e-01, -2.9292166725006846e-17, -2.3744954603693934e-33, -1.1571631191512480e-50), qd_real( 5.9075970185887428e-01, -4.7013584170659542e-17, 2.4808417611768356e-33, 1.2598907673643198e-50), qd_real( 5.9323229503979980e-01, 1.2892320944189053e-17, 5.3058364776359583e-34, 4.1141674699390052e-50), qd_real( 5.9569930449243336e-01, -1.3438641936579467e-17, -6.7877687907721049e-35, -5.6046937531684890e-51), qd_real( 5.9816070699634227e-01, 3.8801885783000657e-17, -1.2084165858094663e-33, -4.0456610843430061e-50), qd_real( 6.0061647938386897e-01, -4.6398198229461932e-17, -1.6673493003710801e-33, 5.1982824378491445e-50), qd_real( 6.0306659854034816e-01, 3.7323357680559650e-17, 2.7771920866974305e-33, -1.6194229649742458e-49), qd_real( 6.0551104140432555e-01, -3.1202672493305677e-17, 1.2761267338680916e-33, -4.0859368598379647e-50), qd_real( 6.0794978496777363e-01, 3.5160832362096660e-17, -2.5546242776778394e-34, -1.4085313551220694e-50), qd_real( 6.1038280627630948e-01, -2.2563265648229169e-17, 1.3185575011226730e-33, 8.2316691420063460e-50), qd_real( 6.1281008242940971e-01, -4.2693476568409685e-18, 2.5839965886650320e-34, 1.6884412005622537e-50), qd_real( 6.1523159058062682e-01, 2.6231417767266950e-17, -1.4095366621106716e-33, 7.2058690491304558e-50), qd_real( 6.1764730793780398e-01, -4.7478594510902452e-17, -7.2986558263123996e-34, -3.0152327517439154e-50), qd_real( 6.2005721176328921e-01, -2.7983410837681118e-17, 1.1649951056138923e-33, -5.4539089117135207e-50), qd_real( 6.2246127937414997e-01, 5.2940728606573002e-18, -4.8486411215945827e-35, 1.2696527641980109e-52), qd_real( 6.2485948814238634e-01, 3.3671846037243900e-17, -2.7846053391012096e-33, 5.6102718120012104e-50), qd_real( 6.2725181549514408e-01, 3.0763585181253225e-17, 2.7068930273498138e-34, -1.1172240309286484e-50), qd_real( 6.2963823891492698e-01, 4.1115334049626806e-17, -1.9167473580230747e-33, 1.1118424028161730e-49), qd_real( 6.3201873593980906e-01, -4.0164942296463612e-17, -7.2208643641736723e-34, 3.7828920470544344e-50), qd_real( 6.3439328416364549e-01, 1.0420901929280035e-17, 4.1174558929280492e-34, -1.4464152986630705e-51), qd_real( 6.3676186123628420e-01, 3.1419048711901611e-17, -2.2693738415126449e-33, -1.6023584204297388e-49), qd_real( 6.3912444486377573e-01, 1.2416796312271043e-17, -6.2095419626356605e-34, 2.7762065999506603e-50), qd_real( 6.4148101280858316e-01, -9.9883430115943310e-18, 4.1969230376730128e-34, 5.6980543799257597e-51), qd_real( 6.4383154288979150e-01, -3.2084798795046886e-17, -1.2595311907053305e-33, -4.0205885230841536e-50), qd_real( 6.4617601298331639e-01, -2.9756137382280815e-17, -1.0275370077518259e-33, 8.0852478665893014e-51), qd_real( 6.4851440102211244e-01, 3.9870270313386831e-18, 1.9408388509540788e-34, -5.1798420636193190e-51), qd_real( 6.5084668499638088e-01, 3.9714670710500257e-17, 2.9178546787002963e-34, 3.8140635508293278e-51), qd_real( 6.5317284295377676e-01, 8.5695642060026238e-18, -6.9165322305070633e-34, 2.3873751224185395e-50), qd_real( 6.5549285299961535e-01, 3.5638734426385005e-17, 1.2695365790889811e-33, 4.3984952865412050e-50), qd_real( 6.5780669329707864e-01, 1.9580943058468545e-17, -1.1944272256627192e-33, 2.8556402616436858e-50), qd_real( 6.6011434206742048e-01, -1.3960054386823638e-19, 6.1515777931494047e-36, 5.3510498875622660e-52), qd_real( 6.6241577759017178e-01, -2.2615508885764591e-17, 5.0177050318126862e-34, 2.9162532399530762e-50), qd_real( 6.6471097820334490e-01, -3.6227793598034367e-17, -9.0607934765540427e-34, 3.0917036342380213e-50), qd_real( 6.6699992230363747e-01, 3.5284364997428166e-17, -1.0382057232458238e-33, 7.3812756550167626e-50), qd_real( 6.6928258834663612e-01, -5.4592652417447913e-17, -2.5181014709695152e-33, -1.6867875999437174e-49), qd_real( 6.7155895484701844e-01, -4.0489037749296692e-17, 3.1995835625355681e-34, -1.4044414655670960e-50), qd_real( 6.7382900037875604e-01, 2.3091901236161086e-17, 5.7428037192881319e-34, 1.1240668354625977e-50), qd_real( 6.7609270357531592e-01, 3.7256902248049466e-17, 1.7059417895764375e-33, 9.7326347795300652e-50), qd_real( 6.7835004312986147e-01, 1.8302093041863122e-17, 9.5241675746813072e-34, 5.0328101116133503e-50), qd_real( 6.8060099779545302e-01, 2.8473293354522047e-17, 4.1331805977270903e-34, 4.2579030510748576e-50), qd_real( 6.8284554638524808e-01, -1.2958058061524531e-17, 1.8292386959330698e-34, 3.4536209116044487e-51), qd_real( 6.8508366777270036e-01, 2.5948135194645137e-17, -8.5030743129500702e-34, -6.9572086141009930e-50), qd_real( 6.8731534089175916e-01, -5.5156158714917168e-17, 1.1896489854266829e-33, -7.8505896218220662e-51), qd_real( 6.8954054473706694e-01, -1.5889323294806790e-17, 9.1242356240205712e-34, 3.8315454152267638e-50), qd_real( 6.9175925836415775e-01, 2.7406078472410668e-17, 1.3286508943202092e-33, 1.0651869129580079e-51), qd_real( 6.9397146088965400e-01, 7.4345076956280137e-18, 7.5061528388197460e-34, -1.5928000240686583e-50), qd_real( 6.9617713149146299e-01, -4.1224081213582889e-17, -3.1838716762083291e-35, -3.9625587412119131e-51), qd_real( 6.9837624940897280e-01, 4.8988282435667768e-17, 1.9134010413244152e-33, 2.6161153243793989e-50), qd_real( 7.0056879394324834e-01, 3.1027960192992922e-17, 9.5638250509179997e-34, 4.5896916138107048e-51), qd_real( 7.0275474445722530e-01, 2.5278294383629822e-18, -8.6985561210674942e-35, -5.6899862307812990e-51), qd_real( 7.0493408037590488e-01, 2.7608725585748502e-17, 2.9816599471629137e-34, 1.1533044185111206e-50), qd_real( 7.0710678118654757e-01, -4.8336466567264567e-17, 2.0693376543497068e-33, 2.4677734957341755e-50) }; static const qd_real cos_table [] = { qd_real( 9.9999529380957619e-01, -1.9668064285322189e-17, -6.3053955095883481e-34, 5.3266110855726731e-52), qd_real( 9.9998117528260111e-01, 3.3568103522895585e-17, -1.4740132559368063e-35, 9.8603097594755596e-52), qd_real( 9.9995764455196390e-01, -3.1527836866647287e-17, 2.6363251186638437e-33, 1.0007504815488399e-49), qd_real( 9.9992470183914450e-01, 3.7931082512668012e-17, -8.5099918660501484e-35, -4.9956973223295153e-51), qd_real( 9.9988234745421256e-01, -3.5477814872408538e-17, 1.7102001035303974e-33, -1.0725388519026542e-49), qd_real( 9.9983058179582340e-01, 1.8825140517551119e-17, -5.1383513457616937e-34, -3.8378827995403787e-50), qd_real( 9.9976940535121528e-01, 4.2681177032289012e-17, 1.9062302359737099e-33, -6.0221153262881160e-50), qd_real( 9.9969881869620425e-01, -2.9851486403799753e-17, -1.9084787370733737e-33, 5.5980260344029202e-51), qd_real( 9.9961882249517864e-01, -4.1181965521424734e-17, 2.0915365593699916e-33, 8.1403390920903734e-50), qd_real( 9.9952941750109314e-01, 2.0517917823755591e-17, -4.7673802585706520e-34, -2.9443604198656772e-50), qd_real( 9.9943060455546173e-01, 3.9644497752257798e-17, -2.3757223716722428e-34, -1.2856759011361726e-51), qd_real( 9.9932238458834954e-01, -4.2858538440845682e-17, 3.3235101605146565e-34, -8.3554272377057543e-51), qd_real( 9.9920475861836389e-01, 9.1796317110385693e-18, 5.5416208185868570e-34, 8.0267046717615311e-52), qd_real( 9.9907772775264536e-01, 2.1419007653587032e-17, -7.9048203318529618e-34, -5.3166296181112712e-50), qd_real( 9.9894129318685687e-01, -2.0610641910058638e-17, -1.2546525485913485e-33, -7.5175888806157064e-50), qd_real( 9.9879545620517241e-01, -1.2291693337075465e-17, 2.4468446786491271e-34, 1.0723891085210268e-50), qd_real( 9.9864021818026527e-01, -4.8690254312923302e-17, -2.9470881967909147e-34, -1.3000650761346907e-50), qd_real( 9.9847558057329477e-01, -2.2002931182778795e-17, -1.2371509454944992e-33, -2.4911225131232065e-50), qd_real( 9.9830154493389289e-01, -5.1869402702792278e-17, 1.0480195493633452e-33, -2.8995649143155511e-50), qd_real( 9.9811811290014918e-01, 2.7935487558113833e-17, 2.4423341255830345e-33, -6.7646699175334417e-50), qd_real( 9.9792528619859600e-01, 1.7143659778886362e-17, 5.7885840902887460e-34, -9.2601432603894597e-51), qd_real( 9.9772306664419164e-01, -2.6394475274898721e-17, -1.6176223087661783e-34, -9.9924942889362281e-51), qd_real( 9.9751145614030345e-01, 5.6007205919806937e-18, -5.9477673514685690e-35, -1.4166807162743627e-54), qd_real( 9.9729045667869021e-01, 9.1647695371101735e-18, 6.7824134309739296e-34, -8.6191392795543357e-52), qd_real( 9.9706007033948296e-01, 1.6734093546241963e-17, -1.3169951440780028e-33, 1.0311048767952477e-50), qd_real( 9.9682029929116567e-01, 4.7062820708615655e-17, 2.8412041076474937e-33, -8.0006155670263622e-50), qd_real( 9.9657114579055484e-01, 1.1707179088390986e-17, -7.5934413263024663e-34, 2.8474848436926008e-50), qd_real( 9.9631261218277800e-01, 1.1336497891624735e-17, 3.4002458674414360e-34, 7.7419075921544901e-52), qd_real( 9.9604470090125197e-01, 2.2870031707670695e-17, -3.9184839405013148e-34, -3.7081260416246375e-50), qd_real( 9.9576741446765982e-01, -2.3151908323094359e-17, -1.6306512931944591e-34, -1.5925420783863192e-51), qd_real( 9.9548075549192694e-01, 3.2084621412226554e-18, -4.9501292146013023e-36, -2.7811428850878516e-52), qd_real( 9.9518472667219693e-01, -4.2486913678304410e-17, 1.3315510772504614e-33, 6.7927987417051888e-50), qd_real( 9.9487933079480562e-01, 4.2130813284943662e-18, -4.2062597488288452e-35, 2.5157064556087620e-51), qd_real( 9.9456457073425542e-01, 3.6745069641528058e-17, -3.0603304105471010e-33, 1.0397872280487526e-49), qd_real( 9.9424044945318790e-01, 4.4129423472462673e-17, -3.0107231708238066e-33, 7.4201582906861892e-50), qd_real( 9.9390697000235606e-01, -1.8964849471123746e-17, -1.5980853777937752e-35, -8.5374807150597082e-52), qd_real( 9.9356413552059530e-01, 2.9752309927797428e-17, -4.5066707331134233e-34, -3.3548191633805036e-50), qd_real( 9.9321194923479450e-01, 3.3096906261272262e-17, 1.5592811973249567e-33, 1.4373977733253592e-50), qd_real( 9.9285041445986510e-01, -1.4094517733693302e-17, -1.1954558131616916e-33, 1.8761873742867983e-50), qd_real( 9.9247953459870997e-01, 3.1093055095428906e-17, -1.8379594757818019e-33, -3.9885758559381314e-51), qd_real( 9.9209931314219180e-01, -3.9431926149588778e-17, -6.2758062911047230e-34, -1.2960929559212390e-50), qd_real( 9.9170975366909953e-01, -2.3372891311883661e-18, 2.7073298824968591e-35, -1.2569459441802872e-51), qd_real( 9.9131085984611544e-01, -2.5192111583372105e-17, -1.2852471567380887e-33, 5.2385212584310483e-50), qd_real( 9.9090263542778001e-01, 1.5394565094566704e-17, -1.0799984133184567e-33, 2.7451115960133595e-51), qd_real( 9.9048508425645709e-01, -5.5411437553780867e-17, -1.4614017210753585e-33, -3.8339374397387620e-50), qd_real( 9.9005821026229712e-01, -1.7055485906233963e-17, 1.3454939685758777e-33, 7.3117589137300036e-50), qd_real( 9.8962201746320089e-01, -5.2398217968132530e-17, 1.3463189211456219e-33, 5.8021640554894872e-50), qd_real( 9.8917650996478101e-01, -4.0987309937047111e-17, -4.4857560552048437e-34, -3.9414504502871125e-50), qd_real( 9.8872169196032378e-01, -1.0976227206656125e-17, 3.2311342577653764e-34, 9.6367946583575041e-51), qd_real( 9.8825756773074946e-01, 2.7030607784372632e-17, 7.7514866488601377e-35, 2.1019644956864938e-51), qd_real( 9.8778414164457218e-01, -2.3600693397159021e-17, -1.2323283769707861e-33, 3.0130900716803339e-50), qd_real( 9.8730141815785843e-01, -5.2332261255715652e-17, -2.7937644333152473e-33, 1.2074160567958408e-49), qd_real( 9.8680940181418553e-01, -5.0287214351061075e-17, -2.2681526238144461e-33, 4.4003694320169133e-50), qd_real( 9.8630809724459867e-01, -2.1520877103013341e-17, 1.1866528054187716e-33, -7.8532199199813836e-50), qd_real( 9.8579750916756748e-01, -5.1439452979953012e-17, 2.6276439309996725e-33, 7.5423552783286347e-50), qd_real( 9.8527764238894122e-01, 2.3155637027900207e-17, -7.5275971545764833e-34, 1.0582231660456094e-50), qd_real( 9.8474850180190421e-01, 1.0548144061829957e-17, 2.8786145266267306e-34, -3.6782210081466112e-51), qd_real( 9.8421009238692903e-01, 4.7983922627050691e-17, 2.2597419645070588e-34, 1.7573875814863400e-50), qd_real( 9.8366241921173025e-01, 1.9864948201635255e-17, -1.0743046281211033e-35, 1.7975662796558100e-52), qd_real( 9.8310548743121629e-01, 4.2170007522888628e-17, 8.2396265656440904e-34, -8.0803700139096561e-50), qd_real( 9.8253930228744124e-01, 1.5149580813777224e-17, -4.1802771422186237e-34, -2.2150174326226160e-50), qd_real( 9.8196386910955524e-01, 2.1108443711513084e-17, -1.5253013442896054e-33, -6.8388082079337969e-50), qd_real( 9.8137919331375456e-01, 1.3428163260355633e-17, -6.5294290469962986e-34, 2.7965412287456268e-51), qd_real( 9.8078528040323043e-01, 1.8546939997825006e-17, -1.0696564445530757e-33, 6.6668174475264961e-50), qd_real( 9.8018213596811743e-01, -3.6801786963856159e-17, 6.3245171387992842e-34, 1.8600176137175971e-50), qd_real( 9.7956976568544052e-01, 1.5573991584990420e-17, -1.3401066029782990e-33, -1.7263702199862149e-50), qd_real( 9.7894817531906220e-01, -2.3817727961148053e-18, -1.0694750370381661e-34, -8.2293047196087462e-51), qd_real( 9.7831737071962765e-01, -2.1623082233344895e-17, 1.0970403012028032e-33, 7.7091923099369339e-50), qd_real( 9.7767735782450993e-01, 5.0514136167059628e-17, -1.3254751701428788e-33, 7.0161254312124538e-50), qd_real( 9.7702814265775439e-01, -4.3353875751555997e-17, 5.4948839831535478e-34, -9.2755263105377306e-51), qd_real( 9.7636973133002114e-01, 9.3093931526213780e-18, -4.1184949155685665e-34, -3.1913926031393690e-50), qd_real( 9.7570213003852857e-01, -2.5572556081259686e-17, -9.3174244508942223e-34, -8.3675863211646863e-51), qd_real( 9.7502534506699412e-01, 2.6642660651899135e-17, 1.7819392739353853e-34, -3.3159625385648947e-51), qd_real( 9.7433938278557586e-01, 2.3041221476151512e-18, 1.0758686005031430e-34, 5.1074116432809478e-51), qd_real( 9.7364424965081198e-01, -5.1729808691005871e-17, -1.5508473005989887e-33, -1.6505125917675401e-49), qd_real( 9.7293995220556018e-01, -3.1311211122281800e-17, -2.6874087789006141e-33, -2.1652434818822145e-51), qd_real( 9.7222649707893627e-01, 3.6461169785938221e-17, 3.0309636883883133e-33, -1.2702716907967306e-51), qd_real( 9.7150389098625178e-01, -7.9865421122289046e-18, -4.3628417211263380e-34, 3.4307517798759352e-51), qd_real( 9.7077214072895035e-01, -4.7992163325114922e-17, 3.0347528910975783e-33, 8.5989199506479701e-50), qd_real( 9.7003125319454397e-01, 1.8365300348428844e-17, -1.4311097571944918e-33, 8.5846781998740697e-51), qd_real( 9.6928123535654853e-01, -4.5663660261927896e-17, 9.6147526917239387e-34, 8.1267605207871330e-51), qd_real( 9.6852209427441727e-01, 4.9475074918244771e-17, 2.8558738351911241e-33, 6.2948422316507461e-50), qd_real( 9.6775383709347551e-01, -4.5512132825515820e-17, -1.4127617988719093e-33, -8.4620609089704578e-50), qd_real( 9.6697647104485207e-01, 3.8496228837337864e-17, -5.3881631542745647e-34, -3.5221863171458959e-50), qd_real( 9.6619000344541250e-01, 5.1298840401665493e-17, 1.4564075904769808e-34, 1.0095973971377432e-50), qd_real( 9.6539444169768940e-01, -2.3745389918392156e-17, 5.9221515590053862e-34, -3.8811192556231094e-50), qd_real( 9.6458979328981276e-01, -3.4189470735959786e-17, 2.2982074155463522e-33, -4.5128791045607634e-50), qd_real( 9.6377606579543984e-01, 2.6463950561220029e-17, -2.9073234590199323e-36, -1.2938328629395601e-52), qd_real( 9.6295326687368388e-01, 8.9341960404313634e-18, -3.9071244661020126e-34, 1.6212091116847394e-50), qd_real( 9.6212140426904158e-01, 1.5236770453846305e-17, -1.3050173525597142e-33, 7.9016122394092666e-50), qd_real( 9.6128048581132064e-01, 2.0933955216674039e-18, 1.0768607469015692e-34, -5.9453639304361774e-51), qd_real( 9.6043051941556579e-01, 2.4653904815317185e-17, -1.3792169410906322e-33, -4.7726598378506903e-51), qd_real( 9.5957151308198452e-01, 1.1000640085000957e-17, -4.2036030828223975e-34, 4.0023704842606573e-51), qd_real( 9.5870347489587160e-01, -4.3685014392372053e-17, 2.2001800662729131e-33, -1.0553721324358075e-49), qd_real( 9.5782641302753291e-01, -1.7696710075371263e-17, 1.9164034110382190e-34, 8.1489235071754813e-51), qd_real( 9.5694033573220882e-01, 4.0553869861875701e-17, -1.7147013364302149e-33, 2.5736745295329455e-50), qd_real( 9.5604525134999641e-01, 3.7705045279589067e-17, 1.9678699997347571e-33, 8.5093177731230180e-50), qd_real( 9.5514116830577067e-01, 5.0088652955014668e-17, -2.6983181838059211e-33, 1.0102323575596493e-49), qd_real( 9.5422809510910567e-01, -3.7545901690626874e-17, 1.4951619241257764e-33, -8.2717333151394973e-50), qd_real( 9.5330604035419386e-01, -2.5190738779919934e-17, -1.4272239821134379e-33, -4.6717286809283155e-50), qd_real( 9.5237501271976588e-01, -2.0269300462299272e-17, -1.0635956887246246e-33, -3.5514537666487619e-50), qd_real( 9.5143502096900834e-01, 3.1350584123266695e-17, -2.4824833452737813e-33, 9.5450335525380613e-51), qd_real( 9.5048607394948170e-01, 1.9410097562630436e-17, -8.1559393949816789e-34, -1.0501209720164562e-50), qd_real( 9.4952818059303667e-01, -7.5544151928043298e-18, -5.1260245024046686e-34, 1.8093643389040406e-50), qd_real( 9.4856134991573027e-01, 2.0668262262333232e-17, -5.9440730243667306e-34, 1.4268853111554300e-50), qd_real( 9.4758559101774109e-01, 4.3417993852125991e-17, -2.7728667889840373e-34, 5.5709160196519968e-51), qd_real( 9.4660091308328353e-01, 3.5056800210680730e-17, 9.8578536940318117e-34, 6.6035911064585197e-50), qd_real( 9.4560732538052128e-01, 4.6019102478523738e-17, -6.2534384769452059e-34, 1.5758941215779961e-50), qd_real( 9.4460483726148026e-01, 8.8100545476641165e-18, 5.2291695602757842e-34, -3.3487256018407123e-50), qd_real( 9.4359345816196039e-01, -2.4093127844404214e-17, 1.0283279856803939e-34, -2.3398232614531355e-51), qd_real( 9.4257319760144687e-01, 1.3235564806436886e-17, -5.7048262885386911e-35, 3.9947050442753744e-51), qd_real( 9.4154406518302081e-01, -2.7896379547698341e-17, 1.6273236356733898e-33, -5.3075944708471203e-51), qd_real( 9.4050607059326830e-01, 2.8610421567116268e-17, 2.9261501147538827e-33, -2.6849867690896925e-50), qd_real( 9.3945922360218992e-01, -7.0152867943098655e-18, -5.6395693818011210e-34, 3.5568142678987651e-50), qd_real( 9.3840353406310806e-01, 5.4242545044795490e-17, -1.9039966607859759e-33, -1.5627792988341215e-49), qd_real( 9.3733901191257496e-01, -3.6570926284362776e-17, -1.1902940071273247e-33, -1.1215082331583223e-50), qd_real( 9.3626566717027826e-01, -1.3013766145497654e-17, 5.2229870061990595e-34, -3.3972777075634108e-51), qd_real( 9.3518350993894761e-01, -3.2609395302485065e-17, -8.1813015218875245e-34, 5.5642140024928139e-50), qd_real( 9.3409255040425887e-01, 4.4662824360767511e-17, -2.5903243047396916e-33, 8.1505209004343043e-50), qd_real( 9.3299279883473885e-01, 4.2041415555384355e-17, 9.0285896495521276e-34, 5.3019984977661259e-50), qd_real( 9.3188426558166815e-01, -4.0785944377318095e-17, 1.7631450298754169e-33, 2.5776403305507453e-50), qd_real( 9.3076696107898371e-01, 1.9703775102838329e-17, 6.5657908718278205e-34, -1.9480347966259524e-51), qd_real( 9.2964089584318121e-01, 5.1282530016864107e-17, 2.3719739891916261e-34, -1.7230065426917127e-50), qd_real( 9.2850608047321559e-01, -2.3306639848485943e-17, -7.7799084333208503e-34, -5.8597558009300305e-50), qd_real( 9.2736252565040111e-01, -2.7677111692155437e-17, 2.2110293450199576e-34, 2.0349190819680613e-50), qd_real( 9.2621024213831138e-01, -3.7303754586099054e-17, 2.0464457809993405e-33, 1.3831799631231817e-49), qd_real( 9.2504924078267758e-01, 6.0529447412576159e-18, -8.8256517760278541e-35, 1.8285462122388328e-51), qd_real( 9.2387953251128674e-01, 1.7645047084336677e-17, -5.0442537321586818e-34, -4.0478677716823890e-50), qd_real( 9.2270112833387852e-01, 5.2963798918539814e-17, -5.7135699628876685e-34, 3.0163671797219087e-50), qd_real( 9.2151403934204190e-01, 4.1639843390684644e-17, 1.1891485604702356e-33, 2.0862437594380324e-50), qd_real( 9.2031827670911059e-01, -2.7806888779036837e-17, 2.7011013677071274e-33, 1.1998578792455499e-49), qd_real( 9.1911385169005777e-01, -2.6496484622344718e-17, 6.5403604763461920e-34, -2.8997180201186078e-50), qd_real( 9.1790077562139050e-01, -3.9074579680849515e-17, 2.3004636541490264e-33, 3.9851762744443107e-50), qd_real( 9.1667905992104270e-01, -4.1733978698287568e-17, 1.2094444804381172e-33, 4.9356916826097816e-50), qd_real( 9.1544871608826783e-01, -1.3591056692900894e-17, 5.9923027475594735e-34, 2.1403295925962879e-50), qd_real( 9.1420975570353069e-01, -3.6316182527814423e-17, -1.9438819777122554e-33, 2.8340679287728316e-50), qd_real( 9.1296219042839821e-01, -4.7932505228039469e-17, -1.7753551889428638e-33, 4.0607782903868160e-51), qd_real( 9.1170603200542988e-01, -2.6913273175034130e-17, -5.1928101916162528e-35, 1.1338175936090630e-51), qd_real( 9.1044129225806725e-01, -5.0433041673313820e-17, 1.0938746257404305e-33, 9.5378272084170731e-51), qd_real( 9.0916798309052238e-01, -3.6878564091359894e-18, 2.9951330310507693e-34, -1.2225666136919926e-50), qd_real( 9.0788611648766626e-01, -4.9459964301225840e-17, -1.6599682707075313e-33, -5.1925202712634716e-50), qd_real( 9.0659570451491533e-01, 3.0506718955442023e-17, -1.4478836557141204e-33, 1.8906373784448725e-50), qd_real( 9.0529675931811882e-01, -4.1153099826889901e-17, 2.9859368705184223e-33, 5.1145293917439211e-50), qd_real( 9.0398929312344334e-01, -6.6097544687484308e-18, 1.2728013034680357e-34, -4.3026097234014823e-51), qd_real( 9.0267331823725883e-01, -1.9250787033961483e-17, 1.3242128993244527e-33, -5.2971030688703665e-50), qd_real( 9.0134884704602203e-01, -1.3524789367698682e-17, 6.3605353115880091e-34, 3.6227400654573828e-50), qd_real( 9.0001589201616028e-01, -5.0639618050802273e-17, 1.0783525384031576e-33, 2.8130016326515111e-50), qd_real( 8.9867446569395382e-01, 2.6316906461033013e-17, 3.7003137047796840e-35, -2.3447719900465938e-51), qd_real( 8.9732458070541832e-01, -3.6396283314867290e-17, -2.3611649895474815e-33, 1.1837247047900082e-49), qd_real( 8.9596624975618511e-01, 4.9025099114811813e-17, -1.9440489814795326e-33, -1.7070486667767033e-49), qd_real( 8.9459948563138270e-01, -1.7516226396814919e-17, -1.3200670047246923e-33, -1.5953009884324695e-50), qd_real( 8.9322430119551532e-01, -4.1161239151908913e-18, 2.5380253805715999e-34, 4.2849455510516192e-51), qd_real( 8.9184070939234272e-01, 4.6690228137124547e-18, 1.6150254286841982e-34, -3.9617448820725012e-51), qd_real( 8.9044872324475788e-01, 1.1781931459051803e-17, -1.3346142209571930e-34, -9.4982373530733431e-51), qd_real( 8.8904835585466457e-01, -1.1164514966766675e-17, -3.4797636107798736e-34, -1.5605079997040631e-50), qd_real( 8.8763962040285393e-01, 1.2805091918587960e-17, 3.9948742059584459e-35, 3.8940716325338136e-51), qd_real( 8.8622253014888064e-01, -6.7307369600274315e-18, 1.2385593432917413e-34, 2.0364014759133320e-51), qd_real( 8.8479709843093779e-01, -9.4331469628972690e-18, -5.7106541478701439e-34, 1.8260134111907397e-50), qd_real( 8.8336333866573158e-01, 1.5822643380255127e-17, -7.8921320007588250e-34, -1.4782321016179836e-50), qd_real( 8.8192126434835505e-01, -1.9843248405890562e-17, -7.0412114007673834e-34, -1.0636770169389104e-50), qd_real( 8.8047088905216075e-01, 1.6311096602996350e-17, -5.7541360594724172e-34, -4.0128611862170021e-50), qd_real( 8.7901222642863353e-01, -4.7356837291118011e-17, 1.4388771297975192e-33, -2.9085554304479134e-50), qd_real( 8.7754529020726124e-01, 5.0113311846499550e-17, 2.8382769008739543e-34, 1.5550640393164140e-50), qd_real( 8.7607009419540660e-01, 5.8729024235147677e-18, 2.7941144391738458e-34, -1.8536073846509828e-50), qd_real( 8.7458665227817611e-01, -5.7216617730397065e-19, -2.9705811503689596e-35, 8.7389593969796752e-52), qd_real( 8.7309497841829009e-01, 7.8424672990129903e-18, -4.8685015839797165e-34, -2.2815570587477527e-50), qd_real( 8.7159508665595109e-01, -5.5272998038551050e-17, -2.2104090204984907e-33, -9.7749763187643172e-50), qd_real( 8.7008699110871146e-01, -4.1888510868549968e-17, 7.0900185861878415e-34, 3.7600251115157260e-50), qd_real( 8.6857070597134090e-01, 2.7192781689782903e-19, -1.6710140396932428e-35, -1.2625514734637969e-51), qd_real( 8.6704624551569265e-01, 3.0267859550930567e-18, -1.1559438782171572e-34, -5.3580556397808012e-52), qd_real( 8.6551362409056909e-01, -6.3723113549628899e-18, 2.3725520321746832e-34, 1.5911880348395175e-50), qd_real( 8.6397285612158670e-01, 4.1486355957361607e-17, 2.2709976932210266e-33, -8.1228385659479984e-50), qd_real( 8.6242395611104050e-01, 3.7008992527383130e-17, 5.2128411542701573e-34, 2.6945600081026861e-50), qd_real( 8.6086693863776731e-01, -3.0050048898573656e-17, -8.8706183090892111e-34, 1.5005320558097301e-50), qd_real( 8.5930181835700836e-01, 4.2435655816850687e-17, 7.6181814059912025e-34, -3.9592127850658708e-50), qd_real( 8.5772861000027212e-01, -4.8183447936336620e-17, -1.1044130517687532e-33, -8.7400233444645562e-50), qd_real( 8.5614732837519447e-01, 9.1806925616606261e-18, 5.6328649785951470e-34, 2.3326646113217378e-51), qd_real( 8.5455798836540053e-01, -1.2991124236396092e-17, 1.2893407722948080e-34, -3.6506925747583053e-52), qd_real( 8.5296060493036363e-01, 2.7152984251981370e-17, 7.4336483283120719e-34, 4.2162417622350668e-50), qd_real( 8.5135519310526520e-01, -5.3279874446016209e-17, 2.2281156380919942e-33, -4.0281886404138477e-50), qd_real( 8.4974176800085244e-01, 5.1812347659974015e-17, 3.0810626087331275e-33, -2.5931308201994965e-50), qd_real( 8.4812034480329723e-01, 1.8762563415239981e-17, 1.4048773307919617e-33, -2.4915221509958691e-50), qd_real( 8.4649093877405213e-01, -4.7969419958569345e-17, -2.7518267097886703e-33, -7.3518959727313350e-50), qd_real( 8.4485356524970712e-01, -4.3631360296879637e-17, -2.0307726853367547e-33, 4.3097229819851761e-50), qd_real( 8.4320823964184544e-01, 9.6536707005959077e-19, 2.8995142431556364e-36, 9.6715076811480284e-53), qd_real( 8.4155497743689844e-01, -3.4095465391321557e-17, -8.4130208607579595e-34, -4.9447283960568686e-50), qd_real( 8.3989379419599952e-01, -1.6673694881511411e-17, -1.4759184141750289e-33, -7.5795098161914058e-50), qd_real( 8.3822470555483808e-01, -3.5560085052855026e-17, 1.1689791577022643e-33, -5.8627347359723411e-50), qd_real( 8.3654772722351201e-01, -2.0899059027066533e-17, -9.8104097821002585e-35, -3.1609177868229853e-51), qd_real( 8.3486287498638001e-01, 4.6048430609159657e-17, -5.1827423265239912e-34, -7.0505343435504109e-51), qd_real( 8.3317016470191319e-01, 1.3275129507229764e-18, 4.8589164115370863e-35, 4.5422281300506859e-51), qd_real( 8.3146961230254524e-01, 1.4073856984728024e-18, 4.6951315383980830e-35, 5.1431906049905658e-51), qd_real( 8.2976123379452305e-01, -2.9349109376485597e-18, 1.1496917934149818e-34, 3.5186665544980233e-51), qd_real( 8.2804504525775580e-01, -4.4196593225871532e-17, 2.7967864855211251e-33, 1.0030777287393502e-49), qd_real( 8.2632106284566353e-01, -5.3957485453612902e-17, 6.8976896130138550e-34, 3.8106164274199196e-50), qd_real( 8.2458930278502529e-01, -2.6512360488868275e-17, 1.6916964350914386e-34, 6.7693974813562649e-51), qd_real( 8.2284978137582632e-01, 1.5193019034505495e-17, 9.6890547246521685e-34, 5.6994562923653264e-50), qd_real( 8.2110251499110465e-01, 3.0715131609697682e-17, -1.7037168325855879e-33, -1.1149862443283853e-49), qd_real( 8.1934752007679701e-01, -4.8200736995191133e-17, -1.5574489646672781e-35, -9.5647853614522216e-53), qd_real( 8.1758481315158371e-01, -1.4883149812426772e-17, -7.8273262771298917e-34, 4.1332149161031594e-50), qd_real( 8.1581441080673378e-01, 8.2652693782130871e-18, -2.3028778135179471e-34, 1.5102071387249843e-50), qd_real( 8.1403632970594841e-01, -5.2127351877042624e-17, -1.9047670611316360e-33, -1.6937269585941507e-49), qd_real( 8.1225058658520388e-01, 3.1054545609214803e-17, 2.2649541922707251e-34, -7.4221684154649405e-51), qd_real( 8.1045719825259477e-01, 2.3520367349840499e-17, -7.7530070904846341e-34, -7.2792616357197140e-50), qd_real( 8.0865618158817498e-01, 9.3251597879721674e-18, -7.1823301933068394e-34, 2.3925440846132106e-50), qd_real( 8.0684755354379922e-01, 4.9220603766095546e-17, 2.9796016899903487e-33, 1.5220754223615788e-49), qd_real( 8.0503133114296355e-01, 5.1368289568212149e-17, 6.3082807402256524e-34, 7.3277646085129827e-51), qd_real( 8.0320753148064494e-01, -3.3060609804814910e-17, -1.2242726252420433e-33, 2.8413673268630117e-50), qd_real( 8.0137617172314024e-01, -2.0958013413495834e-17, -4.3798162198006931e-34, 2.0235690497752515e-50), qd_real( 7.9953726910790501e-01, 2.0356723822005431e-17, -9.7448513696896360e-34, 5.3608109599696008e-52), qd_real( 7.9769084094339116e-01, -4.6730759884788944e-17, 2.3075897077191757e-33, 3.1605567774640253e-51), qd_real( 7.9583690460888357e-01, -3.0062724851910721e-17, -2.2496210832042235e-33, -6.5881774117183040e-50), qd_real( 7.9397547755433717e-01, -7.4194631759921416e-18, 2.4124341304631069e-34, -4.9956808616244972e-51), qd_real( 7.9210657730021239e-01, -3.7087850202326467e-17, -1.4874457267228264e-33, 2.9323097289153505e-50), qd_real( 7.9023022143731003e-01, 2.3056905954954492e-17, 1.4481080533260193e-33, -7.6725237057203488e-50), qd_real( 7.8834642762660623e-01, 3.4396993154059708e-17, 1.7710623746737170e-33, 1.7084159098417402e-49), qd_real( 7.8645521359908577e-01, -9.7841429939305265e-18, 3.3906063272445472e-34, 5.7269505320382577e-51), qd_real( 7.8455659715557524e-01, -8.5627965423173476e-18, -2.1106834459001849e-34, -1.6890322182469603e-50), qd_real( 7.8265059616657573e-01, 9.0745866975808825e-18, 6.7623847404278666e-34, -1.7173237731987271e-50), qd_real( 7.8073722857209449e-01, -9.9198782066678806e-18, -2.1265794012162715e-36, 3.0772165598957647e-54), qd_real( 7.7881651238147598e-01, -2.4891385579973807e-17, 6.7665497024807980e-35, -6.5218594281701332e-52), qd_real( 7.7688846567323244e-01, 7.7418602570672864e-18, -5.9986517872157897e-34, 3.0566548232958972e-50), qd_real( 7.7495310659487393e-01, -5.2209083189826433e-17, -9.6653593393686612e-34, 3.7027750076562569e-50), qd_real( 7.7301045336273699e-01, -3.2565907033649772e-17, 1.3860807251523929e-33, -3.9971329917586022e-50), qd_real( 7.7106052426181382e-01, -4.4558442347769265e-17, -2.9863565614083783e-33, -6.8795262083596236e-50), qd_real( 7.6910333764557959e-01, 5.1546455184564817e-17, 2.6142829553524292e-33, -1.6199023632773298e-49), qd_real( 7.6713891193582040e-01, -1.8885903683750782e-17, -1.3659359331495433e-33, -2.2538834962921934e-50), qd_real( 7.6516726562245896e-01, -3.2707225612534598e-17, 1.1177117747079528e-33, -3.7005182280175715e-50), qd_real( 7.6318841726338127e-01, 2.6314748416750748e-18, 1.4048039063095910e-34, 8.9601886626630321e-52), qd_real( 7.6120238548426178e-01, 3.5315510881690551e-17, 1.2833566381864357e-33, 8.6221435180890613e-50), qd_real( 7.5920918897838807e-01, -3.8558842175523123e-17, 2.9720241208332759e-34, -1.2521388928220163e-50), qd_real( 7.5720884650648457e-01, -1.9909098777335502e-17, 3.9409283266158482e-34, 2.0744254207802976e-50), qd_real( 7.5520137689653655e-01, -1.9402238001823017e-17, -3.7756206444727573e-34, -2.1212242308178287e-50), qd_real( 7.5318679904361252e-01, -3.7937789838736540e-17, -6.7009539920231559e-34, -6.7128562115050214e-51), qd_real( 7.5116513190968637e-01, 4.3499761158645868e-17, 2.5227718971102212e-33, -6.5969709212757102e-50), qd_real( 7.4913639452345937e-01, -4.4729078447011889e-17, -2.4206025249983768e-33, 1.1336681351116422e-49), qd_real( 7.4710060598018013e-01, 1.1874824875965430e-17, 2.1992523849833518e-34, 1.1025018564644483e-50), qd_real( 7.4505778544146595e-01, 1.5078686911877863e-17, 8.0898987212942471e-34, 8.2677958765323532e-50), qd_real( 7.4300795213512172e-01, -2.5144629669719265e-17, 7.1128989512526157e-34, 3.0181629077821220e-50), qd_real( 7.4095112535495911e-01, -1.4708616952297345e-17, -4.9550433827142032e-34, 3.1434132533735671e-50), qd_real( 7.3888732446061511e-01, 3.4324874808225091e-17, -1.3706639444717610e-33, -3.3520827530718938e-51), qd_real( 7.3681656887736990e-01, -2.8932468101656295e-17, -3.4649887126202378e-34, -1.8484474476291476e-50), qd_real( 7.3473887809596350e-01, -3.4507595976263941e-17, -2.3718000676666409e-33, -3.9696090387165402e-50), qd_real( 7.3265427167241282e-01, 1.8918673481573520e-17, -1.5123719544119886e-33, -9.7922152011625728e-51), qd_real( 7.3056276922782759e-01, -2.9689959904476928e-17, -1.1276871244239744e-33, -3.0531520961539007e-50), qd_real( 7.2846439044822520e-01, 1.1924642323370718e-19, 5.9001892316611011e-36, 1.2178089069502704e-52), qd_real( 7.2635915508434601e-01, -3.1917502443460542e-17, 7.7047912412039396e-34, 4.1455880160182123e-50), qd_real( 7.2424708295146689e-01, 2.9198471334403004e-17, 2.3027324968739464e-33, -1.2928820533892183e-51), qd_real( 7.2212819392921535e-01, -2.3871262053452047e-17, 1.0636125432862273e-33, -4.4598638837802517e-50), qd_real( 7.2000250796138165e-01, -2.5689658854462333e-17, -9.1492566948567925e-34, 4.4403780801267786e-50), qd_real( 7.1787004505573171e-01, 2.7006476062511453e-17, -2.2854956580215348e-34, 9.1726903890287867e-51), qd_real( 7.1573082528381871e-01, -5.1581018476410262e-17, -1.3736271349300259e-34, -1.2734611344111297e-50), qd_real( 7.1358486878079364e-01, -4.2342504403133584e-17, -4.2690366101617268e-34, -2.6352370883066522e-50), qd_real( 7.1143219574521643e-01, 7.9643298613856813e-18, 2.9488239510721469e-34, 1.6985236437666356e-50), qd_real( 7.0927282643886569e-01, -3.7597359110245730e-17, 1.0613125954645119e-34, 8.9465480185486032e-51), qd_real( 7.0710678118654757e-01, -4.8336466567264567e-17, 2.0693376543497068e-33, 2.4677734957341755e-50) }; /* Computes sin(a) and cos(a) using Taylor series. Assumes |a| <= pi/2048. */ static void sincos_taylor(const qd_real &a, qd_real &sin_a, qd_real &cos_a) { const double thresh = 0.5 * qd_real::_eps * std::abs(to_double(a)); qd_real p, s, t, x; if (a.is_zero()) { sin_a = 0.0; cos_a = 1.0; return; } x = -sqr(a); s = a; p = a; int i = 0; do { p *= x; t = p * inv_fact[i]; s += t; i += 2; } while (i < n_inv_fact && std::abs(to_double(t)) > thresh); sin_a = s; cos_a = sqrt(1.0 - sqr(s)); } static qd_real sin_taylor(const qd_real &a) { const double thresh = 0.5 * qd_real::_eps * std::abs(to_double(a)); qd_real p, s, t, x; if (a.is_zero()) { return 0.0; } x = -sqr(a); s = a; p = a; int i = 0; do { p *= x; t = p * inv_fact[i]; s += t; i += 2; } while (i < n_inv_fact && std::abs(to_double(t)) > thresh); return s; } static qd_real cos_taylor(const qd_real &a) { const double thresh = 0.5 * qd_real::_eps; qd_real p, s, t, x; if (a.is_zero()) { return 1.0; } x = -sqr(a); s = 1.0 + mul_pwr2(x, 0.5); p = x; int i = 1; do { p *= x; t = p * inv_fact[i]; s += t; i += 2; } while (i < n_inv_fact && std::abs(to_double(t)) > thresh); return s; } qd_real sin(const qd_real &a) { /* Strategy. To compute sin(x), we choose integers a, b so that x = s + a * (pi/2) + b * (pi/1024) and |s| <= pi/2048. Using a precomputed table of sin(k pi / 1024) and cos(k pi / 1024), we can compute sin(x) from sin(s) and cos(s). This greatly increases the convergence of the sine Taylor series. */ if (a.is_zero()) { return 0.0; } // approximately reduce modulo 2*pi qd_real z = nint(a / qd_real::_2pi); qd_real r = a - qd_real::_2pi * z; // approximately reduce modulo pi/2 and then modulo pi/1024 double q = std::floor(r.x[0] / qd_real::_pi2[0] + 0.5); qd_real t = r - qd_real::_pi2 * q; int j = static_cast(q); q = std::floor(t.x[0] / _pi1024[0] + 0.5); t -= _pi1024 * q; int k = static_cast(q); int abs_k = std::abs(k); if (j < -2 || j > 2) { qd_real::error("(qd_real::sin): Cannot reduce modulo pi/2."); return qd_real::_nan; } if (abs_k > 256) { qd_real::error("(qd_real::sin): Cannot reduce modulo pi/1024."); return qd_real::_nan; } if (k == 0) { switch (j) { case 0: return sin_taylor(t); case 1: return cos_taylor(t); case -1: return -cos_taylor(t); default: return -sin_taylor(t); } } qd_real sin_t, cos_t; qd_real u = cos_table[abs_k-1]; qd_real v = sin_table[abs_k-1]; sincos_taylor(t, sin_t, cos_t); if (j == 0) { if (k > 0) { r = u * sin_t + v * cos_t; } else { r = u * sin_t - v * cos_t; } } else if (j == 1) { if (k > 0) { r = u * cos_t - v * sin_t; } else { r = u * cos_t + v * sin_t; } } else if (j == -1) { if (k > 0) { r = v * sin_t - u * cos_t; } else { r = - u * cos_t - v * sin_t; } } else { if (k > 0) { r = - u * sin_t - v * cos_t; } else { r = v * cos_t - u * sin_t; } } return r; } qd_real cos(const qd_real &a) { if (a.is_zero()) { return 1.0; } // approximately reduce modulo 2*pi qd_real z = nint(a / qd_real::_2pi); qd_real r = a - qd_real::_2pi * z; // approximately reduce modulo pi/2 and then modulo pi/1024 double q = std::floor(r.x[0] / qd_real::_pi2.x[0] + 0.5); qd_real t = r - qd_real::_pi2 * q; int j = static_cast(q); q = std::floor(t.x[0] / _pi1024.x[0] + 0.5); t -= _pi1024 * q; int k = static_cast(q); int abs_k = std::abs(k); if (j < -2 || j > 2) { qd_real::error("(qd_real::cos): Cannot reduce modulo pi/2."); return qd_real::_nan; } if (abs_k > 256) { qd_real::error("(qd_real::cos): Cannot reduce modulo pi/1024."); return qd_real::_nan; } if (k == 0) { switch (j) { case 0: return cos_taylor(t); case 1: return -sin_taylor(t); case -1: return sin_taylor(t); default: return -cos_taylor(t); } } qd_real sin_t, cos_t; sincos_taylor(t, sin_t, cos_t); qd_real u = cos_table[abs_k-1]; qd_real v = sin_table[abs_k-1]; if (j == 0) { if (k > 0) { r = u * cos_t - v * sin_t; } else { r = u * cos_t + v * sin_t; } } else if (j == 1) { if (k > 0) { r = - u * sin_t - v * cos_t; } else { r = v * cos_t - u * sin_t; } } else if (j == -1) { if (k > 0) { r = u * sin_t + v * cos_t; } else { r = u * sin_t - v * cos_t; } } else { if (k > 0) { r = v * sin_t - u * cos_t; } else { r = - u * cos_t - v * sin_t; } } return r; } void sincos(const qd_real &a, qd_real &sin_a, qd_real &cos_a) { if (a.is_zero()) { sin_a = 0.0; cos_a = 1.0; return; } // approximately reduce by 2*pi qd_real z = nint(a / qd_real::_2pi); qd_real t = a - qd_real::_2pi * z; // approximately reduce by pi/2 and then by pi/1024. double q = std::floor(t.x[0] / qd_real::_pi2.x[0] + 0.5); t -= qd_real::_pi2 * q; int j = static_cast(q); q = std::floor(t.x[0] / _pi1024.x[0] + 0.5); t -= _pi1024 * q; int k = static_cast(q); int abs_k = std::abs(k); if (j < -2 || j > 2) { qd_real::error("(qd_real::sincos): Cannot reduce modulo pi/2."); cos_a = sin_a = qd_real::_nan; return; } if (abs_k > 256) { qd_real::error("(qd_real::sincos): Cannot reduce modulo pi/1024."); cos_a = sin_a = qd_real::_nan; return; } qd_real sin_t, cos_t; sincos_taylor(t, sin_t, cos_t); if (k == 0) { if (j == 0) { sin_a = sin_t; cos_a = cos_t; } else if (j == 1) { sin_a = cos_t; cos_a = -sin_t; } else if (j == -1) { sin_a = -cos_t; cos_a = sin_t; } else { sin_a = -sin_t; cos_a = -cos_t; } return; } qd_real u = cos_table[abs_k-1]; qd_real v = sin_table[abs_k-1]; if (j == 0) { if (k > 0) { sin_a = u * sin_t + v * cos_t; cos_a = u * cos_t - v * sin_t; } else { sin_a = u * sin_t - v * cos_t; cos_a = u * cos_t + v * sin_t; } } else if (j == 1) { if (k > 0) { cos_a = - u * sin_t - v * cos_t; sin_a = u * cos_t - v * sin_t; } else { cos_a = v * cos_t - u * sin_t; sin_a = u * cos_t + v * sin_t; } } else if (j == -1) { if (k > 0) { cos_a = u * sin_t + v * cos_t; sin_a = v * sin_t - u * cos_t; } else { cos_a = u * sin_t - v * cos_t; sin_a = - u * cos_t - v * sin_t; } } else { if (k > 0) { sin_a = - u * sin_t - v * cos_t; cos_a = v * sin_t - u * cos_t; } else { sin_a = v * cos_t - u * sin_t; cos_a = - u * cos_t - v * sin_t; } } } qd_real atan(const qd_real &a) { return atan2(a, qd_real(1.0)); } qd_real atan2(const qd_real &y, const qd_real &x) { /* Strategy: Instead of using Taylor series to compute arctan, we instead use Newton's iteration to solve the equation sin(z) = y/r or cos(z) = x/r where r = sqrt(x^2 + y^2). The iteration is given by z' = z + (y - sin(z)) / cos(z) (for equation 1) z' = z - (x - cos(z)) / sin(z) (for equation 2) Here, x and y are normalized so that x^2 + y^2 = 1. If |x| > |y|, then first iteration is used since the denominator is larger. Otherwise, the second is used. */ if (x.is_zero()) { if (y.is_zero()) { /* Both x and y is zero. */ qd_real::error("(qd_real::atan2): Both arguments zero."); return qd_real::_nan; } return (y.is_positive()) ? qd_real::_pi2 : -qd_real::_pi2; } else if (y.is_zero()) { return (x.is_positive()) ? qd_real(0.0) : qd_real::_pi; } if (x == y) { return (y.is_positive()) ? qd_real::_pi4 : -qd_real::_3pi4; } if (x == -y) { return (y.is_positive()) ? qd_real::_3pi4 : -qd_real::_pi4; } qd_real r = sqrt(sqr(x) + sqr(y)); qd_real xx = x / r; qd_real yy = y / r; /* Compute double precision approximation to atan. */ qd_real z = std::atan2(to_double(y), to_double(x)); qd_real sin_z, cos_z; if (std::abs(xx.x[0]) > std::abs(yy.x[0])) { /* Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) */ sincos(z, sin_z, cos_z); z += (yy - sin_z) / cos_z; sincos(z, sin_z, cos_z); z += (yy - sin_z) / cos_z; sincos(z, sin_z, cos_z); z += (yy - sin_z) / cos_z; } else { /* Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) */ sincos(z, sin_z, cos_z); z -= (xx - cos_z) / sin_z; sincos(z, sin_z, cos_z); z -= (xx - cos_z) / sin_z; sincos(z, sin_z, cos_z); z -= (xx - cos_z) / sin_z; } return z; } qd_real drem(const qd_real &a, const qd_real &b) { qd_real n = nint(a/b); return (a - n * b); } qd_real divrem(const qd_real &a, const qd_real &b, qd_real &r) { qd_real n = nint(a/b); r = a - n * b; return n; } qd_real tan(const qd_real &a) { qd_real s, c; sincos(a, s, c); return s/c; } qd_real asin(const qd_real &a) { qd_real abs_a = abs(a); if (abs_a > 1.0) { qd_real::error("(qd_real::asin): Argument out of domain."); return qd_real::_nan; } if (abs_a.is_one()) { return (a.is_positive()) ? qd_real::_pi2 : -qd_real::_pi2; } return atan2(a, sqrt(1.0 - sqr(a))); } qd_real acos(const qd_real &a) { qd_real abs_a = abs(a); if (abs_a > 1.0) { qd_real::error("(qd_real::acos): Argument out of domain."); return qd_real::_nan; } if (abs_a.is_one()) { return (a.is_positive()) ? qd_real(0.0) : qd_real::_pi; } return atan2(sqrt(1.0 - sqr(a)), a); } qd_real sinh(const qd_real &a) { if (a.is_zero()) { return 0.0; } if (abs(a) > 0.05) { qd_real ea = exp(a); return mul_pwr2(ea - inv(ea), 0.5); } /* Since a is small, using the above formula gives a lot of cancellation. So use Taylor series. */ qd_real s = a; qd_real t = a; qd_real r = sqr(t); double m = 1.0; double thresh = std::abs(to_double(a) * qd_real::_eps); do { m += 2.0; t *= r; t /= (m-1) * m; s += t; } while (abs(t) > thresh); return s; } qd_real cosh(const qd_real &a) { if (a.is_zero()) { return 1.0; } qd_real ea = exp(a); return mul_pwr2(ea + inv(ea), 0.5); } qd_real tanh(const qd_real &a) { if (a.is_zero()) { return 0.0; } if (std::abs(to_double(a)) > 0.05) { qd_real ea = exp(a); qd_real inv_ea = inv(ea); return (ea - inv_ea) / (ea + inv_ea); } else { qd_real s, c; s = sinh(a); c = sqrt(1.0 + sqr(s)); return s / c; } } void sincosh(const qd_real &a, qd_real &s, qd_real &c) { if (std::abs(to_double(a)) <= 0.05) { s = sinh(a); c = sqrt(1.0 + sqr(s)); } else { qd_real ea = exp(a); qd_real inv_ea = inv(ea); s = mul_pwr2(ea - inv_ea, 0.5); c = mul_pwr2(ea + inv_ea, 0.5); } } qd_real asinh(const qd_real &a) { return log(a + sqrt(sqr(a) + 1.0)); } qd_real acosh(const qd_real &a) { if (a < 1.0) { qd_real::error("(qd_real::acosh): Argument out of domain."); return qd_real::_nan; } return log(a + sqrt(sqr(a) - 1.0)); } qd_real atanh(const qd_real &a) { if (abs(a) >= 1.0) { qd_real::error("(qd_real::atanh): Argument out of domain."); return qd_real::_nan; } return mul_pwr2(log((1.0 + a) / (1.0 - a)), 0.5); } QD_API qd_real fmod(const qd_real &a, const qd_real &b) { qd_real n = aint(a / b); return (a - b * n); } QD_API qd_real qdrand() { static const double m_const = 4.6566128730773926e-10; /* = 2^{-31} */ double m = m_const; qd_real r = 0.0; double d; /* Strategy: Generate 31 bits at a time, using lrand48 random number generator. Shift the bits, and repeat 7 times. */ for (int i = 0; i < 7; i++, m *= m_const) { d = std::rand() * m; r += d; } return r; } /* polyeval(c, n, x) Evaluates the given n-th degree polynomial at x. The polynomial is given by the array of (n+1) coefficients. */ qd_real polyeval(const qd_real *c, int n, const qd_real &x) { /* Just use Horner's method of polynomial evaluation. */ qd_real r = c[n]; for (int i = n-1; i >= 0; i--) { r *= x; r += c[i]; } return r; } /* polyroot(c, n, x0) Given an n-th degree polynomial, finds a root close to the given guess x0. Note that this uses simple Newton iteration scheme, and does not work for multiple roots. */ QD_API qd_real polyroot(const qd_real *c, int n, const qd_real &x0, int max_iter, double thresh) { qd_real x = x0; qd_real f; qd_real *d = new qd_real[n]; bool conv = false; int i; double max_c = std::abs(to_double(c[0])); double v; if (thresh == 0.0) thresh = qd_real::_eps; /* Compute the coefficients of the derivatives. */ for (i = 1; i <= n; i++) { v = std::abs(to_double(c[i])); if (v > max_c) max_c = v; d[i-1] = c[i] * static_cast(i); } thresh *= max_c; /* Newton iteration. */ for (i = 0; i < max_iter; i++) { f = polyeval(c, n, x); if (abs(f) < thresh) { conv = true; break; } x -= (f / polyeval(d, n-1, x)); } delete [] d; if (!conv) { qd_real::error("(qd_real::polyroot): Failed to converge."); return qd_real::_nan; } return x; } qd_real qd_real::debug_rand() { if (std::rand() % 2 == 0) return qdrand(); int expn = 0; qd_real a = 0.0; double d; for (int i = 0; i < 4; i++) { d = std::ldexp(std::rand() / static_cast(RAND_MAX), -expn); a += d; expn = expn + 54 + std::rand() % 200; } return a; } FreeFem-sources-4.9/3rdparty/dissection/src/qd/qd_real.h000664 000000 000000 00000026672 14037356732 023256 0ustar00rootroot000000 000000 /* * include/qd_real.h * * This work was supported by the Director, Office of Science, Division * of Mathematical, Information, and Computational Sciences of the * U.S. Department of Energy under contract number DE-AC03-76SF00098. * * Copyright (c) 2000-2007 * * Quad-double precision (>= 212-bit significand) floating point arithmetic * package, written in ANSI C++, taking full advantage of operator overloading. * Uses similar techniques as that of David Bailey's double-double package * and that of Jonathan Shewchuk's adaptive precision floating point * arithmetic package. See * * http://www.nersc.gov/~dhbailey/mpdist/mpdist.html * http://www.cs.cmu.edu/~quake/robust.html * * for more details. * * Yozo Hida */ // operator int(), dd_real copysign(), fmax(), lobg(), scalbn() are added // for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki #ifndef _QD_QD_REAL_H #define _QD_QD_REAL_H #include #include #include #include #include struct QD_API qd_real { double x[4]; /* The Components. */ /* Eliminates any zeros in the middle component(s). */ void zero_elim(); void zero_elim(double &e); void renorm(); void renorm(double &e); void quick_accum(double d, double &e); void quick_prod_accum(double a, double b, double &e); qd_real(double x0, double x1, double x2, double x3); explicit qd_real(const double *xx); static const qd_real _2pi; static const qd_real _pi; static const qd_real _3pi4; static const qd_real _pi2; static const qd_real _pi4; static const qd_real _e; static const qd_real _log2; static const qd_real _log10; static const qd_real _nan; static const qd_real _inf; static const double _eps; static const double _min_normalized; static const qd_real _max; static const qd_real _safe_max; static const int _ndigits; qd_real(); qd_real(const char *s); qd_real(const dd_real &dd); qd_real(double d); qd_real(int i); double operator[](int i) const; double &operator[](int i); static void error(const char *msg); bool isnan() const; bool isfinite() const { return QD_ISFINITE(x[0]); } bool isinf() const { return QD_ISINF(x[0]); } static qd_real ieee_add(const qd_real &a, const qd_real &b); static qd_real sloppy_add(const qd_real &a, const qd_real &b); qd_real &operator+=(double a); qd_real &operator+=(const dd_real &a); qd_real &operator+=(const qd_real &a); qd_real &operator-=(double a); qd_real &operator-=(const dd_real &a); qd_real &operator-=(const qd_real &a); static qd_real sloppy_mul(const qd_real &a, const qd_real &b); static qd_real accurate_mul(const qd_real &a, const qd_real &b); qd_real &operator*=(double a); qd_real &operator*=(const dd_real &a); qd_real &operator*=(const qd_real &a); static qd_real sloppy_div(const qd_real &a, const dd_real &b); static qd_real accurate_div(const qd_real &a, const dd_real &b); static qd_real sloppy_div(const qd_real &a, const qd_real &b); static qd_real accurate_div(const qd_real &a, const qd_real &b); qd_real &operator/=(double a); qd_real &operator/=(const dd_real &a); qd_real &operator/=(const qd_real &a); qd_real operator^(int n) const; qd_real operator-() const; qd_real operator+() const; qd_real &operator=(double a); qd_real &operator=(const dd_real &a); qd_real &operator=(const char *s); bool is_zero() const; bool is_one() const; bool is_positive() const; bool is_negative() const; static qd_real rand(void); void to_digits(char *s, int &expn, int precision = _ndigits) const; void write(char *s, int len, int precision = _ndigits, bool showpos = false, bool uppercase = false) const; std::string to_string(int precision = _ndigits, int width = 0, std::ios_base::fmtflags fmt = static_cast(0), bool showpos = false, bool uppercase = false, char fill = ' ') const; static int read(const char *s, qd_real &a); /* Debugging methods */ void dump(const std::string &name = "", std::ostream &os = std::cerr) const; void dump_bits(const std::string &name = "", std::ostream &os = std::cerr) const; static qd_real debug_rand(); // added for complex class of LLVM Clang++: 23 Jul.2015 Atsushi Suzuki operator int() { return int(x[0]); } operator int() const { return int(x[0]); } }; namespace std { template <> class numeric_limits : public numeric_limits { public: inline static double epsilon() { return qd_real::_eps; } inline static double min() { return qd_real::_min_normalized; } inline static qd_real max() { return qd_real::_max; } inline static qd_real safe_max() { return qd_real::_safe_max; } static const int digits = 209; static const int digits10 = 62; }; } QD_API qd_real polyeval(const qd_real *c, int n, const qd_real &x); QD_API qd_real polyroot(const qd_real *c, int n, const qd_real &x0, int max_iter = 64, double thresh = 0.0); QD_API qd_real qdrand(void); QD_API qd_real sqrt(const qd_real &a); QD_API inline bool isnan(const qd_real &a) { return a.isnan(); } QD_API inline bool isfinite(const qd_real &a) { return a.isfinite(); } QD_API inline bool isinf(const qd_real &a) { return a.isinf(); } /* Computes qd * d where d is known to be a power of 2. This can be done component wise. */ QD_API qd_real mul_pwr2(const qd_real &qd, double d); QD_API qd_real operator+(const qd_real &a, const qd_real &b); QD_API qd_real operator+(const dd_real &a, const qd_real &b); QD_API qd_real operator+(const qd_real &a, const dd_real &b); QD_API qd_real operator+(const qd_real &a, double b); QD_API qd_real operator+(double a, const qd_real &b); QD_API qd_real operator-(const qd_real &a, const qd_real &b); QD_API qd_real operator-(const dd_real &a, const qd_real &b); QD_API qd_real operator-(const qd_real &a, const dd_real &b); QD_API qd_real operator-(const qd_real &a, double b); QD_API qd_real operator-(double a, const qd_real &b); QD_API qd_real operator*(const qd_real &a, const qd_real &b); QD_API qd_real operator*(const dd_real &a, const qd_real &b); QD_API qd_real operator*(const qd_real &a, const dd_real &b); QD_API qd_real operator*(const qd_real &a, double b); QD_API qd_real operator*(double a, const qd_real &b); QD_API qd_real operator/(const qd_real &a, const qd_real &b); QD_API qd_real operator/(const dd_real &a, const qd_real &b); QD_API qd_real operator/(const qd_real &a, const dd_real &b); QD_API qd_real operator/(const qd_real &a, double b); QD_API qd_real operator/(double a, const qd_real &b); QD_API qd_real sqr(const qd_real &a); QD_API qd_real sqrt(const qd_real &a); QD_API qd_real pow(const qd_real &a, int n); QD_API qd_real pow(const qd_real &a, const qd_real &b); QD_API qd_real npwr(const qd_real &a, int n); QD_API qd_real nroot(const qd_real &a, int n); QD_API qd_real rem(const qd_real &a, const qd_real &b); QD_API qd_real drem(const qd_real &a, const qd_real &b); QD_API qd_real divrem(const qd_real &a, const qd_real &b, qd_real &r); dd_real to_dd_real(const qd_real &a); double to_double(const qd_real &a); int to_int(const qd_real &a); QD_API bool operator==(const qd_real &a, const qd_real &b); QD_API bool operator==(const qd_real &a, const dd_real &b); QD_API bool operator==(const dd_real &a, const qd_real &b); QD_API bool operator==(double a, const qd_real &b); QD_API bool operator==(const qd_real &a, double b); QD_API bool operator==(int a, const qd_real &b); QD_API bool operator==(const qd_real &a, int b); QD_API bool operator<(const qd_real &a, const qd_real &b); QD_API bool operator<(const qd_real &a, const dd_real &b); QD_API bool operator<(const dd_real &a, const qd_real &b); QD_API bool operator<(double a, const qd_real &b); QD_API bool operator<(const qd_real &a, double b); QD_API bool operator<(int a, const qd_real &b); QD_API bool operator<(const qd_real &a, int b); QD_API bool operator>(const qd_real &a, const qd_real &b); QD_API bool operator>(const qd_real &a, const dd_real &b); QD_API bool operator>(const dd_real &a, const qd_real &b); QD_API bool operator>(double a, const qd_real &b); QD_API bool operator>(const qd_real &a, double b); QD_API bool operator>(int a, const qd_real &b); QD_API bool operator>(const qd_real &a, int b); QD_API bool operator<=(const qd_real &a, const qd_real &b); QD_API bool operator<=(const qd_real &a, const dd_real &b); QD_API bool operator<=(const dd_real &a, const qd_real &b); QD_API bool operator<=(double a, const qd_real &b); QD_API bool operator<=(const qd_real &a, double b); QD_API bool operator<=(int a, const qd_real &b); QD_API bool operator<=(const qd_real &a, int b); QD_API bool operator>=(const qd_real &a, const qd_real &b); QD_API bool operator>=(const qd_real &a, const dd_real &b); QD_API bool operator>=(const dd_real &a, const qd_real &b); QD_API bool operator>=(double a, const qd_real &b); QD_API bool operator>=(const qd_real &a, double b); QD_API bool operator>=(int a, const qd_real &b); QD_API bool operator>=(const qd_real &a, int b); QD_API bool operator!=(const qd_real &a, const qd_real &b); QD_API bool operator!=(const qd_real &a, const dd_real &b); QD_API bool operator!=(const dd_real &a, const qd_real &b); QD_API bool operator!=(double a, const qd_real &b); QD_API bool operator!=(const qd_real &a, double b); QD_API bool operator!=(int a, const qd_real &b); QD_API bool operator!=(const qd_real &a, int b); QD_API qd_real fabs(const qd_real &a); QD_API qd_real abs(const qd_real &a); /* same as fabs */ QD_API qd_real ldexp(const qd_real &a, int n); QD_API qd_real nint(const qd_real &a); QD_API qd_real quick_nint(const qd_real &a); QD_API qd_real floor(const qd_real &a); QD_API qd_real ceil(const qd_real &a); QD_API qd_real aint(const qd_real &a); QD_API qd_real sin(const qd_real &a); QD_API qd_real cos(const qd_real &a); QD_API qd_real tan(const qd_real &a); QD_API void sincos(const qd_real &a, qd_real &s, qd_real &c); QD_API qd_real asin(const qd_real &a); QD_API qd_real acos(const qd_real &a); QD_API qd_real atan(const qd_real &a); QD_API qd_real atan2(const qd_real &y, const qd_real &x); QD_API qd_real exp(const qd_real &a); QD_API qd_real log(const qd_real &a); QD_API qd_real log10(const qd_real &a); QD_API qd_real sinh(const qd_real &a); QD_API qd_real cosh(const qd_real &a); QD_API qd_real tanh(const qd_real &a); QD_API void sincosh(const qd_real &a, qd_real &sin_qd, qd_real &cos_qd); QD_API qd_real asinh(const qd_real &a); QD_API qd_real acosh(const qd_real &a); QD_API qd_real atanh(const qd_real &a); QD_API qd_real qdrand(void); QD_API qd_real max(const qd_real &a, const qd_real &b); QD_API qd_real max(const qd_real &a, const qd_real &b, const qd_real &c); QD_API qd_real min(const qd_real &a, const qd_real &b); QD_API qd_real min(const qd_real &a, const qd_real &b, const qd_real &c); QD_API qd_real fmod(const qd_real &a, const qd_real &b); QD_API std::ostream &operator<<(std::ostream &s, const qd_real &a); QD_API std::istream &operator>>(std::istream &s, qd_real &a); #ifdef QD_INLINE #include #endif // added for complex class of LLVM Clang++ : 23 Jul.2015 Atsushi Suzuki inline qd_real copysign(const qd_real &x, const qd_real &y) { return (y.x[0] < 0.0) ? ((x.x[0] < 0.0) ? x : (-x)) : ((x.x[0] < 0.0) ? (-x) : x); } inline qd_real fmax(const qd_real &x, const qd_real &y) { return x.x[0] < y.x[0] ? y : x; } inline qd_real logb(const qd_real &y) { return qd_real(logb(y.x[0])); } #if 0 inline qd_real scalbn(const qd_real &x, int n) { return qd_real(scalb(x.x[0], n)); } #endif #endif /* _QD_QD_REAL_H */ FreeFem-sources-4.9/3rdparty/dissection/src/qd/util.cpp000664 000000 000000 00000000477 14037356732 023152 0ustar00rootroot000000 000000 #include #include "util.h" void append_expn(std::string &str, int expn) { int k; str += (expn < 0 ? '-' : '+'); expn = std::abs(expn); if (expn >= 100) { k = (expn / 100); str += '0' + k; expn -= 100*k; } k = (expn / 10); str += '0' + k; expn -= 10*k; str += '0' + expn; } FreeFem-sources-4.9/3rdparty/dissection/src/qd/util.h000664 000000 000000 00000000102 14037356732 022600 0ustar00rootroot000000 000000 #include void append_expn(std::string &str, int expn); FreeFem-sources-4.9/3rdparty/f2c/000775 000000 000000 00000000000 14037356732 016574 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/f2c/Makefile000664 000000 000000 00000005743 14037356732 020245 0ustar00rootroot000000 000000 DIRPKG=../pkg LISTINST= ../bin/f2c ../bin/fort77 ../include/f2c.h ../lib/libf2c.a DIRINSTALL=/usr/local F2C_TAR=$(DIRPKG)/f2c.tar all:compile $(F2C_TAR): mkdir -p $(DIRPKG) $(WGET) # wget --passive-ftp ftp://netlib.bell-labs.com/netlib/f2c.tar curl http://netlib.sandia.gov/cgi-bin/netlib/netlibfiles.tar?filename=netlib/f2c -o "$(DIRPKG)/f2c.tar" f2c:$(F2C_TAR) f2c.h-int tar xvf $(F2C_TAR) gunzip -rf f2c/* gunzip -rf f2c/*/* cd f2c;unzip -d libf2c libf2c.zip for i in `find . -name f2c.h`; do cp f2c.h-int $$i; done f2c/libf2c/makefile: Makefile egrep -v 'ld -r|mv [$$]' f2c/libf2c/makefile compile:f2c f2c/libf2c/makefile cd f2c/src; make -f makefile.u f2c cd f2c/libf2c; make compile-10.4:f2c Makefile-MacOs cd f2c/src; make -f makefile.u f2c cd f2c/libf2c; make -f ../../Makefile-MacOs OS=10.4 compile-10.5:f2c Makefile-MacOs cd f2c/src; make -f makefile.u f2c cd f2c/libf2c; make -f ../../Makefile-MacOs OS=10.5 compile-10.6:f2c Makefile-MacOs cd f2c/src; make -f makefile.u f2c cd f2c/libf2c; make -f ../../Makefile-MacOs OS=10.6 install: ../lib ../include ../bin ../bin/fort77 ../bin/mpifort77 -mkdir ../bin cp f2c/src/f2c ../bin/. cp f2c/src/f2c.h ../include cp f2c/libf2c/libf2c.a ../lib ../bin/fort77 tt.f rm a.out install-10.4: compile-10.4 sudo make install-sudo make install install-10.5: compile-10.5 sudo make install-sudo make install install-10.6: compile-10.6 sudo make install-sudo make install install-sudo: $(DIRINSTALL)/bin/fort77 $(DIRINSTALL)/bin/mpifort77 mkdir -p $(DIRINSTALL)/bin $(DIRINSTALL)/include $(DIRINSTALL)/lib cp f2c/src/f2c $(DIRINSTALL)/bin; cp f2c/src/f2c.h $(DIRINSTALL)/include; cp f2c/libf2c/libf2c.a $(DIRINSTALL)/lib $(DIRINSTALL)/bin/fort77 tt.f rm a.out ../bin/fort77: fort77.sed Makefile dd="`pwd`/.." ; \ sed -e "s;@CC@;$(CC);"g \ -e "s;@INC@;-I$$dd/include;g" \ -e "s;@LLIBDIR@;-L$$dd/lib;g" \ -e "s;@f2c@;$$dd/bin/f2c;g" < fort77.sed >../bin/fort77 chmod a+x ../bin/fort77 ../bin/mpifort77: fort77.sed Makefile dd="`pwd`/.." ; \ sed -e "s;@CC@;mpicc;"g \ -e "s;@INC@;-I$$dd/include;g" \ -e "s;@LLIBDIR@;-L$$dd/lib;g" \ -e "s;@f2c@;$$dd/bin/f2c;g" < fort77.sed >../bin/fort77 chmod a+x ../bin/fort77 $(DIRINSTALL)/bin/fort77: fort77.sed Makefile dd="$(DIRINSTALL)/" ; \ sed -e "s;@CC@;$(CC);"g \ -e "s;@INC@;-I$$dd/include;g" \ -e "s;@LLIBDIR@;-L$$dd/lib;g" \ -e "s;@f2c@;$$dd/bin/f2c;g" < fort77.sed >$(DIRINSTALL)/bin/fort77 chmod a+x $(DIRINSTALL)/bin/fort77 $(DIRINSTALL)/bin/mpifort77: fort77.sed Makefile dd="$(DIRINSTALL)/" ; \ sed -e "s;@CC@;mpicc;"g \ -e "s;@INC@;-I$$dd/include;g" \ -e "s;@LLIBDIR@;-L$$dd/lib;g" \ -e "s;@f2c@;$$dd/bin/f2c;g" < fort77.sed >$(DIRINSTALL)/bin/fort77 chmod a+x $(DIRINSTALL)/bin/fort77 ../bin: mkdir ../bin ../lib: mkdir ../lib ../include: mkdir ../include clean-local: -rm -rf f2c *~ a.out clean:clean-local veryclean: clean -rm $(LISTINST) $(F2C_TAR) FreeFem-sources-4.9/3rdparty/f2c/Makefile-MacOs000664 000000 000000 00000016566 14037356732 021252 0ustar00rootroot000000 000000 # Unix makefile: see README. # For C++, first "make hadd". # If your compiler does not recognize ANSI C, add # -DKR_headers # to the CFLAGS = line below. # On Sun and other BSD systems that do not provide an ANSI sprintf, add # -DUSE_STRLEN # to the CFLAGS = line below. # On Linux systems, add # -DNON_UNIX_STDIO # to the CFLAGS = line below. For libf2c.so under Linux, also add # -fPIC # to the CFLAGS = line below. .SUFFIXES: .c .o CC = cc SHELL = /bin/sh ifeq (10.4,$(OS)) CC=gcc-4.0 LIST_ARCH=-arch ppc -arch i386 -arch ppc64 -arch x86_64 SDK=/Developer/SDKs/MacOSX10.4u.sdk endif ifeq (10.5,$(OS)) LIST_ARCH=-arch ppc -arch i386 -arch x86_64 -arch ppc64 SDK=/Developer/SDKs/MacOSX10.5.sdk endif ifeq (10.6,$(OS)) LIST_ARCH= -arch i386 -arch x86_64 SDK=/Developer/SDKs/MacOSX10.6.sdk endif CFLAGS = -O -isysroot $(SDK) $(LIST_ARCH) -mmacosx-version-min=$(OS) -DNO_My_ctype LDFLAGS=-Wl,-syslibroot,$(SDK) $(LIST_ARCH) AR=libtool -static -o LIBDIR=/usr/local/lib # compile, then strip unnecessary symbols .c.o: $(CC) -c -DSkip_f2c_Undefs $(CFLAGS) $*.c ## Under Solaris (and other systems that do not understand ld -x), ## omit -x in the ld line above. ## If your system does not have the ld command, comment out ## or remove both the ld and mv lines above. MISC = f77vers.o i77vers.o main.o s_rnge.o abort_.o exit_.o getarg_.o iargc_.o\ getenv_.o signal_.o s_stop.o s_paus.o system_.o cabs.o\ derf_.o derfc_.o erf_.o erfc_.o sig_die.o uninit.o POW = pow_ci.o pow_dd.o pow_di.o pow_hh.o pow_ii.o pow_ri.o pow_zi.o pow_zz.o CX = c_abs.o c_cos.o c_div.o c_exp.o c_log.o c_sin.o c_sqrt.o DCX = z_abs.o z_cos.o z_div.o z_exp.o z_log.o z_sin.o z_sqrt.o REAL = r_abs.o r_acos.o r_asin.o r_atan.o r_atn2.o r_cnjg.o r_cos.o\ r_cosh.o r_dim.o r_exp.o r_imag.o r_int.o\ r_lg10.o r_log.o r_mod.o r_nint.o r_sign.o\ r_sin.o r_sinh.o r_sqrt.o r_tan.o r_tanh.o DBL = d_abs.o d_acos.o d_asin.o d_atan.o d_atn2.o\ d_cnjg.o d_cos.o d_cosh.o d_dim.o d_exp.o\ d_imag.o d_int.o d_lg10.o d_log.o d_mod.o\ d_nint.o d_prod.o d_sign.o d_sin.o d_sinh.o\ d_sqrt.o d_tan.o d_tanh.o INT = i_abs.o i_dim.o i_dnnt.o i_indx.o i_len.o i_mod.o i_nint.o i_sign.o\ lbitbits.o lbitshft.o HALF = h_abs.o h_dim.o h_dnnt.o h_indx.o h_len.o h_mod.o h_nint.o h_sign.o CMP = l_ge.o l_gt.o l_le.o l_lt.o hl_ge.o hl_gt.o hl_le.o hl_lt.o EFL = ef1asc_.o ef1cmc_.o CHAR = f77_aloc.o s_cat.o s_cmp.o s_copy.o I77 = backspac.o close.o dfe.o dolio.o due.o endfile.o err.o\ fmt.o fmtlib.o ftell_.o iio.o ilnw.o inquire.o lread.o lwrite.o\ open.o rdfmt.o rewind.o rsfe.o rsli.o rsne.o sfe.o sue.o\ typesize.o uio.o util.o wref.o wrtfmt.o wsfe.o wsle.o wsne.o xwsne.o QINT = pow_qq.o qbitbits.o qbitshft.o ftell64_.o TIME = dtime_.o etime_.o # If you get an error compiling dtime_.c or etime_.c, try adding # -DUSE_CLOCK to the CFLAGS assignment above; if that does not work, # omit $(TIME) from OFILES = assignment below. # To get signed zeros in write statements on IEEE-arithmetic systems, # add -DSIGNED_ZEROS to the CFLAGS assignment below and add signbit.o # to the end of the OFILES = assignment below. # For INTEGER*8 support (which requires system-dependent adjustments to # f2c.h), add $(QINT) to the OFILES = assignment below... OFILES = $(MISC) $(POW) $(CX) $(DCX) $(REAL) $(DBL) $(INT) \ $(HALF) $(CMP) $(EFL) $(CHAR) $(I77) $(TIME) all: f2c.h signal1.h sysdep1.h libf2c.a libf2c.a: $(OFILES) libtool -static -o libf2c.a $(OFILES) ## Shared-library variant: the following rule works on Linux ## systems. Details are system-dependent. Under Linux, -fPIC ## must appear in the CFLAGS assignment when making libf2c.so. ## Under Solaris, use -Kpic in CFLAGS and use "ld -G" instead ## of "cc -shared". libf2c.so: $(OFILES) libtool -dynamic -o libf2c.dylib $(OFILES) # cc -shared -o libf2c.so $(OFILES) ### If your system lacks ranlib, you don't need it; see README. f77vers.o: f77vers.c $(CC) -c f77vers.c i77vers.o: i77vers.c $(CC) -c i77vers.c # To get an "f2c.h" for use with "f2c -C++", first "make hadd" hadd: f2c.h0 f2ch.add cat f2c.h0 f2ch.add >f2c.h # For use with "f2c" and "f2c -A": f2c.h: f2c.h0 cp f2c.h0 f2c.h # You may need to adjust signal1.h and sysdep1.h suitably for your system... signal1.h: signal1.h0 cp signal1.h0 signal1.h sysdep1.h: sysdep1.h0 cp sysdep1.h0 sysdep1.h # If your system lacks onexit() and you are not using an # ANSI C compiler, then you should uncomment the following # two lines (for compiling main.o): #main.o: main.c # $(CC) -c -DNO_ONEXIT -DSkip_f2c_Undefs main.c # On at least some Sun systems, it is more appropriate to # uncomment the following two lines: #main.o: main.c # $(CC) -c -Donexit=on_exit -DSkip_f2c_Undefs main.c install: libf2c.a cp -p libf2c.a $(LIBDIR) #-ranlib $(LIBDIR)/libf2c.a clean: rm -f libf2c.a *.o arith.h signal1.h sysdep1.h backspac.o: fio.h close.o: fio.h dfe.o: fio.h dfe.o: fmt.h due.o: fio.h endfile.o: fio.h rawio.h err.o: fio.h rawio.h fmt.o: fio.h fmt.o: fmt.h iio.o: fio.h iio.o: fmt.h ilnw.o: fio.h ilnw.o: lio.h inquire.o: fio.h lread.o: fio.h lread.o: fmt.h lread.o: lio.h lread.o: fp.h lwrite.o: fio.h lwrite.o: fmt.h lwrite.o: lio.h open.o: fio.h rawio.h rdfmt.o: fio.h rdfmt.o: fmt.h rdfmt.o: fp.h rewind.o: fio.h rsfe.o: fio.h rsfe.o: fmt.h rsli.o: fio.h rsli.o: lio.h rsne.o: fio.h rsne.o: lio.h sfe.o: fio.h signbit.o: arith.h sue.o: fio.h uio.o: fio.h uninit.o: arith.h util.o: fio.h wref.o: fio.h wref.o: fmt.h wref.o: fp.h wrtfmt.o: fio.h wrtfmt.o: fmt.h wsfe.o: fio.h wsfe.o: fmt.h wsle.o: fio.h wsle.o: fmt.h wsle.o: lio.h wsne.o: fio.h wsne.o: lio.h xwsne.o: fio.h xwsne.o: lio.h xwsne.o: fmt.h arith.h: arithchk.c $(CC) $(CFLAGS) -DNO_FPINIT arithchk.c -lm ||\ $(CC) -DNO_LONG_LONG $(CFLAGS) -DNO_FPINIT arithchk.c -lm ./a.out >arith.h rm -f a.out arithchk.o check: xsum Notice README abort_.c arithchk.c backspac.c c_abs.c c_cos.c \ c_div.c c_exp.c c_log.c c_sin.c c_sqrt.c cabs.c close.c comptry.bat \ d_abs.c d_acos.c d_asin.c d_atan.c d_atn2.c d_cnjg.c d_cos.c d_cosh.c \ d_dim.c d_exp.c d_imag.c d_int.c d_lg10.c d_log.c d_mod.c \ d_nint.c d_prod.c d_sign.c d_sin.c d_sinh.c d_sqrt.c d_tan.c \ d_tanh.c derf_.c derfc_.c dfe.c dolio.c dtime_.c due.c ef1asc_.c \ ef1cmc_.c endfile.c erf_.c erfc_.c err.c etime_.c exit_.c f2c.h0 \ f2ch.add f77_aloc.c f77vers.c fio.h fmt.c fmt.h fmtlib.c \ fp.h ftell_.c ftell64_.c \ getarg_.c getenv_.c h_abs.c h_dim.c h_dnnt.c h_indx.c h_len.c \ h_mod.c h_nint.c h_sign.c hl_ge.c hl_gt.c hl_le.c hl_lt.c \ i77vers.c i_abs.c i_dim.c i_dnnt.c i_indx.c i_len.c i_mod.c \ i_nint.c i_sign.c iargc_.c iio.c ilnw.c inquire.c l_ge.c l_gt.c \ l_le.c l_lt.c lbitbits.c lbitshft.c libf2c.lbc libf2c.sy lio.h \ lread.c lwrite.c main.c makefile.sy makefile.u makefile.vc \ makefile.wat math.hvc mkfile.plan9 open.c pow_ci.c pow_dd.c \ pow_di.c pow_hh.c pow_ii.c pow_qq.c pow_ri.c pow_zi.c pow_zz.c \ qbitbits.c qbitshft.c r_abs.c r_acos.c r_asin.c r_atan.c r_atn2.c \ r_cnjg.c r_cos.c r_cosh.c r_dim.c r_exp.c r_imag.c r_int.c r_lg10.c \ r_log.c r_mod.c r_nint.c r_sign.c r_sin.c r_sinh.c r_sqrt.c \ r_tan.c r_tanh.c rawio.h rdfmt.c rewind.c rsfe.c rsli.c rsne.c \ s_cat.c s_cmp.c s_copy.c s_paus.c s_rnge.c s_stop.c scomptry.bat sfe.c \ sig_die.c signal1.h0 signal_.c signbit.c sue.c sysdep1.h0 system_.c \ typesize.c \ uio.c uninit.c util.c wref.c wrtfmt.c wsfe.c wsle.c wsne.c xwsne.c \ z_abs.c z_cos.c z_div.c z_exp.c z_log.c z_sin.c z_sqrt.c >xsum1.out cmp xsum0.out xsum1.out && mv xsum1.out xsum.out || diff xsum[01].out FreeFem-sources-4.9/3rdparty/f2c/f2c.h-int000664 000000 000000 00000011151 14037356732 020206 0ustar00rootroot000000 000000 /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE /* set integer to int not to long */ typedef int integer; typedef unsigned int uinteger; typedef int logical; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef short int shortlogical; typedef char logical1; typedef char integer1; #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ typedef long long longint; /* system-dependent */ typedef unsigned long long ulongint; /* system-dependent */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef long int flag; typedef long int ftnlen; typedef long int ftnint; #endif /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; #define VOID void union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; /*typedef long int Long;*/ /* No longer used; formerly in Namelist */ struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ VOID (*C_fp)(...); typedef /* Double Complex */ VOID (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ VOID (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ VOID (*C_fp)(); typedef /* Double Complex */ VOID (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ VOID (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef VOID C_f; /* complex function */ typedef VOID H_f; /* character function */ typedef VOID Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif FreeFem-sources-4.9/3rdparty/f2c/fort77.sed000664 000000 000000 00000017312 14037356732 020425 0ustar00rootroot000000 000000 #! /usr/bin/perl -w # fort77 (compiler driver) script for f2c # For use with gcc under Linux # This code is in the public domain; use at your own risk. # Parse options $version = "1.14a"; $nnflag = '-Nn802'; $tmpdir = $ENV{'TMPDIR'} || '/tmp'; $cpp = 0; $fast_math = 1; $debug = 0; $debugcmd = ""; push(@includes, "@INC@"); # Loop over all options; pull all options from @ARGV and put all # arguments into @argv. This is needed because, apparently, UNIX # compilers acceppt options anywhere on the command line. while ($_ = $ARGV[0]) { shift; if (!/^-/) { if (/\.P$/) { push(@pfiles, $_); } else { push(@argv, $_); } next; } # First, the f2c options. if (/^-[CUuaEhRrz]$/ || /^-I[24]$/ || /^-onetrip$/ || /^-![clPR]$/ || /^-ext$/ || /^-!bs$/ || /^-W[1-9][0-9]*$/ || /^-w8$/ || /^-w66$/ || /^-r8$/ || /^-N[^n][0-9]+$/) { push (@fopts, $_); } elsif (/^-Nn[0-9]+$/) { $nnflag = $_; } # Prototype flags for f2c elsif (/^-Ps?/) { $extract_prototypes ++; push (@fopts, $_); } # Does somebody want to run the preprocessor? elsif (/^-cpp$/) { $cpp++; } # These are common to both f2c and gcc elsif (/^-w$/) { push(@fopts, $_); push(@copts, $_); } # This is for the linker, too... elsif (/^-g$/) { push(@fopts, $_); push(@copts, $_); push(@lopts, $_); $debug ++; } # Special options for the different subprocesses: f for f2c step, # p for (separate) preprocessing, c for C compiler, l for linker. # a is also passed to the C compiler. elsif (/^-Wf,/) { push(@fopts, &parsewx($_)); } elsif (/-Wp,/) { push(@cppopts, &parsewx($_)); } elsif (/-W[ca],/) { push(@copts, &parsewx($_)); } elsif (/-Wl,/) { push(@lopts,&parsewx($_)); } # gcc only options # too many -f and -W options to list them all... # First, let's see wether somebody wants to adhere to the C standard # in Fortran. elsif (/^-fnofast-math$/) { $fast_math = 0; } elsif (/^-m64$/) { push(@copts, $_); push(@lopts, $_); } elsif (/^-m32$/) { push(@copts, $_); push(@lopts, $_); } # The '-f' option to f2c... elsif (/^-f$/) { push(@fopts, $_); } elsif (/^-[fWUAm]/ || /^-[Ex]$/ || /^-pipe$/ ) { push(@copts, $_); } # Includes and outputs... elsif (/^-I$/) { (@ARGV > 0) || die "$0: Missing argument to \"$_\"\n"; push(@includes, "-I".shift); } elsif (/^-I./) { push(@includes, $_); } elsif (/^-o$/) { (@ARGV > 0) || die "$0: Missing argument to \"$_\"\n"; $output = shift; } elsif (/^-o(.*)/) { $output = $1; } # Optimization elsif (/^-O/) { push(@copts, $_); push(@lopts, $_); $optimize ++; } # Options for both C compiler and linker elsif (/^-[Og]/ || /^-p$/ || /^-pg$/) { push(@copts, $_); push(@lopts, $_); } elsif (/^-[bV]$/ ) { (@ARGV > 0) || die "$0 : Missing argument to \"$_\"\n"; $arg = shift; push(@copts, $_, $arg); push(@lopts, $_, $arg); } elsif (/^-[bV]./ ) { push(@copts, $_); push(@lopts, $_); } # Linker only options elsif (/^-[lL]$/) { push(@lopts, $_); (@ARGV > 0) || die "$0: Missing argument to \"$_\"\n"; $_ = shift; push(@lopts, $_); } elsif (/^-[lL]./ || /^-nostartfiles$/ || /^-static$/ || /^-shared$/ || /^-symbolic$/) { push(@lopts, $_); } elsif (/^-[cS]$/) { $compile_only = $_; } elsif (/^-D/) { push(@cppopts, $_); } # Are we verbose? elsif (/^-v$/) { $verbose ++; } # Does somebody want to keep the C files around? elsif (/^-k$/) { $keep_c ++; } else { die "$0: Illegal option: $_\n"; } } push(@fopts,$nnflag); push(@copts,'-ffast-math') if $optimize && $fast_math; push(@cppopts,@includes); push(@fopts,@includes,"-I."); push(@fopts, @pfiles); if ($verbose) { print STDERR "$0: fort77 Version $version\n"; if ($verbose > 1) { push(@copts,"-v"); push(@lopts,"-v"); push(@cppopts,"-v"); } } @ARGV = @argv; if ($compile_only && $output && (@ARGV>1)) { warn "$0: Warning: $compile_only and -o with mutiple files, ignoring -o\n"; $output = ""; } die "$0: No input files specified\n" unless @ARGV; while ($_ = $ARGV[0]) { shift; $ffile = ""; $cfile = ""; $lfile = ""; $basefile = ""; if (/\.[fF]$/) { $ffile = $_; $basefile = $ffile; } elsif (/\.[cCisSm]$/ || /\.cc$/ || /\.cxx$/) { $cfile = $_; $basefile = $_; } else { push(@lfiles, $_); } if ($ffile) { &check_file_read($ffile); if ($keep_c) { $cfile = ($ffile =~ /([^\/]*\.).$/)[0] . "c"; } else { $seq ++; $cfile = "$tmpdir/fort77-$$-$seq.c"; } if ($debug) { $debugcmd = ' | /usr/bin/perl -p -e \'s/^(#line.*)""/$1"' . $ffile . '"/\' ' } if ($cpp || ($ffile =~ /\.F$/)) { # Backslashes at the end of comment lines confuse cpp... $pipe = "| /lib/cpp -traditional " . join(' ',@cppopts) . " | @f2c@ " . join(' ',@fopts) . $debugcmd . " > $cfile"; print STDERR "$0: Running \"$pipe\"" if $verbose; open(F2C,$pipe); open (FFILE, "$ffile") || die ("$0: Cannot open $ffile: $_\n"); while () { s/([cC*].*)\\$/$1/; print F2C $_; } close(FFILE); close(F2C); $retcode = $? / 256; } else { $retcode = &mysystem("@f2c@ ". join (" ",@fopts). " < ". $ffile . $debugcmd . " > $cfile")/256; } if ($retcode && !$keep_c) { print STDERR "$0: unlinking $cfile\n" if $verbose; unlink $cfile; die "$0: aborting compilation\n"; } # Separate the prototypes out from the C files. if ($extract_prototypes) { $pfile = ($basefile =~ /([^\/]*\.).$/)[0] . "P"; open(CFILE, "$cfile") || die ("$0: Cannot open $cfile\n"); # *wdh* while (($line = ) && while (defined($line = ) && ($line !~ '#ifdef P_R_O_T_O_T_Y_P_E_S\n')) { print $line; } if ($_) { open(PFILE, ">$pfile") || die ("$0: Cannot open $pfile\n"); # *wdh* while (($line = ) && ($line !~ '#endif')) { while (defined($line = ) && ($line !~ '#endif')) { print PFILE $line; } close(PFILE); } close(CFILE); } } # C compilation step. if ($cfile) { # *wdh* @command = ("cc",@cppopts,@copts); @command = ("@CC@",@cppopts,@copts); if ($compile_only && $output) { push(@command,'-o',$output,$compile_only); } elsif ((!$compile_only) || ($compile_only eq '-c')) { $lfile = ($basefile =~ /([^\/]*\.).$/)[0] . "o"; push(@command, '-c', '-o', $lfile); } elsif ($compile_only eq '-S') { $sfile = ($basefile =~ /([^\/]*\.).$/)[0] . "s"; push(@command, '-S', '-o', $sfile); } push(@command,$cfile); $retcode = &mysystem(@command)/256; if ($retcode) { die "$0: aborting compilation\n"; } if ($ffile && !$keep_c) { print STDERR "$0: unlinking $cfile\n" if $verbose; unlink $cfile; } if ($lfile) { push (@gener_lfiles, $lfile); push(@lfiles, $lfile); $lfile = ""; } } push (@lfiles, $lfile) if $lfile; } exit if $compile_only; push (@output, "-o", $output) if $output; $retcode = &mysystem("@CC@", @output, @lfiles, @lopts, "@LLIBDIR@","-lf2c", "-lm" ); if (@gener_lfiles) { print STDERR "$0: unlinking ",join(',',@gener_lfiles),"\n" if $verbose; unlink (@gener_lfiles); } exit $retcode; # Basically a system call, except that we want to be verbose if # necessary. sub mysystem { local (@args) = @_; if (@args == 1) { print STDERR "$0: Running \"$args[0]\"\n" if $verbose; system($args[0]); } else { print STDERR "$0: Running \"",join(' ',@args),"\"\n" if $verbose; system(@args); } } sub parsewx { local ($str) = @_; local(@tmp) = split(/,/,$str); shift(@tmp); return @tmp; } sub check_file_read { local ($name) = @_; open (TESTFILE,"$name") || die "Cannot open $name: $!\n"; close(TESTFILE); } FreeFem-sources-4.9/3rdparty/f2c/tt.f000664 000000 000000 00000000063 14037356732 017371 0ustar00rootroot000000 000000 program tt print *,"dgfgdfg" end FreeFem-sources-4.9/3rdparty/ff-petsc/000775 000000 000000 00000000000 14037356732 017631 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/ff-petsc/Makefile000664 000000 000000 00000040167 14037356732 021301 0ustar00rootroot000000 000000 # ====================================================================== # Laboratoire Jacques-Louis Lions # Université Pierre et Marie Curie-Paris6, UMR 7598, Paris, F-75005 France # http://www.ljll.math.upmc.fr/lehyaric # ====================================================================== # This file is part of Freefem++ # # Freefem++ is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Freefem++ is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with Freefem++; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ====================================================================== # headeralh default=0 freefem make multipleauthors start=19/03/10 upmc include Makefile.inc mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) dir3rdparty := $(abspath $(dir $(mkfile_path))/..) DIR_INSTALL_REAL:=$(FF_prefix_petsc)/r DIR_INSTALL_COMPLEX:=$(FF_prefix_petsc)/c W_SUDO:=$(SHELL mkdir -p "$(DESTDIR)$(FF_prefix_petsc)" && test -w "$(DESTDIR)$(FF_prefix_petsc) || echo sudo) ifeq ($(COMPILE_OPENBLAS),openblas) PETSC_OPENBLAS := --download-f2cblaslapack PETSC_OPENBLAS_C := --with-blaslapack-dir=$(DIR_INSTALL_REAL) else PETSC_OPENBLAS = --with-blaslapack-include='$(shell echo $(BLASINC) | sed "s/-I//g")' \ --with-blaslapack-lib='$(BLAS_LAPACK_LIBS)' PETSC_OPENBLAS_C = $(PETSC_OPENBLAS) endif PETSC_DOWNLOAD_C := --with-metis-dir=$(DIR_INSTALL_REAL) \ --with-ptscotch-dir=$(DIR_INSTALL_REAL) \ --with-mmg-dir=$(DIR_INSTALL_REAL) \ --with-parmmg-dir=$(DIR_INSTALL_REAL) \ --with-superlu-dir=$(DIR_INSTALL_REAL) \ --with-suitesparse-dir=$(DIR_INSTALL_REAL) \ --with-parmetis-dir=$(DIR_INSTALL_REAL) \ --with-tetgen-dir=$(DIR_INSTALL_REAL) ifeq ($(PETSC_PKG),yes) pkg_dir=$(abspath $(dir $(mkfile_path))/pkg) PETSC_DOWNLOAD := --download-scalapack=$(pkg_dir)/pkg-scalapack.tar.gz \ --download-metis=$(pkg_dir)/pkg-metis.tar.gz \ --download-ptscotch=$(pkg_dir)/scotch-v6.0.9.tar.gz \ --download-mumps=$(pkg_dir)/pkg-mumps.tar.gz \ --download-hypre=$(pkg_dir)/hypre.tar.gz \ --download-parmetis=$(pkg_dir)/pkg-parmetis.tar.gz \ --download-superlu=$(pkg_dir)/superlu.tar.gz \ --download-suitesparse=$(pkg_dir)/SuiteSparse-5.7.1.tar.gz \ --download-tetgen=$(pkg_dir)/tetgen1.5.1.tar.gz \ --download-slepc=$(pkg_dir)/slepc.tar.gz \ --download-hpddm=$(pkg_dir)/hpddm.tar.gz ifeq ($(FFCMAKE),no) PETSC_DOWNLOAD := $(PETSC_DOWNLOAD) --download-cmake=$(pkg_dir)/cmake.tar.gz endif PETSC_DOWNLOAD_C := $(PETSC_DOWNLOAD_C) --download-slepc=$(pkg_dir)/slepc.tar.gz ifeq ($(WIN32DLLTARGET),) PETSC_DOWNLOAD := $(PETSC_DOWNLOAD) --download-hpddm=$(pkg_dir)/hpddm.tar.gz PETSC_DOWNLOAD_C := $(PETSC_DOWNLOAD_C) --download-hpddm=$(pkg_dir)/hpddm.tar.gz endif else PETSC_DOWNLOAD := --download-metis --download-ptscotch \ --download-hypre --download-parmetis \ --download-mmg --download-parmmg \ --download-superlu --download-suitesparse --download-tetgen \ --download-slepc --download-hpddm ifeq ($(FFCMAKE),no) PETSC_DOWNLOAD := $(PETSC_DOWNLOAD) --download-cmake endif PETSC_DOWNLOAD_C := $(PETSC_DOWNLOAD_C) --download-slepc --download-hpddm endif ifeq ($(FF_generic_petsc), yes) FLAGS_MTUNE := -mtune=generic else FLAGS_MTUNE := -mtune=native endif COMMON_FLAGS := MAKEFLAGS='' --with-debugging=0 COPTFLAGS='-O3 $(FLAGS_MTUNE)' CXXOPTFLAGS='-O3 $(FLAGS_MTUNE)' FOPTFLAGS='-O3 $(FLAGS_MTUNE)' --with-cxx-dialect=C++11 --with-ssl=0 --with-x=0 --with-fortran-bindings=0 --with-cudac=0 ifeq ($(WIN32DLLTARGET),) ## Not on windows.... ifeq ($(MPICC)$(MPICXX)$(MPIFC),) PETSC_DOWNLOAD += --with-cc='$(CC)' --with-cxx='$(CXX)' --download-mpich PETSC_DOWNLOAD_C += --with-mpi-dir=$(DIR_INSTALL_REAL) ifneq ($(FC),) PETSC_DOWNLOAD += --with-fc='$(FC)' --download-scalapack --download-mumps --download-slepc-configure-arguments="--download-arpack=https://github.com/prj-/arpack-ng/archive/b64dccb.tar.gz" PETSC_DOWNLOAD_C += --with-scalapack-dir=$(DIR_INSTALL_REAL) --with-mumps-dir=$(DIR_INSTALL_REAL) # --download-slepc-configure-arguments="--with-arpack-dir=$(DIR_INSTALL_REAL)" else COMMON_FLAGS += --with-fc=0 endif else COMMON_FLAGS += --with-cc='$(MPICC)' --with-cxx='$(MPICXX)' ifneq ($(MPIFC),) COMMON_FLAGS += --with-fc='$(MPIFC)' PETSC_DOWNLOAD += --download-scalapack --download-mumps --download-slepc-configure-arguments="--download-arpack=https://github.com/prj-/arpack-ng/archive/b64dccb.tar.gz" PETSC_DOWNLOAD_C += --with-scalapack-dir=$(DIR_INSTALL_REAL) --with-mumps-dir=$(DIR_INSTALL_REAL) # --download-slepc-configure-arguments="--with-arpack-dir=$(DIR_INSTALL_REAL)" else COMMON_FLAGS += --with-fc=0 PETSC_DOWNLOAD += --download-superlu_dist PETSC_DOWNLOAD_C += --with-superlu_dist-dir=$(DIR_INSTALL_REAL) endif endif else ## On windows.... COMMON_FLAGS += --with-shared-libraries=0 \ --with-cc='$(CC)' --with-cxx='$(CXX)' \ CXXFLAGS='-fno-stack-protector' \ CFLAGS='-fno-stack-protector' \ FFLAGS='$(FCFLAGS)' \ --with-mpi-lib='$(MPI_LIB)' \ --with-mpi-include='$(MPI_INC_DIR)' \ --with-mpiexec='/C/Program\ Files/Microsoft\ MPI/Bin/mpiexec' PETSC_DOWNLOAD += '--download-metis-cmake-arguments=-G "MSYS Makefiles"' \ '--download-parmetis-cmake-arguments=-G "MSYS Makefiles"' \ '--download-mmg-cmake-arguments=-G "MSYS Makefiles"' \ '--download-parmmg-cmake-arguments=-DMPI_GUESS_LIBRARY_NAME=MSMPI -G "MSYS Makefiles"' \ '--download-superlu-cmake-arguments=-G "MSYS Makefiles"' \ '--download-hypre-configure-arguments=--build=x86_64-linux-gnu --host=x86_64-linux-gnu' ifneq ($(FC),) COMMON_FLAGS += --with-fc='$(FC)' PETSC_DOWNLOAD += --download-scalapack --download-mumps --download-slepc-configure-arguments="--download-arpack=https://github.com/prj-/arpack-ng/archive/6d11c37b2dc9110f3f6a434029353ae1c5112227.tar.gz" PETSC_DOWNLOAD_C += --with-scalapack-dir=$(DIR_INSTALL_REAL) --with-mumps-dir=$(DIR_INSTALL_REAL) # --download-slepc-configure-arguments="--with-arpack-dir=$(DIR_INSTALL_REAL)" else COMMON_FLAGS += --with-fc=0 endif endif FLAGS_CONF_PETSC_REAL = $(COMMON_FLAGS) --with-scalar-type=real $(PETSC_OPENBLAS) $(PETSC_DOWNLOAD) FLAGS_CONF_PETSC_COMPLEX = $(COMMON_FLAGS) --with-scalar-type=complex $(PETSC_OPENBLAS_C) $(PETSC_DOWNLOAD_C) all-local: @echo make .. @echo " # To build petsc and slepc do $ make petsc-slepc" @echo " # warning you have no write acces in $(DESTDIR)$(FF_prefix_petsc)" @echo " # need sudo if '$(W_SUDO)' == 'sudo' .." @echo " # run under root " @echo " # or create / add access to $(DESTDIR)$(FF_prefix_petsc) " @echo " # do for examples:" @echo " # PETSC_DOWNLOAD = $(PETSC_DOWNLOAD)" @echo " sudo mkdir $(DESTDIR)$(FF_prefix_petsc)" @echo " sudo chown -R $(LOGNAME) $(DESTDIR)$(FF_prefix_petsc)" @echo " make petsc-slepc " DIRPKG=../pkg VERSION=3.15.0 VERSION_SLEPC=3.014.2 PACKAGE=../pkg/petsc-$(VERSION).tar.gz SRCDIR=petsc-$(VERSION) MPI_DIR:=$(shell dirname $(MPI_INC_DIR)) W_MPICC:=$(strip $(shell which mpicc)) SUDO_WITH_ENV=$(if $(SUDO), $(SUDO) -E) ifeq ($(LAPACKLIBS), $(BLASLIBS)) BLAS_LAPACK_LIBS=$(LAPACKLIBS) else BLAS_LAPACK_LIBS=$(LAPACKLIBS) $(BLASLIBS) endif PETSC_DIR:=$(shell pwd)/$(SRCDIR) petsc-slepc: Makefile.inc dir-install WHERE-all recompile: recompile-real recompile-complex recompile-real: -rm */tag-make-real -rm */tag-install-real make WHERE recompile-complex: -rm */tag-make-complex -rm */tag-install-complex @if [ -n "$(MPI_INCLUDE)" ]; then $(MAKE) -f Makefile.complex WHERE-complex ;\ else echo " -- No PETSc, no MPI"; fi dir-install: Makefile.inc -$(SUDO) mkdir -p $(DESTDIR)$(DIR_INSTALL_REAL) -$(SUDO) chown ${USER} $(DESTDIR)$(DIR_INSTALL_REAL) -$(SUDO) mkdir -p $(DESTDIR)$(DIR_INSTALL_COMPLEX) -$(SUDO) chown ${USER} $(DESTDIR)$(DIR_INSTALL_COMPLEX) @if test -w "$(DESTDIR)$(DIR_INSTALL_REAL)" -a -w "$(DESTDIR)$(DIR_INSTALL_COMPLEX)" ; then echo "no need of sudo"; echo > do-sudo; \ else \ echo no write access in $(DESTDIR)$(DIR_INSTALL_REAL) and $(DESTDIR)$(DIR_INSTALL_COMPLEX); \ echo " do: make petsc-slepc SUDO=sudo "; \ echo " or add write access to 2 directory "; \ echo sudo >do-sudo; \ fi test -e "$(DESTDIR)$(DIR_INSTALL_REAL)" -a -e "$(DESTDIR)$(DIR_INSTALL_COMPLEX)" PETSc-real: @if [ -n "$(MPI_INCLUDE)" ]; then $(MAKE) WHERE ;\ else echo " -- No PETSc, no MPI"; fi PETSc-complex: @if [ -n "$(MPI_INCLUDE)" ]; then $(MAKE) -f Makefile.complex WHERE-complex ;\ else echo " -- No PETSc, no MPI, no PETSc real"; fi WHERE:../lib/WHERE.PETSc ../lib/WHERE.SLEPc ../lib/WHERE.PETSc:$(SRCDIR)/tag-install-real -mkdir ../lib echo 'petsc LD -Wl,-rpath,"$(DIR_INSTALL_REAL)/lib" -L"$(DIR_INSTALL_REAL)/lib" -lpetsc' > $@ echo 'petsc INCLUDE -I"$(DIR_INSTALL_REAL)/include"' >> $@ $(SRCDIR)/tag-make-real:$(SRCDIR)/tag-conf-real cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fr all touch $@ $(SRCDIR)/tag-install-real :$(SRCDIR)/tag-make-real cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fr install -test -x "`type -p otool`" && make changer cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fr check test -e $(DIR_INSTALL_REAL)/include/petsc.h test -e $(DIR_INSTALL_REAL)/lib/petsc/conf/petscvariables touch $@ Makefile.inc:../../config.status Makefile Makefile-PETSc.inc ../../config.status --file="Makefile.inc:Makefile-PETSc.inc" $(SRCDIR)/tag-conf-real:$(SRCDIR)/tag-tar cd $(SRCDIR) && ./configure --prefix=$(DIR_INSTALL_REAL) \ $(FLAGS_CONF_PETSC_REAL) PETSC_ARCH=fr test -f $(SRCDIR)/fr/lib/petsc/conf/petscvariables touch $@ Make-petsc-download.mk:$(SRCDIR)/tag-install-real test -e $(DIR_INSTALL_REAL)/lib/petsc/conf/petscvariables egrep 'PETSC_LIB_BASIC|SCALAPACK_|METIS_|MUMPS_|HPDDM_|TETGEN_|SUPERLU_|MMG_|PTSCOTCH_|SUITESPARSE_' $(DIR_INSTALL_REAL)/lib/petsc/conf/petscvariables | sed 's/-I/ /g'|sort >$@ ifdef COMPLEX_CASE # version COMPLEX..... $(SRCDIR)/tag-conf-complex:$(SRCDIR)/tag-tar Make-petsc-download.mk cd $(SRCDIR) && ./configure --prefix=$(DIR_INSTALL_COMPLEX) \ $(FLAGS_CONF_PETSC_COMPLEX) PETSC_ARCH=fc test -f $(SRCDIR)/fc/lib/petsc/conf/petscvariables touch $@ $(SRCDIR)/tag-make-complex:$(SRCDIR)/tag-conf-complex Make-petsc-download.mk cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fc all touch $@ $(SRCDIR)/tag-install-complex :$(SRCDIR)/tag-make-complex cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fc install -test -x "`type -p otool`" && make changec test -e $(DIR_INSTALL_COMPLEX)/include/petsc.h touch $@ WHERE-complex:../lib/WHERE.PETSc-complex ../lib/WHERE.SLEPc-complex ../lib/WHERE.PETSc-complex:$(SRCDIR)/tag-install-complex test -e $(DIR_INSTALL_COMPLEX)/include/petsc.h -mkdir ../lib echo 'petsccomplex LD -Wl,-rpath,"$(DIR_INSTALL_COMPLEX)/lib" -L"$(DIR_INSTALL_COMPLEX)/lib" -lpetsc' > $@ echo 'petsccomplex INCLUDE -I"$(DIR_INSTALL_COMPLEX)/include"' >> $@ echo done $@ ../lib/WHERE.SLEPc-complex: test -e $(DIR_INSTALL_COMPLEX)/include/slepc.h -mkdir ../lib echo 'slepccomplex LD -Wl,-rpath,"$(DIR_INSTALL_COMPLEX)/lib" -L"$(DIR_INSTALL_COMPLEX)/lib" -lslepc' > $@ echo 'slepccomplex INCLUDE -I"$(DIR_INSTALL_COMPLEX)/include"' >> $@ echo done $@ WHERE-all:Makefile WHERE WHERE-complex echo >$@ test -z '$(SCALAPACK_LIB)' || echo scalapack LD $(SCALAPACK_LIB) >>$@ test -z '$(SCALAPACK_INCLUDE)' || echo scalapack INCLUDE -I$(SCALAPACK_INCLUDE) >>$@ test -z '$(METIS_LIB)' || echo metis LD $(METIS_LIB) >>$@ test -z '$(METIS_INCLUDE)' || echo metis INCLUDE -I$(METIS_INCLUDE) >>$@ test -z '$(MUMPS_LIB)' || echo mumps LD $(MUMPS_LIB) >>$@ test -z '$(MUMPS_INCLUDE)' || echo mumps INCLUDE -I$(MUMPS_INCLUDE) >>$@ test -z '$(SUPERLU_LIB)' || echo superlu LD $(SUPERLU_LIB) >>$@ test -z '$(SUPERLU_INCLUDE)' || echo superlu INCLUDE -I$(SUPERLU_INCLUDE) >>$@ test -z '$(MMG_LIB)' || echo mmg LD $(MMG_LIB) >>$@ test -z '$(MMG_INCLUDE)' || echo mmg INCLUDE -I$(MMG_INCLUDE) >>$@ test -z '$(PARMMG_LIB)' || echo parmmg LD $(PARMMG_LIB) >>$@ test -z '$(PARMMG_INCLUDE)' || echo parmmg INCLUDE -I$(PARMMG_INCLUDE) >>$@ test -z '$(PTSCOTCH_LIB)' || echo ptscotch LD $(PTSCOTCH_LIB) >>$@ test -z '$(PTSCOTCH_INCLUDE)' || echo ptscotch INCLUDE -I$(PTSCOTCH_INCLUDE) >>$@ test -z '$(TETGEN_LIB)' || echo tetgen LD $(TETGEN_LIB) >>$@ test -z '$(TETGEN_INCLUDE)' || echo tetgen INCLUDE -I$(TETGEN_INCLUDE) >>$@ test -z '$(PARMETIS_LIB)' || echo parmetis LD $(PARMETIS_LIB) >>$@ test -z '$(PARMETIS_INCLUDE)' || echo parmetis INCLUDE -I$(PARMETIS_INCLUDE) >>$@ test -z '$(HPDDM_LIB)' || echo hpddm LD $(HPDDM_LIB) >>$@ test -z '$(HPDDM_INCLUDE)' || echo hpddm INCLUDE -I$(HPDDM_INCLUDE) >>$@ echo 'petsc LD -Wl,-rpath,"$(DIR_INSTALL_REAL)/lib" -L"$(DIR_INSTALL_REAL)/lib" -lpetsc' >>$@ echo 'petsc INCLUDE -I"$(DIR_INSTALL_REAL)/include"' >>$@ echo 'petsccomplex LD -Wl,-rpath,"$(DIR_INSTALL_COMPLEX)/lib" -L"$(DIR_INSTALL_COMPLEX)/lib" -lpetsc' >>$@ echo 'petsccomplex INCLUDE -I"$(DIR_INSTALL_COMPLEX)/include"' >> $@ echo 'slepc LD -Wl,-rpath,"$(DIR_INSTALL_REAL)/lib" -L"$(DIR_INSTALL_REAL)/lib" -lslepc' >>$@ echo 'slepc INCLUDE -I"$(DIR_INSTALL_REAL)/include"' >>$@ echo 'slepccomplex LD -Wl,-rpath,"$(DIR_INSTALL_COMPLEX)/lib" -L"$(DIR_INSTALL_COMPLEX)/lib" -lslepc' >>$@ echo 'slepccomplex INCLUDE -I"$(DIR_INSTALL_COMPLEX)/include"' >>$@ # else # WHERE-complex: @if [ -n "$(MPI_INCLUDE)" ]; then $(MAKE) -f Makefile.complex WHERE-complex ;\ else echo " -- No PETSc, no MPI"; fi WHERE-all:Makefile Make-petsc-download.mk $(MAKE) -f Makefile.complex $@ endif $(SRCDIR)/tag-tar:$(PACKAGE) -tar xzf $(PACKAGE) ifeq ($(WIN32DLLTARGET),) cd petsc-$(VERSION) && patch -p1 < ../petsc-metis.patch && cd - endif cd petsc-$(VERSION) && patch -p1 < ../petsc-suitesparse.patch && cd - touch $@ $(PACKAGE): ../getall -o PETSc -a changec: diri=$(DIR_INSTALL_COMPLEX)/lib ; \ libpp=$$diri/libslepc.$(VERSION_SLEPC).dylib ; \ opblibpp=`otool -L $$libpp | awk '/3rdparty/ {print $$1}'` ; \ npblibpp=$$diri/`basename $$opblibpp` ; \ echo -- $$opblibpp ; \ echo -- $$npblibpp ; \ test -n "$$opblibpp" && install_name_tool -change $$opblibpp $$npblibpp $$libpp ; \ otool -L $$libpp | awk '/libpetsc/ {print "**",$$1}' changer: diri=$(DIR_INSTALL_REAL)/lib ; \ libpp=$$diri/libslepc.$(VERSION_SLEPC).dylib ; \ opblibpp=`otool -L $$libpp | awk '/3rdparty/ {print $$1}'` ; \ npblibpp=$$diri/`basename $$opblibpp` ; \ echo -- $$opblibpp ; \ echo -- $$npblibpp ; \ test -n "$$opblibpp" && install_name_tool -change $$opblibpp $$npblibpp $$libpp ; \ otool -L $$libpp | awk '/libpetsc/ {print "**",$$1}' ../lib/WHERE.SLEPc: -mkdir ../lib test -e $(DIR_INSTALL_REAL)/include/slepc.h echo 'slepc LD -Wl,-rpath,"$(DIR_INSTALL_REAL)/lib" -L"$(DIR_INSTALL_REAL)/lib" -lslepc' > $@ echo 'slepc INCLUDE -I"$(DIR_INSTALL_REAL)/include"' >> $@ clean:clean-local clean-local: -cd $(SRCDIR) && $(MAKE) clean -C $(SRCDIR) -rm Makefile.inc FAIRE* ../lib/WHERE.PETSc* ../lib/WHERE.SLEPc* -rm -rf ../include/*PETSc* -rm -rf ../lib/lib*PETSc* -rm -rf $(SRCDIR) -rm -rf $(FF_prefix_petsc) -rm WHERE-all config.log *.done -if test -d $(FF_prefix_petsc) ; then echo " try of remove of $(FF_prefix_petsc) under sudo .."; sudo rm -rf $(FF_prefix_petsc) ; fi -rm do-sudo echo: @echo SUDO: $(SUDO) @echo MTUNE: $(FLAGS_MTUNE) @echo "sudo using user env: $(SUDO_WITH_ENV)" @echo MPI_DIR: $(MPI_DIR) @echo " dir install real :" $(DIR_INSTALL_REAL) @echo " dir install complex :" $(DIR_INSTALL_COMPLEX) @echo " do-sudo auto ???:" $(SHELL cat do-sudo) @echo " BLAS_LAPACK_LIBS: $(BLAS_LAPACK_LIBS)" @echo " # need sudo if '$(W_SUDO)' == 'sudo' .." @echo " dir3rdparty: $(dir3rdparty)" -@otool -L $(DIR_INSTALL_COMPLEX)/lib/libslepc.$(VERSION_SLEPC).dylib | awk '/3rdparty/ {print $$1}' -@otool -L $(DIR_INSTALL_REAL)/lib/libslepc.$(VERSION_SLEPC).dylib | awk '/3rdparty/ {print $$1}' install-destdir: cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fr install DESTDIR=$(DESTDIR) cd $(SRCDIR) && $(MAKE) PETSC_DIR=$(PETSC_DIR) PETSC_ARCH=fc install DESTDIR=$(DESTDIR) .NOTPARALLEL: # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/ff-petsc/Makefile-PETSc.inc000664 000000 000000 00000001632 14037356732 022757 0ustar00rootroot000000 000000 abs_top_builddir=@abs_top_builddir@ FF_prefix_petsc=@FF_prefix_petsc@ FF_generic_petsc=@FF_generic_petsc@ CC=@CC@ CXX=@CXX@ # FC : Fortran 90 compiler FC=@FC@ BLASINC=@BLASINC@ FFCMAKE=@ff_cmake@ BLASLIBS=@BLASLIBS@ LAPACKLIBS=@LAPACKLIBS@ COMPILE_OPENBLAS=@COMPILE_OPENBLAS@ # Use: # -DAdd_ if your Fortran compiler adds an underscore at the end # of symbols, # -DAdd__ if your Fortran compiler adds 2 underscores, # # -DUPPER if your Fortran compiler uses uppercase symbols # # leave empty if your Fortran compiler does not change the symbols. # CFLAGS=@CFLAGS@ FCFLAGS=@FCFLAGS@ CFLAGSF77=@CFLAGSF77@ MPI_INCLUDE=@MPI_INCLUDE@ MPI_INC_DIR=@MPI_INC_DIR@ MPI_LIB=@MPI_LIB@ MPI_LIBC=@MPI_LIBC@ MPI_LIBFC=@MPI_LIBFC@ MPI_LIB_DIRS=@MPI_LIB_DIRS@ MPICC=@MPICC@ MPICXX=@MPICXX@ MPIFC=@MPIFC@ MPIPROG=@MPIPROG@ MPIRUN=@MPIRUN@ MPISCRIPT=@MPISCRIPT@ prefix=@prefix@ WIN32DLLTARGET=@WIN32DLLTARGET@ FreeFem-sources-4.9/3rdparty/ff-petsc/Makefile.complex000664 000000 000000 00000000101 14037356732 022727 0ustar00rootroot000000 000000 COMPLEX_CASE=true include Make-petsc-download.mk include MakefileFreeFem-sources-4.9/3rdparty/ff-petsc/petsc-metis.patch000664 000000 000000 00000001650 14037356732 023111 0ustar00rootroot000000 000000 diff --git a/config/BuildSystem/config/packages/metis.py b/config/BuildSystem/config/packages/metis.py index 136526da05..2d1c4d3271 100644 --- a/config/BuildSystem/config/packages/metis.py +++ b/config/BuildSystem/config/packages/metis.py @@ -44,6 +44,12 @@ class Configure(config.package.CMakePackage): if self.framework.argDB['download-metis-use-doubleprecision']: args.append('-DMETIS_USE_DOUBLEPRECISION=1') args.append('-DMATH_LIB="'+self.libraries.toStringNoDupes(self.mathlib.lib)+'"') + mpicc = self.framework.getMakeMacro('MPICC_SHOW') + mpicxx = self.framework.getMakeMacro('MPICXX_SHOW') + if mpicc and mpicxx: + args = self.rmArgsStartsWith(args,['-DCMAKE_CXX_COMPILER','-DCMAKE_C_COMPILER']) + args.append('-DCMAKE_C_COMPILER="'+mpicc.split(None, 1)[0]+'"') + args.append('-DCMAKE_CXX_COMPILER="'+mpicxx.split(None, 1)[0]+'"') return args def configureLibrary(self): FreeFem-sources-4.9/3rdparty/ff-petsc/petsc-suitesparse.patch000664 000000 000000 00000003615 14037356732 024342 0ustar00rootroot000000 000000 diff --git b/config/BuildSystem/config/packages/SuiteSparse.py a/config/BuildSystem/config/packages/SuiteSparse.py index e9c7c7d3b2..3de634af59 100644 --- b/config/BuildSystem/config/packages/SuiteSparse.py +++ a/config/BuildSystem/config/packages/SuiteSparse.py @@ -4,7 +4,7 @@ class Configure(config.package.Package): def __init__(self, framework): config.package.Package.__init__(self,framework) self.minversion = '5.6.0' - self.version = '5.8.1' + self.version = '5.7.1' self.versioninclude = 'SuiteSparse_config.h' self.versionname = 'SUITESPARSE_MAIN_VERSION.SUITESPARSE_SUB_VERSION.SUITESPARSE_SUBSUB_VERSION' self.gitcommit = 'v'+self.version @@ -59,7 +59,7 @@ class Configure(config.package.Package): ldflags=self.getDynamicLinkerFlags() else: ldflags='' - ldflags += ' '+self.setCompilers.LDFLAGS + ldflags+=self.setCompilers.LDFLAGS # SuiteSparse 5.6.0 makefile has a bug in how it treats LDFLAGS (not using the override directive) ldflags+=" -L\$(INSTALL_LIB)" self.popLanguage() @@ -81,9 +81,6 @@ class Configure(config.package.Package): args.append('INSTALL_DOC='+self.installDir+'/share/doc/suitesparse') args.append('BLAS="'+self.libraries.toString(self.blasLapack.dlib)+'"') args.append('LAPACK="'+self.libraries.toString(self.blasLapack.dlib)+'"') - # fix for bug in SuiteSparse - if self.setCompilers.isDarwin(self.log): - args.append('LDLIBS=""') if self.blasLapack.mangling == 'underscore': flg = '' elif self.blasLapack.mangling == 'caps': @@ -129,7 +126,6 @@ class Configure(config.package.Package): args.append('CF="'+cflags+'"') args.append('CHOLMOD_CONFIG="'+flg+'"') args.append('CUDA=no') - args.append('CUDA_PATH=') args = ' '.join(args) conffile = os.path.join(self.packageDir,self.package+'.petscconf') FreeFem-sources-4.9/3rdparty/fftw/000775 000000 000000 00000000000 14037356732 017070 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/fftw/Makefile.am000664 000000 000000 00000002255 14037356732 021130 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: $(DOWNLOAD_FFTW) # Downloading and compiling FFTW # ------------------------------ # FFTW information FFTW_VERSION=3.3.8 SRCDIR=fftw-$(FFTW_VERSION) PACKAGE=fftw-$(FFTW_VERSION).tar.gz SERVER=http://www.fftw.org THIS=fftw3 fftw: $(SRCDIR)/FAIT WHERE $(MAKE) WHERE $(SRCDIR)/FAIT:$(SRCDIR)/FAIT-1 cd $(SRCDIR) && ./configure --disable-dependency-tracking --disable-fortran --prefix=`cd ../..; pwd` CXX="$(CXX)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP='gcc -E' CXXFLAGS="$(CXXFLAGS)" cd $(SRCDIR) && make cd $(SRCDIR) && make install touch $(SRCDIR)/FAIT $(SRCDIR)/FAIT-1:../pkg/$(PACKAGE) tar xzf ../pkg/$(PACKAGE) touch $(SRCDIR)/FAIT-1 ../pkg/$(PACKAGE): -mkdir ../pkg cd ../pkg;@WGET@ -N $(SERVER)/$(PACKAGE) WHERE:$(SRCDIR)/FAIT -if [ $(SRCDIR)/FAIT ] ; then \ echo $(THIS) LD -L@DIR@/lib -l$(THIS) >../lib/WHERE.$(THIS) ;\ echo $(THIS) INCLUDE -I@DIR@/include >> ../lib/WHERE.$(THIS) ;\ fi clean-local: -rm -rf fftw-* -rm ../include/fftw3.f ../include/fftw3.f03 ../include/fftw3.h ../include/fftw3l.f03 ../include/fftw3q.f03 \ ../lib/libfftw3.a ../lib/libfftw3.la FreeFem-sources-4.9/3rdparty/getall000775 000000 000000 00000024713 14037356732 017327 0ustar00rootroot000000 000000 #!/usr/bin/perl ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Download third-party packages independently of FF configuration # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric # E-MAIL : http://www.ljll.math.upmc.fr/lehyaric use strict; use Getopt::Std; use Digest::MD5 qw(md5_hex); # [[http://perldoc.perl.org/Digest/MD5.html]] # This download script needs to be able to work on platforms that do not have threads (eg Mingw). my $can_use_threads=eval 'use threads;use threads::shared;1'; my %opts; getopts('afho:',\%opts); if($opts{h}){ print <create(\&downloadone,$_)} foreach(@threads){$_->join()} } else{ foreach(@downloads){downloadone($_)} } if($errors ne ''){ print "\n$errors"; exit 1; } sub download{ my($title,$url,$lic,$pkg,$md5,$opts)=@_; # skip packages that have not been requested explicitely return if($only && !defined $packs{$title}); # skip packages that are already present if(-f "pkg/$pkg" && !$opts{f}){ my $md5check=md5_hex(`cat pkg/$pkg`); if( ( $md5 eq "none") || ($md5check eq $md5)){ print "$title $pkg done\n"; return; } else { print "$title $pkg md5 change => reget \n"; } } # we do not store the answers to these questions. To repeat the same downloads without manual input, options "-a" and # "-o names" are provided. if(!$opts{a}){ print "$title: please check the licence at $lic. Do you want to download $url? (yN)\n"; my $ans=uc ; chomp $ans; return if $ans ne 'Y'; } # uses [[file:../bin/build/download]] push @downloads,"$url,$pkg,$md5,$opts"; } sub downloadone{ ## correct rescue server for done load my($url,$pkg,$md5,$opts)=split(/,/,$_[0]);# warning just $pkg without pkg/$pkg my $cmd="../bin/build/download $url pkg/$pkg $opts"; print "$cmd\n"; system $cmd; if($?){ print "Download failed from $url of $pkg\n"; my $url="http://pkgs.freefem.org/$pkg"; my $cmd="../bin/build/download $url pkg/$pkg $opts"; print "Try other site: $url\n"; system $cmd; if($?){ print "Download 2 times failed from $url of $pkg\n"; my $url="http://104.46.50.187/pkg/$pkg"; my $cmd="../bin/build/download $url pkg/$pkg $opts"; print "Try (2 times) other site: $url\n"; system $cmd; if ($?){ $errors.="ERROR: $cmd FAILED\n" } } } # check if resulting package contents are valid my $md5check=md5_hex(`cat pkg/$pkg`); if( ( $md5 ne "none") && ($md5check ne $md5)){ print "Download failed (MD5 check) from $url of $pkg\n"; my $url="http://pkgs.freefem.org/$pkg"; my $cmd="../bin/build/download $url pkg/$pkg $opts"; print "Try other site: $url\n"; system $cmd; if($?){ $errors.="ERROR 2: $cmd FAILED\n" } else { $md5check=md5_hex(`cat pkg/$pkg`);} if( ($md5check ne $md5)) { $errors.="ERROR2: INVALID MD5 for $pkg $md5check\n"} } } FreeFem-sources-4.9/3rdparty/gmm/000775 000000 000000 00000000000 14037356732 016702 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/gmm/Makefile000664 000000 000000 00000002607 14037356732 020347 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- include cxxflags # Downloading and compiling FFTW # ------------------------------ PKG=gmm # $(PKG) information SRCDIR=$(PKG)-$(PKG_VERSION) PACKAGE=$(PKG)-$(PKG_VERSION).tar.gz SERVER=http://download.gna.org/getfem/stable PKGDIR=../pkg PKG_VERSION=4.2 INSTALL=../.. $(PKG):FAIT FAITwin32-dll-target: echo "On Pure Win32 (to hard to compile) " FAIT: $(MAKE) install touch FAIT $(MAKE) WHERE FAIRE: $(SRCDIR) cd $(SRCDIR) && ./configure --disable-dependency-tracking --prefix=`pwd`/$(INSTALL) CXX="$(CXX)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP='gcc -E' CXXFLAGS="$(CXXFLAGS)" cd $(SRCDIR) && make touch FAIRE install: FAIRE cd $(SRCDIR) && make install $(SRCDIR): $(PKGDIR)/$(PACKAGE) gunzip -c $^ | tar xf - $(PKGDIR)/$(PACKAGE): ../getall -o Gmm++ -a clean: clean-local clean-local: -rm -rf $(PKG)-* FAIT FAIRE $(SRCDIR) -rm ../lib/WHERE.gmm cxxflags: ../Makefile grep 'CXX *=' ../Makefile >cxxflags grep 'CC *=' ../Makefile >>cxxflags grep 'CXXFLAGS *=' ../Makefile >>cxxflags grep 'CFLAGS *=' ../Makefile >>cxxflags grep 'WGET *=' ../Makefile >>cxxflags grep 'WIN32DLLTARGET *=' ../Makefile >>cxxflags WHERE: -@if [ -f FAIT ] ; then \ echo build ../lib/WHERE.gmm ;\ echo gmm INCLUDE -I@DIR@/include > ../lib/WHERE.gmm ;\ echo gmm LD -L@DIR@/lib >> ../lib/WHERE.gmm ;\ fi FreeFem-sources-4.9/3rdparty/headers-sparsesolver.inc000664 000000 000000 00000011351 14037356732 022757 0ustar00rootroot000000 000000 # BLAS #FFBLASINCLUDE = -I/Users/morice/librairie/PATCHVECLIB/ #FFBLASDIRLIBS = #FFBLASLIB = -L/Users/morice/librairie/PATCHVECLIB/ -lwrapperdotblas -framework veclib #FFBLASLIB2 = -lblas # LAPACK #FFLAPACKDIRLIBS = #FFLAPACKLIB = -framework veclib #FFLAPACKLIB2 = -llapack # MPI #FFMPIDIR = /Users/morice/librairie/openmpi-gcc-gfortran-4.4/ #FFMPIINCLUDE = -I/Users/morice/librairie/openmpi-gcc-gfortran-4.4/include/ #FFMPILIB = -L/Users/morice/librairie/openmpi-gcc-gfortran-4.4/lib/ -lmpi -lmpi_cxx -lopen-pal -lopen-rte -lotf -lvt #FFMPIDIRLIBS = /Users/morice/librairie/openmpi-gcc-gfortran-4.4/lib/ #FFMPILIB2 = mpi mpi_cxx open-pal open-rte otf vt # INT #pastix FFVERSIONINT = _long FFCTYPESINT = -DFORCE_LONG -DLONG #scotch FFINTSCOTCH = -DLONG #hips :: -DINTSIZE32, -DINTSIZE64 ou default FFINTHIPS = # particularite # FFLIBOTHERSMUMPS = -framework Carbon -framework AppKit #blasdef :: FFBLASDEF = -DUSE_VENDOR_BLAS #### parameter for blacs #### # ------------------------------------------------------------------------ # Allows the user to vary the topologies that the BLACS default topologies # (TOP = ' ') correspond to. If you wish to use a particular topology # (as opposed to letting the BLACS make the choice), uncomment the # following macros, and replace the character in single quotes with the # topology of your choice. # ------------------------------------------------------------------------ # DEFBSTOP = -DDefBSTop="'1'" # DEFCOMBTOP = -DDefCombTop="'1'" FFDEFBSTOP = FFDEFCOMBTOP = # ------------------------------------------------------------------- # If your MPI_Send is locally-blocking, substitute the following line # for the empty macro definition below. # SENDIS = -DSndIsLocBlk # ------------------------------------------------------------------- FFSENDIS = # -------------------------------------------------------------------- # If your MPI handles packing of non-contiguous messages by copying to # another buffer or sending extra bytes, better performance may be # obtained by replacing the empty macro definition below with the # macro definition on the following line. # BUFF = -DNoMpiBuff # -------------------------------------------------------------------- FFBUFF = # ----------------------------------------------------------------------- # If you know something about your system, you may make it easier for the # BLACS to translate between C and fortran communicators. If the empty # macro defininition is left alone, this translation will cause the C # BLACS to globally block for MPI_COMM_WORLD on calls to BLACS_GRIDINIT # and BLACS_GRIDMAP. If you choose one of the options for translating # the context, neither the C or fortran calls will globally block. # If you are using MPICH, or a derivitive system, you can replace the # empty macro definition below with the following (note that if you let # MPICH do the translation between C and fortran, you must also indicate # here if your system has pointers that are longer than integers. If so, # define -DPOINTER_64_BITS=1.) For help on setting TRANSCOMM, you can # run BLACS/INSTALL/xtc_CsameF77 and BLACS/INSTALL/xtc_UseMpich as # explained in BLACS/INSTALL/README. # TRANSCOMM = -DUseMpich # # If you know that your MPI uses the same handles for fortran and C # communicators, you can replace the empty macro definition below with # the macro definition on the following line. # TRANSCOMM = -DCSameF77 # ----------------------------------------------------------------------- FFTRANSCOMM = -DUseMpi2 # -------------------------------------------------------------------------- # You may choose to have the BLACS internally call either the C or Fortran77 # interface to MPI by varying the following macro. If TRANSCOMM is left # empty, the C interface BLACS_GRIDMAP/BLACS_GRIDINIT will globally-block if # you choose to use the fortran internals, and the fortran interface will # block if you choose to use the C internals. It is recommended that the # user leave this macro definition blank, unless there is a strong reason # to prefer one MPI interface over the other. # WHATMPI = -DUseF77Mpi # WHATMPI = -DUseCMpi # -------------------------------------------------------------------------- FFWHATMPI = # --------------------------------------------------------------------------- # Some early versions of MPICH and its derivatives cannot handle user defined # zero byte data types. If your system has this problem (compile and run # BLACS/INSTALL/xsyserrors to check if unsure), replace the empty macro # definition below with the macro definition on the following line. # SYSERRORS = -DZeroByteTypeBug # --------------------------------------------------------------------------- FFSYSERRORS = FreeFem-sources-4.9/3rdparty/ipopt/000775 000000 000000 00000000000 14037356732 017255 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/ipopt/Makefile000775 000000 000000 00000005475 14037356732 020733 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- include Makefile.inc all-local: Ipopt MAKEFLAGS += -j1 # Downloading and compiling mumps # ------------------------------ DIR=$(DOWNLOADFF)/ipopt DIRPKG=../pkg SRCDIR=Ipopt-$(VERSION) PACKAGE=$(DIRPKG)/Ipopt-$(VERSION).tgz INSTALL=../.. VERSION=3.12.4 # 3.10.2 URL=http://www.coin-or.org/download/source/Ipopt FHSL=#$(DIRPKG)/ddeps.f $(DIRPKG)/ma27ad.f $(DIRPKG)/mc19d.f WHERE_MUMPSSEQ=$(wildcard ../lib/WHERE.mumpsseq) ifeq ($(WHERE_MUMPSSEQ),) LIBMUMPS=-ldmumps_seq -lzmumps_seq -lmumps_common_seq -lpord_seq -lmpiseq_seq INCMUMPS=/usr/include/mumps_seq else LIBMUMPS=-L$(DOWNLOADFF)/lib -ldmumpsFREEFEM-SEQ -lzmumpsFREEFEM-SEQ -lmumps_commonFREEFEM-SEQ -lpordFREEFEM-SEQ -lmpiseqFREEFEM-SEQ INCMUMPS=$(DOWNLOADFF)/include/mumps_seq endif #/Ipopt-3.10.2.tgz Ipopt: $(SRCDIR)/FAIRE # --enable-static --disable-shared # ALH - 6/1/14 - We need to specify FLIBS explicitely because Ipopt # fails to guess FLIBS correctly on Windows+Cygwin+Mingw64, see [[file:Makefile.inc.in::FLIBS]] $(SRCDIR)/FAIT: $(SRCDIR)/tag-tar # FFCS - disable dependency tracking like in FFCS itself for MinGW compilation (problem with backslashes, see # [[file:../../../../configure.ac::enable_dependency_tracking]]) cd $(SRCDIR) ; \ ./configure --disable-dependency-tracking \ --disable-shared --enable-static \ --with-mumps='$(LIBMUMPS)' \ --without-hsl \ --with-mumps-incdir='$(INCMUMPS)' \ CXX='$(CXX)' CXXFLAGS='$(CXXFLAGS) -I$(INCMUMPS)' \ CC='$(CC)' CFLAGS='$(CFLAGS) -I$(INCMUMPS)' \ F77='$(FC)' FFLAGS='$(FCFLAGS)' \ FLIBS='$(FLIBS)' \ CXXCPP='$(CXXCPP)' CPP='$(CXXCPP)' MPICC='' MPICXX='' MPIFC='' \ --with-blas-lib='$(LIBBLAS)' --with-lapack='$(LIBLAPACK)' --prefix='$(DOWNLOADFF)' -rm -rf ../include/coin touch $(SRCDIR)/FAIT # FFCS - avoid remaking install every time install.done: Makefile $(SRCDIR)/FAIT $(MAKE) -C $(SRCDIR) install touch $@ clean-local:: -rm *.done # FFCS - install and WHERE need to be sequential WHERE.done: install.done Makefile echo ipopt LD -L@DIR@/lib -lipopt >$(SRCDIR)/$(INSTALL)/lib/WHERE.Ipopt; echo ipopt INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.Ipopt ; touch $@ Makefile.inc: ../../config.status --file="Makefile.inc:Makefile.inc.in" # FFCS - install and WHERE need to be sequential $(SRCDIR)/FAIRE: install.done WHERE.done touch $@ $(SRCDIR)/$(INSTALL): $(SRCDIR)/tag-tar $(SRCDIR)/tag-tar:$(PACKAGE) $(FHSL) tar xzf $(PACKAGE) # patch -p0 > ALH - 6/1/14 - Ipopt fails to guess FLIBS correctly on Windows+Cygwin+Mingw64 FLIBS=@FLIBS@ FreeFem-sources-4.9/3rdparty/ipopt/patch-IpBlas000664 000000 000000 00000006767 14037356732 021467 0ustar00rootroot000000 000000 --- Ipopt-3.10.2/Ipopt/src/LinAlg/IpBlas.cpp 2010-12-21 22:34:47.000000000 +0100 +++ Ipopt-3.10.2-okk/Ipopt/src/LinAlg/IpBlas.cpp 2012-03-15 15:30:11.000000000 +0100 @@ -8,7 +8,7 @@ #include "IpoptConfig.h" #include "IpBlas.hpp" - +#include // Prototypes for the BLAS routines extern "C" { @@ -57,6 +57,7 @@ int transa_len, int diag_len); } + namespace Ipopt { #ifndef HAVE_CBLAS @@ -65,8 +66,13 @@ Index incY) { ipfint n=size, INCX=incX, INCY=incY; - - return F77_FUNC(ddot,DDOT)(&n, x, &INCX, y, &INCY); + + Number s=0; + if( incX && incY ) s= F77_FUNC(ddot,DDOT)(&n, x, &INCX, y, &INCY) ; + else + for (int i=0,ix=0,iy=0; i../lib/WHERE.pthread-google echo pthread-google INCLUDE -I@DIR@/include/pthread-google >> ../lib/WHERE.pthread-google tag-install:tag-tar cd $(SRCDIR)/src; \ $(CC) -O -c *.c -I../include ;\ $(AR) rcs ../../../lib/libpthread-google.a *.o cp -r $(SRCDIR)/include/. ../include/pthread-google touch $@ tag-tar: $(PACKAGE) tar zxvf $(PACKAGE) touch $@ $(PACKAGE): ../getall -o libpthread-google -a endif clean: -rm ff-flags tag-* -rm -rf libpthread-google* -rm FAIT $(FAIRE) -rm ../lib/*pthread-google* -rm -rf ../include/pthread-google* ff-flags: ../Makefile Makefile grep 'abs_top_builddir *=' ../Makefile > ff-flags grep 'CC *=' ../Makefile >> ff-flags grep 'CFLAGS *=' ../Makefile >> ff-flags grep 'LDFLAGS *=' ../Makefile >> ff-flags grep 'AR *=' ../Makefile >> ff-flags grep 'ARFLAGS *=' ../Makefile >> ff-flags grep 'RANLIB *=' ../Makefile >> ff-flags grep 'WIN32DLLTARGET *=' ../Makefile >> ff-flags grep 'WGET *=' ../Makefile >> ff-flags grep 'LIBS *=' ../Makefile >> ff-flags FreeFem-sources-4.9/3rdparty/metis/000775 000000 000000 00000000000 14037356732 017243 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/metis/Makefile000664 000000 000000 00000005421 14037356732 020705 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- include Makefile.in all-local: metis # Downloading and compiling Tetgen # ------------------------------ # http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/metis-4.0.tar.gz # Metis information DIRPKG=../pkg SRCDIR=metis-$(metis_VERSION) PACKAGE=$(DIRPKG)/metis-$(metis_VERSION).tar.gz SERVER=http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis #//http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/OLD INSTALL=../.. # FFCS - 14/11/11 - version 4.0.3 is not available from netlib anymore metis_VERSION=5.1.0 metis: FAIRE $(SRCDIR)/FAIT: $(MAKE) install touch $(SRCDIR)/FAIT install:$(SRCDIR)/tag-compile # cd $(SRCDIR)/Programs ;$(MAKE) # cd $(SRCDIR) ; $(MAKE) -C install -mkdir -p ../include -mkdir -p ../lib cp $(SRCDIR)/libmetis.a ../lib cp $(SRCDIR)/include/*.h ../include FAIRE: $(SRCDIR)/FAIT $(MAKE) WHERE touch FAIRE Makefile.in: ../../config.status Makefile-metis.in ../../config.status --file="Makefile.in:Makefile-metis.in" $(SRCDIR)/tag-compile: $(SRCDIR)/tags #cd $(SRCDIR)/libmetis ; make -C $(SRCDIR)/libmetis $(OBJS) 'CC=$(CC)' CFLAGS='$(CFLAGS) -I../GKlib -I../include -I.' make -C $(SRCDIR)/GKlib $(OBJS_GK) 'CC=$(CC)' CFLAGS='$(CFLAGS) -I../GKlib -I../include -I. -Dmetis_EXPORTS -D_GNU_SOURCE' mkdir $(SRCDIR)/lib cd $(SRCDIR); $(AR) libmetis.a libmetis/*.o GKlib/*.o touch $(SRCDIR)/tag-compile WHERE: -if [ -f $(SRCDIR)/FAIT ] ; then \ echo metis LD -L@DIR@/lib -lmetis >$(SRCDIR)/$(INSTALL)/lib/WHERE.metis ;\ echo metis INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.metis ;\ fi OBJS=auxapi.o coarsen.o fm.o graph.o kwayrefine.o minconn.o options.o separator.o timing.o \ balance.o compress.o fortran.o initpart.o mcutil.o mincover.o parmetis.o sfm.o util.o \ bucketsort.o contig.o frename.o kmetis.o mesh.o mmd.o pmetis.o srefine.o wspace.o \ checkgraph.o debug.o gklib.o kwayfm.o meshpart.o ometis.o refine.o stat.o OBJS_GK=b64.o error.o fs.o graph.o itemsets.o omp.o random.o sort.o tokenizer.o \ blas.o evaluate.o getopt.o htable.o mcore.o pdb.o rw.o string.o util.o \ csr.o fkvkselect.o gkregex.o io.o memory.o pqueue.o seq.o timers.o # FFCS: patch is necessary for metis 4.0, but not for 4.0.3 $(SRCDIR)/tags: $(PACKAGE) tar xzf $(PACKAGE) patch -p0 #include - #include +#ifndef WIN32 +#include +#endif #include #endif diff -ur metis-5.1.0/GKlib/gk_getopt.h metis-5.1.0-ok/GKlib/gk_getopt.h --- metis-5.1.0/GKlib/gk_getopt.h 2013-03-30 17:24:45.000000000 +0100 +++ metis-5.1.0-ok/GKlib/gk_getopt.h 2019-05-10 11:44:42.218710800 +0200 @@ -52,11 +52,11 @@ /* Function prototypes */ -extern int gk_getopt(int __argc, char **__argv, char *__shortopts); -extern int gk_getopt_long(int __argc, char **__argv, char *__shortopts, - struct gk_option *__longopts, int *__longind); -extern int gk_getopt_long_only (int __argc, char **__argv, - char *__shortopts, struct gk_option *__longopts, int *__longind); +extern int gk_getopt(int argc, char **argv, char *shortopts); +extern int gk_getopt_long(int argc, char **argv, char *shortopts, + struct gk_option *longopts, int *longind); +extern int gk_getopt_long_only (int argc, char **argv, + char *shortopts, struct gk_option *longopts, int *longind); FreeFem-sources-4.9/3rdparty/mmg/000775 000000 000000 00000000000 14037356732 016702 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/mmg/Makefile000664 000000 000000 00000005030 14037356732 020340 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: mmg include ff-flags # Downloading and compiling mmg # ------------------------------- # DIRPKG= ../pkg SRCDIR= . PACKAGE=$(DIRPKG)/mmg.zip INSTALL=. mmg_VERSION= mmg:WHERE.done ifeq ($(WIN32DLLTARGET),) CMAKE_GENERATOR := else CMAKE_GENERATOR := -G "MSYS Makefiles" endif FAIT.done:tag-tar -mkdir build cd build && cmake ../mmg-sources \ -DCMAKE_C_COMPILER=$(CC) \ -DCMAKE_C_FLAGS="$(CFLAGS) -fPIC" \ -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_CXX_FLAGS="$(CXXFLAGS) $(CXX11FLAGS) -fPIC" \ -DM_LIB="-lm" -DUSE_ELAS=OFF -DUSE_VTK=OFF \ -DSCOTCH_DIR=$(scotch_dir) \ -DCMAKE_BUILD_TYPE=Release $(CMAKE_GENERATOR) && $(MAKE) touch FAIT.done install.done:FAIT.done cp -r build/include/mmg ../include/mmg cp build/lib/libmmg.a ../lib touch $@ mmg:$(PACKAGE) install:install.done WHERE.done WHERE.done: install.done echo mmg LD -L@DIR@/lib -lmmg >../lib/WHERE.mmg ; echo mmg INCLUDE -I@DIR@/include/>> ../lib/WHERE.mmg ; echo build WHERE ./lib/WHERE.mmg ; touch $@ clean:: -rm WHERE.done FAIRE: FAIT.done install.done tag-tar: $(PACKAGE) -rm -rf mmg-* unzip -q $(PACKAGE) && mv mmg-* mmg-sources # patch -p1 > ff-flags -awk '/^scotch LD /{print "scotch_dir=../../../3rdparty/" }' ../lib/WHERE.scotch >> ff-flags ../../bin/ff-md5 $(PACKAGE) >> ff-flags if diff -q ff-flags ff-flags.old ; then echo No Modif skip compile of mmg ; else touch -c tag-tar; fi .PHONY:$(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/mmg/patch-mmg000664 000000 000000 00000000720 14037356732 020501 0ustar00rootroot000000 000000 --- ./mmg-sources/src/common/mmgcommon.h.in-origine 2020-04-03 09:59:45.000000000 +0200 +++ ./mmg-sources/src/common/mmgcommon.h.in 2020-04-03 10:00:23.000000000 +0200 @@ -43,6 +43,7 @@ #elif defined(__unix__) || defined(__unix) || defined(unix) #include #elif defined(_WIN16) || defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__TOS_WIN__) || defined(__WINDOWS__) +#undef POSIX #ifndef GNU #define _WIN32_WINNT 0x0500 #endif FreeFem-sources-4.9/3rdparty/mmg3d/000775 000000 000000 00000000000 14037356732 017131 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/mmg3d/Makefile000664 000000 000000 00000007325 14037356732 020600 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: mmg3d include ff-flags # Downloading and compiling mmg3d # ------------------------------- # DIRPKG= ../pkg SRCDIR= ./mmg3d4 PACKAGE=$(DIRPKG)/mmg3d4.0.tgz INSTALL=.. mmg3d_VERSION= # mmg3d pas sur internet LIBMMG3D=$(INSTALL)/lib/libmmg3d-v4.a OPT=4 # size of the PKG file ( this file change See Cecile.) FFCS - 19/2/13 - the test for the file size (`stat -f "%z" file`) # is not portable from MacOS, so just leave it out in FFCS (SIZEPKG is still useful to force a remake when the package # changes) SIZEPKG=158547 OBJS= analar.o chkmsh.o hash.o memory.o optcte.o outqua.o simu44.o swap44.o zaldy.o \ analarcutting.o chrono.o heap.o mmg3d1.o optlap.o pattern.o simu56.o swap56.o \ baryct.o colpoi.o inout.o mmg3d4.o optlen.o quality.o simu68.o swap68.o \ boulep.o coquil.o length.o mmg3d9.o optlentet.o queue.o simu710.o swap710.o \ bucket.o cutelt.o librnbg.o movevertex.o optra4.o ratio.o solmap.o swapar.o \ delaunay.o locate.o optbdry.o opttet.o scalem.o spledg.o swaptet.o \ cenrad.o eigenv.o matrix.o optcoq.o opttyp.o simu23.o swap23.o typelt.o OBJSNOP = cendel.o swapar.o # FFCS - simplify Makefile structure mmg3d:WHERE.done mmg3d4/FAIT-4.done:tag-tar-$(SIZEPKG) echo "#define COMPIL " '"' `date` '"' > ./mmg3d4/build/sources/compil.date cd mmg3d4/build/sources/; $(MAKE) CC='$(CC)' CFLAGS='$(CNOFLAGS) -g' $(OBJSNOP) cd mmg3d4/build/sources/; $(MAKE) CC='$(CC)' CFLAGS='$(CFLAGS) -g' $(OBJS) cd mmg3d4/build/sources/mmg3dmain; $(CC) -c $(CFLAGS) mmg3d.c -I.. $(AR) $(ARFLAGS) $(LIBMMG3D) mmg3d4/build/sources/*.o mmg3d4/build/sources/mmg3dmain/mmg3d.o # # FFCS - ranlib required on Windows 7 64 bits # $(RANLIB) $(LIBMMG3D) -$(CC) $(CNOFLAGS) mmg3d4/build/sources/mmg3dmain/mmg3d.o mmg3d4/build/sources/*.o -o ../bin/mmg3d $(STD_LIBS) touch mmg3d4/FAIT-4.done # FFCS - simplify Makefile structure install-4.done:mmg3d4/FAIT-4.done -mkdir ../include/mmg3d-v4/ cp mmg3d4/build/sources/*.h ../include/mmg3d-v4/ touch $@ clean:: -rm install-4.done mmg3d-4:$(PACKAGE) install:install-4.done WHERE.done # FFCS - keep simplest makefile structure for automatic recompilations WHERE.done: install-4.done echo mmg3d-v4 LD -L@DIR@/lib -lmmg3d-v4 >../lib/WHERE.mmg3d ; echo mmg3d-v4 INCLUDE -I@DIR@/include/mmg3d-v4>> ../lib/WHERE.mmg3d ; echo build WHERE ./lib/WHERE.mmg3d ; touch $@ clean:: -rm WHERE.done FAIRE: mmg3d4/FAIT-4.done install-4.done # FFCS - keep it simple tag-tar-$(SIZEPKG): $(PACKAGE) patch-mmg3dv4.diff -rm -rf mmg3d4 tar xzf $(PACKAGE) -rm mmg3d4/build/libexamples/main. touch mmg3d4/build/sources/dataff.h # # ALH - clean-up all CR/LF to make patching more successful # ../../bin/build/cleancrlf mmg3d4 # cd mmg3d4;patch -p1 <../patch-mmg3dv4.diff cat mmg3d4/build/sources/mmg3dConfig.h touch tag-tar-$(SIZEPKG) # cp makefile-mmg3d.inc $(SRCDIR)/makefile $(PACKAGE): ../getall -o MMG3D -a clean:: -rm ff-flags # # FFCS - make sure that all directories are cleaned. Thisis especially important under Windows because there is no # compilation dependencies control there (see # [[file:c:/cygwin/home/alh/ffcs/dist/configure.ac::dependency_tracking]]) # -rm -r mmg3d4 -rm FAIT* mmg* flags-* tag-tar* #FH -rm $(PACKAGE) ff-flags: ../Makefile Makefile grep 'abs_top_builddir *=' ../Makefile > ff-flags grep 'CC *=' ../Makefile >> ff-flags grep 'CFLAGS *=' ../Makefile >> ff-flags grep 'LDFLAGS *=' ../Makefile >> ff-flags grep 'AR *=' ../Makefile >> ff-flags grep 'ARFLAGS *=' ../Makefile >> ff-flags grep 'RANLIB *=' ../Makefile >> ff-flags grep 'WGET *=' ../Makefile >> ff-flags grep 'STD_LIBS *=' ../Makefile >> ff-flags grep 'CNOFLAGS *=' ../Makefile >> ff-flags .PHONY:$(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/mmg3d/makefile-mmg3d.inc000664 000000 000000 00000001634 14037356732 022412 0ustar00rootroot000000 000000 include ../ff-flags # working dirs MMG3DDIR = $(abs_top_builddir)/3rdprty/mmg3d/mmg3dlib EXEDIR = $(MMG3DDIR)/bin/ SRCDIR = $(MMG3DDIR)/sources/ OBJDIR = $(MMG3DDIR)/objects/ ARCDIR = $(MMG3DDIR)/archives DIRDIR = $(MMG3DDIR)/objects $(OBJDIR) $(ARCDIR) INCDIR = -I$(MMG3DDIR)/sources/ LDLDIR = VPATH = $(SRCDIR) # objects list src = $(wildcard $(SRCDIR)/*.c) header = $(wildcard $(SRCDIR)/*.h) objs = $(patsubst $(SRCDIR)%,$(OBJDIR)%,$(src:.c=.o)) lib = $(OBJDIR)/libmmg3d.a #.SILENT: $(OBJDIR)/%.o: $(SRCDIR)/%.c $(CC) $(OPT64) $(INCDIR) $(CFLAGS) -c $< -o $@ $(lib): $(objs) @echo 'Fin Archivage' $@ $(objs):$(header) $(DIRDIR): @[ -d $@ ] || mkdir $@ lib: $(DIRDIR) $(objs) $(AR) $(ARFLAGS) $(lib) $(OBJDIR)/*.o $(RANLIB) $(lib) @echo 'Fin Archivage' $@ clean: -rm $(objs) libmmg3d.a tar: $(DIRDIR) tar czf $(ARCDIR)/$(prog).`date +"%Y.%m.%d"`.tgz sources makefile target: $(EXEDIR)/$(prog) FreeFem-sources-4.9/3rdparty/mmg3d/patch-mmg3dv4.diff000664 000000 000000 00000314237 14037356732 022353 0ustar00rootroot000000 000000 >diff -r -u mmg3d4/build/sources/analarcutting.c mmg3d4-new/build/sources/analarcutting.c --- mmg3d4/build/sources/analarcutting.c 2012-12-19 16:05:32.000000000 +0100 +++ mmg3d4-new/build/sources/analarcutting.c 2013-01-18 16:33:45.000000000 +0100 @@ -307,7 +307,7 @@ printf("6 cut : %8d\n",n6); printf("---------------------------\n"); */ if ( !na ) return(na); -#warning check memory allocation + // #warning check memory allocation //printf("%d cut init --- nb tet %d\n",na,mesh->ne); return(na); diff -r -u mmg3d4/build/sources/cutelt.c mmg3d4-new/build/sources/cutelt.c --- mmg3d4/build/sources/cutelt.c 2012-12-19 16:05:32.000000000 +0100 +++ mmg3d4-new/build/sources/cutelt.c 2013-01-18 16:35:41.000000000 +0100 @@ -305,8 +305,7 @@ // } // return(1); // } -int ddebug=0; - +extern int ddebug; int MMG_decouphex(pMesh mesh, pHedge hed,int k,int* p,int ref) { pTetra pt; int i,nu1,nu2; diff -r -u mmg3d4/build/sources/dataff.h mmg3d4-new/build/sources/dataff.h --- mmg3d4/build/sources/dataff.h 2013-01-18 21:52:48.000000000 +0100 +++ mmg3d4-new/build/sources/dataff.h 2013-01-18 18:50:04.000000000 +0100 @@ -0,0 +1,60 @@ +/* + * dataff.h + * + * + * Created by Fr\E9d\E9ric Hecht on 19/06/11. + * Copyright 2011 UPMC. All rights reserved. + * + + */ + +enum ff_data_type { + ff_id_vertex =0, + ff_id_seg =1, + ff_id_tria=2, + ff_id_tet =3, + + ff_id_prism =5, + ff_id_hex =6, + ff_id_quad =7, + ff_id_corner=8 +} ; + + +typedef struct DataFF +{ + const char * meshname; + const char * movename; + const char * solname; + int imprim; + int memory; + int np; // nb of vertices in/out + int typesol; // 1 iso , 6 : m11; m12,m13,m22,m23,m33 + void * mesh; + double * sol; /* metric :size typesol*np */ + double * mov; /* displac. size :3*np */ + void (*set_mesh)(void *dataff,int *data,int ldata); + void (*end_mesh)(void *dataff); + void (*set_v)(void *dataff,int i,double *xyz,int lab); + void (*set_elmt)(void *dataff,int ff_id,int i,int *k,int lab); + void (*get_mesh)(void *dataff,int *data,int ldata); + void (*get_v3)(void *dataff,int i,double *xyz,int *lab); + void (*get_elmt)(void *dataff,int ff_id,int i,int *k,int *lab); +} DataFF; + +#ifdef __cplusplus +extern "C" { +#endif + int mainmmg3d(int argc,char *argv[],DataFF *dataff); + +#ifdef __cplusplus +} +#endif +/* + m11 = met[0] + m12 = met[1] + m13 = met[2] + m22 = met[3] + m23 = met[4] + m33 = met[5] +*/ diff -r -u mmg3d4/build/sources/delaunay.c mmg3d4-new/build/sources/delaunay.c --- mmg3d4/build/sources/delaunay.c 2012-12-19 16:05:32.000000000 +0100 +++ mmg3d4-new/build/sources/delaunay.c 2013-01-18 16:32:41.000000000 +0100 @@ -728,7 +728,7 @@ if ( ppt->tag & M_UNUSED ) return(0); tref = mesh->tetra[list->tetra[1]/6].ref; -#warning remove this test + // #warning remove this test for (k=1; k<=lon; k++) if(tref!=mesh->tetra[list->tetra[k]/6].ref) printf("pbs coquil %d %d tet %d\n",tref,mesh->tetra[list->tetra[k]/6].ref,list->tetra[k]/6); diff -r -u mmg3d4/build/sources/libmmg3d.h mmg3d4-new/build/sources/libmmg3d.h --- mmg3d4/build/sources/libmmg3d.h 2012-12-19 16:05:36.000000000 +0100 +++ mmg3d4-new/build/sources/libmmg3d.h 2013-01-18 16:32:41.000000000 +0100 @@ -118,12 +118,12 @@ typedef MMG_Sol * MMG_pSol; /* inout */ -int MMG_loadMesh(MMG_pMesh ,char *); -int MMG_loadSol(MMG_pSol ,char *,int ); -int MMG_loadVect(MMG_pMesh ,char *,int ); -int MMG_saveMesh(MMG_pMesh ,char *); -int MMG_saveSol(MMG_pMesh ,MMG_pSol ,char *); -int MMG_saveVect(MMG_pMesh ,char *); +int MMG_loadMesh(MMG_pMesh ,char *,void *); +int MMG_loadSol(MMG_pSol ,char *,int ,void *); +int MMG_loadVect(MMG_pMesh ,char *,int ,void *); +int MMG_saveMesh(MMG_pMesh ,char *,void *); +int MMG_saveSol(MMG_pMesh ,MMG_pSol ,char *,void *); +int MMG_saveVect(MMG_pMesh ,char *,void *); #ifdef __cplusplus namespace mmg3d{ diff -r -u mmg3d4/build/sources/mesh.h mmg3d4-new/build/sources/mesh.h --- mmg3d4/build/sources/mesh.h 2012-12-19 16:05:36.000000000 +0100 +++ mmg3d4-new/build/sources/mesh.h 2013-01-18 16:32:41.000000000 +0100 @@ -405,17 +405,17 @@ /* function pointers */ typedef int (*MMG_Swap)(pMesh ,pSol ,pList ); -MMG_Swap MMG_swpptr; -double (*MMG_length)(double *,double *,double *,double *); -double (*MMG_caltet)(pMesh ,pSol ,int ); -double (*MMG_calte1)(pMesh ,pSol ,int ); -int (*MMG_caltet2)(pMesh ,pSol ,int ,int ,double ,double *); -int (*MMG_cavity)(pMesh ,pSol ,int ,int ,pList ,int ); -int (*MMG_buckin)(pMesh ,pSol ,pBucket ,int ); -int (*MMG_optlen)(pMesh ,pSol ,double ,int ); -int (*MMG_interp)(double *,double *,double *,double ); -int (*MMG_optlentet)(pMesh ,pSol ,pQueue ,double ,int ,int ); -int (*MMG_movevertex)(pMesh ,pSol ,int ,int ); +extern MMG_Swap MMG_swpptr; +extern double (*MMG_length)(double *,double *,double *,double *); +extern double (*MMG_caltet)(pMesh ,pSol ,int ); +extern double (*MMG_calte1)(pMesh ,pSol ,int ); +extern int (*MMG_caltet2)(pMesh ,pSol ,int ,int ,double ,double *); +extern int (*MMG_cavity)(pMesh ,pSol ,int ,int ,pList ,int ); +extern int (*MMG_buckin)(pMesh ,pSol ,pBucket ,int ); +extern int (*MMG_optlen)(pMesh ,pSol ,double ,int ); +extern int (*MMG_interp)(double *,double *,double *,double ); +extern int (*MMG_optlentet)(pMesh ,pSol ,pQueue ,double ,int ,int ); +extern int (*MMG_movevertex)(pMesh ,pSol ,int ,int ); #endif diff -r -u mmg3d4/build/sources/mmg3d4.c mmg3d4-new/build/sources/mmg3d4.c --- mmg3d4/build/sources/mmg3d4.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/mmg3d4.c 2013-01-18 18:28:05.000000000 +0100 @@ -3,32 +3,32 @@ Co-auteurs : Cecile Dobrzynski et Pascal Frey. Propriétaires :IPB - UPMC -INRIA. -Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, +Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, diffusé sous les termes et conditions de la licence publique générale de GNU -Version 3 ou toute version ultérieure. +Version 3 ou toute version ultérieure. Ce fichier est une partie de MMG3D. MMG3D est un logiciel libre ; vous pouvez le redistribuer et/ou le modifier suivant les termes de la licence publique générale de GNU Version 3 ou toute version ultérieure. -MMG3D est distribué dans l'espoir qu'il sera utile, mais SANS -AUCUNE GARANTIE ; sans même garantie de valeur marchande. +MMG3D est distribué dans l'espoir qu'il sera utile, mais SANS +AUCUNE GARANTIE ; sans même garantie de valeur marchande. Voir la licence publique générale de GNU pour plus de détails. -MMG3D est diffusé en espérant qu’il sera utile, -mais SANS AUCUNE GARANTIE, ni explicite ni implicite, -y compris les garanties de commercialisation ou -d’adaptation dans un but spécifique. +MMG3D est diffusé en espérant qu’il sera utile, +mais SANS AUCUNE GARANTIE, ni explicite ni implicite, +y compris les garanties de commercialisation ou +d’adaptation dans un but spécifique. Reportez-vous à la licence publique générale de GNU pour plus de détails. -Vous devez avoir reçu une copie de la licence publique générale de GNU -en même temps que ce document. +Vous devez avoir reçu une copie de la licence publique générale de GNU +en même temps que ce document. Si ce n’est pas le cas, aller voir . /**************************************************************************** Initial software: MMG3D Version 4.0 Co-authors: Cecile Dobrzynski et Pascal Frey. Owners: IPB - UPMC -INRIA. -Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, -spread under the terms and conditions of the license GNU General Public License +Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, +spread under the terms and conditions of the license GNU General Public License as published Version 3, or (at your option) any later version. This file is part of MMG3D @@ -41,26 +41,26 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with MMG3D. If not, see . +along with MMG3D. If not, see . ****************************************************************************/ #include "mesh.h" -int MMG_npuiss,MMG_nvol,MMG_npres; -int MMG_nlen,MMG_ncal,MMG_ntopo,MMG_nex; -int MMG_npuisstot,MMG_nvoltot,MMG_nprestot; -int MMG_npdtot; -int MMG_nplen,MMG_npref,MMG_bouffe; +extern int MMG_npuiss,MMG_nvol,MMG_npres; +extern int MMG_nlen,MMG_ncal,MMG_ntopo,MMG_nex; +extern int MMG_npuisstot,MMG_nvoltot,MMG_nprestot; +extern int MMG_npdtot; +extern int MMG_nplen,MMG_npref,MMG_bouffe; int ddebug; int MMG_mmg3d4(pMesh mesh,pSol sol,int *alert) { Hedge hash; - pBucket bucket; + pBucket bucket; double declic; - int base,na,nd,ns,nna,nnd,nns,dd,it,nf,maxtou; - double lmoy,LLLONG; - int k; - pTetra pt; + int base,na,nd,ns,nna,nnd,nns,dd,it,nf,maxtou; + double lmoy,LLLONG; + int k; + pTetra pt; if ( abs(mesh->info.imprim) > 3 ) fprintf(stdout," ** SIZE OPTIMIZATION\n"); if ( mesh->info.imprim < 0 ) { @@ -73,82 +73,82 @@ maxtou = 10; nna = nns = nnd = 0; it = 0; - declic = 3. / ALPHAD; + declic = 3. / ALPHAD; lmoy = 10.; LLLONG = 1.5; - + nna = 10; - do { - na = nd = ns = 0; + do { + na = nd = ns = 0; if(0) ddebug = 1; else ddebug = 0; - + if(it && !(it%2) ) { bucket = MMG_newBucket(mesh,M_MAX(mesh->info.bucksiz,BUCKSIZ)); if ( !bucket ) return(0); - //MMG_saveMesh(mesh,"avtana.mesh"); - MMG_analar(mesh,sol,bucket,&na,&nd,&nf,alert); - //MMG_saveMesh(mesh,"apresana.mesh"); - if ( abs(mesh->info.imprim) > 5 ) - fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); - - M_free(bucket->head); - M_free(bucket->link); - M_free(bucket); - + //MMG_saveMesh(mesh,"avtana.mesh",0); + MMG_analar(mesh,sol,bucket,&na,&nd,&nf,alert); + //MMG_saveMesh(mesh,"apresana.mesh",0); + if ( abs(mesh->info.imprim) > 5 ) + fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); + + M_free(bucket->head); + M_free(bucket->link); + M_free(bucket); + } else { - ++mesh->flag; + ++mesh->flag; } - //printf("IT %d $$$$$$$$$$$ LLLONG %9.3f\n",it,LLLONG); - nna = nns = nnd = 0; - + //printf("IT %d $$$$$$$$$$$ LLLONG %9.3f\n",it,LLLONG); + nna = nns = nnd = 0; + /*splitting*/ if ( !mesh->info.noinsert && (!*alert) ) { /* store points on edges */ if ( !MMG_zaldy4(&hash,mesh->np) ) { - if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM.\n"); - *alert = 2; - break; + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM.\n"); + *alert = 2; + break; } - nna = MMG_analarcutting(mesh,sol,&hash,alert,&lmoy,LLLONG); + nna = MMG_analarcutting(mesh,sol,&hash,alert,&lmoy,LLLONG); if ( abs(mesh->info.imprim) > 5 ) { printf("lmoy %9.5f\n",lmoy); } /*puts("--------------------------------------"); - puts("--------------------------------------"); - puts("--------------------------------------"); - */ + puts("--------------------------------------"); + puts("--------------------------------------"); + */ if ( *alert ) { - fprintf(stdout," \n\n ** UNABLE TO CUT (analarcutting)\n"); - fprintf(stdout," ** RETRY WITH -m > %6d \n\n",mesh->info.memory); - MMG_saveMesh(mesh,"crash.mesh"); - MMG_saveSol(mesh,sol,"crash.sol"); - exit(0); + fprintf(stdout," \n\n ** UNABLE TO CUT (analarcutting)\n"); + fprintf(stdout," ** RETRY WITH -m > %6d \n\n",mesh->info.memory); + MMG_saveMesh(mesh,"crash.mesh",0); + MMG_saveSol(mesh,sol,"crash.sol",0); + exit(0); } - M_free(hash.item); + M_free(hash.item); } - else if ( *alert ) nna = 0; - /* adjacencies */ + else if ( *alert ) nna = 0; + /* adjacencies */ if ( nna /*|| it == (maxtou-1)*/ ) { mesh->nt = 0; if ( !MMG_hashTetra(mesh) ) return(0); if ( !MMG_markBdry(mesh) ) return(0); } - // printf("chkmsh\n"); - // MMG_unscaleMesh(mesh,sol); - // MMG_saveMesh(mesh,"chk.mesh"); + // printf("chkmsh\n"); + // MMG_unscaleMesh(mesh,sol); + // MMG_saveMesh(mesh,"chk.mesh",0); //MMG_chkmsh(mesh,1,-1); - //if(it==1)exit(0); - /* delaunization */ - if ( !mesh->info.noswap && (nna || na) ) { + //if(it==1)exit(0); + /* delaunization */ + if ( !mesh->info.noswap && (nna || na) ) { nns = MMG_cendel(mesh,sol,declic,base); } /* deletion */ /*if ( 0 && nna ) { nnd = MMG_colvert(mesh,sol,base); - } */ + } */ if ( nna+nnd+nns && abs(mesh->info.imprim) > 3 ) fprintf(stdout," %7d INSERTED %7d REMOVED %7d FLIPPED\n",nna+na,nnd+nd,nns); - + } while ( na+nd+nns+nna+nnd > 0 && ++it < maxtou && lmoy > 1.3); @@ -161,80 +161,80 @@ MMG_prilen(mesh,sol); } - //return(1); - //MMG_saveMesh(mesh,"aprescut.mesh"); - fprintf(stdout," ---\n"); - + //return(1); + //MMG_saveMesh(mesh,"aprescut.mesh",0); + fprintf(stdout," ---\n"); + /*analyze standard*/ - base = mesh->flag; - *alert = 0; + base = mesh->flag; + *alert = 0; - nna = 0; - nnd = 0; - nf = 0; - it = 0; - maxtou = 100; - MMG_npdtot=0; - MMG_npuisstot=0; - MMG_nprestot=0; - MMG_nvoltot=0; - - /* 2. field points */ - if ( mesh->info.imprim < -4 ) { - MMG_prilen(mesh,sol); - fprintf(stdout," -- FIELD POINTS\n"); - } - - /* create filter */ - bucket = MMG_newBucket(mesh,M_MAX(mesh->info.bucksiz,BUCKSIZ)); - if ( !bucket ) return(0); - - do { - MMG_analar(mesh,sol,bucket,&na,&nd,&nf,alert); - nna += na; - nnd += nd; - if ( *alert ) { - if ( nd < 1000 ) break; - else *alert = 0; - } - if ( it > 5 ) { - dd = abs(nd-na); - if ( dd < 5 || dd < 0.05*nd ) break; - else if ( it > 12 && nd >= na ) break; - } - if ( na+nd && abs(mesh->info.imprim) > 3 ) - fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); - // MMG_saveMesh(mesh,"chk.mesh"); - // //if(it==1) exit(0); - } - while ( na+nd > 0 && ++it < maxtou ); - - if ( nna+nnd && abs(mesh->info.imprim) < 3 ) { - fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); - } - - if(MMG_npdtot>0) { - fprintf(stdout," REJECTED : %5d\n",MMG_npdtot); - fprintf(stdout," VOL : %6.2f %% %5d \n", - 100*(MMG_nvoltot/(float) - MMG_npdtot),MMG_nvoltot); - fprintf(stdout," PUISS : %6.2f %% %5d \n", - 100*(MMG_npuisstot/(float) MMG_npdtot),MMG_npuisstot); - fprintf(stdout," PROCHE : %6.2f %% %5d \n", - 100*(MMG_nprestot/(float) MMG_npuisstot),MMG_nprestot); + nna = 0; + nnd = 0; + nf = 0; + it = 0; + maxtou = 100; MMG_npdtot=0; MMG_npuisstot=0; + MMG_nprestot=0; MMG_nvoltot=0; - } - if ( mesh->info.imprim < 0 ) { - MMG_outqua(mesh,sol); - MMG_prilen(mesh,sol); - } - M_free(bucket->head); - M_free(bucket->link); - M_free(bucket); + /* 2. field points */ + if ( mesh->info.imprim < -4 ) { + MMG_prilen(mesh,sol); + fprintf(stdout," -- FIELD POINTS\n"); + } + + /* create filter */ + bucket = MMG_newBucket(mesh,M_MAX(mesh->info.bucksiz,BUCKSIZ)); + if ( !bucket ) return(0); + + do { + MMG_analar(mesh,sol,bucket,&na,&nd,&nf,alert); + nna += na; + nnd += nd; + if ( *alert ) { + if ( nd < 1000 ) break; + else *alert = 0; + } + if ( it > 5 ) { + dd = abs(nd-na); + if ( dd < 5 || dd < 0.05*nd ) break; + else if ( it > 12 && nd >= na ) break; + } + if ( na+nd && abs(mesh->info.imprim) > 3 ) + fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); + // MMG_saveMesh(mesh,"chk.mesh",0); + // //if(it==1) exit(0); + } + while ( na+nd > 0 && ++it < maxtou ); + + if ( nna+nnd && abs(mesh->info.imprim) < 3 ) { + fprintf(stdout," %7d INSERTED %7d REMOVED %7d FILTERED\n",na,nd,nf); + } + + if(MMG_npdtot>0) { + fprintf(stdout," REJECTED : %5d\n",MMG_npdtot); + fprintf(stdout," VOL : %6.2f %% %5d \n", + 100*(MMG_nvoltot/(float) + MMG_npdtot),MMG_nvoltot); + fprintf(stdout," PUISS : %6.2f %% %5d \n", + 100*(MMG_npuisstot/(float) MMG_npdtot),MMG_npuisstot); + fprintf(stdout," PROCHE : %6.2f %% %5d \n", + 100*(MMG_nprestot/(float) MMG_npuisstot),MMG_nprestot); + MMG_npdtot=0; + MMG_npuisstot=0; + MMG_nvoltot=0; + } + if ( mesh->info.imprim < 0 ) { + MMG_outqua(mesh,sol); + MMG_prilen(mesh,sol); + } + M_free(bucket->head); + M_free(bucket->link); + M_free(bucket); + return(1); } diff -r -u mmg3d4/build/sources/mmg3dConfig.h mmg3d4-new/build/sources/mmg3dConfig.h --- mmg3d4/build/sources/mmg3dConfig.h 2012-12-19 16:05:36.000000000 +0100 +++ mmg3d4-new/build/sources/mmg3dConfig.h 2013-01-18 16:32:41.000000000 +0100 @@ -2,4 +2,4 @@ #define Tutorial_VERSION_MAJOR #define Tutorial_VERSION_MINOR -#define USE_SCOTCH +/* #undef USE_SCOTCH */ diff -r -u mmg3d4/build/sources/mmg3dlib/mmg3dlib.c mmg3d4-new/build/sources/mmg3dlib/mmg3dlib.c --- mmg3d4/build/sources/mmg3dlib/mmg3dlib.c 2012-12-19 16:06:03.000000000 +0100 +++ mmg3d4-new/build/sources/mmg3dlib/mmg3dlib.c 2013-01-18 16:32:41.000000000 +0100 @@ -385,7 +385,7 @@ if ( !MMG_hashTetra(mesh) ) return(1); if ( !MMG_markBdry(mesh) ) return(1); if (abs(mesh->info.option)==10) { - MMG_saveMesh(mesh,"tetra.mesh"); + MMG_saveMesh(mesh,"tetra.mesh",0); return(0); } if ( !sol->np) { @@ -431,7 +431,7 @@ if ( abs(info->option) == 9 ) { if(!MMG_mmg3d9(mesh,sol,&alert)) { if ( !MMG_unscaleMesh(mesh,sol) ) return(1); - MMG_saveMesh(mesh,"errormoving.mesh"); + MMG_saveMesh(mesh,"errormoving.mesh",0); //MMG_saveSol(mesh,sol,mesh->outf); return(1); } diff -r -u mmg3d4/build/sources/optlen.c mmg3d4-new/build/sources/optlen.c --- mmg3d4/build/sources/optlen.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/optlen.c 2013-01-18 16:32:41.000000000 +0100 @@ -48,7 +48,7 @@ #define HQCOEF 0.9 #define HCRIT 0.98 -double MMG_rao(pMesh mesh,int k,int inm); +double MMG_rao(pMesh mesh,int k,FILE* ); int MMG_optlen_ani(pMesh mesh,pSol sol,double declic,int base) { pTetra pt,pt1; pPoint ppa,ppb; diff -r -u mmg3d4/build/sources/pattern.c mmg3d4-new/build/sources/pattern.c --- mmg3d4/build/sources/pattern.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/pattern.c 2013-01-18 18:41:02.000000000 +0100 @@ -47,7 +47,7 @@ unsigned char MMG_arfa[3][4] = { {2,0,1,3}, {1,2,0,3}, {0,1,2,3} }; -extern int MMG_permar[10][4]; +extern int MMG_permar[12][4]; extern int MMG_pointar[64][2]; extern int ddebug; //insert ip on ia-ib diff -r -u mmg3d4/build/sources/quality.c mmg3d4-new/build/sources/quality.c --- mmg3d4/build/sources/quality.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/quality.c 2013-01-18 16:32:41.000000000 +0100 @@ -46,7 +46,7 @@ #include "mesh.h" -double MMG_rao(pMesh mesh,int k,int inm); +double MMG_rao(pMesh mesh,int k,FILE* inm) ; double MMG_caltetrao(pMesh mesh,pSol sol,int iel) { return(MMG_rao(mesh,iel,0)); } diff -r -u mmg3d4/build/sources/ratio.c mmg3d4-new/build/sources/ratio.c --- mmg3d4/build/sources/ratio.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/ratio.c 2013-01-18 16:32:41.000000000 +0100 @@ -365,7 +365,7 @@ fprintf(stdout," ELEMENT %d (%d) %d %d %d %d\n", iel,ielreal,pt->v[0],pt->v[1],pt->v[2],pt->v[3]); - if ( abs(mesh->info.imprim) < 5 ) return; + if ( abs(mesh->info.imprim) < 5 ) return (1) ; fprintf(stdout,"\n HISTOGRAMM\n"); for (k=1; k<9; k++) { diff -r -u mmg3d4/build/sources/sproto.h mmg3d4-new/build/sources/sproto.h --- mmg3d4/build/sources/sproto.h 2012-12-19 16:05:36.000000000 +0100 +++ mmg3d4-new/build/sources/sproto.h 2013-01-18 16:32:41.000000000 +0100 @@ -67,13 +67,13 @@ int MMG_inEdge(pHedge ,int *,int *,int *); int MMG_markBdry(pMesh ); -/* inout */ -int MMG_loadMesh(pMesh ,char *); -int MMG_loadSol(pSol ,char *,int ); -int MMG_loadVect(pMesh ,char *,int ); -int MMG_saveMesh(pMesh ,char *); -int MMG_saveSol(pMesh ,pSol ,char *); -int MMG_saveVect(pMesh ,char *); +/* inout add param F.H. june 2011 (dataff) */ +int MMG_loadMesh(pMesh ,char *,void *); +int MMG_loadSol(pSol ,char *,int ,void *); +int MMG_loadVect(pMesh ,char *,int ,void *); +int MMG_saveMesh(pMesh ,char *,void *); +int MMG_saveSol(pMesh ,pSol ,char *,void *); +int MMG_saveVect(pMesh ,char *,void *); int MMG_loctet(pMesh ,int ,int ,double *,double *); int MMG_computeMetric(pMesh ,pSol ,int ,double * ); diff -r -u mmg3d4/build/sources/swapar.c mmg3d4-new/build/sources/swapar.c --- mmg3d4/build/sources/swapar.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-new/build/sources/swapar.c 2013-01-18 18:43:38.000000000 +0100 @@ -1,106 +1,107 @@ -/**************************************************************************** -Logiciel initial: MMG3D Version 4.0 -Co-auteurs : Cecile Dobrzynski et Pascal Frey. -Propriétaires :IPB - UPMC -INRIA. - -Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, -diffusé sous les termes et conditions de la licence publique générale de GNU -Version 3 ou toute version ultérieure. - -Ce fichier est une partie de MMG3D. -MMG3D est un logiciel libre ; vous pouvez le redistribuer et/ou le modifier -suivant les termes de la licence publique générale de GNU -Version 3 ou toute version ultérieure. -MMG3D est distribué dans l'espoir qu'il sera utile, mais SANS -AUCUNE GARANTIE ; sans même garantie de valeur marchande. -Voir la licence publique générale de GNU pour plus de détails. -MMG3D est diffusé en espérant qu’il sera utile, -mais SANS AUCUNE GARANTIE, ni explicite ni implicite, -y compris les garanties de commercialisation ou -d’adaptation dans un but spécifique. -Reportez-vous à la licence publique générale de GNU pour plus de détails. -Vous devez avoir reçu une copie de la licence publique générale de GNU -en même temps que ce document. -Si ce n’est pas le cas, aller voir . -/**************************************************************************** -Initial software: MMG3D Version 4.0 -Co-authors: Cecile Dobrzynski et Pascal Frey. -Owners: IPB - UPMC -INRIA. - -Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, -spread under the terms and conditions of the license GNU General Public License -as published Version 3, or (at your option) any later version. - -This file is part of MMG3D -MMG3D is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3 of the License, or -(at your option) any later version. -MMG3D is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with MMG3D. If not, see . -****************************************************************************/ -#include "mesh.h" - -int MMG_swapar(pMesh mesh,pSol sol,pQueue q,List *list,int lon,double crit,double declic) { - pTetra pt; - int i,l,jel,ncas,ddebug,iadr; - - MMG_swpptr = 0; - ncas = 0; - if ( !MMG_getnElt(mesh,10) ) return(-1); - if(0 && list->tetra[1]/6==2352) ddebug=1; - else ddebug=0; - - switch(lon) { - case 3: - ncas = MMG_simu32(mesh,sol,list,crit); - break; - case 4: - ncas = MMG_simu44(mesh,sol,list,crit); - break; - case 5: - ncas = MMG_simu56(mesh,sol,list,crit); - break; - case 6: - ncas = MMG_simu68(mesh,sol,list,crit); - break; - case 7: - ncas = MMG_simu710(mesh,sol,list,crit); - break; - default: - return(0); - } - if(ddebug) printf("on fait swap %d\n",ncas); - if ( ncas && MMG_swpptr ) { - if(ddebug) MMG_saveMesh(mesh,"avt.mesh"); - for (l=1; l<=lon; l++) { - jel = list->tetra[l]/6; - pt = &mesh->tetra[jel]; - if(ddebug) { - printf("tet %d : %d %d %d %d -- %d %d %d %d %d %d\n",jel,pt->v[0],pt->v[1],pt->v[2],pt->v[3], - pt->bdryinfo[0],pt->bdryinfo[1],pt->bdryinfo[2],pt->bdryinfo[3],pt->bdryinfo[4],pt->bdryinfo[5]); - - } - MMG_kiudel(q,jel); - } - lon = MMG_swpptr(mesh,sol,list); - assert(lon); - if(!lon) return(0); - - for (l=1; l<=lon; l++) { - jel = list->tetra[l]; - pt = &mesh->tetra[jel]; - if ( pt->qual >= declic ) MMG_kiuput(q,jel); - for (i=0; i<4; i++) mesh->point[pt->v[i]].flag = mesh->flag; - - } - if(ddebug) {MMG_saveMesh(mesh,"sw.mesh"); exit(0);} - return(1); - } - - return(0); -} +/**************************************************************************** +Logiciel initial: MMG3D Version 4.0 +Co-auteurs : Cecile Dobrzynski et Pascal Frey. +Propriétaires :IPB - UPMC -INRIA. + +Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, +diffusé sous les termes et conditions de la licence publique générale de GNU +Version 3 ou toute version ultérieure. + +Ce fichier est une partie de MMG3D. +MMG3D est un logiciel libre ; vous pouvez le redistribuer et/ou le modifier +suivant les termes de la licence publique générale de GNU +Version 3 ou toute version ultérieure. +MMG3D est distribué dans l'espoir qu'il sera utile, mais SANS +AUCUNE GARANTIE ; sans même garantie de valeur marchande. +Voir la licence publique générale de GNU pour plus de détails. +MMG3D est diffusé en espérant qu’il sera utile, +mais SANS AUCUNE GARANTIE, ni explicite ni implicite, +y compris les garanties de commercialisation ou +d’adaptation dans un but spécifique. +Reportez-vous à la licence publique générale de GNU pour plus de détails. +Vous devez avoir reçu une copie de la licence publique générale de GNU +en même temps que ce document. +Si ce n’est pas le cas, aller voir . +/**************************************************************************** +Initial software: MMG3D Version 4.0 +Co-authors: Cecile Dobrzynski et Pascal Frey. +Owners: IPB - UPMC -INRIA. + +Copyright © 2004-2005-2006-2007-2008-2009-2010-2011, +spread under the terms and conditions of the license GNU General Public License +as published Version 3, or (at your option) any later version. + +This file is part of MMG3D +MMG3D is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. +MMG3D is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +You should have received a copy of the GNU General Public License +along with MMG3D. If not, see . +****************************************************************************/ +#include "mesh.h" + +int MMG_swapar(pMesh mesh,pSol sol,pQueue q,List *list,int lon,double crit,double declic) { + pTetra pt; + int i,l,jel,ncas,ddebug,iadr; + + MMG_swpptr = 0; + ncas = 0; + if ( !MMG_getnElt(mesh,10) ) return(-1); + if(0 && list->tetra[1]/6==2352) ddebug=1; + else ddebug=0; + + switch(lon) { + case 3: + ncas = MMG_simu32(mesh,sol,list,crit); + break; + case 4: + ncas = MMG_simu44(mesh,sol,list,crit); + break; + case 5: + ncas = MMG_simu56(mesh,sol,list,crit); + break; + case 6: + ncas = MMG_simu68(mesh,sol,list,crit); + break; + case 7: + ncas = MMG_simu710(mesh,sol,list,crit); + break; + default: + return(0); + } + if(ddebug) printf("on fait swap %d\n",ncas); + if ( ncas && MMG_swpptr ) { + if(ddebug) MMG_saveMesh(mesh,"avt.mesh",0); + for (l=1; l<=lon; l++) { + jel = list->tetra[l]/6; + pt = &mesh->tetra[jel]; + if(ddebug) { + printf("tet %d : %d %d %d %d -- %d %d %d %d %d %d\n",jel,pt->v[0],pt->v[1],pt->v[2],pt->v[3], + pt->bdryinfo[0],pt->bdryinfo[1],pt->bdryinfo[2],pt->bdryinfo[3],pt->bdryinfo[4],pt->bdryinfo[5]); + + } + MMG_kiudel(q,jel); + } + lon = MMG_swpptr(mesh,sol,list); + assert(lon); + if(!lon) return(0); + + for (l=1; l<=lon; l++) { + jel = list->tetra[l]; + pt = &mesh->tetra[jel]; + if ( pt->qual >= declic ) MMG_kiuput(q,jel); + for (i=0; i<4; i++) mesh->point[pt->v[i]].flag = mesh->flag; + + } + if(ddebug) {MMG_saveMesh(mesh,"sw.mesh",0); exit(0);} + return(1); + } + + return(0); +} + diff -u mmg3d4/build/sources/inout.c mmg3d4-ok/build/sources/inout.c --- mmg3d4/build/sources/inout.c 2012-12-19 16:05:32.000000000 +0100 +++ mmg3d4-ok/build/sources/inout.c 2013-07-11 11:43:47.000000000 +0200 @@ -44,6 +44,7 @@ along with MMG3D. If not, see . ****************************************************************************/ #include "mesh.h" +#include "dataff.h" extern short MMG_imprim; @@ -100,35 +101,467 @@ return(out); } + +int MMG_loadMeshff(pMesh mesh,char *filename,DataFF *dataff) { + + + Hedge hed,hed2; + pPoint ppt; + pTetra pt; + pTria pt1; + int i,j,k; + int nhex=0, npris=0, netmp=0,nq=0, pp[10] , ned=0, ncor=0; + int p0,p1,p2,p3,p4,p5,p6,ref ; + int data[10],ldata=10; + dataff->get_mesh(dataff,data,10); + mesh->np = data[ff_id_vertex]; + ned = data[ff_id_seg]; + mesh->nt = data[ff_id_tria]; + mesh->ne = data[ff_id_tet]; + netmp=mesh->ne; + nhex=data[ff_id_hex]; + npris=data[ff_id_prism]; + nq=data[ff_id_quad]; + ncor= data[ff_id_corner]; + + if( nhex || npris) { + printf("mmg3d ff interface , hex or prism NOT SUPPORTED to day (sorry FH) \n"); + goto L0; + } + + mesh->ncor = 0; + mesh->ver =1; + if ( abs(mesh->info.option)==10 ) { + fprintf(stdout," -- READING %8d HEXA %8d PRISMS\n",nhex,npris); + if(!mesh->ne) netmp = 0; + mesh->ne += 6*nhex + 3*npris; + } + + if ( abs(mesh->info.imprim) > 5 ) + fprintf(stdout," -- READING DATA for ff interface "); + + if ( !mesh->np || !mesh->ne ) { + fprintf(stdout," ** MISSING DATA yy\n"); + goto L0; ; + } + if ( !MMG_zaldy(mesh) ) goto L0; + + /* read mesh vertices */ + mesh->npfixe = mesh->np; + + for (k=1; k<=mesh->np; k++) { + ppt = &mesh->point[k]; + dataff->get_v3(dataff,k,ppt->c,&ppt->ref); + ppt->tag = M_UNUSED; + } + + /* read mesh triangles */ + mesh->ntfixe = mesh->nt; + + for (k=1; k<=mesh->nt; k++) { + pt1 = &mesh->tria[k]; + dataff->get_elmt(dataff,ff_id_tria,k,pt1->v,&pt1->ref); + + } + + + + /* read mesh quads (option 10)*/ + if(abs(mesh->info.option)==10) { + fprintf(stdout," QUADS READING %d\n",nq); + mesh->ntfixe += 4*nq; + for (k=1; k<=nq; k++) { + dataff->get_elmt(dataff, ff_id_quad ,k,pp,&ref); + + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = pp[0]; + pt1->v[1] = pp[1]; + pt1->v[2] = pp[2]; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = pp[0]; + pt1->v[1] = pp[2]; + pt1->v[2] = pp[3]; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = pp[0]; + pt1->v[1] = pp[1]; + pt1->v[2] = pp[3]; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = pp[1]; + pt1->v[1] = pp[2]; + pt1->v[2] = pp[3]; + pt1->ref = ref; + + } + } + + /*read and store edges*/ + if (ned) { + if ( !MMG_zaldy4(&hed,ned) ) { + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : EDGES IGNORED\n"); + ned = 0; + } + mesh->ned = ned; + + for (k=1; k<=ned; k++) { + dataff->get_elmt(dataff, ff_id_seg ,k,pp,&ref); + + if(MMG_edgePut(&hed,pp[0],pp[1],2)>1) { + fprintf(stdout," ## WARNING DOUBLE EDGE : %d %d\n",pp[0],pp[1]); + } + } + } + + /* read mesh tetrahedra */ + mesh->nefixe = mesh->ne; + + + for (k=1; k<=netmp; k++) { + pt = &mesh->tetra[k]; + dataff->get_elmt(dataff,ff_id_tet,k,pt->v,&ref); + pt->ref = ref;//0;//ref ; + for(i=0 ; i<4 ; i++) + pt->bdryref[i] = -1; + + if (ned) { int nu1,nu2; + for(i=0 ; i<6 ; i++) { + nu1 = pt->v[MMG_iare[i][0]]; + nu2 = pt->v[MMG_iare[i][1]]; + pt->bdryinfo[i] = MMG_edgePoint(&hed,nu1,nu2); + } + + } else { + for(i=0 ; i<6 ; i++) + pt->bdryinfo[i] = 0; + } + } + if (ned) M_free(hed.item); + + /*read corners*/ + if (ncor) { + + mesh->ncor = ncor; + for (k=1; k<=ncor; k++) { + dataff->get_elmt(dataff,ff_id_corner,k,&ref,0); + + ppt = &mesh->point[ref]; + ppt->geom = M_CORNER; + } + } +#ifdef XXXXXXXXXXXXXXX + if ( abs(mesh->info.option)==10 ) { + if(bin) { + printf("NOT SUPPORTED\n"); + exit(0); + } + if ( !MMG_zaldy4(&hed2,3*npris+6*nhex) ) { + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : PRISM IGNORED\n"); + npris = 0; + nhex = 0; + } + + /*read hexa and transform to tetra*/ + rewind(inm); + fseek(inm,posnhex,SEEK_SET); + for (k=1; k<=nhex; k++) { + fscanf(inm,"%d %d %d %d %d %d %d %d %d",&p0,&p1,&p2,&p3,&p4,&p5,&p6,&p7,&ref); + //fscanf(inm,"%d %d %d %d %d %d %d %d %d",&p0,&p4,&p2,&p1,&p3,&p5,&p6,&p7,&ref); + //printf("hex %d : %d %d %d %d %d %d %d %d\n",k,p0,p1,p2,p3,p4,p5,p6,p7); + MMG_cuthex(mesh,&hed2,netmp+(k-1)*6,p0,p1,p2,p3,p4,p5,p6,p7,ref); + } + + /*read prism and transform to tetra + ---> compatibility pbs ==> hash edge and switch case*/ + rewind(inm); + fseek(inm,posnpris,SEEK_SET); + nimp = 0; + ne = netmp+6*nhex; + for (k=1; k<=npris; k++) { + fscanf(inm,"%d %d %d %d %d %d %d",&p0,&p1,&p2,&p3,&p4,&p5,&ref); + if(!MMG_cutprism(mesh,&hed2,ne,p0,p1,p2,p3,p4,p5,ref)) + { + if(mesh->info.imprim < 0 ) fprintf(stdout,"DECOMPOSITION PRISM INVALID \n\n"); + mesh->ne += 5; + ne += 8; + nimp++; + continue; + } + ne += 3; + } + if(abs(mesh->info.imprim) > 3 )fprintf(stdout," %d INVALID DECOMPOSITION\n\n",nimp); + } +#endif + if ( abs(mesh->info.imprim) > 3 ) { + fprintf(stdout," NUMBER OF GIVEN VERTICES %8d\n",mesh->npfixe); + if ( mesh->ntfixe ) + fprintf(stdout," NUMBER OF GIVEN TRIANGLES %8d\n",mesh->ntfixe); + fprintf(stdout," NUMBER OF GIVEN TETRAHEDRA %8d\n",mesh->nefixe); + if ( ncor ) + fprintf(stdout," NUMBER OF GIVEN CORNERS %8d\n",ncor); + if ( ned ) + fprintf(stdout," NUMBER OF GIVEN EDGES %8d\n",ned); + } + // MMG_saveMesh(mesh,"XXXXX.mesh",0); + dataff->mesh=0; // used + return 1; +L0: + dataff->mesh=0;// used + return 1; +} + +int MMG_loadSolff(pSol sol,char *filename,int npmax,DataFF *dataff) { + + double tmp , *dsol ; + int binch,bdim,iswp; + int k,i,isol,type,bin,dim,btyp,bpos; + long posnp; + char *ptr,data[128],chaine[128]; + if( ! dataff->sol){ + fprintf(stdout," ** MISSING DATA metrix ff \n"); + return(1); + } + dsol = dataff->sol; + dataff->sol=0;// used + + + btyp = (dataff->typesol== 6) ? 3: dataff->typesol ; + sol->np= dataff->np; + + if ( !sol->np ) { + fprintf(stdout," ** MISSING DATA zz\n"); + return(1); + } + + if ( btyp!= 1 && btyp!=3 ) { + fprintf(stdout," ** DATA IGNORED (ff) btyp=%d\n",btyp); + sol->np = 0; + return(1); + } + + sol->offset = (btyp==1) ? 1 : 6; + + if ( abs(MMG_imprim) > 5 ) + fprintf(stdout," -- READING DATA FILE(ff) %s\n",data); + + if ( !sol->np ) { + fprintf(stdout," ** MISSING DATA no metrix \n"); + return(0); + } + sol->npfixe = sol->np; + sol->npmax = npmax; + if ( !MMG_zaldy3(sol) ) return(0); + + /* read mesh solutions */ + sol->npfixe = sol->np; + + for (k=1; k<=sol->np; k++) { + isol = (k-1) * sol->offset + 1; + for (i=0; ioffset; i++) + sol->met[isol + i] = *dsol++; + + } + + if ( abs(MMG_imprim) > 3 ) + fprintf(stdout," NUMBER OF GIVEN DATA %8d\n",sol->npfixe); + + + return(1); + +} +/* load solution (metric) */ +int MMG_loadVectff(pMesh mesh,char *filename,int npmax,DataFF *dataff) { + + + pDispl pd; + int binch,bdim,iswp; + int k,i,type,bin,dim,btyp,bpos,iadr; + long posnp; + char *ptr,data[128],chaine[128]; + double *fsol = dataff->mov; + dataff->mov=0;// used + + pd = mesh->disp; + pd->np =mesh->np ; + + if ( !pd->np || !fsol ) { + fprintf(stdout," ** MISSING DATA dep ff %d %p\n",pd->np, fsol); + return(0); + } + + + if ( abs(mesh->info.imprim) > 5 ) + fprintf(stdout," -- COPY DATA form ff interface %s\n",data); + + /* read mesh solutions */ + for (k=1; k<=pd->np; k++) { + iadr = (k - 1) * 3 + 1; + + for (i=0; i<3; i++) { + pd->mv[iadr + i] = *fsol++; + } + } + + + + if ( abs(mesh->info.imprim) > 3 ) + fprintf(stdout," NUMBER OF GIVEN DATA %8d\n",pd->np); + return(1); + + +} +int MMG_saveMeshff(pMesh mesh,char *filename,DataFF *dataff) { + + pPoint ppt; + pTetra pt; + pTria pt1; + int j,k,np,nc,k0; + int data[10],ldata=10; + int kn[10]; + np = 0; + nc = 0; + // compress vertex ... fist case ... + for (k=1; k<=mesh->np; k++) { + ppt = &mesh->point[k]; + if ( ppt->tag & M_UNUSED ) continue; + ppt->tmp = ++np; + /* if ( ppt->geom & M_CORNER ) cor[nc++] = ppt->tmp; */ + } + + /* seacrch vertex not in tet ???? */ + { + int kk=0,npp=np; + for (k=1; k<=mesh->np; k++) + { + ppt = &mesh->point[k]; + if ((ppt->tag & M_UNUSED )) ppt->tmp =-2; + else ppt->tmp =-1; + + } + for ( k=1; k<=mesh->ne; k++) + { + pt = &mesh->tetra[k]; + if ( !pt->v[0] ) continue; + k0++; + for(j=0;j<4;++j) + mesh->point[pt->v[j]].tmp=0; + } + np=0; + for (k=1; k<=mesh->np; k++) + { + ppt = &mesh->point[k]; + + if ( ppt->tmp ==0) + ppt->tmp = ++np; + else + { + + if (ppt->tmp==-1) + { + kk++; + ppt->tag |= M_UNUSED; + } + ppt->tmp=0; + } + } + if(kk) printf(" mmg3d: Strange nb of point %d not in tet is not zero (correct by FH)!\n" , kk); + + } + + mesh->nt=0; + if(! MMG_markBdry(mesh)) + mesh->nt=0; + data[ff_id_vertex]=np ; + //data[ff_id_seg]=ned ; + data[ff_id_tria]=mesh->nt ; + int ne=0; + for ( k=1; k<=mesh->ne; k++) + { + pt = &mesh->tetra[k]; + if ( !pt->v[0] ) continue; + ne++; + } + + data[ff_id_tet]=ne ; + printf(" mmg3d: nbp = %d, nb tet %d\n",np,ne); + //data[ff_id_hex]=nhex; + //data[ff_id_prism]=npris; + //data[ff_id_quad]=nq; + // data[ff_id_corner]=ncor; + + dataff->set_mesh(dataff,data,10); + + + for ( k=1; k<=mesh->np; k++) + { + ppt = &mesh->point[k]; + if ( ! ppt->tmp ) continue; + dataff->set_v(dataff, ppt->tmp,ppt->c,ppt->ref); + } + + + for (k=1; k<=mesh->nt; k++) + { + pt1 = &mesh->tria[k]; + for(j=0;j<3;++j) + kn[j] = mesh->point[pt1->v[j]].tmp; + dataff->set_elmt(dataff,ff_id_tria,k,kn,pt1->ref); + + } + k0=0; + for ( k=1; k<=mesh->ne; k++) + { + + pt = &mesh->tetra[k]; + if ( !pt->v[0] ) continue; + k0++; + for(j=0;j<4;++j) + kn[j] = mesh->point[pt->v[j]].tmp; + dataff->set_elmt(dataff,ff_id_tet,k0,kn,pt->ref); + + } + dataff->end_mesh(dataff); + return 1; +} +int MMG_saveSolff (pMesh mesh,pSol sol,char *filename,DataFF *dataff) { + return 1; +} +/*save the node speed : coornew-coorold/dt*/ +int MMG_saveVectff(pMesh mesh,char *filename,DataFF *dataff) { + return 1; +} /* read mesh data */ -int MMG_loadMesh(pMesh mesh,char *filename) { +int MMG_loadMesh(pMesh mesh,char *filename,void *dataff) { FILE* inm; Hedge hed,hed2; pPoint ppt; pTetra pt; pHexa ph,listhexa; pTria pt1; - int k,dim,ref,bin,bpos,i,tmp; - int *adjahex; + int k,dim,ref,bin,bpos,i,tmp; + int *adjahex; long posnp,posnt,posne,posnhex,posnpris,posncor,posned,posnq; char *ptr,data[128],chaine[128]; int nhex,npris,netmp,ncor,ned,nq; - int p0,p1,p2,p3,p4,p5,p6,p7; - int binch,bdim,iswp,nu1,nu2,nimp,ne,nbado; - float fc; - double volhex,volref; + int p0,p1,p2,p3,p4,p5,p6,p7; + int binch,bdim,iswp,nu1,nu2,nimp,ne,nbado; + float fc; + double volhex,volref; int iadr,reorient; - + if(dataff) + return MMG_loadMeshff( mesh,filename,(DataFF*) dataff); + posnp = posnt = posne = posnhex = posnpris = 0; netmp = ncor = ned = 0; bin = 0; iswp = 0; - mesh->np = mesh->nt = mesh->ne = mesh->ncor = 0; + mesh->np = mesh->nt = mesh->ne = mesh->ncor = 0; npris = nhex = nq = 0; - + strcpy(data,filename); - ptr = strstr(data,".mesh"); + ptr = strstr(data,".mesh"); if ( !ptr ) { strcat(data,".meshb"); if( !(inm = fopen(data,"rb")) ) { @@ -136,8 +569,8 @@ *ptr = '\0'; strcat(data,".mesh"); if( !(inm = fopen(data,"r")) ) { - fprintf(stderr," ** %s NOT FOUND.\n",data); - return(0); + fprintf(stderr," ** %s NOT FOUND.\n",data); + return(0); } } else { bin = 1; @@ -147,194 +580,194 @@ ptr = strstr(data,".meshb"); if( !ptr ) { if( !(inm = fopen(data,"r")) ) { - fprintf(stderr," ** %s NOT FOUND.\n",data); - return(0); - } + fprintf(stderr," ** %s NOT FOUND.\n",data); + return(0); + } } else { bin = 1; if( !(inm = fopen(data,"rb")) ) { - fprintf(stderr," ** %s NOT FOUND.\n",data); - return(0); + fprintf(stderr," ** %s NOT FOUND.\n",data); + return(0); } - - } + + } } fprintf(stdout," %%%% %s OPENED\n",data); if (!bin) { - strcpy(chaine,"D"); - while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { + strcpy(chaine,"D"); + while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { if(!strncmp(chaine,"MeshVersionFormatted",strlen("MeshVersionFormatted"))) { - fscanf(inm,"%d",&mesh->ver); - continue; + fscanf(inm,"%d",&mesh->ver); + continue; } else if(!strncmp(chaine,"Dimension",strlen("Dimension"))) { - fscanf(inm,"%d",&dim); - if(dim!=3) { - fprintf(stdout,"BAD DIMENSION : %d\n",dim); - return(0); - } - continue; + fscanf(inm,"%d",&dim); + if(dim!=3) { + fprintf(stdout,"BAD DIMENSION : %d\n",dim); + return(0); + } + continue; } else if(!strncmp(chaine,"Vertices",strlen("Vertices"))) { - fscanf(inm,"%d",&mesh->np); - posnp = ftell(inm); - continue; + fscanf(inm,"%d",&mesh->np); + posnp = ftell(inm); + continue; } else if(!strncmp(chaine,"Triangles",strlen("Triangles"))) { - fscanf(inm,"%d",&mesh->nt); - posnt = ftell(inm); - continue; + fscanf(inm,"%d",&mesh->nt); + posnt = ftell(inm); + continue; } else if(!strncmp(chaine,"Tetrahedra",strlen("Tetrahedra"))) { - fscanf(inm,"%d",&mesh->ne); - netmp = mesh->ne; - posne = ftell(inm); - continue; - } else if(!strncmp(chaine,"Hexahedra",strlen("Hexahedra"))) { - assert(abs(mesh->info.option)==10); - fscanf(inm,"%d",&nhex); - //nhex=0; - posnhex = ftell(inm); - continue; - } else if(!strncmp(chaine,"Pentahedra",strlen("Pentahedra"))) { - assert(abs(mesh->info.option)==10); - fscanf(inm,"%d",&npris); - //npris=0; - posnpris = ftell(inm); - continue; - } else if(!strncmp(chaine,"Corners",strlen("Corners"))) { - fscanf(inm,"%d",&ncor); - posncor = ftell(inm); - continue; - } else if(!strncmp(chaine,"Edges",strlen("Edges"))) { - fscanf(inm,"%d",&ned); - posned = ftell(inm); - continue; - } else if(abs(mesh->info.option)==10 && !strncmp(chaine,"Quadrilaterals",strlen("Quadrilaterals"))) { - fscanf(inm,"%d",&nq); - posnq = ftell(inm); - continue; - } - } + fscanf(inm,"%d",&mesh->ne); + netmp = mesh->ne; + posne = ftell(inm); + continue; + } else if(!strncmp(chaine,"Hexahedra",strlen("Hexahedra"))) { + assert(abs(mesh->info.option)==10); + fscanf(inm,"%d",&nhex); + //nhex=0; + posnhex = ftell(inm); + continue; + } else if(!strncmp(chaine,"Pentahedra",strlen("Pentahedra"))) { + assert(abs(mesh->info.option)==10); + fscanf(inm,"%d",&npris); + //npris=0; + posnpris = ftell(inm); + continue; + } else if(!strncmp(chaine,"Corners",strlen("Corners"))) { + fscanf(inm,"%d",&ncor); + posncor = ftell(inm); + continue; + } else if(!strncmp(chaine,"Edges",strlen("Edges"))) { + fscanf(inm,"%d",&ned); + posned = ftell(inm); + continue; + } else if(abs(mesh->info.option)==10 && !strncmp(chaine,"Quadrilaterals",strlen("Quadrilaterals"))) { + fscanf(inm,"%d",&nq); + posnq = ftell(inm); + continue; + } + } } else { bdim = 0; fread(&mesh->ver,sw,1,inm); - iswp=0; - if(mesh->ver==16777216) - iswp=1; + iswp=0; + if(mesh->ver==16777216) + iswp=1; else if(mesh->ver!=1) { fprintf(stdout,"BAD FILE ENCODING\n"); - } - fread(&mesh->ver,sw,1,inm); - if(iswp) mesh->ver = MMG_swapbin(mesh->ver); - while(fread(&binch,sw,1,inm)!=0 && binch!=54 ) { - if(iswp) binch=MMG_swapbin(binch); - if(binch==54) break; + } + fread(&mesh->ver,sw,1,inm); + if(iswp) mesh->ver = MMG_swapbin(mesh->ver); + while(fread(&binch,sw,1,inm)!=0 && binch!=54 ) { + if(iswp) binch=MMG_swapbin(binch); + if(binch==54) break; if(!bdim && binch==3) { //Dimension - fread(&bdim,sw,1,inm); //NulPos=>20 - if(iswp) bdim=MMG_swapbin(bdim); - fread(&bdim,sw,1,inm); - if(iswp) bdim=MMG_swapbin(bdim); - if(bdim!=3) { - fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); - exit(0); - return(1); - } - continue; + fread(&bdim,sw,1,inm); //NulPos=>20 + if(iswp) bdim=MMG_swapbin(bdim); + fread(&bdim,sw,1,inm); + if(iswp) bdim=MMG_swapbin(bdim); + if(bdim!=3) { + fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); + exit(0); + return(1); + } + continue; } else if(!mesh->np && binch==4) { //Vertices - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&mesh->np,sw,1,inm); - if(iswp) mesh->np=MMG_swapbin(mesh->np); - posnp = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&mesh->np,sw,1,inm); + if(iswp) mesh->np=MMG_swapbin(mesh->np); + posnp = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; } else if(!mesh->nt && binch==6) {//Triangles - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&mesh->nt,sw,1,inm); - if(iswp) mesh->nt=MMG_swapbin(mesh->nt); - posnt = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else if(!mesh->ne && binch==8) { - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&mesh->ne,sw,1,inm); - if(iswp) mesh->ne=MMG_swapbin(mesh->ne); - netmp = mesh->ne; - posne = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else if(!nhex && binch==10) { - assert(abs(mesh->info.option)==10); - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&nhex,sw,1,inm); - if(iswp) nhex=MMG_swapbin(nhex); - posnhex = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else if(!npris && binch==9) { - assert(abs(mesh->info.option)==10); - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&npris,sw,1,inm); - if(iswp) npris=MMG_swapbin(npris); - posnpris = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else if(!ncor && binch==13) { - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&ncor,sw,1,inm); - if(iswp) ncor=MMG_swapbin(ncor); - posncor = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else if(!ned && binch==5) { //Edges - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - fread(&ned,sw,1,inm); - if(iswp) ned=MMG_swapbin(ned); - posned = ftell(inm); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - continue; - } else { - //printf("on traite ? %d\n",binch); - fread(&bpos,sw,1,inm); //NulPos - if(iswp) bpos=MMG_swapbin(bpos); - //printf("on avance... Nulpos %d\n",bpos); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - } - } - + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&mesh->nt,sw,1,inm); + if(iswp) mesh->nt=MMG_swapbin(mesh->nt); + posnt = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else if(!mesh->ne && binch==8) { + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&mesh->ne,sw,1,inm); + if(iswp) mesh->ne=MMG_swapbin(mesh->ne); + netmp = mesh->ne; + posne = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else if(!nhex && binch==10) { + assert(abs(mesh->info.option)==10); + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&nhex,sw,1,inm); + if(iswp) nhex=MMG_swapbin(nhex); + posnhex = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else if(!npris && binch==9) { + assert(abs(mesh->info.option)==10); + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&npris,sw,1,inm); + if(iswp) npris=MMG_swapbin(npris); + posnpris = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else if(!ncor && binch==13) { + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&ncor,sw,1,inm); + if(iswp) ncor=MMG_swapbin(ncor); + posncor = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else if(!ned && binch==5) { //Edges + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + fread(&ned,sw,1,inm); + if(iswp) ned=MMG_swapbin(ned); + posned = ftell(inm); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + continue; + } else { + //printf("on traite ? %d\n",binch); + fread(&bpos,sw,1,inm); //NulPos + if(iswp) bpos=MMG_swapbin(bpos); + //printf("on avance... Nulpos %d\n",bpos); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + } + } + } if ( abs(mesh->info.option)==10 ) { - fprintf(stdout," -- READING %8d HEXA %8d PRISMS\n",nhex,npris); - if(!mesh->ne) netmp = 0; - mesh->ne += 6*nhex + 3*npris; + fprintf(stdout," -- READING %8d HEXA %8d PRISMS\n",nhex,npris); + if(!mesh->ne) netmp = 0; + mesh->ne += 6*nhex + 3*npris; } if ( abs(mesh->info.imprim) > 5 ) fprintf(stdout," -- READING DATA FILE %s\n",data); if ( !mesh->np || !mesh->ne ) { - fprintf(stdout," ** MISSING DATA\n"); + fprintf(stdout," ** MISSING DAT qqA\n"); return(0); } - if(abs(mesh->info.option)==10) { //allocation + if(abs(mesh->info.option)==10) { //allocation listhexa = (pHexa)M_calloc(nhex+1,sizeof(Hexa),"allochexa"); - assert(listhexa); - adjahex = (int*)M_calloc(6*nhex+7,sizeof(int),"allocadjhexa"); - assert(adjahex); - } + assert(listhexa); + adjahex = (int*)M_calloc(6*nhex+7,sizeof(int),"allocadjhexa"); + assert(adjahex); + } if ( !MMG_zaldy(mesh) ) return(0); /* read mesh vertices */ mesh->npfixe = mesh->np; @@ -342,35 +775,35 @@ fseek(inm,posnp,SEEK_SET); for (k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; - if (mesh->ver < 2) { /*float*/ + if (mesh->ver < 2) { /*float*/ if (!bin) { - for (i=0 ; i<3 ; i++) { - fscanf(inm,"%f",&fc); - ppt->c[i] = (double) fc; - } - fscanf(inm,"%d",&ppt->ref); + for (i=0 ; i<3 ; i++) { + fscanf(inm,"%f",&fc); + ppt->c[i] = (double) fc; + } + fscanf(inm,"%d",&ppt->ref); } else { - for (i=0 ; i<3 ; i++) { - fread(&fc,sw,1,inm); - if(iswp) fc=MMG_swapf(fc); - ppt->c[i] = (double) fc; - } - fread(&ppt->ref,sw,1,inm); - if(iswp) ppt->ref=MMG_swapbin(ppt->ref); + for (i=0 ; i<3 ; i++) { + fread(&fc,sw,1,inm); + if(iswp) fc=MMG_swapf(fc); + ppt->c[i] = (double) fc; + } + fread(&ppt->ref,sw,1,inm); + if(iswp) ppt->ref=MMG_swapbin(ppt->ref); } } else { - if (!bin) - fscanf(inm,"%lf %lf %lf %d",&ppt->c[0],&ppt->c[1],&ppt->c[2],&ppt->ref); + if (!bin) + fscanf(inm,"%lf %lf %lf %d",&ppt->c[0],&ppt->c[1],&ppt->c[2],&ppt->ref); else { - for (i=0 ; i<3 ; i++) { - fread(&ppt->c[i],sd,1,inm); - if(iswp) ppt->c[i]=MMG_swapd(ppt->c[i]); - } - fread(&ppt->ref,sw,1,inm); - if(iswp) ppt->ref=MMG_swapbin(ppt->ref); - } - } - ppt->tag = M_UNUSED; + for (i=0 ; i<3 ; i++) { + fread(&ppt->c[i],sd,1,inm); + if(iswp) ppt->c[i]=MMG_swapd(ppt->c[i]); + } + fread(&ppt->ref,sw,1,inm); + if(iswp) ppt->ref=MMG_swapbin(ppt->ref); + } + } + ppt->tag = M_UNUSED; } /* read mesh triangles */ @@ -378,227 +811,226 @@ rewind(inm); fseek(inm,posnt,SEEK_SET); for (k=1; k<=mesh->nt; k++) { - pt1 = &mesh->tria[k]; + pt1 = &mesh->tria[k]; if (!bin) fscanf(inm,"%d %d %d %d",&pt1->v[0],&pt1->v[1],&pt1->v[2],&pt1->ref); else { - for (i=0 ; i<3 ; i++) { - fread(&pt1->v[i],sw,1,inm); - if(iswp) pt1->v[i]=MMG_swapbin(pt1->v[i]); - } - fread(&pt1->ref,sw,1,inm); - if(iswp) pt1->ref=MMG_swapbin(pt1->ref); - } - } - /* read mesh quads (option 10)*/ - if(abs(mesh->info.option)==10) { - fprintf(stdout," QUADS READING %d\n",nq); + for (i=0 ; i<3 ; i++) { + fread(&pt1->v[i],sw,1,inm); + if(iswp) pt1->v[i]=MMG_swapbin(pt1->v[i]); + } + fread(&pt1->ref,sw,1,inm); + if(iswp) pt1->ref=MMG_swapbin(pt1->ref); + } + } + /* read mesh quads (option 10)*/ + if(abs(mesh->info.option)==10) { + fprintf(stdout," QUADS READING %d\n",nq); mesh->ntfixe += 4*nq; rewind(inm); fseek(inm,posnq,SEEK_SET); for (k=1; k<=nq; k++) { if (!bin) - fscanf(inm,"%d %d %d %d %d",&p0,&p1,&p2,&p3,&ref); + fscanf(inm,"%d %d %d %d %d",&p0,&p1,&p2,&p3,&ref); else { - fread(&p0,sw,1,inm); - if(iswp) p0=MMG_swapbin(p0); - fread(&p1,sw,1,inm); - if(iswp) p1=MMG_swapbin(p1); - fread(&p2,sw,1,inm); - if(iswp) p2=MMG_swapbin(p2); - fread(&p3,sw,1,inm); - if(iswp) p3=MMG_swapbin(p3); - fread(&pt1->ref,sw,1,inm); - if(iswp) ref=MMG_swapbin(ref); - } + fread(&p0,sw,1,inm); + if(iswp) p0=MMG_swapbin(p0); + fread(&p1,sw,1,inm); + if(iswp) p1=MMG_swapbin(p1); + fread(&p2,sw,1,inm); + if(iswp) p2=MMG_swapbin(p2); + fread(&p3,sw,1,inm); + if(iswp) p3=MMG_swapbin(p3); + fread(&pt1->ref,sw,1,inm); + if(iswp) ref=MMG_swapbin(ref); + } /*creation of 4 triangles per quads because we don't know how hexa will be cut*/ - pt1 = &mesh->tria[++mesh->nt]; - pt1->v[0] = p0; - pt1->v[1] = p1; - pt1->v[2] = p2; - pt1->ref = ref; - pt1 = &mesh->tria[++mesh->nt]; - pt1->v[0] = p0; - pt1->v[1] = p2; - pt1->v[2] = p3; - pt1->ref = ref; - pt1 = &mesh->tria[++mesh->nt]; - pt1->v[0] = p0; - pt1->v[1] = p1; - pt1->v[2] = p3; - pt1->ref = ref; - pt1 = &mesh->tria[++mesh->nt]; - pt1->v[0] = p1; - pt1->v[1] = p2; - pt1->v[2] = p3; - pt1->ref = ref; - - } - } - /*read and store edges*/ - if (ned) { - if ( !MMG_zaldy4(&hed,ned) ) { - if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : EDGES IGNORED\n"); - ned = 0; - } - mesh->ned = ned; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = p0; + pt1->v[1] = p1; + pt1->v[2] = p2; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = p0; + pt1->v[1] = p2; + pt1->v[2] = p3; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = p0; + pt1->v[1] = p1; + pt1->v[2] = p3; + pt1->ref = ref; + pt1 = &mesh->tria[++mesh->nt]; + pt1->v[0] = p1; + pt1->v[1] = p2; + pt1->v[2] = p3; + pt1->ref = ref; + + } + } + /*read and store edges*/ + if (ned) { + if ( !MMG_zaldy4(&hed,ned) ) { + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : EDGES IGNORED\n"); + ned = 0; + } + mesh->ned = ned; rewind(inm); - fseek(inm,posned,SEEK_SET); - for (k=1; k<=ned; k++) { + fseek(inm,posned,SEEK_SET); + for (k=1; k<=ned; k++) { if (!bin) - fscanf(inm,"%d %d %d",&nu1,&nu2,&ref); + fscanf(inm,"%d %d %d",&nu1,&nu2,&ref); else { - fread(&nu1,sw,1,inm); - if(iswp) nu1=MMG_swapbin(nu1); - fread(&nu2,sw,1,inm); - if(iswp) nu2=MMG_swapbin(nu2); - fread(&ref,sw,1,inm); - if(iswp) ref=MMG_swapbin(ref); - } - if(MMG_edgePut(&hed,nu1,nu2,2)>1) { - fprintf(stdout," ## WARNING DOUBLE EDGE : %d %d\n",nu1,nu2); - } - mesh->point[nu1].geom = M_RIDGE_GEO; - mesh->point[nu2].geom = M_RIDGE_GEO; + fread(&nu1,sw,1,inm); + if(iswp) nu1=MMG_swapbin(nu1); + fread(&nu2,sw,1,inm); + if(iswp) nu2=MMG_swapbin(nu2); + fread(&ref,sw,1,inm); + if(iswp) ref=MMG_swapbin(ref); + } + if(MMG_edgePut(&hed,nu1,nu2,2)>1) { + fprintf(stdout," ## WARNING DOUBLE EDGE : %d %d\n",nu1,nu2); + } + mesh->point[nu1].geom = M_RIDGE_GEO; + mesh->point[nu2].geom = M_RIDGE_GEO; } } /* read mesh tetrahedra */ mesh->nefixe = mesh->ne; rewind(inm); - fseek(inm,posne,SEEK_SET); - reorient = 0; - for (k=1; k<=netmp; k++) { + fseek(inm,posne,SEEK_SET); + reorient = 0; + for (k=1; k<=netmp; k++) { pt = &mesh->tetra[k]; - if (!bin) - fscanf(inm,"%d %d %d %d %d",&pt->v[0],&pt->v[1],&pt->v[2],&pt->v[3],&ref); - else { - - for (i=0 ; i<4 ; i++) { - fread(&pt->v[i],sw,1,inm); - if(iswp) pt->v[i]=MMG_swapbin(pt->v[i]); - } - fread(&ref,sw,1,inm); - if(iswp) ref=MMG_swapbin(ref); - } + if (!bin) + fscanf(inm,"%d %d %d %d %d",&pt->v[0],&pt->v[1],&pt->v[2],&pt->v[3],&ref); + else { + + for (i=0 ; i<4 ; i++) { + fread(&pt->v[i],sw,1,inm); + if(iswp) pt->v[i]=MMG_swapbin(pt->v[i]); + } + fread(&ref,sw,1,inm); + if(iswp) ref=MMG_swapbin(ref); + } pt->ref = ref;//0;//ref ; /*check orientation*/ - volref = MMG_voltet(mesh,k); - if(volref < 0) { - reorient++; - tmp = pt->v[2]; - pt->v[2] = pt->v[3]; - pt->v[3] = tmp; - } - + volref = MMG_voltet(mesh,k); + if(volref < 0) { + if(!reorient) { + fprintf(stdout,"\n $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ \n"); + fprintf(stdout," BAD ORIENTATION : vol < 0 -- Some tetra will be reoriented\n"); + fprintf(stdout," $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ \n\n"); + reorient = 1; + } + tmp = pt->v[2]; + pt->v[2] = pt->v[3]; + pt->v[3] = tmp; + } + for(i=0 ; i<4 ; i++) - pt->bdryref[i] = -1; - - if (ned) { - for(i=0 ; i<6 ; i++) { - nu1 = pt->v[MMG_iare[i][0]]; - nu2 = pt->v[MMG_iare[i][1]]; - pt->bdryinfo[i] = MMG_edgePoint(&hed,nu1,nu2); - } - - } else { - for(i=0 ; i<6 ; i++) - pt->bdryinfo[i] = 0; - } + pt->bdryref[i] = -1; + + if (ned) { + for(i=0 ; i<6 ; i++) { + nu1 = pt->v[MMG_iare[i][0]]; + nu2 = pt->v[MMG_iare[i][1]]; + pt->bdryinfo[i] = MMG_edgePoint(&hed,nu1,nu2); + } + + } else { + for(i=0 ; i<6 ; i++) + pt->bdryinfo[i] = 0; + } } - if(reorient) { - fprintf(stdout,"\n $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ \n"); - fprintf(stdout," BAD ORIENTATION : vol < 0 -- %8d tetra reoriented\n",reorient); - fprintf(stdout," $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ \n\n"); - reorient = 1; - } - if (ned) M_free(hed.item); + if (ned) M_free(hed.item); - /*read corners*/ + /*read corners*/ if (ncor) { rewind(inm); - fseek(inm,posncor,SEEK_SET); + fseek(inm,posncor,SEEK_SET); mesh->ncor = ncor; - for (k=1; k<=ncor; k++) { + for (k=1; k<=ncor; k++) { if (!bin) - fscanf(inm,"%d",&ref); + fscanf(inm,"%d",&ref); else { - fread(&ref,sw,1,inm); - if(iswp) ref=MMG_swapbin(ref); - } + fread(&ref,sw,1,inm); + if(iswp) ref=MMG_swapbin(ref); + } ppt = &mesh->point[ref]; - ppt->geom = M_CORNER; - } + ppt->geom = M_CORNER; + } } - - - if ( abs(mesh->info.option)==10 ) { + + + if ( abs(mesh->info.option)==10 ) { if(bin) { printf("NOT SUPPORTED\n"); exit(0); - } - if ( !MMG_zaldy4(&hed2,3*npris+6*nhex) ) { - if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : PRISM IGNORED\n"); - npris = 0; - nhex = 0; - } + } + if ( !MMG_zaldy4(&hed2,3*npris+6*nhex) ) { + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : PRISM IGNORED\n"); + npris = 0; + nhex = 0; + } /*read hexa and transform to tetra*/ rewind(inm); - fseek(inm,posnhex,SEEK_SET); - nbado = 0; + fseek(inm,posnhex,SEEK_SET); + nbado = 0; for (k=1; k<=nhex; k++) { - ph = &listhexa[k]; - fscanf(inm,"%d %d %d %d %d %d %d %d %d",&ph->v[0],&ph->v[1],&ph->v[2],&ph->v[3],&ph->v[4],&ph->v[5],&ph->v[6],&ph->v[7],&ph->ref); - //fscanf(inm,"%d %d %d %d %d %d %d %d %d",&p0,&p4,&p2,&p1,&p3,&p5,&p6,&p7,&ref); - //printf("hex %d : %d %d %d %d %d %d %d %d\n",k,p0,p1,p2,p3,p4,p5,p6,p7); + ph = &listhexa[k]; + fscanf(inm,"%d %d %d %d %d %d %d %d %d",&ph->v[0],&ph->v[1],&ph->v[2],&ph->v[3],&ph->v[4],&ph->v[5],&ph->v[6],&ph->v[7],&ph->ref); + //fscanf(inm,"%d %d %d %d %d %d %d %d %d",&p0,&p4,&p2,&p1,&p3,&p5,&p6,&p7,&ref); + //printf("hex %d : %d %d %d %d %d %d %d %d\n",k,p0,p1,p2,p3,p4,p5,p6,p7); //check orientability of the hexahedra : vol of tet p0 p1 p3 p4 - volhex = MMG_quickvol(mesh->point[ph->v[0]].c,mesh->point[ph->v[1]].c,mesh->point[ph->v[2]].c,mesh->point[ph->v[3]].c); - if(k==1) { - volref = volhex; - //printf("vol %e\n",volref); - } - else { - if(volref*volhex < 0) { - fprintf(stdout,"BAD ORIENTATION OF HEXAHEDRON %d : %d %d %d %d %d %d %d %d\n",k,p0,p1,p2,p3,p4,p5,p6,p7); - nbado++; - tmp = ph->v[3]; - ph->v[3] = ph->v[1]; - ph->v[1] = tmp; - tmp = ph->v[5]; - ph->v[5] = ph->v[7]; - ph->v[7] = tmp; - } - } - // MMG_cuthex(mesh,&hed2,netmp+(k-1)*6,ph->v[0],ph->v[1],ph->v[2],ph->v[3],ph->v[4],ph->v[5],ph->v[6],ph->v[7],ph->ref); - } - fprintf(stdout,"%8d HEXA REORIENTED\n",nbado); + volhex = MMG_quickvol(mesh->point[ph->v[0]].c,mesh->point[ph->v[1]].c,mesh->point[ph->v[2]].c,mesh->point[ph->v[3]].c); + if(k==1) { + volref = volhex; + //printf("vol %e\n",volref); + } + else { + if(volref*volhex < 0) { + fprintf(stdout,"BAD ORIENTATION OF HEXAHEDRON %d : %d %d %d %d %d %d %d %d\n",k,p0,p1,p2,p3,p4,p5,p6,p7); + nbado++; + tmp = ph->v[3]; + ph->v[3] = ph->v[1]; + ph->v[1] = tmp; + tmp = ph->v[5]; + ph->v[5] = ph->v[7]; + ph->v[7] = tmp; + } + } + // MMG_cuthex(mesh,&hed2,netmp+(k-1)*6,ph->v[0],ph->v[1],ph->v[2],ph->v[3],ph->v[4],ph->v[5],ph->v[6],ph->v[7],ph->ref); + } + fprintf(stdout,"%8d HEXA REORIENTED\n",nbado); - if(!MMG_hashHexa(listhexa,adjahex,nhex)) return(0); - MMG_cuthex(mesh,&hed2,listhexa,adjahex,nhex,netmp); + if(!MMG_hashHexa(listhexa,adjahex,nhex)) return(0); + MMG_cuthex(mesh,&hed2,listhexa,adjahex,nhex,netmp); /*read prism and transform to tetra - ---> compatibility pbs ==> hash edge and switch case*/ + ---> compatibility pbs ==> hash edge and switch case*/ rewind(inm); - fseek(inm,posnpris,SEEK_SET); - nimp = 0; - ne = netmp+6*nhex; + fseek(inm,posnpris,SEEK_SET); + nimp = 0; + ne = netmp+6*nhex; for (k=1; k<=npris; k++) { - fscanf(inm,"%d %d %d %d %d %d %d",&p0,&p1,&p2,&p3,&p4,&p5,&ref); - if(!MMG_cutprism(mesh,&hed2,ne,p0,p1,p2,p3,p4,p5,ref)) - { - if(mesh->info.imprim < 0 ) fprintf(stdout,"DECOMPOSITION PRISM INVALID \n\n"); - mesh->ne += 5; - ne += 8; - nimp++; - continue; - } - ne += 3; + fscanf(inm,"%d %d %d %d %d %d %d",&p0,&p1,&p2,&p3,&p4,&p5,&ref); + if(!MMG_cutprism(mesh,&hed2,ne,p0,p1,p2,p3,p4,p5,ref)) + { + if(mesh->info.imprim < 0 ) fprintf(stdout,"DECOMPOSITION PRISM INVALID \n\n"); + mesh->ne += 5; + ne += 8; + nimp++; + continue; + } + ne += 3; } - if(abs(mesh->info.imprim) > 3 )fprintf(stdout," %d INVALID DECOMPOSITION\n\n",nimp); + if(abs(mesh->info.imprim) > 3 )fprintf(stdout," %d INVALID DECOMPOSITION\n\n",nimp); } - + if ( abs(mesh->info.imprim) > 3 && abs(mesh->info.option)!=10 ) { fprintf(stdout," NUMBER OF GIVEN VERTICES %8d\n",mesh->npfixe); if ( mesh->ntfixe ) @@ -609,24 +1041,26 @@ if ( ned ) fprintf(stdout," NUMBER OF GIVEN EDGES %8d\n",ned); } - fclose(inm); - return(1); + fclose(inm); + return(1); } /* load solution (metric) */ -int MMG_loadSol(pSol sol,char *filename,int npmax) { - FILE *inm; +int MMG_loadSol(pSol sol,char *filename,int npmax,void *dataff) { + FILE *inm; float fsol; - double tmp; + double tmp; int binch,bdim,iswp; int k,i,isol,type,bin,dim,btyp,bpos; long posnp; char *ptr,data[128],chaine[128]; - - posnp = 0; + if(dataff) + return MMG_loadSolff( sol,filename,npmax,(DataFF*) dataff); + + posnp = 0; bin = 0; - iswp = 0; + iswp = 0; strcpy(data,filename); ptr = strstr(data,".mesh"); @@ -645,94 +1079,94 @@ } fprintf(stdout," %%%% %s OPENED\n",data); - - if(!bin) { + + if(!bin) { strcpy(chaine,"DDD"); - while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { + while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { if(!strncmp(chaine,"Dimension",strlen("Dimension"))) { - fscanf(inm,"%d",&dim); - if(dim!=3) { - fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); - return(1); - } - continue; + fscanf(inm,"%d",&dim); + if(dim!=3) { + fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); + return(1); + } + continue; } else if(!strncmp(chaine,"SolAtVertices",strlen("SolAtVertices"))) { - fscanf(inm,"%d",&sol->np); - fscanf(inm,"%d",&type); - if(type!=1) { - fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); - return(1); - } - fscanf(inm,"%d",&btyp); - posnp = ftell(inm); - break; - } - } - } else { - fread(&binch,sw,1,inm); - iswp=0; - if(binch==16777216) iswp=1; + fscanf(inm,"%d",&sol->np); + fscanf(inm,"%d",&type); + if(type!=1) { + fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); + return(1); + } + fscanf(inm,"%d",&btyp); + posnp = ftell(inm); + break; + } + } + } else { + fread(&binch,sw,1,inm); + iswp=0; + if(binch==16777216) iswp=1; else if(binch!=1) { fprintf(stdout,"BAD FILE ENCODING\n"); - } - fread(&sol->ver,sw,1,inm); - if(iswp) sol->ver = MMG_swapbin(sol->ver); + } + fread(&sol->ver,sw,1,inm); + if(iswp) sol->ver = MMG_swapbin(sol->ver); while(fread(&binch,sw,1,inm)!=EOF && binch!=54 ) { - if(iswp) binch=MMG_swapbin(binch); - if(binch==54) break; + if(iswp) binch=MMG_swapbin(binch); + if(binch==54) break; if(binch==3) { //Dimension - fread(&bdim,sw,1,inm); //NulPos=>20 - if(iswp) bdim=MMG_swapbin(bdim); - fread(&bdim,sw,1,inm); - if(iswp) bdim=MMG_swapbin(bdim); - if(bdim!=3) { - fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); - exit(0); - return(1); - } - continue; + fread(&bdim,sw,1,inm); //NulPos=>20 + if(iswp) bdim=MMG_swapbin(bdim); + fread(&bdim,sw,1,inm); + if(iswp) bdim=MMG_swapbin(bdim); + if(bdim!=3) { + fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); + exit(0); + return(1); + } + continue; } else if(binch==62) { //SolAtVertices - fread(&binch,sw,1,inm); //NulPos - if(iswp) binch=MMG_swapbin(binch); - fread(&sol->np,sw,1,inm); - if(iswp) sol->np=MMG_swapbin(sol->np); - fread(&binch,sw,1,inm); //nb sol - if(iswp) binch=MMG_swapbin(binch); - if(binch!=1) { - fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); - return(1); - } - fread(&btyp,sw,1,inm); //typsol - if(iswp) btyp=MMG_swapbin(btyp); - posnp = ftell(inm); - break; + fread(&binch,sw,1,inm); //NulPos + if(iswp) binch=MMG_swapbin(binch); + fread(&sol->np,sw,1,inm); + if(iswp) sol->np=MMG_swapbin(sol->np); + fread(&binch,sw,1,inm); //nb sol + if(iswp) binch=MMG_swapbin(binch); + if(binch!=1) { + fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); + return(1); + } + fread(&btyp,sw,1,inm); //typsol + if(iswp) btyp=MMG_swapbin(btyp); + posnp = ftell(inm); + break; } else { - fread(&bpos,sw,1,inm); //Pos - if(iswp) bpos=MMG_swapbin(bpos); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - } - } - - } + fread(&bpos,sw,1,inm); //Pos + if(iswp) bpos=MMG_swapbin(bpos); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + } + } + + } if ( !sol->np ) { - fprintf(stdout," ** MISSING DATA\n"); + fprintf(stdout," ** MISSING DATA ss\n"); return(1); } if ( btyp!= 1 && btyp!=3 ) { - fprintf(stdout," ** DATA IGNORED\n"); + fprintf(stdout," ** DATA IGNORED %d\n",btyp); sol->np = 0; return(1); } - + sol->offset = (btyp==1) ? 1 : 6; if ( abs(MMG_imprim) > 5 ) fprintf(stdout," -- READING DATA FILE %s\n",data); if ( !sol->np ) { - fprintf(stdout," ** MISSING DATA\n"); + fprintf(stdout," ** MISSING DATA xx\n"); return(0); } sol->npfixe = sol->np; @@ -742,31 +1176,31 @@ /* read mesh solutions */ sol->npfixe = sol->np; rewind(inm); - fseek(inm,posnp,SEEK_SET); + fseek(inm,posnp,SEEK_SET); for (k=1; k<=sol->np; k++) { isol = (k-1) * sol->offset + 1; - if (sol->ver == 1) { + if (sol->ver == 1) { for (i=0; ioffset; i++) { - if(!bin){ - fscanf(inm,"%f",&fsol); - sol->met[isol + i] = (double) fsol; - } else { - fread(&fsol,sw,1,inm); - if(iswp) fsol=MMG_swapf(fsol); - sol->met[isol + i] = (double) fsol; - } - } + if(!bin){ + fscanf(inm,"%f",&fsol); + sol->met[isol + i] = (double) fsol; + } else { + fread(&fsol,sw,1,inm); + if(iswp) fsol=MMG_swapf(fsol); + sol->met[isol + i] = (double) fsol; + } + } } else { for (i=0; ioffset; i++) { - if(!bin){ - fscanf(inm,"%lf",&sol->met[isol + i]); + if(!bin){ + fscanf(inm,"%lf",&sol->met[isol + i]); - } else { - fread(&sol->met[isol + i],sd,1,inm); - if(iswp) sol->met[isol + i]=MMG_swapd(sol->met[isol + i]); - } - } - } + } else { + fread(&sol->met[isol + i],sd,1,inm); + if(iswp) sol->met[isol + i]=MMG_swapd(sol->met[isol + i]); + } + } + } /* MMG_swap data */ if ( sol->offset == 6 ) { tmp = sol->met[isol + 2]; @@ -778,13 +1212,13 @@ if ( abs(MMG_imprim) > 3 ) fprintf(stdout," NUMBER OF GIVEN DATA %8d\n",sol->npfixe); - fclose(inm); - return(1); + fclose(inm); + return(1); } -int MMG_loadVect(pMesh mesh,char *filename,int npmax) { - FILE *inm; +int MMG_loadVect(pMesh mesh,char *filename,int npmax,void *dataff) { + FILE *inm; pDispl pd; float fsol; int binch,bdim,iswp; @@ -792,10 +1226,13 @@ long posnp; char *ptr,data[128],chaine[128]; - pd = mesh->disp; + if(dataff) + return MMG_loadVectff( mesh, filename,npmax,(DataFF*) dataff); - posnp = 0; - bin = 0; + pd = mesh->disp; + + posnp = 0; + bin = 0; iswp = 0; strcpy(data,filename); @@ -815,76 +1252,76 @@ } fprintf(stdout," %%%% %s OPENED\n",data); - - if(!bin) { + + if(!bin) { strcpy(chaine,"DDD"); - while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { + while(fscanf(inm,"%s",&chaine[0])!=EOF && strncmp(chaine,"End",strlen("End")) ) { if(!strncmp(chaine,"Dimension",strlen("Dimension"))) { - fscanf(inm,"%d",&dim); - if(dim!=3) { - fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); - return(1); - } - continue; + fscanf(inm,"%d",&dim); + if(dim!=3) { + fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); + return(1); + } + continue; } else if(!strncmp(chaine,"SolAtVertices",strlen("SolAtVertices"))) { - fscanf(inm,"%d",&pd->np); - fscanf(inm,"%d",&type); - if(type!=1) { - fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); - return(1); - } - fscanf(inm,"%d",&btyp); - posnp = ftell(inm); - break; - } - } - } else { - fread(&pd->ver,sw,1,inm); - iswp=0; - if(pd->ver==16777216) iswp=1; + fscanf(inm,"%d",&pd->np); + fscanf(inm,"%d",&type); + if(type!=1) { + fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); + return(1); + } + fscanf(inm,"%d",&btyp); + posnp = ftell(inm); + break; + } + } + } else { + fread(&pd->ver,sw,1,inm); + iswp=0; + if(pd->ver==16777216) iswp=1; else if(pd->ver!=1) { fprintf(stdout,"BAD FILE ENCODING\n"); - } - fread(&pd->ver,sw,1,inm); - if(iswp) pd->ver = MMG_swapbin(pd->ver); + } + fread(&pd->ver,sw,1,inm); + if(iswp) pd->ver = MMG_swapbin(pd->ver); while(fread(&binch,sw,1,inm)!=EOF && binch!=54 ) { - if(iswp) binch=MMG_swapbin(binch); - if(binch==54) break; + if(iswp) binch=MMG_swapbin(binch); + if(binch==54) break; if(binch==3) { //Dimension - fread(&bdim,sw,1,inm); //Pos=>20 - if(iswp) bdim=MMG_swapbin(bdim); - fread(&bdim,sw,1,inm); - if(iswp) bdim=MMG_swapbin(bdim); - if(bdim!=3) { - fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); - exit(0); - return(1); - } - continue; + fread(&bdim,sw,1,inm); //Pos=>20 + if(iswp) bdim=MMG_swapbin(bdim); + fread(&bdim,sw,1,inm); + if(iswp) bdim=MMG_swapbin(bdim); + if(bdim!=3) { + fprintf(stdout,"BAD SOL DIMENSION : %d\n",dim); + exit(0); + return(1); + } + continue; } else if(binch==62) { //SolAtVertices - fread(&binch,sw,1,inm); //Pos - if(iswp) binch=MMG_swapbin(binch); - fread(&pd->np,sw,1,inm); - if(iswp) pd->np=MMG_swapbin(pd->np); - fread(&binch,sw,1,inm); //nb sol - if(iswp) binch=MMG_swapbin(binch); - if(binch!=1) { - fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); - return(1); - } - fread(&btyp,sw,1,inm); //typsol - if(iswp) btyp=MMG_swapbin(btyp); - posnp = ftell(inm); - break; + fread(&binch,sw,1,inm); //Pos + if(iswp) binch=MMG_swapbin(binch); + fread(&pd->np,sw,1,inm); + if(iswp) pd->np=MMG_swapbin(pd->np); + fread(&binch,sw,1,inm); //nb sol + if(iswp) binch=MMG_swapbin(binch); + if(binch!=1) { + fprintf(stdout,"SEVERAL SOLUTION => IGNORED : %d\n",type); + return(1); + } + fread(&btyp,sw,1,inm); //typsol + if(iswp) btyp=MMG_swapbin(btyp); + posnp = ftell(inm); + break; } else { - fread(&bpos,sw,1,inm); //Pos - if(iswp) bpos=MMG_swapbin(bpos); - rewind(inm); - fseek(inm,bpos,SEEK_SET); - } - } - - } + fread(&bpos,sw,1,inm); //Pos + if(iswp) bpos=MMG_swapbin(bpos); + rewind(inm); + fseek(inm,bpos,SEEK_SET); + } + } + + } if ( !pd->np ) { fprintf(stdout," ** MISSING DATA\n"); return(0); @@ -895,7 +1332,7 @@ } if ( btyp != 2 ) { - fprintf(stdout," ** DATA IGNORED\n"); + fprintf(stdout," ** DATA IGNORED %d !=2\n",btyp); return(0); } @@ -907,47 +1344,50 @@ fseek(inm,posnp,SEEK_SET); for (k=1; k<=pd->np; k++) { iadr = (k - 1) * 3 + 1; - if (pd->ver < 2) { + if (pd->ver < 2) { for (i=0; i<3; i++) { - if(!bin){ - fscanf(inm,"%f",&fsol); - pd->mv[iadr + i] = (double) fsol; - } else { - fread(&fsol,sw,1,inm); - if(iswp) fsol=MMG_swapf(fsol); - pd->mv[iadr + i] = (double) fsol; - } - } + if(!bin){ + fscanf(inm,"%f",&fsol); + pd->mv[iadr + i] = (double) fsol; + } else { + fread(&fsol,sw,1,inm); + if(iswp) fsol=MMG_swapf(fsol); + pd->mv[iadr + i] = (double) fsol; + } + } } else { for (i=0; i<3; i++) { - if(!bin){ - fscanf(inm,"%lf",&pd->mv[iadr + i]); - } else { - fread(&pd->mv[iadr + i],sd,1,inm); - if(iswp) pd->mv[iadr + i]=MMG_swapd(pd->mv[iadr + i]); - } - } - } + if(!bin){ + fscanf(inm,"%lf",&pd->mv[iadr + i]); + } else { + fread(&pd->mv[iadr + i],sd,1,inm); + if(iswp) pd->mv[iadr + i]=MMG_swapd(pd->mv[iadr + i]); + } + } + } } if ( abs(mesh->info.imprim) > 3 ) fprintf(stdout," NUMBER OF GIVEN DATA %8d\n",pd->np); - fclose(inm); + fclose(inm); return(1); } /* save mesh to disk */ -int MMG_saveMesh(pMesh mesh,char *filename) { - FILE* inm; - Hedge hed; +int MMG_saveMesh(pMesh mesh,char *filename,void *dataff) { + FILE* inm; + Hedge hed; pPoint ppt; pTria pt1; pTetra pt; int i,k,np,ne,nc,ned,*cor,*ed,ref,bin,bpos; - char *ptr,data[128],chaine[128]; + char *ptr,data[128],chaine[128]; int binch,nu1,nu2; + if(dataff) + return MMG_saveMeshff( mesh, filename,(DataFF*) dataff); + mesh->ver = 2; //double precision bin = 0; strcpy(data,filename); @@ -959,28 +1399,28 @@ *ptr = '\0'; strcat(data,".mesh"); if( !(inm = fopen(data,"w")) ) { - fprintf(stderr," ** UNABLE TO OPEN %s.\n",data); - return(0); + fprintf(stderr," ** UNABLE TO OPEN %s.\n",data); + return(0); } } else { - bin = 1; + bin = 1; } } - else { + else { ptr = strstr(data,".meshb"); if( ptr ) bin = 1; if( !(inm = fopen(data,"w")) ) { fprintf(stderr," ** UNABLE TO OPEN %s.\n",data); return(0); - } + } } fprintf(stdout," %%%% %s OPENED\n",data); /*entete fichier*/ if(!bin) { - strcpy(&chaine[0],"MeshVersionFormatted 2\n"); + strcpy(&chaine[0],"MeshVersionFormatted 2\n"); fprintf(inm,"%s",chaine); - strcpy(&chaine[0],"\n\nDimension 3\n"); + strcpy(&chaine[0],"\n\nDimension 3\n"); fprintf(inm,"%s ",chaine); } else { binch = 1; //MeshVersionFormatted @@ -993,33 +1433,38 @@ fwrite(&bpos,sw,1,inm); binch = 3; fwrite(&binch,sw,1,inm); - + } /* compact vertices */ - if(mesh->ncor) { + if(mesh->ncor) { cor = (int*) M_calloc(mesh->ncor,sizeof(int),"MMG_savemesh"); - assert(cor); + assert(cor); } - if(mesh->ned) { - if ( !MMG_zaldy4(&hed,mesh->ned) ) { - if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : EXPORT EDGES IGNORED\n"); - mesh->ned = 0; - } + if(mesh->ned) { + if ( !MMG_zaldy4(&hed,mesh->ned) ) { + if ( mesh->info.ddebug ) fprintf(stdout," ## MEMORY ALLOCATION PROBLEM : EXPORT EDGES IGNORED\n"); + mesh->ned = 0; + } ed = (int*)M_calloc(2*mesh->ned,sizeof(int),"MMG_savemesh"); - assert(ed); + assert(ed); } - np = 0; + np = 0; nc = 0; for (k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; - if ( ppt->tag & M_UNUSED ) continue; - ppt->tmp = ++np; + if ( ppt->tag & M_UNUSED ) continue; + ppt->tmp = ++np; if ( ppt->geom & M_CORNER ) cor[nc++] = ppt->tmp; + } + //assert(mesh->ncor==nc); + if(mesh->ncor!=nc) { + fprintf(stdout,"WARNING: some corners have been added or deleted\n"); + mesh->ncor = nc; } - assert(mesh->ncor==nc); + if(!bin) { - strcpy(&chaine[0],"\n\nVertices\n"); + strcpy(&chaine[0],"\n\nVertices\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d\n",np); } else { @@ -1027,27 +1472,27 @@ fwrite(&binch,sw,1,inm); bpos += 12+(1+3*mesh->ver)*4*np; //NullPos fwrite(&bpos,sw,1,inm); - fwrite(&np,sw,1,inm); + fwrite(&np,sw,1,inm); } for(k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; - if ( ppt->tag & M_UNUSED ) continue; - //if(ppt->tmp==52453) printf("point %d --> %d\n",ppt->tmp,k); + if ( ppt->tag & M_UNUSED ) continue; + //if(ppt->tmp==52453) printf("point %d --> %d\n",ppt->tmp,k); if(!bin) { fprintf(inm,"%.15lg %.15lg %.15lg %d\n",ppt->c[0],ppt->c[1],ppt->c[2],ppt->ref); } else { - fwrite((unsigned char*)&ppt->c[0],sd,1,inm); - fwrite((unsigned char*)&ppt->c[1],sd,1,inm); - fwrite((unsigned char*)&ppt->c[2],sd,1,inm); - fwrite((unsigned char*)&ppt->ref,sw,1,inm); + fwrite((unsigned char*)&ppt->c[0],sd,1,inm); + fwrite((unsigned char*)&ppt->c[1],sd,1,inm); + fwrite((unsigned char*)&ppt->c[2],sd,1,inm); + fwrite((unsigned char*)&ppt->ref,sw,1,inm); } } - /* rebuild triangles tabular and write triangles */ + /* rebuild triangles tabular and write triangles */ mesh->nt = 0; if(MMG_markBdry(mesh)) { if(!bin) { - strcpy(&chaine[0],"\n\nTriangles\n"); + strcpy(&chaine[0],"\n\nTriangles\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d \n",mesh->nt); } else { @@ -1055,49 +1500,48 @@ fwrite(&binch,sw,1,inm); bpos += 12+16*mesh->nt; //Pos fwrite(&bpos,sw,1,inm); - fwrite(&mesh->nt,sw,1,inm); + fwrite(&mesh->nt,sw,1,inm); } for (k=1; k<=mesh->nt; k++) { pt1 = &mesh->tria[k]; - ref = pt1->ref; + ref = pt1->ref; if(!bin) { - //if(ref==0) printf("tr %d bad ref!!\n",k); - fprintf(inm,"%d %d %d %d\n",mesh->point[pt1->v[0]].tmp,mesh->point[pt1->v[1]].tmp - ,mesh->point[pt1->v[2]].tmp,ref); + fprintf(inm,"%d %d %d %d\n",mesh->point[pt1->v[0]].tmp,mesh->point[pt1->v[1]].tmp + ,mesh->point[pt1->v[2]].tmp,ref); } else { - fwrite(&mesh->point[pt1->v[0]].tmp,sw,1,inm); - fwrite(&mesh->point[pt1->v[1]].tmp,sw,1,inm); - fwrite(&mesh->point[pt1->v[2]].tmp,sw,1,inm); - fwrite(&ref,sw,1,inm); + fwrite(&mesh->point[pt1->v[0]].tmp,sw,1,inm); + fwrite(&mesh->point[pt1->v[1]].tmp,sw,1,inm); + fwrite(&mesh->point[pt1->v[2]].tmp,sw,1,inm); + fwrite(&ref,sw,1,inm); } } - } - + } + /* write tetrahedra */ - ne = 0; - ned = 0; - //printf("avt %d\n",mesh->ned); + ne = 0; + ned = 0; + //printf("avt %d\n",mesh->ned); for (k=1; k<=mesh->ne; k++) { pt = &mesh->tetra[k]; - if ( !pt->v[0] ) continue; - if(mesh->ned) { - for (i=0 ; i<6 ; i++) { - if (pt->bdryinfo[i]) { - nu1 = pt->v[MMG_iare[i][0]]; - nu2 = pt->v[MMG_iare[i][1]]; - if (MMG_edgePut(&hed,nu1,nu2,2)<=1) { - ed[2*ned] = (mesh->point[nu1]).tmp; - ed[2*ned + 1] = (mesh->point[nu2]).tmp; - ned++; - } - } - } - } - ne++; + if ( !pt->v[0] ) continue; + if(mesh->ned) { + for (i=0 ; i<6 ; i++) { + if (pt->bdryinfo[i]) { + nu1 = pt->v[MMG_iare[i][0]]; + nu2 = pt->v[MMG_iare[i][1]]; + if (MMG_edgePut(&hed,nu1,nu2,2)<=1) { + ed[2*ned] = (mesh->point[nu1]).tmp; + ed[2*ned + 1] = (mesh->point[nu2]).tmp; + ned++; + } + } + } + } + ne++; } - //printf("ned %d\n",ned); + //printf("ned %d\n",ned); if(!bin) { - strcpy(&chaine[0],"\n\nTetrahedra\n"); + strcpy(&chaine[0],"\n\nTetrahedra\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d\n",ne); } else { @@ -1105,29 +1549,29 @@ fwrite(&binch,sw,1,inm); bpos += 12 + 20*ne;//Pos fwrite(&bpos,sw,1,inm); - fwrite((unsigned char*)&ne,sw,1,inm); - } - ne=0; + fwrite((unsigned char*)&ne,sw,1,inm); + } + ne=0; for (k=1; k<=mesh->ne; k++) { pt = &mesh->tetra[k]; - if ( !pt->v[0] ) continue; - ne++; - ref = pt->ref; + if ( !pt->v[0] ) continue; + ne++; + ref = pt->ref; if(!bin) { fprintf(inm,"%d %d %d %d %d\n",mesh->point[pt->v[0]].tmp,mesh->point[pt->v[1]].tmp - ,mesh->point[pt->v[2]].tmp,mesh->point[pt->v[3]].tmp,ref); + ,mesh->point[pt->v[2]].tmp,mesh->point[pt->v[3]].tmp,ref); } else { - fwrite(&mesh->point[pt->v[0]].tmp,sw,1,inm); - fwrite(&mesh->point[pt->v[1]].tmp,sw,1,inm); - fwrite(&mesh->point[pt->v[2]].tmp,sw,1,inm); - fwrite(&mesh->point[pt->v[3]].tmp,sw,1,inm); - fwrite(&ref,sw,1,inm); + fwrite(&mesh->point[pt->v[0]].tmp,sw,1,inm); + fwrite(&mesh->point[pt->v[1]].tmp,sw,1,inm); + fwrite(&mesh->point[pt->v[2]].tmp,sw,1,inm); + fwrite(&mesh->point[pt->v[3]].tmp,sw,1,inm); + fwrite(&ref,sw,1,inm); } - } - + } + if(mesh->ned) { if(!bin) { - strcpy(&chaine[0],"\n\nEdges\n"); + strcpy(&chaine[0],"\n\nEdges\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d\n",ned); } else { @@ -1135,50 +1579,50 @@ fwrite(&binch,sw,1,inm); bpos += 12 + 3*4*ned;//Pos fwrite(&bpos,sw,1,inm); - fwrite((unsigned char*)&ned,sw,1,inm); - } - for (k=0; kncor); } else { binch = 13; //Corners fwrite(&binch,sw,1,inm); - bpos += 12 + 4*mesh->ncor;//Pos + bpos += 12 + 4*mesh->ncor;//Pos fwrite(&bpos,sw,1,inm); - fwrite((unsigned char*)&mesh->ncor,sw,1,inm); + fwrite((unsigned char*)&mesh->ncor,sw,1,inm); } for (k=0; kncor; k++) { if(!bin) { fprintf(inm,"%d \n",cor[k]); } else { - fwrite(&cor[k],sw,1,inm); + fwrite(&cor[k],sw,1,inm); } - } + } /*fin fichier*/ if(!bin) { - strcpy(&chaine[0],"\n\nEnd\n"); + strcpy(&chaine[0],"\n\nEnd\n"); fprintf(inm,"%s",chaine); } else { binch = 54; //End fwrite(&binch,sw,1,inm); } - fclose(inm); + fclose(inm); if(mesh->ncor) M_free(cor); if ( mesh->info.imprim ) { fprintf(stdout," NUMBER OF GIVEN VERTICES %8d\n",mesh->npfixe); @@ -1193,20 +1637,22 @@ if ( mesh->ned ) fprintf(stdout," TOTAL NUMBER OF EDGES %8d\n",ned); } - //if(ned!=mesh->ned) exit(0); + //if(ned!=mesh->ned) exit(0); return(1); } -int MMG_saveSol(pMesh mesh,pSol sol,char *filename) { +int MMG_saveSol(pMesh mesh,pSol sol,char *filename,void *dataff) { FILE* inm; pPoint ppt; float fsol; double tmp; int i,k,nbl,isol,bin,bpos,typ; - char *ptr,data[128],chaine[128]; + char *ptr,data[128],chaine[128]; int binch; + if(dataff) + return MMG_saveSolff( mesh, sol, filename,(DataFF*) dataff); if ( !sol->np ) return(1); bin = 1; @@ -1219,24 +1665,24 @@ *ptr = '\0'; bin = 0; } else { - ptr = strstr(data,".solb"); - if ( ptr ) { - *ptr = '\0'; - bin = 1; + ptr = strstr(data,".solb"); + if ( ptr ) { + *ptr = '\0'; + bin = 1; } else { - ptr = strstr(data,".sol"); - if ( ptr ) { - *ptr = '\0'; - bin = 0; - } - } - } + ptr = strstr(data,".sol"); + if ( ptr ) { + *ptr = '\0'; + bin = 0; + } + } + } } - if ( bin ) + if ( bin ) strcat(data,".solb"); else strcat(data,".sol"); - + sol->ver = 2; if( bin && !(inm = fopen(data,"wb")) ) { fprintf(stderr," ** UNABLE TO OPEN %s.\n",data); @@ -1251,9 +1697,9 @@ /*entete fichier*/ if(!bin) { - strcpy(&chaine[0],"MeshVersionFormatted 2\n"); + strcpy(&chaine[0],"MeshVersionFormatted 2\n"); fprintf(inm,"%s",chaine); - strcpy(&chaine[0],"\n\nDimension 3\n"); + strcpy(&chaine[0],"\n\nDimension 3\n"); fprintf(inm,"%s ",chaine); } else { binch = 1; //MeshVersionFormatted @@ -1266,19 +1712,19 @@ fwrite(&bpos,sw,1,inm); binch = 3; fwrite(&binch,sw,1,inm); - + } switch(sol->offset) { case 1: - typ = 1; - break; + typ = 1; + break; case 6: - typ = 3; + typ = 3; break; default: - fprintf(stdout," ** DATA IGNORED\n"); + fprintf(stdout," ** DATA IGNORED not 1 ou 6 == %d\n",sol->offset); return(0); } @@ -1287,11 +1733,11 @@ for (k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; if ( ppt->tag & M_UNUSED ) continue; - nbl++; + nbl++; } - + if(!bin) { - strcpy(&chaine[0],"\n\nSolAtVertices\n"); + strcpy(&chaine[0],"\n\nSolAtVertices\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d\n",nbl); fprintf(inm,"%d %d\n",1,typ); @@ -1300,7 +1746,7 @@ fwrite(&binch,sw,1,inm); bpos += 20+(sol->offset*sol->ver)*4*nbl; //Pos fwrite(&bpos,sw,1,inm); - fwrite(&nbl,sw,1,inm); + fwrite(&nbl,sw,1,inm); binch = 1; //nb sol fwrite(&binch,sw,1,inm); binch = typ; //typ sol @@ -1317,34 +1763,34 @@ sol->met[isol + 3] = tmp; } if (sol->ver < 2) { - if(!bin) { - for (i=0; ioffset; i++) { - fsol = (float) sol->met[isol + i]; - fprintf(inm,"%f ",fsol); - } - fprintf(inm,"\n"); + if(!bin) { + for (i=0; ioffset; i++) { + fsol = (float) sol->met[isol + i]; + fprintf(inm,"%f ",fsol); + } + fprintf(inm,"\n"); } else { - for (i=0; ioffset; i++) { - fsol = (float) sol->met[isol + i]; - fwrite(&fsol,sw,1,inm); - } + for (i=0; ioffset; i++) { + fsol = (float) sol->met[isol + i]; + fwrite(&fsol,sw,1,inm); + } } } else { - if(!bin) { - for (i=0; ioffset; i++) - fprintf(inm,"%.15lg ",sol->met[isol + i]); - fprintf(inm,"\n"); + if(!bin) { + for (i=0; ioffset; i++) + fprintf(inm,"%.15lg ",sol->met[isol + i]); + fprintf(inm,"\n"); } else { - for (i=0; ioffset; i++) - fwrite(&sol->met[isol + i],sd,1,inm); + for (i=0; ioffset; i++) + fwrite(&sol->met[isol + i],sd,1,inm); } - + } } - + /*fin fichier*/ if(!bin) { - strcpy(&chaine[0],"\n\nEnd\n"); + strcpy(&chaine[0],"\n\nEnd\n"); fprintf(inm,"%s",chaine); } else { binch = 54; //End @@ -1355,14 +1801,16 @@ } /*save the node speed : coornew-coorold/dt*/ -int MMG_saveVect(pMesh mesh,char *filename) { - FILE* inm; +int MMG_saveVect(pMesh mesh,char *filename,void *dataff) { + FILE* inm; pDispl pd; pPoint ppt; double dsol,dd; int i,k,nbl,bin,bpos,typ; - char *ptr,data[128],chaine[128]; + char *ptr,data[128],chaine[128]; unsigned char binch; + if(dataff) + return MMG_saveVectff( mesh, filename,(DataFF*) dataff); pd = mesh->disp; pd->ver = 2; @@ -1378,7 +1826,7 @@ bin = 0; } } - if ( bin ) + if ( bin ) strcat(data,".o.solb"); else strcat(data,".o.sol"); @@ -1395,9 +1843,9 @@ /*entete fichier*/ if(!bin) { - strcpy(&chaine[0],"MeshVersionFormatted 2\n"); + strcpy(&chaine[0],"MeshVersionFormatted 2\n"); fprintf(inm,"%s",chaine); - strcpy(&chaine[0],"\n\nDimension 3\n"); + strcpy(&chaine[0],"\n\nDimension 3\n"); fprintf(inm,"%s ",chaine); } else { binch = 1; //MeshVersionFormatted @@ -1410,20 +1858,20 @@ fwrite(&bpos,sw,1,inm); binch = 3; fwrite(&binch,sw,1,inm); - + } - typ = 2; + typ = 2; /* write data */ nbl = 0; for (k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; if ( ppt->tag & M_UNUSED ) continue; - nbl++; + nbl++; } - + if(!bin) { - strcpy(&chaine[0],"\n\nSolAtVertices\n"); + strcpy(&chaine[0],"\n\nSolAtVertices\n"); fprintf(inm,"%s",chaine); fprintf(inm,"%d\n",nbl); fprintf(inm,"%d %d\n",1,typ); @@ -1432,34 +1880,34 @@ fwrite(&binch,sw,1,inm); bpos += 20+(3*pd->ver)*4*nbl; //Pos fwrite(&bpos,sw,1,inm); - fwrite(&nbl,sw,1,inm); + fwrite(&nbl,sw,1,inm); binch = 1; //nb sol fwrite(&binch,sw,1,inm); binch = typ; //typ sol fwrite(&binch,sw,1,inm); - } - - - dd = mesh->info.delta / (double)PRECI; + } + + + dd = mesh->info.delta / (double)PRECI; fprintf(stdout," DT %e\n",mesh->info.dt); for (k=1; k<=mesh->np; k++) { ppt = &mesh->point[k]; - if ( ppt->tag & M_UNUSED ) continue; + if ( ppt->tag & M_UNUSED ) continue; for (i=0 ; i<3 ; i++) { - dsol = (ppt->c[i] - mesh->disp->cold[3*(k-1) + 1 + i]* dd - mesh->info.min[i])/mesh->info.dt; - if(!bin) { - fprintf(inm,"%.15lg ",dsol); + dsol = (ppt->c[i] - mesh->disp->cold[3*(k-1) + 1 + i]* dd - mesh->info.min[i])/mesh->info.dt; + if(!bin) { + fprintf(inm,"%.15lg ",dsol); } else { - fwrite(&dsol,sd,1,inm); + fwrite(&dsol,sd,1,inm); } } - if (!bin) fprintf(inm,"\n"); + if (!bin) fprintf(inm,"\n"); } - - + + /*fin fichier*/ if(!bin) { - strcpy(&chaine[0],"\n\nEnd\n"); + strcpy(&chaine[0],"\n\nEnd\n"); fprintf(inm,"%s",chaine); } else { binch = 54; //End --- mmg3d4/build/sources/mmg3d9.c 2012-12-19 16:05:33.000000000 +0100 +++ mmg3d4-nnew/build/sources/mmg3d9.c 2014-03-05 18:37:03.000000000 +0100 @@ -382,7 +382,7 @@ fprintf(stdout," ** MOVING MESH\n"); /*alloc et init metold*/ - sol->metold = (double*)M_calloc(sol->npmax+1,sol->offset*sizeof(double),"MMG_mmg3d9"); + if(!sol->metold) sol->metold = (double*)M_calloc(sol->npmax+1,sol->offset*sizeof(double),"MMG_mmg3d9"); assert(sol->metold); mesh->disp->cold = (double*)M_calloc(3*(mesh->npmax + 1),sizeof(double),"MMG_mmg3d9"); assert(mesh->disp->cold); --- mmg3d4/build/sources/mmg3dmain/mmg3d.c 2012-12-19 16:05:53.000000000 +0100 +++ mmg3d4-new/build/sources/mmg3dmain/mmg3d.c 2014-04-22 16:37:41.000000000 +0200 @@ -46,7 +46,7 @@ #include "compil.date" #include "mesh.h" #include "eigenv.h" - +#include "dataff.h" TIM_mytime MMG_ctim[TIMEMAX]; short MMG_imprim; @@ -310,7 +310,8 @@ strcpy(sol->name,mesh->name); } if ( mesh->outf == NULL ) { - mesh->outf = (char *)calloc(128,sizeof(char)); + static char stbub [256]; + mesh->outf = stbub; /*(char *)calloc(128,sizeof(char)); */ assert(mesh->outf); strcpy(mesh->outf,mesh->name); ptr = strstr(mesh->outf,".mesh"); @@ -397,6 +398,7 @@ 100.*ttot/ttim[0],call[0],ttot/(float)call[0]); } fprintf(stdout,"\n ELAPSED TIME %.2f SEC. (%.2f)\n",ttim[0],ttot); + fflush(stdout); } @@ -433,8 +435,7 @@ return(1); } - -int main(int argc,char *argv[]) { +int mainmmg3d(int argc,char *argv[],DataFF *dataff) { pMesh mesh; pSol sol; Info *info; @@ -442,7 +443,7 @@ int k,iadr,i,jj,kk,ii; double lambda[3],v[3][3],*mold,*m,declic,maxLES,calLES; fprintf(stdout," -- MMG3d, Release %s (%s) \n",M_VER,M_REL); - fprintf(stdout," Copyright (c) LJLL/IMB, 2010\n"); + fprintf(stdout," Copyright (c) LJLL/IMB, 2014\n"); fprintf(stdout," %s\n",COMPIL); signal(SIGABRT,excfun); @@ -451,7 +452,7 @@ signal(SIGSEGV,excfun); signal(SIGTERM,excfun); signal(SIGINT,excfun); - atexit(endcod); + if(dataff==0) atexit(endcod); TIM_tminit(MMG_ctim,TIMEMAX); TIM_chrono(ON,&MMG_ctim[0]); @@ -479,15 +480,27 @@ info->dt = 1.; info->bdry = 0; info->optles = 0; - + /* modif F. Hecht ..*/ + if(dataff) + { + mesh->name=dataff->meshname; + mesh->move=dataff->movename; + sol->name=dataff->solname; + /* printf(" #### %p %p %p --- \n",mesh->name,mesh->move,sol->name); */ + info->imprim=dataff->imprim; + info->memory=dataff->memory; + } + /* end modf */ if ( !parsar(argc,argv,mesh,sol) ) return(1); MMG_imprim = info->imprim; /* load data */ if ( MMG_imprim ) fprintf(stdout,"\n -- INPUT DATA\n"); TIM_chrono(ON,&MMG_ctim[1]); - if ( !MMG_loadMesh(mesh,mesh->name) ) return(1); - if ( !MMG_loadSol(sol,sol->name,mesh->npmax) ) return(1); + /* modif FH. for interface with ff++ add dataff param */ + if ( !MMG_loadMesh(mesh,mesh->name,dataff) ) return(1); + if ( !MMG_loadSol(sol,sol->name,mesh->npmax,dataff ) ) return(1); + if ( sol->np && sol->np != mesh->np ) { fprintf(stdout," ## WARNING: WRONG SOLUTION NUMBER. IGNORED\n"); sol->np = 0; @@ -495,7 +508,7 @@ if ( !parsop(mesh) ) return(1); - if ( abs(info->option) == 9 && !MMG_loadVect(mesh,mesh->move,mesh->np) ) return(0); + if ( abs(info->option) == 9 && !MMG_loadVect(mesh,mesh->move,mesh->np,dataff) ) return(0); if ( !MMG_setfunc(sol->offset) ) return(1); if ( !MMG_scaleMesh(mesh,sol) ) return(1); @@ -527,7 +540,7 @@ if ( !MMG_hashTetra(mesh) ) return(1); if ( !MMG_markBdry(mesh) ) return(1); if (abs(mesh->info.option)==10) { - MMG_saveMesh(mesh,"tetra.mesh"); + MMG_saveMesh(mesh,"tetra.mesh",dataff); return(0); } @@ -571,8 +584,8 @@ if ( abs(info->option) == 9 ) { if(!MMG_mmg3d9(mesh,sol,&alert)) { if ( !MMG_unscaleMesh(mesh,sol) ) return(1); - MMG_saveMesh(mesh,mesh->outf); - MMG_saveSol(mesh,sol,mesh->outf); + MMG_saveMesh(mesh,mesh->outf,dataff); + MMG_saveSol(mesh,sol,mesh->outf,dataff); return(1); } /*puts("appel 1"); @@ -678,18 +691,18 @@ fprintf(stdout,"\n ## WARNING: INCOMPLETE MESH %d , %d\n", mesh->np,mesh->ne); - if ( MMG_imprim ) fprintf(stdout,"\n -- WRITING DATA FILE %s\n",mesh->outf); + if ( MMG_imprim && !dataff) fprintf(stdout,"\n -- WRITING DATA FILE %s\n",mesh->outf); TIM_chrono(ON,&MMG_ctim[1]); if ( !MMG_unscaleMesh(mesh,sol) ) return(1); - MMG_saveMesh(mesh,mesh->outf); + MMG_saveMesh(mesh,mesh->outf,dataff); if ( info->option == 9 ) { - MMG_saveSol(mesh,sol,mesh->outf); - MMG_saveVect(mesh,mesh->move); + MMG_saveSol(mesh,sol,mesh->outf,dataff); + MMG_saveVect(mesh,mesh->move,dataff); } else - MMG_saveSol(mesh,sol,mesh->outf); + MMG_saveSol(mesh,sol,mesh->outf,dataff); TIM_chrono(OFF,&MMG_ctim[1]); - if ( MMG_imprim ) fprintf(stdout," -- WRITING COMPLETED\n"); + if ( MMG_imprim && !dataff) fprintf(stdout," -- WRITING COMPLETED\n"); /* free mem */ M_free(mesh->point); @@ -697,14 +710,42 @@ M_free(mesh->tetra); /*la desallocation de ce pointeur plante dans certains cas...*/ M_free(mesh->adja); + /* free FH thank to Iztok Bajc */ + /* if( mesh->outf ) free( mesh->outf ); */ + if(sol->metold ) M_free(sol->metold ); + if(mesh->disp->cold ) M_free(mesh->disp->cold); + /* ---- */ M_free(mesh->disp->alpha); M_free(mesh->disp->mv); M_free(mesh->disp); - if ( sol->npfixe ) M_free(sol->met); + if ( sol->met ) M_free(sol->met); M_free(sol); if ( MMG_imprim < -4 || info->ddebug ) M_memDump(); M_free(mesh); + if(MMG_imprim && dataff ) endcod(); + if( M_memLeak() > 1000) + { + M_memDump(); + } return(0); } + +int main(int argc,char *argv[]) { + return mainmmg3d( argc,argv,0); +} +/* + def + */ + MMG_Swap MMG_swpptr; + double (*MMG_length)(double *,double *,double *,double *); + double (*MMG_caltet)(pMesh ,pSol ,int ); + double (*MMG_calte1)(pMesh ,pSol ,int ); + int (*MMG_caltet2)(pMesh ,pSol ,int ,int ,double ,double *); + int (*MMG_cavity)(pMesh ,pSol ,int ,int ,pList ,int ); + int (*MMG_buckin)(pMesh ,pSol ,pBucket ,int ); + int (*MMG_optlen)(pMesh ,pSol ,double ,int ); + int (*MMG_interp)(double *,double *,double *,double ); + int (*MMG_optlentet)(pMesh ,pSol ,pQueue ,double ,int ,int ); + int (*MMG_movevertex)(pMesh ,pSol ,int ,int ); --- mmg3d4/build/sources/eigenv.c 2012-12-19 16:05:32.000000000 +0100 +++ mmg3d4-new/build/sources/eigenv.c 2016-01-28 12:10:07.000000000 +0100 @@ -48,7 +48,7 @@ #include /* seeking 1.e-05 accuracy */ -#define EPSD 1.e-15 +#define EPSD 1.e-12 #define EPSD2 1.e-10 #define EPS6 5.e-06 #define EPS 1.e-06 FreeFem-sources-4.9/3rdparty/mshmet/000775 000000 000000 00000000000 14037356732 017417 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/mshmet/Makefile000775 000000 000000 00000005562 14037356732 021072 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: mshmet include ff-flags # Downloading and compiling mshmet # ------------------------------- # DIRPKG= ../pkg SRCDIR= ./mshmet$(mshmet_VERSION) #-$(mshmet_VERSION) PACKAGE=$(DIRPKG)/mshmet$(mshmet_VERSION).tgz SERVER=http://www.ann.jussieu.fr/~frey/ftp/archives/ INSTALL=../.. mshmet_VERSION=.2012.04.25 FAIRE=$(SRCDIR)/FAIRE # ---------------------- # mshmetlib MSHMET_DIR = $(abs_top_builddir)/3rdparty/mshmet/mshmet$(mshmet_VERSION) MSHMET_SRCDIRNOLIB = $(MSHMET_DIR)/sources MSHMET_SRCDIR = $(MSHMET_DIR)/sourceslib MSHMET_OBJDIR = $(MSHMET_DIR)/objects mshmet: $(FAIRE) $(FAIRE):$(SRCDIR)/FAIT $(MAKE) WHERE $(SRCDIR)/$(INSTALL) touch $(FAIRE) $(SRCDIR)/FAIT:$(SRCDIR)/tag-tar cd $(MSHMET_DIR); $(MAKE) touch $(SRCDIR)/FAIT # FFCS - libMesh is also required by yams, so we move all the rules to [[file:../../../Makefile.am]] install:$(SRCDIR)/FAIT cp $(MSHMET_SRCDIR)/mshmetlib.h $(SRCDIR)/$(INSTALL)/include/mshmetlib.h cp $(MSHMET_OBJDIR)/libmshmet.a $(SRCDIR)/$(INSTALL)/lib/libmshmet.a # FFCS - simplify makefile structure for automatic rebuilds WHERE:install echo mshmet LD -L@DIR@/lib -lmshmet >$(SRCDIR)/$(INSTALL)/lib/WHERE.mshmet echo mshmet INCLUDE -I@DIR@/include>> $(SRCDIR)/$(INSTALL)/lib/WHERE.mshmet $(SRCDIR)/tag-tar: $(PACKAGE) mshmetlib-internal.h mshmetlib.c mshmetlib.h -mkdir mshmet$(mshmet_VERSION) cd mshmet$(mshmet_VERSION); tar xzf ../$(PACKAGE) -mkdir $(MSHMET_SRCDIR) cp $(MSHMET_SRCDIRNOLIB)/*.c $(MSHMET_SRCDIRNOLIB)/*.h $(MSHMET_SRCDIR) cp $(MSHMET_SRCDIRNOLIB)/compil.date $(MSHMET_SRCDIR) cp mshmetlib-internal.h mshmetlib.c mshmetlib.h $(MSHMET_SRCDIR)/ cp ../../src/libMesh/chrono.h $(MSHMET_SRCDIR) rm $(MSHMET_SRCDIR)/mshmet.c rm $(MSHMET_SRCDIR)/chrono.c cp ../../src/libMesh/eigenv.c $(MSHMET_SRCDIR) cp Makefile-mshmet.inc $(MSHMET_DIR)/makefile cd $(MSHMET_SRCDIR); patch -p2 < ../../mshmet.2011.03.06.patch # # Patch for i586 developed by John Hunt (14/2/13) # cd $(MSHMET_SRCDIR); patch -p2 < ../../mshmet.2012.04.25_i586.patch touch $(SRCDIR)/tag-tar $(PACKAGE): ../getall -o mshmet -a clean: -rm ff-flags # FFCS - make sure that all directories are cleaned. Thisis especially important under Windows because there is no # compilation dependencies control there (see # [[file:c:/cygwin/home/alh/ffcs/dist/configure.ac::dependency_tracking]]) -rm -rf mshmet.????.??.?? -rm FAIT $(FAIRE) ff-flags: ../Makefile grep 'abs_top_builddir *=' ../Makefile >> ff-flags grep 'CC *=' ../Makefile >> ff-flags grep 'CFLAGS *=' ../Makefile >> ff-flags grep 'LDFLAGS *=' ../Makefile >> ff-flags grep 'AR *=' ../Makefile >> ff-flags grep 'ARFLAGS *=' ../Makefile >> ff-flags grep 'RANLIB *=' ../Makefile >> ff-flags grep 'WGET *=' ../Makefile >> ff-flags grep 'mshmet_VERSION *=' ./Makefile >> ff-flags .PHONY: $(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/mshmet/Makefile-mshmet.inc000664 000000 000000 00000002253 14037356732 023124 0ustar00rootroot000000 000000 include ../ff-flags MSHMETDIR = $(abs_top_builddir)/3rdparty/mshmet/mshmet$(mshmet_VERSION) # working dirs EXEDIR = #$(MSHMETDIR)/executable SRCDIR = $(MSHMETDIR)/sourceslib OBJDIR = $(MSHMETDIR)/objects/$(ARCHI) ARCDIR = $(MSHMETDIR)/archives DIRDIR = $(EXEDIR) $(OBJDIR) $(ARCDIR) VPATH = $(SRCDIR) INCDIR = -I$(MSHMETDIR)/sourceslib/ -I$(abs_top_builddir)/src/libMesh/ LDLDIR = -L$(abs_top_builddir)/src/libMesh/ -lMesh # objects list src = $(wildcard $(SRCDIR)/*.c) header = $(wildcard $(SRCDIR)/*.h) objs = $(patsubst $(SRCDIR)%,$(OBJDIR)%,$(src:.c=.o)) prog = mshmet lib = $(OBJDIR)/libmshmet.a #.SILENT: $(OBJDIR)/%.o: $(SRCDIR)/%.c $(CC) $(OPT64) $(INCDIR) $(CFLAGS) -DSERIAL -c $< -o $@ $(EXEDIR)/$(prog):$(DIRDIR) $(objs) echo "#define COMPIL " '"' `date` '"' > $(SRCDIR)/compil.date #$(CC) -c $(CFLAGS) $(INCDIR) $(SRCDIR)/mshmet.c -o $(OBJDIR)/mshmet.o #$(CC) $(LDFLAGS) $(OPT64) $(LDLDIR) $(objs) -o $@ -lm $(AR) $(ARFLAGS) $(lib) $(objs) $(RANLIB) $(lib) $(objs):$(header) $(DIRDIR): @[ -d $@ ] || mkdir $@ clean: -rm $(objs) $(EXEDIR)/$(prog) tar:$(DIRDIR) tar czf $(ARCDIR)/$(prog).`date +"%Y.%m.%d"`.tgz sources makefile target: $(EXEDIR)/$(prog) FreeFem-sources-4.9/3rdparty/mshmet/mshmet.2011.03.06.patch000664 000000 000000 00000004041 14037356732 022763 0ustar00rootroot000000 000000 --- mshmet.2011.03.06/sources/mshmet.h 2011-02-11 19:41:49.000000000 +0100 +++ mshmet.2011.03.06/sourceslib/mshmet.h 2011-11-16 22:04:20.000000000 +0100 @@ -33,7 +33,7 @@ #define MS_MAX(a,b) ( ((a) < (b)) ? (b) : (a) ) #define MS_MIN(a,b) ( ((a) < (b)) ? (a) : (b) ) -char idir[5]; +extern char idir[5]; typedef struct { double c[3]; @@ -135,17 +135,17 @@ pHash hashEdge_3d(pMesh mesh); pHash hashEdge_2d(pMesh mesh); -int (*boulep)(pMesh ,int ,int ,int *); -int (*hashel)(pMesh ); -int (*gradLS)(pMesh ,pSol ,int ,int ); -int (*hessLS)(pMesh ,pSol ,int ,int ); -int (*avgval)(pMesh ,pSol ,int ); -int (*clsval)(pMesh ,pSol ,int ); -int (*nrmhes)(pMesh ,pSol ,int ); -int (*redsim)(double *,double *,double *); -int (*defmet)(pMesh ,pSol ,int ); -double (*getSol)(pSol ,int ,int ); -int (*metrLS)(pMesh mesh,pSol ); -int (*lissag)(pMesh ,pSol , int ,int ); +extern int (*boulep)(pMesh ,int ,int ,int *); +extern int (*hashel)(pMesh ); +extern int (*gradLS)(pMesh ,pSol ,int ,int ); +extern int (*hessLS)(pMesh ,pSol ,int ,int ); +extern int (*avgval)(pMesh ,pSol ,int ); +extern int (*clsval)(pMesh ,pSol ,int ); +extern int (*nrmhes)(pMesh ,pSol ,int ); +extern int (*redsim)(double *,double *,double *); +extern int (*defmet)(pMesh ,pSol ,int ); +extern double (*getSol)(pSol ,int ,int ); +extern int (*metrLS)(pMesh mesh,pSol ); +extern int (*lissag)(pMesh ,pSol , int ,int ); #endif --- mshmet.2012.04.25/sources/lplib3.c 2013-04-10 11:53:17.000000000 +0200 +++ mshmet.2012.04.25/sourceslib/lplib3.c 2013-04-10 11:54:49.000000000 +0200 @@ -30,7 +30,10 @@ #include #include #include "lplib3.h" - +#ifdef __FreeBSD__ +#include +#include +#endif /*----------------------------------------------------------*/ /* Defines */ @@ -1250,6 +1253,7 @@ * NPROC environment variable (BSD/OS, CrayOS) * sysctl hw.ncpu or kern.smp.cpus */ + int ncpu; if (pmc_init() == 0 && (ncpu = pmc_ncpu()) != -1) maxthreads = ncpu; else FreeFem-sources-4.9/3rdparty/mshmet/mshmet.2012.04.25_i586.patch000664 000000 000000 00000000517 14037356732 023545 0ustar00rootroot000000 000000 --- mshmet.2012.04.25/sourceslib/libmesh5.c.orig 2013-02-12 17:47:00.093678985 +0000 +++ mshmet.2012.04.25/sourceslib/libmesh5.c 2013-02-12 17:48:27.083684096 +0000 @@ -1381,7 +1381,7 @@ int IntVal; long pos; - if(msh->ver >= 3) + if ( (msh->ver >= 3) && (sizeof(long) == 8) ) ScaDblWrd(msh, (unsigned char*)&pos); else { FreeFem-sources-4.9/3rdparty/mshmet/mshmetlib-internal.h000664 000000 000000 00000000564 14037356732 023373 0ustar00rootroot000000 000000 typedef MSHMET_Point Point; typedef MSHMET_Tetra Tetra; typedef MSHMET_Tria Tria; typedef MSHMET_Mesh Mesh; typedef MSHMET_Sol Sol; typedef MSHMET_Deriv Deriv; typedef MSHMET_pPoint pPoint; typedef MSHMET_pTetra pTetra; typedef MSHMET_pDeriv pDeriv; typedef MSHMET_pTria pTria; typedef MSHMET_Info Info; typedef MSHMET_pMesh pMesh; typedef MSHMET_pSol pSol; FreeFem-sources-4.9/3rdparty/mshmet/mshmetlib.c000664 000000 000000 00000021526 14037356732 021555 0ustar00rootroot000000 000000 /* mshmetlib.c mshmetlib(int option, ...) to use mshmet via a library * compute metric based on hessian * j.morice LJLL 2010 * Copyright (c) LJLL, 2010. */ #include "mshmet.h" #include "compil.date" extern long verbosity; char idir[5] = {0,1,2,0,1}; mytime mshmet_ctim[TIMEMAX]; int (*boulep)(pMesh ,int ,int ,int *); int (*hashel)(pMesh ); int (*gradLS)(pMesh ,pSol ,int ,int ); int (*hessLS)(pMesh ,pSol ,int ,int ); int (*avgval)(pMesh ,pSol ,int ); int (*clsval)(pMesh ,pSol ,int ); int (*nrmhes)(pMesh ,pSol ,int ); int (*redsim)(double *,double *,double *); int (*defmet)(pMesh ,pSol ,int ); double (*getSol)(pSol ,int ,int ); int (*metrLS)(pMesh mesh,pSol ); int (*lissag)(pMesh ,pSol , int ,int ); static void mshmet_excfun(int sigid) { fprintf(stdout,"\n Unexpected error:"); fflush(stdout); switch(sigid) { case SIGABRT: fprintf(stdout," Abnormal stop\n"); exit(1); case SIGFPE: fprintf(stdout," Floating-point exception\n"); exit(1); case SIGILL: fprintf(stdout," Illegal instruction\n"); exit(1); case SIGSEGV: fprintf(stdout," Segmentation fault\n"); exit(1); case SIGTERM: case SIGINT: //fprintf(stdout," Program killed\n"); exit(1); fprintf(stdout," Abnormal end\n"); exit(1); } exit(1); } /* static void usage(char *prog) { fprintf(stdout,"\n usage: %s filein[.mesh] [solin[.sol]] [fileout.sol] -eps x -hmin y -hmax z -v -iso -norm\n",prog); fprintf(stdout,"\n** Generic options :\n"); fprintf(stdout,"-d Turn on debug mode\n"); fprintf(stdout,"-h Print this message\n"); fprintf(stdout,"-ls Build levelset metric\n"); fprintf(stdout,"-v [n] Tune level of verbosity\n"); fprintf(stdout,"-m file Use metric file\n"); fprintf(stdout,"\n** Specific options : \n"); fprintf(stdout," -eps : tolerance\n"); fprintf(stdout," -hmin: min size\n"); fprintf(stdout," -hmax: max size\n"); fprintf(stdout," -iso : isotropic\n"); fprintf(stdout," -w : relative width for LS (0name); ptr = strstr(data,".sol"); if ( ptr ) *ptr = '\0'; strcat(data,".mhes"); in = fopen(data,"r"); if ( !in ) { strcpy(data,"DEFAULT.hmet"); in = fopen(data,"r"); if ( !in ) { if ( mesh->info.imprim < 0 ) fprintf(stdout," %%%% DEFAULT VALUES (%g %g %g)\n", mesh->info.eps,mesh->info.hmin,mesh->info.hmax); return(1); } } fprintf(stdout," %%%% %s FOUND\n",data); while ( !feof(in) ) { ret = fscanf(in,"%s",key); if ( !ret || feof(in) ) break; for (i=0; iinfo.hmin = dummy; } else if ( !strcmp(key,"hmax") ) { fscanf(in,"%f",&dummy); mesh->info.hmax = dummy; } else if ( !strcmp(key,"eps") ) { fscanf(in,"%f",&dummy); mesh->info.eps = dummy; } else if ( !strcmp(key,"iso") ) { mesh->info.iso = 1; } else if ( !strcmp(key,"norm") ) { fscanf(in,"%d",&mesh->info.nnu); } else if ( key[0] == '#' ) { fgets(key,255,in); } else fprintf(stderr," unrecognized keyword : %s\n",key); } fclose(in); return(1); } */ static void mshmet_stats(pMesh mesh,pSol sol) { fprintf(stdout," NUMBER OF GIVEN VERTICES %8d\n",mesh->np); if ( mesh->nt ) fprintf(stdout," NUMBER OF GIVEN TRIANGLES %8d\n",mesh->nt); if ( mesh->ne ) fprintf(stdout," NUMBER OF GIVEN TETRAHEDRA %8d\n",mesh->ne); fprintf(stdout," NUMBER OF GIVEN DATA %8d\n",sol->np); } static void mshmet_endcod() { double ttot,ttim[TIMEMAX]; int k,call[TIMEMAX]; chrono(OFF,&mshmet_ctim[0]); for (k=0; k 0.01 ) { fprintf(stdout,"\n -- CPU REQUIREMENTS\n"); fprintf(stdout," in/out %8.2f %% %3d. calls, %7.2f sec/call\n", 100.*ttim[1]/ttim[0],call[1],ttim[1]/(float)call[1]); fprintf(stdout," analysis %8.2f %% %3d. calls, %7.2f sec/call\n", 100.*ttim[2]/ttim[0],call[2],ttim[2]/(float)call[2]); fprintf(stdout," metric %8.2f %% %3d. calls, %7.2f sec/call\n", 100.*ttim[3]/ttim[0],call[3],ttim[3]/(float)call[3]); fprintf(stdout," total %8.2f %% %3d. calls, %7.2f sec/call\n", 100.*ttot/ttim[0],call[0],ttot/(float)call[0]); } fprintf(stdout,"\n ELAPSED TIME %.2f SEC. (%.2f)\n",ttim[0],ttot); } /* set function pointers */ /* set function pointers */ void MSHMET_setfunc(pMesh mesh) { if ( mesh->dim == 2 ) { boulep = boulep_2d; hashel = hashel_2d; gradLS = gradLS_2d; hessLS = hessLS_2d; getSol = getSol_2d; avgval = avgval_2d; clsval = clsval_2d; nrmhes = nrmhes_2d; defmet = defmet_2d; redsim = redsim_2d; metrLS = metrLS_2d; lissag = lissag_2d; } else { if ( mesh->ne > 0 ) { /* 3d */ boulep = boulep_3d; hashel = hashel_3d; gradLS = gradLS_3d; hessLS = hessLS_3d; getSol = getSol_3d; avgval = avgval_3d; clsval = clsval_3d; nrmhes = nrmhes_3d; defmet = defmet_3d; redsim = redsim_3d; metrLS = metrLS_3d; lissag = lissag_3d; } else { /* surface mesh */ boulep = boulep_2d; hashel = hashel_2d; lissag = lissag_2d; avgval = avgval_3d; clsval = clsval_3d; nrmhes = nrmhes_3d; getSol = getSol_3d; redsim = redsim_3d; gradLS = gradLS_s; hessLS = hessLS_s; defmet = defmet_s; metrLS = metrLS_3d; } } } int MSHMET_mshmet(int intopt[7], double fopt[4], pMesh mesh, pSol sol){ Info *info; if ( intopt[4] ) { fprintf(stdout," -- MSHMET, Release %s (%s) \n",MS_VER,MS_REL); fprintf(stdout," %s\n",MS_CPY); fprintf(stdout," %s\n",COMPIL); } /* trap exceptions */ signal(SIGABRT,mshmet_excfun); signal(SIGFPE,mshmet_excfun); signal(SIGILL,mshmet_excfun); signal(SIGSEGV,mshmet_excfun); signal(SIGTERM,mshmet_excfun); signal(SIGINT,mshmet_excfun); //atexit(mshmet_endcod); tminit(mshmet_ctim,TIMEMAX); chrono(ON,&mshmet_ctim[0]); chrono(ON,&mshmet_ctim[1]); /* default */ info = &mesh->info; info->hmin = (float) fopt[0]; // 0.01; info->hmax = (float) fopt[1]; // 1.0; info->eps = (float) fopt[2]; // 0.01; info->width = (float) fopt[3]; // 0.05; info->nnu = intopt[0]; // 0; info->iso = intopt[1]; // 0; info->ls = intopt[2]; // 0; info->ddebug = intopt[3]; // 0; info->imprim = intopt[4]; // 10; info->nlis = intopt[5]; // 0; info->bin = 1; // pas besoin c'est pour le fichier info->nsol = -1; //-1; // pas besoin ==> on peut prendre plusieurs solutions en meme temps ??? info->metric = intopt[6]; // 0; // metric given besoin ??? MSHMET_setfunc(mesh); chrono(OFF,&mshmet_ctim[1]); if ( mesh->info.imprim ) { mshmet_stats(mesh,sol); fprintf(stdout," -- DATA READING COMPLETED. %.2f sec.\n",gttime(mshmet_ctim[1])); fprintf(stdout,"\n %s\n MODULE MSHMET-LJLL : %s (%s)\n %s\n", MS_STR,MS_VER,MS_REL,MS_STR); } /* analysis */ chrono(ON,&mshmet_ctim[2]); if ( mesh->info.imprim ) fprintf(stdout,"\n -- PHASE 1 : ANALYSIS\n"); if ( abs(mesh->info.imprim) > 4 ) { fprintf(stdout," ** SETTING ADJACENCIES\n"); fflush(stdout); } if ( !scaleMesh(mesh,sol) ) return(1); if ( !hashel(mesh) ) exit(1); chrono(OFF,&mshmet_ctim[2]); if ( mesh->info.imprim ) fprintf(stdout," -- PHASE 1 COMPLETED. %.2f sec.\n",gttime(mshmet_ctim[2])); /* metric */ chrono(ON,&mshmet_ctim[3]); if ( mesh->info.imprim ) fprintf(stdout,"\n -- PHASE 2 : METRIC\n"); if ( !mshme1(mesh,sol) ) exit(1); chrono(OFF,&mshmet_ctim[3]); if ( mesh->info.imprim ) fprintf(stdout," -- PHASE 2 COMPLETED. %.2f sec.\n",gttime(mshmet_ctim[3])); if ( mesh->info.imprim ) fprintf(stdout,"\n %s\n END OF MODULE MSHMET \n %s\n",MS_STR,MS_STR); /* sol->outn="zzzz"; if ( !saveMet(sol,&mesh->info,sol->outn) ) exit(1); */ if ( mesh->info.imprim ) { mshmet_endcod(); fprintf(stdout,"\n %s\n END OF MODULE MSHMET \n %s\n",MS_STR,MS_STR); } if ( mesh->info.imprim < -4 || mesh->info.ddebug ) M_memDump(); return(0); } FreeFem-sources-4.9/3rdparty/mshmet/mshmetlib.h000664 000000 000000 00000003604 14037356732 021557 0ustar00rootroot000000 000000 typedef struct { /* double aire,rins; double c[3]; int s,nv,mark; unsigned char b,h; */ double c[3]; int s,nv,mark; unsigned char b,h; } MSHMET_Point; typedef MSHMET_Point * MSHMET_pPoint; typedef struct { int v[3]; int mark; /* double aire; int v[3]; int mark;*/ } MSHMET_Tria; typedef MSHMET_Tria * MSHMET_pTria; typedef struct { int v[4]; int mark; } MSHMET_Tetra; typedef MSHMET_Tetra * MSHMET_pTetra; typedef struct { double delta; double min[3],max[3]; float eps,hmin,hmax,width,ani,hgrad,map; int nnu,nsol,nlis; char imprim,ddebug,iso,bin,metric,ls; /* double delta; double min[3],max[3]; float eps,hmin,hmax,width; int nnu,nsol,nlis; char imprim,ddebug,iso,bin,metric,ls; */ } MSHMET_Info; typedef struct { /* int np,nt,ne,ver,dim; int *adja,mark; char *name,*mname; MSHMET_pPoint point; MSHMET_pTria tria; MSHMET_pTetra tetra; MSHMET_Info info; */ int np,nt,ne,ver,dim; int *adja,mark; char *name,*mname; MSHMET_pPoint point; MSHMET_pTria tria; MSHMET_pTetra tetra; MSHMET_Info info; } MSHMET_Mesh; typedef MSHMET_Mesh * MSHMET_pMesh; typedef struct { int np,ver,dim,type,size,typtab[GmfMaxTyp]; double *sol,*met,*hes,*grd,*nn,umin,umax; char *name,*outn,*mapname; /* version 2.0 int np,ver,dim,type,size,typtab[GmfMaxTyp]; double *sol,*met,umin,umax; char *name,*outn; */ } MSHMET_Sol; typedef MSHMET_Sol * MSHMET_pSol; typedef struct { double grd[3]; double hes[6]; } MSHMET_Deriv; typedef MSHMET_Deriv * MSHMET_pDeriv; #ifdef __cplusplus namespace mshmet{ extern "C" { #endif int MSHMET_mshmet(int intopt[7], double fopt[4], MSHMET_pMesh mesh, MSHMET_pSol sol); #ifdef __cplusplus }} #endif FreeFem-sources-4.9/3rdparty/mumps-seq/000775 000000 000000 00000000000 14037356732 020051 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/mumps-seq/Makefile000664 000000 000000 00000005770 14037356732 021522 0ustar00rootroot000000 000000 # ====================================================================== # Laboratoire Jacques-Louis Lions # Université Pierre et Marie Curie-Paris6, UMR 7598, Paris, F-75005 France # http://www.ljll.math.upmc.fr/lehyaric # ====================================================================== # This file is part of Freefem++ # # Freefem++ is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Freefem++ is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with Freefem++; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ====================================================================== # headeralh default=0 freefem make multipleauthors start=19/03/10 upmc # Downloading and compiling extra libraries # ----------------------------------------- include Makefile.inc all-local: mumps mumps:../lib/WHERE.mumpsseq # Downloading and compiling mumps # ------------------------------- DIRPKG=../pkg SRCDIR=MUMPS_$(VERSION) PACKAGE1=$(DIRPKG)/MUMPS_$(VERSION).tar.gz INSTALL=../.. VERSION=5.0.2 # FFCS - install and WHERE need to be done sequentially, even in parallel builds ../lib/WHERE.mumpsseq: install.done echo mumps_seq LD -L@DIR@/lib -ldmumpsFREEFEM-SEQ -lzmumpsFREEFEM-SEQ -lmumps_commonFREEFEM-SEQ \ -lpordFREEFEM-SEQ $(LIBSPTHREAD) > ../lib/WHERE.mumpsseq echo mumps_seq INCLUDE -I@DIR@/include/mumps_seq >> ../lib/WHERE.mumpsseq echo libseq LD -L@DIR@/lib -lmpiseqFREEFEM-SEQ >>../lib/WHERE.mumpsseq echo libseq INCLUDE -I@DIR@/include -I@DIR@/include/mumps_seq >> ../lib/WHERE.mumpsseq install.done: $(SRCDIR)/FAIT -mkdir -p ../include/mumps_seq cp $(SRCDIR)/include/*.h ../include/mumps_seq cp $(SRCDIR)/libseq/*.h ../include/mumps_seq -mkdir -p ../lib cp $(SRCDIR)/lib/*.a ../lib/ cp $(SRCDIR)/libseq/libmpiseqFREEFEM-SEQ.a ../lib/ touch $@ $(SRCDIR)/FAIT:Makefile.inc $(SRCDIR)/tag-tar cp Makefile.inc $(SRCDIR) # # FFCS - 22/5/12 - Mumps has difficulties compiling d & z in parallel # cd $(SRCDIR) && $(MAKE) d cd $(SRCDIR) && $(MAKE) z touch $@ Makefile.inc: ../../config.status Makefile Makefile-mumps-$(VERSION).inc ../../config.status --file="Makefile.inc:Makefile-mumps-$(VERSION).inc" $(SRCDIR)/tag-tar:$(PACKAGE1) tar xzf $(PACKAGE1) touch $@ $(PACKAGE1): ../getall -o MUMPS -a clean: -rm Makefile.inc FAIRE* *~ -rm -rf ../include/*mumps* -rm -rf ../lib/lib*mumps*SEQ* ../lib/libpord*SEQ.a ../lib/libmpiseq*SEQ.a ../lib/WHERE.mumpsseq -rm -rf $(SRCDIR) -rm *.done config.log # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/mumps-seq/Makefile-mumps-5.0.2.inc000664 000000 000000 00000013366 14037356732 024071 0ustar00rootroot000000 000000 abs_top_builddir=@abs_top_builddir@ DOWNLOADFF= $(abs_top_builddir)/3rdparty # # This file is part of MUMPS 4.10.0, built on Tue May 10 12:56:32 UTC 2011 # ################################################################################ # # Makefile.inc.generic # # This defines some parameters dependent on your platform; you should # look for the approriate file in the directory ./Make.inc/ and copy it # into a file called Makefile.inc. For example, from the MUMPS root # directory, use # "cp Make.inc/Makefile.inc.generic ./Makefile.inc" # (see the main README file for details) # # If you do not find any suitable Makefile in Makefile.inc, use this file: # "cp Make.inc/Makefile.inc.generic ./Makefile.inc" and modify it according # to the comments given below. If you manage to build MUMPS on a new platform, # and think that this could be useful to others, you may want to send us # the corresponding Makefile.inc file. # ################################################################################ ######################################################################## #Begin orderings # # NOTE that PORD is distributed within MUMPS by default. If you would like to # use other orderings, you need to obtain the corresponding package and modify # the variables below accordingly. # For example, to have Metis available within MUMPS: # 1/ download Metis and compile it # 2/ uncomment (suppress # in first column) lines # starting with LMETISDIR, LMETIS # 3/ add -Dmetis in line ORDERINGSF # ORDERINGSF = -Dpord -Dmetis # 4/ Compile and install MUMPS # make clean; make (to clean up previous installation) # # Metis/ParMetis and SCOTCH/PT-SCOTCH (ver 5.1 and later) orderings are now available for MUMPS. # SCOTCHDIR = $(DOWNLOADFF)/ ISCOTCH = #-I$(SCOTCHDIR)/include/scotch # You have to choose one among the following two lines depending on # the type of analysis you want to perform. If you want to perform only # sequential analysis choose the first (remember to add -Dscotch in the ORDERINGSF # variable below); for both parallel and sequential analysis choose the second # line (remember to add -Dptscotch in the ORDERINGSF variable below) #LSCOTCH = -L$(SCOTCHDIR)/lib/scotch/ -lesmumps -lscotch -lscotcherr LSCOTCH = #-L$(SCOTCHDIR)/lib -lptesmumps -lptscotch -lptscotcherr LPORDDIR = $(topdir)/PORD/lib/ IPORD = -I$(topdir)/PORD/include/ LPORD = -L$(LPORDDIR) -lpord LMETISDIR = $(DOWNLOADFF) #IMETIS = # Metis doesn't need include files (Fortran interface avail.) # You have to choose one among the following two lines depending on # the type of analysis you want to perform. If you want to perform only # sequential analysis choose the first (remember to add -Dmetis in the ORDERINGSF # variable below); for both parallel and sequential analysis choose the second # line (remember to add -Dparmetis in the ORDERINGSF variable below) LMETIS = # -L$(LMETISDIR)/lib -lparmetis -lmetis # The following variables will be used in the compilation process. # Please note that -Dptscotch and -Dparmetis imply -Dscotch and -Dmetis respectively. #ORDERINGSF = -Dscotch -Dmetis -Dpord -Dptscotch -Dparmetis ORDERINGSF = -Dpord ORDERINGSC = $(ORDERINGSF) LORDERINGS = $(LMETIS) $(LPORD) $(LSCOTCH) IORDERINGSF = $(ISCOTCH) IORDERINGSC = $(IMETIS) $(IPORD) $(ISCOTCH) #End orderings ######################################################################## # DEFINE HERE SOME COMMON COMMANDS, THE COMPILER NAMES, ETC... # PLAT : use it to add a default suffix to the generated libraries PLAT = FREEFEM-SEQ # Library extension, + C and Fortran "-o" option # may be different under Windows LIBEXT = .a OUTC = -o OUTF = -o # RM : remove files RM = /bin/rm -f # CC : C compiler CC = @CC@ # FC : Fortran 90 compiler FC = @FC@ # FL : Fortran linker FL = @FC@ # AR : Archive object in a library # keep a space at the end if options have to be separated from lib name AR = @AR@ @ARFLAGS@ # RANLIB : generate index of an archive file # (optionnal use "RANLIB = echo" in case of problem) RANLIB = @RANLIB@ #RANLIB = echo # SCALAP should define the SCALAPACK and BLACS libraries. #SCALAPDIR = $(DOWNLOADFF)/lib/scalapack/ #SCALAP = $(FFBLASLIB) -L$(SCALAPDIR) -lscalapack -L$(DOWNLOADFF)/lib/blacs/ -lblacs_MPI-$(PLAT)-0 -lblacsF77init_MPI-$(PLAT)-0 -lblacsCinit_MPI-$(PLAT)-0 # INCLUDE DIRECTORY FOR MPI #INCPAR = $(FFMPIINCLUDE) # LIBRARIES USED BY THE PARALLEL VERSION OF MUMPS: $(SCALAP) and MPI LIBPAR = #$(SCALAP) $(FFMPILIB) # The parallel version is not concerned by the next two lines. # They are related to the sequential library provided by MUMPS, # to use instead of ScaLAPACK and MPI.l INCSEQ = -I$(DOWNLOADFF)/mumps-seq/MUMPS_5.0.2/libseq LIBSEQ = -L$(DOWNLOADFF)//mumps-seq/MUMPS_5.0.2/libseq -lmpiseq$(PLAT) # DEFINE HERE YOUR BLAS LIBRARY LIBBLAS = @BLASLIBS@ # DEFINE YOUR PTHREAD LIBRARY LIBOTHERS = @LIBSPTHREAD@ @FLIBS@ $(FFLIBOTHERSMUMPS) LIBSPTHREAD = @LIBSPTHREAD@ # FORTRAN/C COMPATIBILITY: # Use: # -DAdd_ if your Fortran compiler adds an underscore at the end # of symbols, # -DAdd__ if your Fortran compiler adds 2 underscores, # # -DUPPER if your Fortran compiler uses uppercase symbols # # leave empty if your Fortran compiler does not change the symbols. # CDEFS = @CFLAGSF77@ #COMPILER OPTIONS OPTF = -O @FFLAGS@ OPTC = -O -I. @CFLAGS@ OPTL = -O @FFLAGS@ # CHOOSE BETWEEN USING THE SEQUENTIAL OR THE PARALLEL VERSION. #Sequential: INCS = $(INCSEQ) LIBS = $(LIBSEQ) LIBSEQNEEDED = libseqneeded #Parallel: #INCS = $(INCPAR) #LIBS = $(LIBPAR) #LIBSEQNEEDED = libseqneeded WGET = @WGET@ # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/mumps/000775 000000 000000 00000000000 14037356732 017263 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/mumps/Makefile000664 000000 000000 00000006023 14037356732 020724 0ustar00rootroot000000 000000 # ====================================================================== # Laboratoire Jacques-Louis Lions # Université Pierre et Marie Curie-Paris6, UMR 7598, Paris, F-75005 France # http://www.ljll.math.upmc.fr/lehyaric # ====================================================================== # This file is part of Freefem++ # # Freefem++ is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Freefem++ is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with Freefem++; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ====================================================================== # headeralh default=0 freefem make multipleauthors start=19/03/10 upmc include Makefile.inc all-local: mumps # Downloading and compiling mumps # ------------------------------- DIRPKG=../pkg SRCDIR=MUMPS_$(VERSION) PACKAGE1=$(DIRPKG)/MUMPS_$(VERSION).tar.gz VERSION=5.0.2 mumps:../lib/WHERE.mumps $(SRCDIR)/FAIT: $(SRCDIR)/tag-tar cp Makefile.inc $(SRCDIR) # # ALH - 'd' and 'z' versions need to be made sequentially # cd $(SRCDIR);make d cd $(SRCDIR);make z touch $@ # ALH - WHERE was not built properly during a full compile from a clean source. This goal has been rewritten to solve # this issue. ../lib/WHERE.mumps:install.done echo mumps LD -L@DIR@/lib -ldmumpsFREEFEM -lzmumpsFREEFEM -lmumps_commonFREEFEM -lpordFREEFEM $(LIBSPTHREAD) > $@ echo mumps INCLUDE -I@DIR@/include >> $@ install.done:$(SRCDIR)/FAIT -mkdir -p ../include/libseq cp $(SRCDIR)/include/*.h ../include/ cp $(SRCDIR)/libseq/*.h ../include/libseq -mkdir -p ../lib cp $(SRCDIR)/lib/*.a ../lib/ cp $(SRCDIR)/libseq/libmpiseqFREEFEM.a ../lib/ touch $@ FAIRE-$(VERSION):WHERE touch FAIRE-$(VERSION) # ALH - "make d" and "make z" need to be done sequentially $(SRCDIR)/FAIT:$(SRCDIR)/tag-tar Makefile.inc cp Makefile.inc $(SRCDIR) cd $(SRCDIR) && $(MAKE) d cd $(SRCDIR) && $(MAKE) z touch $@ Makefile.inc:../../config.status Makefile Makefile-mumps-$(VERSION).inc ../../config.status --file="Makefile.inc:Makefile-mumps-$(VERSION).inc" $(SRCDIR)/tag-tar:$(PACKAGE1) tar xzf $(PACKAGE1) #patch -d MUMPS_$(VERSION) -p 1 < MUMPS_$(VERSION).patch touch $(SRCDIR)/tag-tar clean:clean-local clean-local: #FH -cd $(SRCDIR) && $(MAKE) clean -C $(SRCDIR) -rm Makefile.inc FAIRE* ../lib/WHERE.mumps -rm -rf ../include/*mumps* -rm -rf ../lib/lib*mumpsFREEFEM* ../lib/libpordFREEFEM.a ../lib/libmpiseqFREEFEM.a -rm -rf $(SRCDIR) # -rm $(PACKAGE1) -rm config.log *.done # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/mumps/Makefile-mumps-5.0.2.inc000664 000000 000000 00000013545 14037356732 023302 0ustar00rootroot000000 000000 abs_top_builddir=@abs_top_builddir@ DOWNLOADFF= $(abs_top_builddir)/3rdparty # # This file is part of MUMPS 5.0.1, built on Tue May 10 12:56:32 UTC 2011 # ################################################################################ # # Makefile.inc.generic # # This defines some parameters dependent on your platform; you should # look for the approriate file in the directory ./Make.inc/ and copy it # into a file called Makefile.inc. For example, from the MUMPS root # directory, use # "cp Make.inc/Makefile.inc.generic ./Makefile.inc" # (see the main README file for details) # # If you do not find any suitable Makefile in Makefile.inc, use this file: # "cp Make.inc/Makefile.inc.generic ./Makefile.inc" and modify it according # to the comments given below. If you manage to build MUMPS on a new platform, # and think that this could be useful to others, you may want to send us # the corresponding Makefile.inc file. # ################################################################################ ######################################################################## #Begin orderings # # NOTE that PORD is distributed within MUMPS by default. If you would like to # use other orderings, you need to obtain the corresponding package and modify # the variables below accordingly. # For example, to have Metis available within MUMPS: # 1/ download Metis and compile it # 2/ uncomment (suppress # in first column) lines # starting with LMETISDIR, LMETIS # 3/ add -Dmetis in line ORDERINGSF # ORDERINGSF = -Dpord -Dmetis # 4/ Compile and install MUMPS # make clean; make (to clean up previous installation) # # Metis/ParMetis and SCOTCH/PT-SCOTCH (ver 5.1 and later) orderings are now available for MUMPS. # SCOTCHDIR = $(DOWNLOADFF)/ ISCOTCH = -I$(SCOTCHDIR)/include/scotch # You have to choose one among the following two lines depending on # the type of analysis you want to perform. If you want to perform only # sequential analysis choose the first (remember to add -Dscotch in the ORDERINGSF # variable below); for both parallel and sequential analysis choose the second # line (remember to add -Dptscotch in the ORDERINGSF variable below) #LSCOTCH = -L$(SCOTCHDIR)/lib/scotch/ -lesmumps -lscotch -lscotcherr LSCOTCH = -L$(SCOTCHDIR)/lib -lptesmumps -lptscotch -lptscotcherr LPORDDIR = $(topdir)/PORD/lib/ IPORD = -I$(topdir)/PORD/include/ LPORD = -L$(LPORDDIR) -lpord LMETISDIR = $(DOWNLOADFF) #IMETIS = # Metis doesn't need include files (Fortran interface avail.) # You have to choose one among the following two lines depending on # the type of analysis you want to perform. If you want to perform only # sequential analysis choose the first (remember to add -Dmetis in the ORDERINGSF # variable below); for both parallel and sequential analysis choose the second # line (remember to add -Dparmetis in the ORDERINGSF variable below) LMETIS = #-L$(LMETISDIR)/lib -lparmetis -lmetis # The following variables will be used in the compilation process. # Please note that -Dptscotch and -Dparmetis imply -Dscotch and -Dmetis respectively. ORDERINGSF = -Dscotch -Dpord #ORDERINGSF = -Dscotch -Dmetis -Dpord -Dptscotch -Dparmetis #ORDERINGSF = -Dpord ORDERINGSC = $(ORDERINGSF) LORDERINGS = $(LMETIS) $(LPORD) $(LSCOTCH) IORDERINGSF = $(ISCOTCH) IORDERINGSC = $(IMETIS) $(IPORD) $(ISCOTCH) #End orderings ######################################################################## # DEFINE HERE SOME COMMON COMMANDS, THE COMPILER NAMES, ETC... # PLAT : use it to add a default suffix to the generated libraries PLAT = FREEFEM # Library extension, + C and Fortran "-o" option # may be different under Windows LIBEXT = .a OUTC = -o OUTF = -o # RM : remove files RM = /bin/rm -f # CC : C compiler CC = @CC@ # FC : Fortran 90 compiler FC = @FC@ # FL : Fortran linker FL = @FC@ # AR : Archive object in a library # keep a space at the end if options have to be separated from lib name AR = @AR@ @ARFLAGS@ # RANLIB : generate index of an archive file # (optionnal use "RANLIB = echo" in case of problem) RANLIB = @RANLIB@ #RANLIB = echo # SCALAP should define the SCALAPACK and BLACS libraries. SCALAPDIR = $(DOWNLOADFF)/lib/scalapack/ SCALAP = $(FFBLASLIB) -L$(SCALAPDIR) -lscalapack -L$(DOWNLOADFF)/lib/blacs/ -lblacs_MPI-$(PLAT)-0 -lblacsF77init_MPI-$(PLAT)-0 -lblacsCinit_MPI-$(PLAT)-0 # INCLUDE DIRECTORY FOR MPI INCPAR = $(FFMPIINCLUDE) # LIBRARIES USED BY THE PARALLEL VERSION OF MUMPS: $(SCALAP) and MPI LIBPAR = $(SCALAP) $(FFMPILIB) # The parallel version is not concerned by the next two lines. # They are related to the sequential library provided by MUMPS, # to use instead of ScaLAPACK and MPI. INCSEQ = -I$(topdir)/libseq LIBSEQ = -L$(topdir)/libseq -lmpiseq # DEFINE HERE YOUR BLAS LIBRARY LIBBLAS = @BLASLIBS@ # DEFINE YOUR PTHREAD LIBRARY LIBOTHERS = @LIBSPTHREAD@ @FLIBS@ $(FFLIBOTHERSMUMPS) LIBSPTHREAD = @LIBSPTHREAD@ # FORTRAN/C COMPATIBILITY: # Use: # -DAdd_ if your Fortran compiler adds an underscore at the end # of symbols, # -DAdd__ if your Fortran compiler adds 2 underscores, # # -DUPPER if your Fortran compiler uses uppercase symbols # # leave empty if your Fortran compiler does not change the symbols. # CDEFS = @CFLAGSF77@ #COMPILER OPTIONS # FFCS - 23/4/13 - -fno-range-check required on MinGW to compile with # Microsoft MPI. mpif.h contains INTEGER MPI_FLOAT_INT / PARAMETER # (MPI_FLOAT_INT=z'8c000000') which requires this. OPTF = -O @FFLAGS@ @MPI_INCLUDE@ @NO_RANGE_CHECK@ OPTC = -O -I. @CFLAGS@ @MPI_INCLUDE@ OPTL = -O @FFLAGS@ @MPI_LIBFC@ # CHOOSE BETWEEN USING THE SEQUENTIAL OR THE PARALLEL VERSION. #Sequential: #INCS = $(INCSEQ) #LIBS = $(LIBSEQ) #LIBSEQNEEDED = libseqneeded #Parallel: INCS = $(INCPAR) LIBS = $(LIBPAR) LIBSEQNEEDED = libseqneeded WGET = @WGET@ FreeFem-sources-4.9/3rdparty/nlopt/000775 000000 000000 00000000000 14037356732 017256 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/nlopt/Make.inc000664 000000 000000 00000000165 14037356732 020630 0ustar00rootroot000000 000000 FFDOWNLOAD=@abs_builddir@/3rdparty CC=@CC@ CXX=@CXX@ WGET=@WGET@ CXXFLAGS=@CXXFLAGS@ CFLAGS=@CFLAGS@ CXXCPP=@CXXCPP@ FreeFem-sources-4.9/3rdparty/nlopt/Makefile000664 000000 000000 00000003005 14037356732 020714 0ustar00rootroot000000 000000 URL="http://ab-initio.mit.edu/nlopt/nlopt-2.2.4.tar.gz" SRCDIR=nlopt-2.2.4 TARGZ=../pkg/$(SRCDIR).tar.gz -include Make.defs all: FAIRE FAIRE: ../pkg $(SRCDIR)/FAIT $(SRCDIR)/FAIT: $(TARGZ) tar zxf $(TARGZ) # # FFCS - 30/11/11 - "--disable-dependency-tracking": dependencies pose problem on mixed Cygwin/MinGW setups # because of slashes and backslashes # # FFCS - 22/10/12 - Fred - "--without-octave": patch for one machine that has octave installed on it: building of # nlopt fails because it wants to install the octave components. Not a bad idea in general but probably not when # building ffcs. # #remove c++11 flags for windfows FH .. cxxflags=`echo $(CXXFLAGS) |sed -e 's/[-]std=c[+][+]11//g'` ; \ cd $(SRCDIR) && ./configure --disable-dependency-tracking --with-cxx --prefix="$(FFDOWNLOAD)" CXX="$(CXX)" \ CXXFLAGS="$(CXXFLAGS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$$cxxflag" "--without-threadlocal" "--without-octave" # # FFCS - 30/11/11 - we need to know when the building process breaks # cd $(SRCDIR) && $(MAKE) install touch $(SRCDIR)/FAIT $(MAKE) WHERE ../pkg: mkdir $@ $(TARGZ): ../getall -o NLopt -a clean-local: rm -rf $(SRCDIR) Make.defs rm -f ../lib/libnlopt_cxx* ../lib/WHERE.nlopt rm -f ../include/nlopt* clean:clean-local WHERE: -if [ -f $(SRCDIR)/FAIT ] ; then \ echo nlopt LD -L@DIR@/lib -lnlopt_cxx >../lib/WHERE.nlopt ;\ echo nlopt INCLUDE -I@DIR@/include >> ../lib/WHERE.nlopt ;\ fi Make.defs: ../../config.status Makefile Make.inc ../../config.status --file="Make.defs:Make.inc" FreeFem-sources-4.9/3rdparty/parmetis/000775 000000 000000 00000000000 14037356732 017746 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/parmetis/Makefile-parmetis.in000664 000000 000000 00000001367 14037356732 023644 0ustar00rootroot000000 000000 # Which compiler to use CC = @CC@ LD= @CC@ # What optimization level to use OPTFLAGS = @CFLAGS@ CFLAGS = @CFLAGS@ MPI_INCLUDE=@MPI_INCLUDE@ MPI_LIBC=@MPI_LIBC@ MPIRUN=@MPIRUN@ # What options to be used by the compiler # FFCS - add path to mpi.h (required for MacOS 10.8 + MacPorts OpenMPI) COPTIONS = -DHAVE_CONFIG_H -I. -I../../../.. -I'@MPI_INC_DIR@' @FFMETIS_CFLAGS@ #-D_MSC_VER # What options to be used by the loader LDOPTIONS = @LDFLAGS@ XTRALIBS= @MPI_LIBC@ # What archiving to use AR = @AR@ @ARFLAGS@ # What to use for indexing the archive RANLIB = @RANLIB@ #RANLIB = ar -ts #RANLIB = WGET = @WGET@ # hack to set -DUSE_GKREGEX if no regex.h => FF_HAVE_REGEX_H=0 USE_GKREGEX0=-DUSE_GKREGEX FLAGS_REGEX_H=$(USE_GKREGEX@FF_HAVE_REGEX_H@) FreeFem-sources-4.9/3rdparty/parmetis/makefile000775 000000 000000 00000015213 14037356732 021453 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- include Makefile.in all-local: parmetis # Downloading and compiling Tetgen # ------------------------------ # http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.tar.gz # Parmetis information DIRPKG=../pkg SRCDIR=parmetis-$(parmetis_VERSION) PACKAGE=$(DIRPKG)/ParMetis-$(parmetis_VERSION).tar.gz SERVER=http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis INSTALL=../.. parmetis_VERSION=4.0.3 parmetis: FAIRE test -z "$(MPIRUN)" ||$(MAKE) FAIRE-mpi $(SRCDIR)/FAIT: $(MAKE) install.done test -z "$(MPIRUN)" ||$(MAKE) $(SRCDIR)/FAIT-mpi -test -z "$(MPIRUN)" && rm $(SRCDIR)/FAIT-mpi touch $(SRCDIR)/FAIT $(SRCDIR)/FAIT-mpi: make install-mpi.done touch $(SRCDIR)/FAIT-mpi install.done:$(SRCDIR)/tag-compile -mkdir -p ../include -mkdir -p ../lib #test -z "$(MPIRUN)" || cp $(SRCDIR)/libparmetis.a ../lib cp $(SRCDIR)/libmetis.a ../lib cp $(SRCDIR)/include/*.h ../include cp $(SRCDIR)/metis/include/*.h ../include touch $@ install-mpi.done:$(SRCDIR)/tag-compile-mpi cp $(SRCDIR)/libparmetis.a ../lib touch $@ FAIRE: $(SRCDIR)/FAIT $(MAKE) WHERE test -z "$(MPIRUN)" || $(MAKE) WHERE-mpi touch FAIRE FAIRE-mpi: $(SRCDIR)/FAIT-mpi $(MAKE) WHERE-mpi touch FAIRE-mpi METISCFLAGS=$(FLAGS_REGEX_H) $(CFLAGS) -I../GKlib -I../include -I. -D__thread= PMETISCFLAGS=$(MPI_INCLUDE) $(FLAGS_REGEX_H) $(CFLAGS) -I../metis/GKlib -I../metis/include -I../include -I. -D__thread= $(SRCDIR)/tag-compile: $(SRCDIR)/tags make -C $(SRCDIR)/metis/libmetis $(OBJS_METIS) "CC=$(CC)" "CFLAGS=$(METISCFLAGS)" make -C $(SRCDIR)/metis/GKlib $(OBJS_GK) "CC=$(CC)" CFLAGS="$(METISCFLAGS) -Dmetis_EXPORTS" # test -z "$(MPIRUN)" || make -C $(SRCDIR)/libparmetis $(OBJS_PARMETIS) "CC=$(CC)" "CFLAGS=$(PMETISCFLAGS)" -mkdir $(SRCDIR)/lib cd $(SRCDIR); $(AR) libmetis.a metis/libmetis/*.o metis/GKlib/*.o # test -z "$(MPIRUN)" ||(cd $(SRCDIR); $(AR) libparmetis.a libparmetis/*.o ) touch $(SRCDIR)/tag-compile $(SRCDIR)/tag-compile-mpi: $(SRCDIR)/tags make -C $(SRCDIR)/libparmetis $(OBJS_PARMETIS) "CC=$(CC)" "CFLAGS=$(PMETISCFLAGS)" -mkdir $(SRCDIR)/lib # cd $(SRCDIR); $(AR) libmetis.a metis/libmetis/*.o metis/GKlib/*.o (cd $(SRCDIR); $(AR) libparmetis.a libparmetis/*.o ) touch $(SRCDIR)/tag-compile-mpi WHERE: install.done -if [ -f $(SRCDIR)/FAIT ] ; then \ echo metis LD -L@DIR@/lib -lmetis >$(SRCDIR)/$(INSTALL)/lib/WHERE.metis ;\ echo metis INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.metis ;\ fi WHERE-mpi: install-mpi.done -if [ -f $(SRCDIR)/FAIT-mpi ] ; then \ echo parmetis LD -L@DIR@/lib -lparmetis -lmetis >$(SRCDIR)/$(INSTALL)/lib/WHERE.parmetis ;\ echo parmetis INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.parmetis ;\ fi OBJS_METIS=auxapi.o coarsen.o fm.o graph.o kwayrefine.o minconn.o options.o separator.o timing.o \ balance.o compress.o fortran.o initpart.o mcutil.o mincover.o parmetis.o sfm.o util.o \ bucketsort.o contig.o frename.o kmetis.o mesh.o mmd.o pmetis.o srefine.o wspace.o \ checkgraph.o debug.o gklib.o kwayfm.o meshpart.o ometis.o refine.o stat.o OBJS_GK=b64.o error.o fs.o graph.o itemsets.o omp.o random.o sort.o tokenizer.o \ blas.o evaluate.o getopt.o htable.o mcore.o pdb.o rw.o string.o util.o \ csr.o fkvkselect.o gkregex.o io.o memory.o pqueue.o seq.o timers.o OBJS_PARMETIS=akwayfm.o diffutil.o initpart.o move.o renumber.o wave.o \ ametis.o frename.o kmetis.o msetup.o rmetis.o weird.o \ balancemylink.o gkmetis.o kwayrefine.o node_refine.o selectq.o wspace.o \ comm.o gkmpi.o match.o ometis.o serial.o xyzpart.o \ csrmatch.o graph.o mdiffusion.o pspases.o stat.o \ ctrl.o initbalance.o mesh.o redomylink.o timer.o \ debug.o initmsection.o mmetis.o remap.o util.o # FFCS: patch is necessary for metis 4.0, but not for 4.0.3 $(SRCDIR)/tags: $(PACKAGE) tar xzf $(PACKAGE) patch -p1 ../include/parmetis/metis.h # # FFCS - 23/5/12 - cannot keep name libmetis.a because it is identical to the library created by # [[file:../metis/Makefile]] and library path mechanisms at link time pick one for the other on MinGW. mv $(SRCDIR)/$(INSTALL)/lib/parmetis/libmetis.a $(SRCDIR)/$(INSTALL)/lib/parmetis/libmetis.a # -cd $(SRCDIR)/Programs && make touch $@ # FFCS - simpler makefile WHERE.done:install.done echo parmetis LD -L@DIR@/lib -lparmetis -lmetis >$(SRCDIR)/$(INSTALL)/lib/WHERE.parmetis ; echo parmetis INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.parmetis ; touch $@ Makefile.in: ../../config.status Makefile-parmetis.in ../../config.status --file="Makefile.in:Makefile-parmetis.in" $(SRCDIR)/tags-old: $(PACKAGE) tar xzf $(PACKAGE) mv $(SRCDIR)/Programs/Makefile $(SRCDIR)/Programs/Makefile-orig sed -e 's;BINDIR = .*$$;BINDIR = ../$(INSTALL)/bin;' \ -e 's;../libparmetis.a;../$(INSTALL)/lib/parmetis/libparmetis.a;' \ -e 's;../libmetis.a;../$(INSTALL)/lib/parmetis/libmetis.a;' \ -e 's;-L[.][.];-L../$(INSTALL)/lib/parmetis;' \ <$(SRCDIR)/Programs/Makefile-orig \ >$(SRCDIR)/Programs/Makefile mv $(SRCDIR)/ParMETISLib/Makefile $(SRCDIR)/ParMETISLib/Makefile-orig sed -e 's;../libparmetis.a;../$(INSTALL)/lib/parmetis/libparmetis.a;' \ -e 's;../libmetis.a;../$(INSTALL)/lib/parmetis/libmetis.a;' \ <$(SRCDIR)/ParMETISLib/Makefile-orig \ >$(SRCDIR)/ParMETISLib/Makefile mv $(SRCDIR)/METISLib/Makefile $(SRCDIR)/METISLib/Makefile-orig sed -e 's;../libparmetis.a;../$(INSTALL)/lib/parmetis/libparmetis.a;' \ -e 's;../libmetis.a;../$(INSTALL)/lib/parmetis/libmetis.a;' \ <$(SRCDIR)/METISLib/Makefile-orig \ >$(SRCDIR)/METISLib/Makefile touch $(SRCDIR)/tags $(PACKAGE): ../getall -o ParMETIS -a # FFCS - 23/5/12 - corrected bug in removing .a from ../lib clean: -cd $(SRCDIR) && $(MAKE) realclean -C $(SRCDIR) -rm Makefile.in $(SRCDIR)/tags -rm -rf ../lib/parmetis -rm -rf ../include/parmetis # -rm -rf ../include/metis -rm lib/libmetis.* ib/libparmetis.* -rm include/metis.h include/parmetis.h -rm -rf $(SRCDIR) -rm *.done -rm FAIRE* WHERE* FreeFem-sources-4.9/3rdparty/parmetis/parmetis-4.0.3.patch000664 000000 000000 00000002327 14037356732 023257 0ustar00rootroot000000 000000 --- a/parmetis-4.0.3/metis/GKlib/gk_arch.h 2015-03-12 18:55:30.000000000 +0100 +++ b/parmetis-4.0.3/metis/GKlib/gk_arch.h 2015-03-12 20:27:25.000000000 +0100 @@ -41,7 +41,9 @@ #endif #include #include - #include +#ifndef __WIN32__ + #include +#endif #include #endif --- a/parmetis-4.0.3/metis/GKlib/gk_getopt.h 2013-03-30 17:24:50.000000000 +0100 +++ b/parmetis-4.0.3/metis/GKlib/gk_getopt.h 2015-03-26 14:26:48.000000000 +0100 @@ -52,11 +52,11 @@ /* Function prototypes */ -extern int gk_getopt(int __argc, char **__argv, char *__shortopts); -extern int gk_getopt_long(int __argc, char **__argv, char *__shortopts, - struct gk_option *__longopts, int *__longind); -extern int gk_getopt_long_only (int __argc, char **__argv, - char *__shortopts, struct gk_option *__longopts, int *__longind); +extern int gk_getopt(int argc, char **argv, char *shortopts); +extern int gk_getopt_long(int argc, char **argv, char *shortopts, + struct gk_option *longopts, int *longind); +extern int gk_getopt_long_only (int argc, char **argv, + char *shortopts, struct gk_option *longopts, int *longind); FreeFem-sources-4.9/3rdparty/parmmg/000775 000000 000000 00000000000 14037356732 017405 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/parmmg/Makefile000664 000000 000000 00000006535 14037356732 021056 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: parmmg include ff-flags # Downloading and compiling parmmg # ------------------------------- # DIRPKG= ../pkg SRCDIR= . PACKAGE=$(DIRPKG)/parmmg.zip INSTALL=. parmmg_VERSION= parmmg:WHERE.done ifeq ($(WIN32DLLTARGET),) CMAKE_GUESS := else CMAKE_GUESS := -DMPI_GUESS_LIBRARY_NAME=MSMPI -G "MSYS Makefiles" endif FAIT.done:tag-tar -mkdir build cd build && cmake ../parmmg-sources \ -DCMAKE_C_COMPILER=$(CC) \ -DCMAKE_C_FLAGS="$(CFLAGS) -fPIC" \ -DCMAKE_CXX_COMPILER="$(CXX)" \ -DCMAKE_CXX_FLAGS="$(CXXFLAGS) $(CXX11FLAGS) -fPIC" \ -DCMAKE_Fortran_COMPILER="$(FC)" \ -DMPI_C_COMPILER="$(MPICC)" \ -DMPI_CXX_COMPILER="$(MPICXX)" \ -DMPI_Fortran_COMPILER="$(MPIFC)" \ -DMPIEXEC_EXECUTABLE="$(MPIRUN)" \ -DDOWNLOAD_MMG=OFF \ -DDOWNLOAD_METIS=OFF \ -DMMG_DIR=../../mmg/mmg-sources \ -DMMG_BUILDDIR=../../mmg/build \ -DSCOTCH_DIR=$(scotch_dir) \ -DMETIS_DIR=$(metis_dir) -DUSE_VTK=OFF \ -DCMAKE_BUILD_TYPE=Release $(CMAKE_GUESS) && $(MAKE) touch FAIT.done install.done:FAIT.done cp -r build/include/parmmg ../include/parmmg cp build/lib/libparmmg.a ../lib touch $@ clean:: -rm install.done FAIT.done parmmg:$(PACKAGE) install:install.done WHERE.done WHERE.done: install.done echo parmmg LD -L@DIR@/lib -lparmmg >../lib/WHERE.parmmg ; echo parmmg INCLUDE -I@DIR@/include/>> ../lib/WHERE.parmmg ; echo build WHERE ./lib/WHERE.parmmg ; touch $@ clean:: -rm WHERE.done FAIRE: FAIT.done install.done tag-tar: $(PACKAGE) -rm -rf ParMmg-* parmmg-sources unzip -q $(PACKAGE) && mv ParMmg-* parmmg-sources touch tag-tar $(PACKAGE): FORCE ../getall -o parmmg -a FORCE: clean:: -rm ff-flags -rm FAIT* -rm -rf ParMmg-* parmmg-sources build -rm flags-* tag-tar* -rm -rf ../include/parmmg -rm ../lib/libparmmg.a ff-flags: ../Makefile Makefile ../getall ../getall -o parmmg -a -cp -f ff-flags ff-flags.old grep 'abs_top_builddir *=' ../Makefile > ff-flags grep 'CC *=' ../Makefile >> ff-flags grep 'FC *=' ../Makefile >> ff-flags grep 'CFLAGS *=' ../Makefile >> ff-flags grep 'CXX *=' ../Makefile >> ff-flags grep 'CXXFLAGS *=' ../Makefile >> ff-flags grep 'CXX11FLAGS *=' ../Makefile >> ff-flags grep 'LDFLAGS *=' ../Makefile >> ff-flags grep 'AR *=' ../Makefile >> ff-flags grep 'ARFLAGS *=' ../Makefile >> ff-flags grep 'RANLIB *=' ../Makefile >> ff-flags grep 'WGET *=' ../Makefile >> ff-flags grep 'STD_LIBS *=' ../Makefile >> ff-flags grep 'CNOFLAGS *=' ../Makefile >> ff-flags grep 'FF_prefix_petsc_real *=' ../Makefile >> ff-flags grep 'MPICC *=' ../Makefile >> ff-flags grep 'MPICXX *=' ../Makefile >> ff-flags grep 'MPIFC *=' ../Makefile >> ff-flags grep 'MPIRUN *=' ../Makefile >> ff-flags grep 'WIN32DLLTARGET *=' ../Makefile >> ff-flags -awk '/^scotch LD /&&/petsc/ {print "scotch_dir=$$(FF_prefix_petsc_real)" }' ../../plugin/seq/WHERE_LIBRARY-config >> ff-flags -awk '/^scotch LD /{print "scotch_dir=../../../3rdparty/" }' ../lib/WHERE.scotch >> ff-flags -awk '/^metis LD /&&/petsc/ {print "metis_dir=$$(FF_prefix_petsc_real)" }' ../../plugin/seq/WHERE_LIBRARY-config >> ff-flags -awk '/^metis LD /{print "metis_dir=../../../3rdparty/" }' ../lib/WHERE.metis >> ff-flags ../../bin/ff-md5 $(PACKAGE) >> ff-flags if diff -q ff-flags ff-flags.old ; then echo No Modif skip compile of parmmg ; else touch -c tag-tar; fi .PHONY:$(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/scalapack/000775 000000 000000 00000000000 14037356732 020044 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/scalapack/Makefile000664 000000 000000 00000004351 14037356732 021507 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- include SLmake.inc all-local: scalapack # FFCS: parallel compilation crashes on Win32 (same archive updated from 2 different parallel makes) # Downloading and compiling scalapack # ------------------------------ # http://www.netlib.org/scalapack/ # Hips information DIRPKG=../pkg SRCDIR=scalapack-2.1.0 PACKAGE1=$(DIRPKG)/scalapack-2.1.0.tgz SERVER=http://www.netlib.org/scalapack/ INSTALL=../.. # FFCS: shorten argument length to avoid "sh: ../ar: Argument list too long" under Cygwin DIR1 = TOOLS/LAPACK/*.o DIR2 = TOOLS/*.o DIR3 = PBLAS/SRC/PBBLAS/*.o DIR4 = PBLAS/SRC/PTZBLAS/*.o DIR5 = PBLAS/SRC/PTOOLS/*.o DIR6 = PBLAS/SRC/*.o DIR7 = REDIST/SRC/*.o DIR8 = SRC/*.o DIR9 = BLACS/SRC/*.o BLACS/SRC/*.oo scalapack: FAIRE # FFCS: shorten argument length to avoid "sh: ../ar: Argument list too long" under Cygwin $(SRCDIR)/FAIT-202:$(SRCDIR)/tag-tar cp SLmake.inc $(SRCDIR) cd $(SRCDIR);$(MAKE) MAKEFLAGS=-j1 lib rm $(SRCDIR)/libscalapack.a cd $(SRCDIR) && $(ARCH) $(ARCHFLAGS) libscalapack.a $(DIR1) $(DIR2) $(DIR3) $(DIR4) $(DIR5) $(DIR6) $(DIR7) $(DIR8) $(DIR9) $(RANLIB) $(SRCDIR)/libscalapack.a touch $(SRCDIR)/FAIT-202 install: mkdir -p ../include cp $(SRCDIR)/SRC/*.h ../include mkdir -p ../lib cp $(SRCDIR)/libscalapack.a ../lib WHERE: if [ -f $(SRCDIR)/FAIT-202 ] ; then \ make install; \ echo scalapack LD -L@DIR@/lib -lscalapack >../lib/WHERE.scalapack ;\ echo scalapack INCLUDE -I@DIR@/include >> ../lib/WHERE.scalapack ;\ fi FAIRE: $(SRCDIR)/FAIT-202 $(MAKE) WHERE touch FAIRE SLmake.inc: ../../config.status Makefile SLmake-scalapack.inc ../../config.status --file="SLmake.inc:SLmake-scalapack.inc" $(SRCDIR)/tag-tar: $(PACKAGE1) gunzip -c $(PACKAGE1) | tar xf - touch $(SRCDIR)/tag-tar $(PACKAGE1): ../getall -o ScaLAPACK -a clean: # FFCS - need to clean completely even in case of error -rm SLmake.inc FAIRE FAIT # FFCS - make sure that all directories are cleaned. Thisis especially important under Windows because there is no # compilation dependencies control there (see # [[file:c:/cygwin/home/alh/ffcs/dist/configure.ac::dependency_tracking]]) -rm -rf scalapack-* -rm config.log # -rm $(PACKAGE1) .PHONY:$(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/scalapack/SLmake-scalapack.inc000664 000000 000000 00000003522 14037356732 023635 0ustar00rootroot000000 000000 ############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: October 13, 2011 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # # C preprocessor definitions: set CDEFS to one of the following: # # -DNoChange (fortran subprogram names are lower case without any suffix) # -DUpCase (fortran subprogram names are upper case without any suffix) # -DAdd_ (fortran subprogram names are lower case with "_" appended) CDEFS = @CFLAGSF77@ -DNO_IEEE $(USEMPI) # # The fortran and C compilers, loaders, and their flags # FC = @MPIFC@ CC = @MPICC@ NOOPT = -O0 @CNOFLAGS@ # FFCS - some return statements without value cause trouble on MacOS # FFCS - add path to mpi.h (required for MacOS 10.8 + MacPorts OpenMPI) # FFCS - added @CNOFLAGS@ according to upstream changes CCFLAGS = -O3 -Wreturn-type @CFLAGS@ -I'@MPI_INC_DIR@' @CNOFLAGS@ -Wno-implicit-function-declaration FCFLAGS = -O3 @CNOFLAGS@ FCLOADER = $(FC) CCLOADER = $(CC) FCLOADFLAGS = $(FCFLAGS) CCLOADFLAGS = $(CCFLAGS) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = @AR@ ARCHFLAGS = @ARFLAGS@ RANLIB = @RANLIB@ # # The name of the ScaLAPACK library to be created # SCALAPACKLIB = libscalapack.a # # BLAS, LAPACK (and possibly other) libraries needed for linking test programs # BLASLIB = $(FFBLASLIB) LAPACKLIB = $(FFLAPACKLIB) LIBS = $(LAPACKLIB) $(BLASLIB) WGET = @WGET@ .NOTPARRALEL: FreeFem-sources-4.9/3rdparty/scotch/000775 000000 000000 00000000000 14037356732 017405 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/scotch/Makefile000664 000000 000000 00000010707 14037356732 021052 0ustar00rootroot000000 000000 # ====================================================================== # Laboratoire Jacques-Louis Lions # Université Pierre et Marie Curie-Paris6, UMR 7598, Paris, F-75005 France # http://www.ljll.math.upmc.fr/lehyaric # ====================================================================== # This file is part of Freefem++ # # Freefem++ is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Freefem++ is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with Freefem++; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ====================================================================== # headeralh default=0 freefem make multipleauthors start=19/03/10 upmc # Downloading and compiling extra libraries # ----------------------------------------- include Makefile.inc all-local: scotch # Downloading and compiling scotch # ------------------------------- # https://gforge.inria.fr/frs/download.php/file/34099/scotch_6.0.4.tar.gz # http://gforge.inria.fr/frs/download.php/23391/scotch_5.1.7_esmumps.tar.gz # Scotch information DIRPKG=../pkg SRCDIR=scotch-v$(scotch_VERSION) PACKAGE=$(DIRPKG)/scotch-v$(scotch_VERSION_LOCAL).tar.gz SERVER=https://gforge.inria.fr/frs/download.php/file/34099 #https://gforge.inria.fr/frs/download.php/file/34099/scotch_6.0.4.tar.gz #https://gforge.inria.fr/frs/download.php/23391 INSTALL=../.. scotch_VERSION=6.1.0 scotch_VERSION_LOCAL=6.1.0 # trick to no in MPI on NOT .. NOTESTMPI= test -z "$(CCP)" scotch: FAIRE $(SRCDIR)/AFAIRE: $(MAKE) install $(NOTESTMPI) || $(MAKE) install-mpi FAIRE: $(SRCDIR)/FAIT $(MAKE) WHERE $(NOTESTMPI) || $(MAKE) WHERE-mpi touch FAIRE Makefile.inc: ../../config.status Makefile-scotch.inc Makefile ../../config.status --file="Makefile.inc:Makefile-scotch.inc" # DCOMMON_TIMING_OLD # FFCS - 16/1/13 - what was "$(CCP)" used for? $(SRCDIR)/FAIT: $(SRCDIR) Makefile.inc cp Makefile.inc $(SRCDIR)/src cd $(SRCDIR)/src && $(MAKE) scotch esmumps touch $(SRCDIR)/FAIT $(SRCDIR)/FAIT-mpi: $(SRCDIR) Makefile.inc cp Makefile.inc $(SRCDIR)/src cd $(SRCDIR)/src && $(MAKE) -j1 ptscotch ptesmumps touch $(SRCDIR)/FAIT-mpi install:$(SRCDIR)/FAIT mkdir -p $(SRCDIR)/$(INSTALL)/include/scotch cp $(SRCDIR)/include/*.h $(SRCDIR)/$(INSTALL)/include/scotch/ cp $(SRCDIR)/lib/*.a $(SRCDIR)/$(INSTALL)/lib/ install-mpi:$(SRCDIR)/FAIT-mpi mkdir -p $(SRCDIR)/$(INSTALL)/include/scotch cp $(SRCDIR)/include/*.h $(SRCDIR)/$(INSTALL)/include/scotch/ cp $(SRCDIR)/lib/*.a $(SRCDIR)/$(INSTALL)/lib/ # ALH - 16/1/13 - simplifying make rules WHERE:$(SRCDIR)/FAIT $(MAKE) install -rm $(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo scotch LD -L@DIR@/lib -lesmumps -lscotch -lscotcherr >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch # echo scotchmetis LD -L@DIR@/lib -lscotchmetis >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo scotch INCLUDE -I@DIR@/include/scotch >> $(SRCDIR)/$(INSTALL)/lib/WHERE.scotch WHERE-mpi:$(SRCDIR)/FAIT-mpi $(MAKE) install-mpi -rm $(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo ptscotch LD -L@DIR@/lib -lesmumps -lptscotch -lptscotcherr >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch # echo ptscotchparmetis LD -L@DIR@/lib -lesmumps -lptscotchparmetis >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo ptscotch INCLUDE -I@DIR@/include/scotch >> $(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo scotch LD -L@DIR@/lib -lesmumps -lscotch -lscotcherr >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch # echo scotchmetis LD -L@DIR@/lib -lscotchmetis >>$(SRCDIR)/$(INSTALL)/lib/WHERE.scotch echo scotch INCLUDE -I@DIR@/include/scotch >> $(SRCDIR)/$(INSTALL)/lib/WHERE.scotch $(SRCDIR): $(PACKAGE) tar xzf $(PACKAGE) touch $(SRCDIR) $(PACKAGE): ../getall -o Scotch -a # FFCS - 28/3/13 - more cleaning clean-local: -cd $(SRCDIR)/src && $(MAKE) realclean -C $(SRCDIR)/src -rm config.log -rm -rf ../include/scotch/ -rm -rf ../lib/*scotch* # FH -rm $(PACKAGE) clean: clean-local -rm Makefile.inc -rm -rf $(SRCDIR) -rm FAIRE .PHONY:$(SRCDIR)/$(INSTALL) # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/scotch/Makefile-scotch.inc000664 000000 000000 00000003625 14037356732 023104 0ustar00rootroot000000 000000 # ALH - avoid blocking when Makefile.inc is not built yet #include @abs_top_builddir@/3rdparty/headers-sparsesolver.inc EXE = LIB = .a OBJ = .o #DIRMPI = @FFMPIDIR@ #MAKE = make AR = @AR@ ARFLAGS = @ARFLAGS@ CAT = cat CCS = @CC@ CCP = @MPICC@ CCD = @CC@ -I'@MPI_INC_DIR@' FFINTSCOTCH = -DINTSIZE32 -DIDXSIZE32 -g # FFCS - 16/4/13 - some plain C files seem to need access to mpi.h too # FFCS - 23/4/13 - remove thread and fork dependency on Windows with # -DCOMMON_STUB_FORK without -DCOMMON_PTHREAD # [[file:scotch_5.1_esmumps/INSTALL.TXT::COMMON_STUB_FORK]] # s/-DCOMMON_TIMING_OLD/-Du_int32_t='unsigned int'/" -e "s/-DCOMMON_RANDOM_FIXED_SEED/-DCOMMON_RANDOM_RAND -DCOMMON_RANDOM_FIXED_SEED -D'pipe(pfds)=_pipe(pfds,1024,0x8000)' CFLAGSWIN = -DCOMMON_PTHREAD -DCOMMON_PTHREAD_BARRIER -DCOMMON_RANDOM_FIXED_SEED -DCOMMON_STUB_FORK -DSCOTCH_RENAME -D'pipe(pfds)=_pipe(pfds,1024,0x8000)' #-DHAVE_SYS_TIME_H -DCOMMON_STUB_FORK -Du_int32_t='unsigned int' -DSCOTCH_RENAME -DCOMMON_RANDOM_RAND -DCOMMON_RANDOM_FIXED_SEED -D'pipe(pfds)=_pipe(pfds,1024,0x8000)' # CFLAGSSTD = -DCOMMON_STUB_FORK -DCOMMON_RANDOM_FIXED_SEED -DCOMMON_TIMING_OLD -DSCOTCH_RENAME -DSCOTCH_DETERMINISTIC # reomve -DSCOTCH_PTHREAD due to (1): ERROR: SCOTCH_dgraphInit: Scotch compiled with SCOTCH_PTHREAD and program not launched with MPI_THREAD_MULTIPLE # in SUPERLU_DIST solver .. FH. Sep 2016 v 3.48 CFLAGSSTD = -DCOMMON_PTHREAD -DCOMMON_PTHREAD_BARRIER -DCOMMON_RANDOM_FIXED_SEED -DCOMMON_TIMING_OLD -DSCOTCH_RENAME ifeq (@WIN32DLLTARGET@,) CFLAGS = @CFLAGS@ -I'@MPI_INC_DIR@' $(FFINTSCOTCH) -Drestrict=__restrict $(CFLAGSSTD) else CFLAGS = @CFLAGS@ -I'@MPI_INC_DIR@' $(FFINTSCOTCH) -Drestrict=__restrict $(CFLAGSWIN) endif LDLIBS = @LDFLAGS@ -lm @MPI_LIBC@ @LIBSPTHREAD@ LDFLAGS = @LDFLAGS@ -lm @MPI_LIBC@ @LIBSPTHREAD@ #-L/usr/local/lib/gcc/ CP = cp LEX = @LEX@ LN = ln MKDIR = mkdir MV = mv RANLIB = @RANLIB@ YACC = @YACC@ WGET = @WGET@ FreeFem-sources-4.9/3rdparty/superlu/000775 000000 000000 00000000000 14037356732 017621 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/superlu/Makefile000775 000000 000000 00000011770 14037356732 021272 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- all-local: superlu # Downloading and compiling Tetgen # ------------------------------ # SUPERLU information #http://crd.lbl.gov/~xiaoye/SuperLU/superlu_4.0.tar.gz DIRPKG=../pkg SRCDIR=superlu-sources PACKAGE=superlu-5.2.2.zip PACKAGE_PATH=$(DIRPKG)/$(PACKAGE) SERVER=https://github.com/xiaoyeli/superlu/ # http://crd-legacy.lbl.gov/~xiaoye/SuperLU/superlu_4.3.tar.gz INSTALL=../.. SUPERLU_VERSION=5.2.2 SUPERLULIB=../$(INSTALL)/lib/libsuperlu_$(SUPERLU_VERSION).a include ff-FLAGS ### return machine parameters SCAUX = smach.o DZAUX = dmach.o ### LAPACK LAAUX = #lsame.o xerbla.o SLASRC = #slacon.o DLASRC = #dlacon.o CLASRC = #clacon.o scsum1.o icmax1.o ZLASRC = #zlacon.o dzsum1.o izmax1.o SCLAUX = #slamch.o DZLAUX = #dlamch.o ### SuperLU ALLAUX = superlu_timer.o util.o memory.o get_perm_c.o mmd.o \ sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o \ heap_relax_snode.o colamd.o \ ilu_relax_snode.o ilu_heap_relax_snode.o mark_relax.o \ mc64ad.o qselect.o input_error.o smach.o dmach.o SLUSRC = \ sgssv.o sgssvx.o \ ssp_blas2.o ssp_blas3.o sgscon.o \ slangs.o sgsequ.o slaqgs.o spivotgrowth.o \ sgsrfs.o sgstrf.o sgstrs.o scopy_to_ucol.o \ ssnode_dfs.o ssnode_bmod.o \ spanel_dfs.o spanel_bmod.o \ sreadhb.o sreadrb.o sreadtriple.o \ scolumn_dfs.o scolumn_bmod.o spivotL.o spruneL.o \ smemory.o sutil.o smyblas2.o \ sgsisx.o sgsitrf.o sldperm.o \ ilu_sdrop_row.o ilu_ssnode_dfs.o \ ilu_scolumn_dfs.o ilu_spanel_dfs.o ilu_scopy_to_ucol.o \ ilu_spivotL.o sdiagonal.o slacon2.o DLUSRC = \ dgssv.o dgssvx.o \ dsp_blas2.o dsp_blas3.o dgscon.o \ dlangs.o dgsequ.o dlaqgs.o dpivotgrowth.o \ dgsrfs.o dgstrf.o dgstrs.o dcopy_to_ucol.o \ dsnode_dfs.o dsnode_bmod.o dpanel_dfs.o dpanel_bmod.o \ dreadhb.o dreadrb.o dreadtriple.o \ dcolumn_dfs.o dcolumn_bmod.o dpivotL.o dpruneL.o \ dmemory.o dutil.o dmyblas2.o \ dgsisx.o dgsitrf.o dldperm.o \ ilu_ddrop_row.o ilu_dsnode_dfs.o \ ilu_dcolumn_dfs.o ilu_dpanel_dfs.o ilu_dcopy_to_ucol.o \ ilu_dpivotL.o ddiagonal.o dlacon2.o ## dgstrsL.o dgstrsU.o CLUSRC = \ scomplex.o cgssv.o cgssvx.o csp_blas2.o csp_blas3.o cgscon.o \ clangs.o cgsequ.o claqgs.o cpivotgrowth.o \ cgsrfs.o cgstrf.o cgstrs.o ccopy_to_ucol.o \ csnode_dfs.o csnode_bmod.o \ cpanel_dfs.o cpanel_bmod.o \ creadhb.o creadrb.o creadtriple.o \ ccolumn_dfs.o ccolumn_bmod.o cpivotL.o cpruneL.o \ cmemory.o cutil.o cmyblas2.o \ cgsisx.o cgsitrf.o cldperm.o \ ilu_cdrop_row.o ilu_csnode_dfs.o \ ilu_ccolumn_dfs.o ilu_cpanel_dfs.o ilu_ccopy_to_ucol.o \ ilu_cpivotL.o cdiagonal.o clacon2.o scsum1.o icmax1.o ZLUSRC = \ dcomplex.o zgssv.o zgssvx.o zsp_blas2.o zsp_blas3.o zgscon.o \ zlangs.o zgsequ.o zlaqgs.o zpivotgrowth.o \ zgsrfs.o zgstrf.o zgstrs.o zcopy_to_ucol.o \ zsnode_dfs.o zsnode_bmod.o \ zpanel_dfs.o zpanel_bmod.o \ zreadhb.o zreadrb.o zreadtriple.o \ zcolumn_dfs.o zcolumn_bmod.o zpivotL.o zpruneL.o \ zmemory.o zutil.o zmyblas2.o \ zgsisx.o zgsitrf.o zldperm.o \ ilu_zdrop_row.o ilu_zsnode_dfs.o \ ilu_zcolumn_dfs.o ilu_zpanel_dfs.o ilu_zcopy_to_ucol.o \ ilu_zpivotL.o zdiagonal.o zlacon2.o dzsum1.o izmax1.o OBJ_SUPERLU = $(ALLAUX) $(LAAUX) \ $(SLUSRC) $(SLASRC) $(SCLAUX) \ $(DLUSRC) $(DLASRC) $(DZLAUX) \ $(CLUSRC) $(CLASRC) \ $(ZLUSRC) $(ZLASRC) superlu: FAIT # FFCS - 18/6/12 - depend on SRCDIR to remake all when package version changes FAIT:$(SRCDIR) $(MAKE) install WHERE touch FAIT FAIRE:$(SRCDIR) cd $(SRCDIR)/SRC; $(MAKE) -f ../../ff-FLAGS $(OBJ_SUPERLU) touch FAIRE # FFCS - FAIRE and WHERE need to be done sequentially, even in a parallel build (corrected by Cico, 1/3/12) install: FAIRE $(MAKE) WHERE cd $(SRCDIR)/SRC; $(AR) $(ARFLAGS) $(SUPERLULIB) $(OBJ_SUPERLU) cd $(SRCDIR)/SRC; $(RANLIB) $(SUPERLULIB) cp $(SRCDIR)/SRC/s*h $(SRCDIR)/$(INSTALL)/include WHERE: echo superlu LD -L@DIR@/lib -lsuperlu_$(SUPERLU_VERSION) >$(SRCDIR)/$(INSTALL)/lib/WHERE.superlu echo superlu INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.superlu $(SRCDIR)/$(INSTALL): mkdir $(SRCDIR)/$(INSTALL) $(SRCDIR): $(PACKAGE_PATH) unzip -q $(PACKAGE_PATH) && mv superlu-* superlu-sources patch -p0 >$@ grep '^BLAS[A-Z ]*=' ../Makefile|grep = >>$@ grep '^AR *=' ../Makefile|grep = >>$@ grep '^ARFLAGS *=' ../Makefile|grep = >>$@ grep '^RANLIB *=' ../Makefile|grep = >>$@ grep '^WGET *=' ../Makefile|grep = >>$@ FreeFem-sources-4.9/3rdparty/superlu/make.inc000664 000000 000000 00000002016 14037356732 021230 0ustar00rootroot000000 000000 ############################################################################ # # Program: SuperLU # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: October 2, 1995 # # Modified: February 4, 1997 Version 1.0 # November 15, 1997 Version 1.1 # September 1, 1999 Version 2.0 # ############################################################################ # # # The name of the libraries to be created/linked to # TMGLIB = $(INSTALL)/../lib/libtmglib.a SUPERLULIB = $(INSTALL)/../lib/libsuperlu_3.1.a BLASLIB = $(BLASLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = $(AR) ARCHFLAGS = $(ARFLAGS) FORTRAN = f77 FFLAGS = -O LOADER = $(CC) LOADOPTS = # # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # CDEFS = -DAdd_ # # The directory in which Matlab is installed # MATLAB = /usr/sww/pkg/matlab FreeFem-sources-4.9/3rdparty/superlu/patch-superlu-5.2.2000664 000000 000000 00000004261 14037356732 023005 0ustar00rootroot000000 000000 diff -ur OO-superlu-sources-origin/SRC/csnode_bmod.c superlu-sources/SRC/csnode_bmod.c --- OO-superlu-sources-origin/SRC/csnode_bmod.c 2020-10-18 00:23:21.000000000 +0200 +++ superlu-sources/SRC/csnode_bmod.c 2021-02-17 09:02:06.000000000 +0100 @@ -33,6 +33,12 @@ #include "slu_cdefs.h" +/* + * Function prototypes + */ +void cusolve(int, int, complex*, complex*); +void clsolve(int, int, complex*, complex*); +void cmatvec(int, int, int, complex*, complex*, complex*); /*! \brief Performs numeric block updates within the relaxed snode. diff -ur OO-superlu-sources-origin/SRC/dsnode_bmod.c superlu-sources/SRC/dsnode_bmod.c --- OO-superlu-sources-origin/SRC/dsnode_bmod.c 2020-10-18 00:23:21.000000000 +0200 +++ superlu-sources/SRC/dsnode_bmod.c 2021-02-17 09:00:45.000000000 +0100 @@ -33,6 +33,12 @@ #include "slu_ddefs.h" +/* + * Function prototypes + */ +void dusolve(int, int, double*, double*); +void dlsolve(int, int, double*, double*); +void dmatvec(int, int, int, double*, double*, double*); /*! \brief Performs numeric block updates within the relaxed snode. diff -ur OO-superlu-sources-origin/SRC/ssnode_bmod.c superlu-sources/SRC/ssnode_bmod.c --- OO-superlu-sources-origin/SRC/ssnode_bmod.c 2020-10-18 00:23:21.000000000 +0200 +++ superlu-sources/SRC/ssnode_bmod.c 2021-02-17 08:58:12.000000000 +0100 @@ -33,6 +33,12 @@ #include "slu_sdefs.h" +/* + * Function prototypes + */ +void susolve(int, int, float*, float*); +void slsolve(int, int, float*, float*); +void smatvec(int, int, int, float*, float*, float*); /*! \brief Performs numeric block updates within the relaxed snode. diff -ur OO-superlu-sources-origin/SRC/zsnode_bmod.c superlu-sources/SRC/zsnode_bmod.c --- OO-superlu-sources-origin/SRC/zsnode_bmod.c 2020-10-18 00:23:21.000000000 +0200 +++ superlu-sources/SRC/zsnode_bmod.c 2021-02-17 09:04:15.000000000 +0100 @@ -33,6 +33,12 @@ #include "slu_zdefs.h" +/* + * Function prototypes + */ +void zusolve(int, int, doublecomplex*, doublecomplex*); +void zlsolve(int, int, doublecomplex*, doublecomplex*); +void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*); /*! \brief Performs numeric block updates within the relaxed snode. FreeFem-sources-4.9/3rdparty/tetgen/000775 000000 000000 00000000000 14037356732 017410 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/tetgen/Makefile000775 000000 000000 00000005071 14037356732 021056 0ustar00rootroot000000 000000 # and compiling extra libraries # ----------------------------------------- include cxxflags all-local: tetgen # Downloading and compiling Tetgen # ------------------------------ # Tetgen information DIRPKG=../pkg tetgen_VERSION=1.5.1-beta1 SRCDIR=tetgen$(tetgen_VERSION) PACKAGE=$(DIRPKG)/tetgen$(tetgen_VERSION).tar.gz # FFCS - 6/11/12 - curl is not able to follow redirections from http://tetgen.org/files #SERVER=http://wias-berlin.de/software/tetgen/files INSTALL=../.. tetgen: FAIRE FAIRE:install.done WHERE.done touch FAIRE # ALH - FFCS - 18/12/8 - need '-fPIC' on Linux64 because the .a will be used in a .so (by examples++-load/tetgen.cpp). # FFCS - 30/11/10 - need ranlib on Win64. The PATH is setup so that mingw/ranlib is called fait-tags.done:tar-tags.done cxxflags cd $(SRCDIR);$(CXX) $(CXXFLAGS) -O3 -fPIC -DSELF_CHECK -DNDEBUG -DTETLIBRARY -c tetgen.cxx cd $(SRCDIR);$(CXX) $(CXXFLAGS) -O0 -fPIC -DSELF_CHECK -DNDEBUG -DTETLIBRARY -c predicates.cxx touch fait-tags.done WHERE:WHERE.done WHERE.done: fait-tags.done echo tetgen LD -L@DIR@/lib -ltet >$(SRCDIR)/$(INSTALL)/lib/WHERE.tetgen echo tetgen INCLUDE -I@DIR@/include >> $(SRCDIR)/$(INSTALL)/lib/WHERE.tetgen touch $@ clean-local:: -rm WHERE.done # FFCS - avoid remaking install every time install.done:fait-tags.done cd $(SRCDIR);$(AR) $(ARFLAGS) $(INSTALL)/lib/libtet.a tetgen.o predicates.o ranlib $(SRCDIR)/$(INSTALL)/lib/libtet.a cp $(SRCDIR)/tetgen.h $(SRCDIR)/$(INSTALL)/include touch $@ clean-local:: -rm install.done tar-tags.done: $(PACKAGE) gunzip -c $(PACKAGE)|tar zxf - # FFCS: needs to patch tetgen because mingw64 has 4-byte longs touch tar-tags.done # cd tetgen1.4.2;patch -p1 <../tetgen1.4.2.patch $(PACKAGE): ../getall -o TetGen -a clean-local:: -rm -rf tetgen1.?.* *.done clean: -rm $(SRCDIR) -rm *.done -rm -rf tetgen1.?.* cxxflags cxxflags: ../Makefile Makefile ../getall ../getall -o TetGen -a -mv -f cxxflags cxxflags.old grep 'CXX *=' ../Makefile >cxxflags grep 'CC *=' ../Makefile >>cxxflags # FFCS - 10/5/12 - bug under Windows if -O3 is specified tetgen never returns. It could also be the case under # Ubuntu. All optimisation options are removed for safety. grep 'CXXFLAGS *=' ../Makefile | sed 's/ -O[0-9]* / /g' >>cxxflags grep 'WGET *=' ../Makefile >>cxxflags grep 'AR *=' ../Makefile >>cxxflags grep 'ARFLAGS *=' ../Makefile >>cxxflags ../../bin/ff-md5 $(PACKAGE) >> cxxflags if diff -q cxxflags cxxflags.old ; then echo No Modif skip compile of tetgen ; mv -f cxxflags.old cxxflags; else touch -c tar-tags.done; fi .PHONY:$(SRCDIR)/$(INSTALL)FreeFem-sources-4.9/3rdparty/umfpack/000775 000000 000000 00000000000 14037356732 017550 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/umfpack/AMD_Make.include000664 000000 000000 00000005762 14037356732 022465 0ustar00rootroot000000 000000 #------------------------------------------------------------------------------ # Include file for GNU make or original make (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # You can edit these definitions, or select and and edit a specfic # Make.(architecture) file, below. This same file is used for configuring # both AMD and UMFPACK. AMD is a stand-alone package. UMFPACK requires AMD, # and for simplicity of configuring both packages, UMFPACK and AMD share this # configuration file (and all files in the AMD/Make directory). To configure # AMD, or both AMD and UMFPACK, you only need to edit this one file (and # optionaly, one of the ../Make/Make. files below). # NOTE: -DNBLAS and other BLAS configurations are ignored by AMD, since AMD # does not use the BLAS. This flag is here because this file, and the # ../Make/Make.* files, are shared by UMFPACK (which requires AMD). If you # use AMD but not UMFPACK, then you can ignore any BLAS-related configuration # settings. #CFLAGS = -O RANLIB = ranlib LIB = -lm RM = rm -f MV = mv -f #F77 = f77 #F77FLAGS = -O F77LIB = AR = ar #------------------------------------------------------------------------------ # for the AMD and UMFPACK mexFunctions (-DNBLAS and -DNUTIL for UMFPACK only) #------------------------------------------------------------------------------ # MATLAB 6.0 or later (default) MEX = mex -inline -O # MATLAB 6.0 or later (no divide-by-zero) # MEX = mex -inline -O -DNO_DIVIDE_BY_ZERO # MATLAB 5 (no BLAS, do not try to use utMalloc, utFree, and utRealloc) # MEX = mex -inline -O -DNBLAS -DNUTIL #------------------------------------------------------------------------------ # for UMFPACK only (BLAS configuration): #------------------------------------------------------------------------------ # The default is to NOT use the BLAS. UMFPACK will be slow, but this is more # portable. Try this option first, then use your architecture-specific # configuration, below, to add the BLAS library. AMD ignores the -DNBLAS flag. #CONFIG = -DNBLAS #------------------------------------------------------------------------------ # Archicture-specific configuration (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # Select your architecture by un-commenting the appropriate line. The include # file can redefine any of the above definitions, or other definitions. Use # CC = ... to redefine the name of your C compiler, for example. Without # any specific changes, this Makefile should work on nearly all systems. # include ../Make/Make.linux # include ../Make/Make.sgi # include ../Make/Make.solaris # include ../Make/Make.alpha # include ../Make/Make.rs6000 #------------------------------------------------------------------------------ # remove object files and profile output #------------------------------------------------------------------------------ clean: - $(RM) *.o *.obj *.ln *.bb *.bbg *.da *.c.tcov *.c.gcov gmon.out *.bak FreeFem-sources-4.9/3rdparty/umfpack/Makefile.am000664 000000 000000 00000015557 14037356732 021621 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Downloading and compiling extra libraries for Umfpack # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : ... # E-MAIL : ... # see [[file:../../configure.ac::DOWNLOAD_UMFPACK]] all-local: $(DOWNLOAD_UMFPACK) EXTRA_DIST= SuiteSparse_config.mk.in # Downloading and compiling UMFPACK # --------------------------------- DIRPKG=../pkg SUITESPARSE_TGZ=$(DIRPKG)/SuiteSparse-4.4.4.tar.gz UMFPACKLIB=../lib/libumfpack.a AMDLIB=../lib/libamd.a CAMDLIB=../lib/libcamd.a CHOLMODLIB=../lib/libcholmod.a COLAMDLIB=../lib/libcolamd.a SUITESPARSECONFIGLIB=../lib/libsuitesparseconfig.a # ALH - 4/9/13 - parallel make crashes ("pipe from processes is a directory"?). make sure that everything is run # sequentially umfpack: $(MAKE) MAKEFLAGS= $(AMDLIB) $(MAKE) MAKEFLAGS= $(UMFPACKLIB) $(MAKE) MAKEFLAGS= $(SUITESPARSECONFIGLIB) # # ALH - 19/9/13 - cholmod part built sequentially # $(MAKE) MAKEFLAGS= $(CAMDLIB) $(MAKE) MAKEFLAGS= $(COLAMDLIB) $(MAKE) MAKEFLAGS= $(AMDLIB) $(MAKE) MAKEFLAGS= $(CHOLMODLIB) $(MAKE) MAKEFLAGS= $(SUITESPARSECONFIGLIB) # FFCS: SuiteSparse/*/Source are not able to compile in parallel from scratch ("pipe from processes is a # directory"?). But specifying "make -j 1" is not enough (another error pops up). Use "$(MAKE) MAKEFLAGS=" instead # (using "make MAKEFLAGS=" still produces an error on Cygwin). $(SUITESPARSECONFIGLIB): SuiteSparse/FF cd SuiteSparse/SuiteSparse_config && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/SuiteSparse_config/SuiteSparse_config.h ../include cp SuiteSparse/SuiteSparse_config/libsuitesparseconfig.a ../lib/libsuitesparseconfig.a $(RANLIB) ../lib/libsuitesparseconfig.a $(UMFPACKLIB): SuiteSparse/FF cd SuiteSparse/UMFPACK/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/UMFPACK/Include/*.h ../include cp -f SuiteSparse/SuiteSparse_config/SuiteSparse_config.h ../include cp SuiteSparse/UMFPACK/Lib/libumfpack.a ../lib/libumfpack.a $(RANLIB) ../lib/libumfpack.a $(AMDLIB): SuiteSparse/FF cd SuiteSparse/AMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/AMD/Include/*.h ../include cp SuiteSparse/AMD/Lib/libamd.a ../lib/libamd.a $(RANLIB) ../lib/libamd.a $(CAMDLIB): SuiteSparse/FF cd SuiteSparse/CAMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/CAMD/Include/*.h ../include cp SuiteSparse/CAMD/Lib/libcamd.a ../lib/libcamd.a $(RANLIB) ../lib/libcamd.a $(CHOLMODLIB): SuiteSparse/FF cd SuiteSparse/CHOLMOD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/CHOLMOD/Include/*.h ../include cp SuiteSparse/CHOLMOD/Lib/libcholmod.a ../lib/libcholmod.a $(RANLIB) ../lib/libcholmod.a $(COLAMDLIB): SuiteSparse/FF cd SuiteSparse/COLAMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/COLAMD/Include/*.h ../include cp SuiteSparse/COLAMD/Lib/libcolamd.a ../lib/libcolamd.a $(RANLIB) ../lib/libcolamd.a UMFPACKv4.4: tar xzf UMFPACKv4.4.tar.gz # The 'lib' goal is replaced with 'libb' to avoid problems with the existing 'Lib' subdirectory on case-insensitive file # systems the lib depend of the Makefile to force the reconstruction if the parameter change UMFPACKv4.4/AMD/Makefile2 UMFPACKv4.4/UMFPACK/Makefile2: UMFPACKv4.4 sed 's/lib:/libb:/' < UMFPACKv4.4/UMFPACK/`basename $@ 2` >$@ SuiteSparse/DATE:$(SUITESPARSE_TGZ) tar zxf $(SUITESPARSE_TGZ) touch SuiteSparse/DATE SuiteSparse/FF:SuiteSparse/DATE SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk touch SuiteSparse/FF SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk:SuiteSparse/DATE Makefile SuiteSparse_config.mk.in ../../config.status --file="SuiteSparse_config.mk:SuiteSparse_config.mk.in" -if diff SuiteSparse_config.mk SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk 2>&1 >/dev/null; then \ echo " same flags => no recompilation ! " ; \ else \ echo " recompile umfpack (some flags change) => clean umfpack colmod amd " ;\ mkdir -p SuiteSparse/SuiteSparse_config ;\ cp SuiteSparse_config.mk SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk ; \ (cd SuiteSparse/UMFPACK && make clean); \ (cd SuiteSparse/CHOLMOD && make clean); \ (cd SuiteSparse/COLAMD && make clean); \ (cd SuiteSparse/CAMD && make clean); \ (cd SuiteSparse/CAMD && make clean); \ (cd SuiteSparse/AMD && make clean); \ find . -name '*.exe'|xargs rm; \ fi; touch SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk UMFPACK_Make.m4: Makefile UMFPACKv4.4 m4 -DFF_CC="$(CC)" \ -DFF_CFLAGS="@CPPFLAGS@ @CFLAGS@ @BLASINC@" \ -DFF_LIB="@BLASLIB@ @LIBS@" \ -DFF_CONFIG="@FF_UMFPACK_CONFIG@" \ UMFPACK_Make.m4 >Make.include -if diff Make.include UMFPACKv4.4/UMFPACK/Make/Make.include 2>&1 >/dev/null; then \ echo " same flags => no recompilation ! " ; \ else \ echo " recompile umfpack (some flags change) => clean umfpack" ;\ cp Make.include UMFPACKv4.4/UMFPACK/Make/Make.include; \ cd UMFPACKv4.4/UMFPACK && make clean; \ find . -name '*.exe'|xargs rm; \ fi; UMFPACKv4.4.tar.gz: @WGET@ -N http://www.cise.ufl.edu/research/sparse/umfpack/v4.4/UMFPACKv4.4.tar.gz $(SUITESPARSE_TGZ): ../getall -o SuiteSparse -a clean-local: -rm $(AMDLIB) $(CAMDLIB) $(CHOLMODLIB) $(COLAMDLIB) $(SUITESPARSECONFIGLIB) -rm ../include/amd*.h -rm ../include/umfpack*.h -rm ../include/colmod*.h -rm ../include/camd*.h -rm ../include/amd*.h -rm ../include/SuiteSparse_config.h -rm -rf UMFPACKv4.?.tar.gz UMFPACKv4.? -rm SuiteSparse*gz -rm -rf SuiteSparse # -rm ../pkg/SuiteSparse-* -rm SuiteSparse_config.mk # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/umfpack/Makefile.am-4.5.5000664 000000 000000 00000017250 14037356732 022260 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Downloading and compiling extra libraries for Umfpack # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : ... # E-MAIL : ... # see [[file:../../configure.ac::DOWNLOAD_UMFPACK]] all-local: $(DOWNLOAD_UMFPACK) EXTRA_DIST= SuiteSparse_config.mk.in # Downloading and compiling UMFPACK # --------------------------------- DIRPKG=../pkg SUITESPARSE_TGZ=$(DIRPKG)/SuiteSparse-4.5.5.tar.gz UMFPACKLIB=../lib/libumfpack.a AMDLIB=../lib/libamd.a CAMDLIB=../lib/libcamd.a CHOLMODLIB=../lib/libcholmod.a COLAMDLIB=../lib/libcolamd.a KLULIB=../lib/libklu.a BTFLIB=../lib/libbtf.a SUITESPARSECONFIGLIB=../lib/libsuitesparseconfig.a # ALH - 4/9/13 - parallel make crashes ("pipe from processes is a directory"?). make sure that everything is run # sequentially umfpack: $(MAKE) $(AMDLIB) $(MAKE) $(UMFPACKLIB) $(MAKE) $(SUITESPARSECONFIGLIB) # # ALH - 19/9/13 - cholmod part built sequentially # $(MAKE) $(CAMDLIB) $(MAKE) $(COLAMDLIB) $(MAKE) $(CHOLMODLIB) $(MAKE) $(BTFLIB) $(MAKE) $(KLULIB) $(MAKE) $(SUITESPARSECONFIGLIB) $(MAKE) install.done # FFCS: SuiteSparse/*/Source are not able to compile in parallel from scratch ("pipe from processes is a # directory"?). But specifying "make -j 1" is not enough (another error pops up). Use "$(MAKE) MAKEFLAGS=" instead # (using "make MAKEFLAGS=" still produces an error on Cygwin). $(SUITESPARSECONFIGLIB): SuiteSparse/FF cd SuiteSparse/SuiteSparse_config && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/SuiteSparse_config/SuiteSparse_config.h ../include cp SuiteSparse/SuiteSparse_config/libsuitesparseconfig.a ../lib/libsuitesparseconfig.a $(RANLIB) ../lib/libsuitesparseconfig.a $(UMFPACKLIB): SuiteSparse/FF cd SuiteSparse/UMFPACK/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/UMFPACK/Include/*.h ../include cp -f SuiteSparse/SuiteSparse_config/SuiteSparse_config.h ../include cp SuiteSparse/UMFPACK/Lib/libumfpack.a ../lib/libumfpack.a $(RANLIB) ../lib/libumfpack.a $(AMDLIB): SuiteSparse/FF cd SuiteSparse/AMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/AMD/Include/*.h ../include cp SuiteSparse/AMD/Lib/libamd.a ../lib/libamd.a $(RANLIB) ../lib/libamd.a $(CAMDLIB): SuiteSparse/FF cd SuiteSparse/CAMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/CAMD/Include/*.h ../include cp SuiteSparse/CAMD/Lib/libcamd.a ../lib/libcamd.a $(RANLIB) ../lib/libcamd.a $(CHOLMODLIB): SuiteSparse/FF cd SuiteSparse/CHOLMOD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/CHOLMOD/Include/*.h ../include cp SuiteSparse/CHOLMOD/Lib/libcholmod.a ../lib/libcholmod.a $(RANLIB) ../lib/libcholmod.a $(COLAMDLIB): SuiteSparse/FF cd SuiteSparse/COLAMD/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/COLAMD/Include/*.h ../include cp SuiteSparse/COLAMD/Lib/libcolamd.a ../lib/libcolamd.a $(RANLIB) ../lib/libcolamd.a $(KLULIB): SuiteSparse/FF cd SuiteSparse/KLU/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/KLU/Include/*.h ../include cp SuiteSparse/KLU/Lib/libklu.a ../lib/libklu.a $(RANLIB) ../lib/libklu.a $(BTFLIB): SuiteSparse/FF cd SuiteSparse/BTF/Lib && $(MAKE) MAKEFLAGS= mkdir -p ../include ../lib cp -f SuiteSparse/BTF/Include/*.h ../include cp SuiteSparse/BTF/Lib/libbtf.a ../lib/libbtf.a $(RANLIB) ../lib/libbtf.a UMFPACKv4.4: tar xzf UMFPACKv4.4.tar.gz # The 'lib' goal is replaced with 'libb' to avoid problems with the existing 'Lib' subdirectory on case-insensitive file # systems the lib depend of the Makefile to force the reconstruction if the parameter change UMFPACKv4.4/AMD/Makefile2 UMFPACKv4.4/UMFPACK/Makefile2: UMFPACKv4.4 sed 's/lib:/libb:/' < UMFPACKv4.4/UMFPACK/`basename $@ 2` >$@ SuiteSparse/DATE:$(SUITESPARSE_TGZ) tar zxf $(SUITESPARSE_TGZ) touch SuiteSparse/DATE SuiteSparse/FF:SuiteSparse/DATE SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk touch SuiteSparse/FF SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk:SuiteSparse/DATE Makefile SuiteSparse_config.mk.in ../../config.status --file="SuiteSparse_config.mk:SuiteSparse_config.mk.in" -if diff SuiteSparse_config.mk SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk 2>&1 >/dev/null; then \ echo " same flags => no recompilation ! " ; \ else \ echo " recompile umfpack (some flags change) => clean umfpack colmod amd " ;\ mkdir -p SuiteSparse/SuiteSparse_config ;\ cp SuiteSparse_config.mk SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk ; \ (cd SuiteSparse/UMFPACK && make clean); \ (cd SuiteSparse/CHOLMOD && make clean); \ (cd SuiteSparse/COLAMD && make clean); \ (cd SuiteSparse/CAMD && make clean); \ (cd SuiteSparse/AMD && make clean); \ (cd SuiteSparse/BTF && make clean); \ (cd SuiteSparse/KLU && make clean); \ find . -name '*.exe'|xargs rm; \ fi; touch SuiteSparse/SuiteSparse_config/SuiteSparse_config.mk UMFPACK_Make.m4: Makefile UMFPACKv4.4 m4 -DFF_CC="$(CC)" \ -DFF_CFLAGS="@CPPFLAGS@ @CFLAGS@ @BLASINC@" \ -DFF_LIB="@BLASLIB@ @LIBS@" \ -DFF_CONFIG="@FF_UMFPACK_CONFIG@" \ UMFPACK_Make.m4 >Make.include -if diff Make.include UMFPACKv4.4/UMFPACK/Make/Make.include 2>&1 >/dev/null; then \ echo " same flags => no recompilation ! " ; \ else \ echo " recompile umfpack (some flags change) => clean umfpack" ;\ cp Make.include UMFPACKv4.4/UMFPACK/Make/Make.include; \ cd UMFPACKv4.4/UMFPACK && make clean; \ find . -name '*.exe'|xargs rm; \ fi; UMFPACKv4.4.tar.gz: @WGET@ -N http://www.cise.ufl.edu/research/sparse/umfpack/v4.4/UMFPACKv4.4.tar.gz $(SUITESPARSE_TGZ): ../getall -o SuiteSparse -a ../lib/WHERE.suitesparse: echo suitesparse LD -L@DIR@/lib -lumfpack -lklu -lcholmod -lbtf -lccolamd -lcolamd -lcamd -lamd -lsuitesparseconfig > $@ echo suitesparse INCLUDE -I@DIR@/include >> $@ install.done:../lib/WHERE.suitesparse touch install.done clean-local: -rm $(AMDLIB) $(CAMDLIB) $(CHOLMODLIB) $(COLAMDLIB) $(SUITESPARSECONFIGLIB) $(KLULIB) $(BTFLIB) -rm ../include/amd*.h -rm ../include/umfpack*.h -rm ../include/colmod*.h -rm ../include/camd*.h -rm ../include/amd*.h -rm ../include/klu*.h -rm ../include/btf*.h -rm ../include/SuiteSparse_config.h -rm -rf UMFPACKv4.?.tar.gz UMFPACKv4.? -rm SuiteSparse*gz -rm -rf SuiteSparse # -rm ../pkg/SuiteSparse-* -rm SuiteSparse_config.mk # Local Variables: # mode:makefile # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/3rdparty/umfpack/SuiteSparse_config.mk-4.5.5000664 000000 000000 00000064164 14037356732 024357 0ustar00rootroot000000 000000 #=============================================================================== # SuiteSparse_config.mk: common configuration file for the SuiteSparse #=============================================================================== # This file contains all configuration settings for all packages in SuiteSparse, # except for CSparse (which is stand-alone) and the packages in MATLAB_Tools. SUITESPARSE_VERSION = 4.5.5 #=============================================================================== # Options you can change without editing this file: #=============================================================================== # To list the options you can modify at the 'make' command line, type # 'make config', which also lists their default values. You can then # change them with 'make OPTION=value'. For example, to use an INSTALL # path of /my/path, and to use your own BLAS and LAPACK libraries, do: # # make install INSTALL=/my/path BLAS=-lmyblas LAPACK=-lmylapackgoeshere # # which will install the package into /my/path/lib and /my/path/include, # and use -lmyblas -lmylapackgoes here when building the demo program. #=============================================================================== # Defaults for any system #=============================================================================== #--------------------------------------------------------------------------- # SuiteSparse root directory #--------------------------------------------------------------------------- # Most Makefiles are in SuiteSparse/Pkg/Lib or SuiteSparse/Pkg/Demo, so # the top-level of SuiteSparse is in ../.. unless otherwise specified. # This is true for all but the SuiteSparse_config package. SUITESPARSE ?= $(realpath $(CURDIR)/../..) #--------------------------------------------------------------------------- # installation location #--------------------------------------------------------------------------- # For "make install" and "make uninstall", the default location is # SuiteSparse/lib, SuiteSparse/include, and # SuiteSparse/share/doc/suitesparse-x.y.z # If you do this: # make install INSTALL=/usr/local # then the libraries are installed in /usr/local/lib, include files in # /usr/local/include, and documentation in # /usr/local/share/doc/suitesparse-x.y.z. # You can instead specify the install location of each of these 3 components # separately, via (for example): # make install INSTALL_LIB=/yada/mylibs INSTALL_INCLUDE=/yoda/myinc \ # INSTALL_DOC=/solo/mydox # which puts the libraries in /yada/mylibs, include files in /yoda/myinc, # and documentation in /solo/mydox. INSTALL ?= @abs_srcdir@/3rdparty/ INSTALL_LIB ?= $(INSTALL)/lib INSTALL_INCLUDE ?= $(INSTALL)/include INSTALL_DOC ?= $(INSTALL)/share/doc/suitesparse-$(SUITESPARSE_VERSION) #--------------------------------------------------------------------------- # optimization level #--------------------------------------------------------------------------- OPTIMIZATION ?= -O3 #--------------------------------------------------------------------------- # statement coverage for */Tcov #--------------------------------------------------------------------------- ifeq ($(TCOV),yes) # Each package has a */Tcov directory for extensive testing, including # statement coverage. The Tcov tests require Linux and gcc, and use # the vanilla BLAS. For those tests, the packages use 'make TCOV=yes', # which overrides the following settings: MKLROOT = AUTOCC = no CC = @CC@ CXX = @CC@ CFLAGS += @CFLAGS@ BLAS = @BLASLIBS@ LAPACK = @LAPACKLIBS@ CFLAGS += --coverage OPTIMIZATION = -g LDFLAGS += --LAPACKLIBS endif #--------------------------------------------------------------------------- # CFLAGS for the C/C++ compiler #--------------------------------------------------------------------------- # The CF macro is used by SuiteSparse Makefiles as a combination of # CFLAGS, CPPFLAGS, TARGET_ARCH, and system-dependent settings. CF ?= $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) $(OPTIMIZATION) -fexceptions -fPIC #--------------------------------------------------------------------------- # OpenMP is used in CHOLMOD #--------------------------------------------------------------------------- # with gcc, enable OpenMP directives via -fopenmp # This is not supported on Darwin, so this string is cleared, below. CFOPENMP ?= #-fopenmp #--------------------------------------------------------------------------- # compiler #--------------------------------------------------------------------------- # By default, look for the Intel compilers. If present, they are used # instead of $(CC), $(CXX), and $(F77). To disable this feature and # use the $(CC), $(CXX), and $(F77) compilers, use 'make AUTOCC=no' AUTOCC ?= no ifneq ($(AUTOCC),no) ifneq ($(shell which icc 2>/dev/null),) # use the Intel icc compiler for C codes, and -qopenmp for OpenMP CC = icc -D_GNU_SOURCE CXX = $(CC) CFOPENMP = -qopenmp -I$(MKLROOT)/include LDFLAGS += -openmp endif ifneq ($(shell which ifort 2>/dev/null),) # use the Intel ifort compiler for Fortran codes F77 = ifort endif endif #--------------------------------------------------------------------------- # code formatting (for Tcov on Linux only) #--------------------------------------------------------------------------- PRETTY ?= grep -v "^\#" | indent -bl -nce -bli0 -i4 -sob -l120 #--------------------------------------------------------------------------- # required libraries #--------------------------------------------------------------------------- # SuiteSparse requires the BLAS, LAPACK, and -lm (Math) libraries. # It places its shared *.so libraries in SuiteSparse/lib. # Linux also requires the -lrt library (see below) LDLIBS ?= -lm LDFLAGS += -L$(INSTALL_LIB) # See http://www.openblas.net for a recent and freely available optimzed # BLAS. LAPACK is at http://www.netlib.org/lapack/ . You can use the # standard Fortran LAPACK along with OpenBLAS to obtain very good # performance. This script can also detect if the Intel MKL BLAS is # installed. LAPACK ?= -llapack ifndef BLAS ifdef MKLROOT # use the Intel MKL for BLAS and LAPACK # using static linking: # BLAS = -Wl,--start-group \ # $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a \ # $(MKLROOT)/lib/intel64/libmkl_core.a \ # $(MKLROOT)/lib/intel64/libmkl_intel_thread.a \ # -Wl,--end-group -lpthread -lm # using dynamic linking: BLAS = -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -lpthread -lm LAPACK = else # use the OpenBLAS at http://www.openblas.net BLAS = -lopenblas endif endif # For ACML, use this instead: # make BLAS='-lacml -lgfortran' #--------------------------------------------------------------------------- # shell commands #--------------------------------------------------------------------------- # ranlib, and ar, for generating libraries. If you don't need ranlib, # just change it to RANLAB = echo RANLIB ?= ranlib ARCHIVE ?= $(AR) $(ARFLAGS) CP ?= cp -f MV ?= mv -f #--------------------------------------------------------------------------- # Fortran compiler (not required for 'make' or 'make library') #--------------------------------------------------------------------------- # A Fortran compiler is optional. Only required for the optional Fortran # interfaces to AMD and UMFPACK. Not needed by 'make' or 'make install' F77 ?= @FC@ F77FLAGS ?= $(FFLAGS) $(OPTIMIZATION) #--------------------------------------------------------------------------- # NVIDIA CUDA configuration for CHOLMOD and SPQR #--------------------------------------------------------------------------- # CUDA is detected automatically, and used if found. To disable CUDA, # use CUDA=no CUDA=no ifneq ($(CUDA),no) CUDA_PATH = $(shell which nvcc 2>/dev/null | sed "s/\/bin\/nvcc//") endif ifeq ($(wildcard $(CUDA_PATH)),) # CUDA is not present CUDA_PATH = GPU_BLAS_PATH = GPU_CONFIG = CUDART_LIB = CUBLAS_LIB = CUDA_INC_PATH = CUDA_INC = NVCC = echo NVCCFLAGS = else # with CUDA for CHOLMOD and SPQR GPU_BLAS_PATH = $(CUDA_PATH) # GPU_CONFIG must include -DGPU_BLAS to compile SuiteSparse for the # GPU. You can add additional GPU-related flags to it as well. # with 4 cores (default): GPU_CONFIG = -DGPU_BLAS # For example, to compile CHOLMOD for 10 CPU cores when using the GPU: # GPU_CONFIG = -DGPU_BLAS -DCHOLMOD_OMP_NUM_THREADS=10 CUDART_LIB = $(CUDA_PATH)/lib64/libcudart.so CUBLAS_LIB = $(CUDA_PATH)/lib64/libcublas.so CUDA_INC_PATH = $(CUDA_PATH)/include/ CUDA_INC = -I$(CUDA_INC_PATH) NVCC = $(CUDA_PATH)/bin/nvcc NVCCFLAGS = -Xcompiler -fPIC -O3 \ -gencode=arch=compute_30,code=sm_30 \ -gencode=arch=compute_35,code=sm_35 \ -gencode=arch=compute_50,code=sm_50 \ -gencode=arch=compute_50,code=compute_50 endif #--------------------------------------------------------------------------- # UMFPACK configuration: #--------------------------------------------------------------------------- # Configuration for UMFPACK. See UMFPACK/Source/umf_config.h for details. # # -DNBLAS do not use the BLAS. UMFPACK will be very slow. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF do not use the Sun Perf. Library on Solaris # -DNRECIPROCAL do not multiply by the reciprocal # -DNO_DIVIDE_BY_ZERO do not divide by zero # -DNCHOLMOD do not use CHOLMOD as a ordering method. If -DNCHOLMOD is # included in UMFPACK_CONFIG, then UMFPACK does not rely on # CHOLMOD, CAMD, CCOLAMD, COLAMD, and METIS. UMFPACK_CONFIG ?= # For example, uncomment this line to compile UMFPACK without CHOLMOD: # UMFPACK_CONFIG = -DNCHOLMOD # or use 'make UMFPACK_CONFIG=-DNCHOLMOD' #--------------------------------------------------------------------------- # CHOLMOD configuration #--------------------------------------------------------------------------- # CHOLMOD Library Modules, which appear in -lcholmod # Core requires: none # Check requires: Core # Cholesky requires: Core, AMD, COLAMD. optional: Partition, Supernodal # MatrixOps requires: Core # Modify requires: Core # Partition requires: Core, CCOLAMD, METIS. optional: Cholesky # Supernodal requires: Core, BLAS, LAPACK # # CHOLMOD test/demo Modules (these do not appear in -lcholmod): # Tcov requires: Core, Check, Cholesky, MatrixOps, Modify, Supernodal # optional: Partition # Valgrind same as Tcov # Demo requires: Core, Check, Cholesky, MatrixOps, Supernodal # optional: Partition # # Configuration flags: # -DNCHECK do not include the Check module. # -DNCHOLESKY do not include the Cholesky module. # -DNPARTITION do not include the Partition module. # also do not include METIS. # -DNCAMD do not use CAMD & CCOLAMD in Parition Module. # -DNMATRIXOPS do not include the MatrixOps module. # -DNMODIFY do not include the Modify module. # -DNSUPERNODAL do not include the Supernodal module. # # -DNPRINT do not print anything. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF for Solaris only. If defined, do not use the Sun # Performance Library # -DGPU_BLAS enable the use of the CUDA BLAS CHOLMOD_CONFIG ?= -DNPARTITION #--------------------------------------------------------------------------- # SuiteSparseQR configuration: #--------------------------------------------------------------------------- # The SuiteSparseQR library can be compiled with the following options: # # -DNPARTITION do not include the CHOLMOD partition module # -DNEXPERT do not include the functions in SuiteSparseQR_expert.cpp # -DHAVE_TBB enable the use of Intel's Threading Building Blocks # -DGPU_BLAS enable the use of the CUDA BLAS SPQR_CONFIG ?= $(GPU_CONFIG) # to compile with Intel's TBB, use TBB=-ltbb -DSPQR_CONFIG=-DHAVE_TBB TBB ?= # TBB = -ltbb -DSPQR_CONFIG=-DHAVE_TBB # TODO: this *mk file should auto-detect the presence of Intel's TBB, # and set the compiler flags accordingly. #=============================================================================== # System-dependent configurations #=============================================================================== #--------------------------------------------------------------------------- # determine what system we are on #--------------------------------------------------------------------------- # To disable these auto configurations, use 'make UNAME=custom' ifndef UNAME ifeq ($(OS),Windows_NT) # Cygwin Make on Windows has an $(OS) variable, but not uname. # Note that this option is untested. UNAME = Windows else # Linux and Darwin (Mac OSX) have been tested. UNAME := $(shell uname) endif endif #--------------------------------------------------------------------------- # Linux #--------------------------------------------------------------------------- ifeq ($(UNAME),Linux) # add the realtime library, librt, and SuiteSparse/lib LDLIBS += -lrt -Wl,-rpath=$(INSTALL_LIB) endif #--------------------------------------------------------------------------- # Mac #--------------------------------------------------------------------------- ifeq ($(UNAME), Darwin) # To compile on the Mac, you must install Xcode. Then do this at the # command line in the Terminal, before doing 'make': # xcode-select --install CF += -fno-common BLAS = -framework Accelerate LAPACK = -framework Accelerate # OpenMP is not yet supported by default in clang CFOPENMP = endif #--------------------------------------------------------------------------- # Solaris #--------------------------------------------------------------------------- ifeq ($(UNAME), SunOS) # Using the Sun compiler and the Sun Performance Library # This hasn't been tested recently. # I leave it here in case you need it. It likely needs updating. CF += -fast -KPIC -xc99=%none -xlibmieee -xlibmil -m64 -Xc F77FLAGS = -O -fast -KPIC -dalign -xlibmil -m64 BLAS = -xlic_lib=sunperf LAPACK = # Using the GCC compiler and the reference BLAS ## CC = gcc ## CXX = g++ ## MAKE = gmake ## BLAS = -lrefblas -lgfortran ## LAPACK = -llapack endif #--------------------------------------------------------------------------- # IBM AIX #--------------------------------------------------------------------------- ifeq ($(UNAME), AIX) # hasn't been tested for a very long time... # I leave it here in case you need it. It likely needs updating. CF += -O4 -qipa -qmaxmem=16384 -q64 -qproto -DBLAS_NO_UNDERSCORE F77FLAGS = -O4 -qipa -qmaxmem=16384 -q64 BLAS = -lessl LAPACK = endif #=============================================================================== # finalize the CF compiler flags #=============================================================================== CF += $(CFOPENMP) #=============================================================================== # internal configuration #=============================================================================== # The user should not have to change these definitions, and they are # not displayed by 'make config' #--------------------------------------------------------------------------- # for removing files not in the distribution #--------------------------------------------------------------------------- # remove object files, but keep compiled libraries via 'make clean' CLEAN = *.o *.obj *.ln *.bb *.bbg *.da *.tcov *.gcov gmon.out *.bak *.d \ *.gcda *.gcno *.aux *.bbl *.blg *.log *.toc *.dvi *.lof *.lot # also remove compiled libraries, via 'make distclean' PURGE = *.so* *.a *.dll *.dylib *.dSYM # location of TCOV test output TCOV_TMP ?= /tmp #=============================================================================== # Building the shared and static libraries #=============================================================================== # How to build/install shared and static libraries for Mac and Linux/Unix. # This assumes that LIBRARY and VERSION have already been defined by the # Makefile that includes this file. SO_OPTS = $(LDFLAGS) ifeq ($(UNAME),Windows) # Cygwin Make on Windows (untested) AR_TARGET = $(LIBRARY).lib SO_PLAIN = $(LIBRARY).dll SO_MAIN = $(LIBRARY).$(SO_VERSION).dll SO_TARGET = $(LIBRARY).$(VERSION).dll SO_INSTALL_NAME = echo else # Mac or Linux/Unix AR_TARGET = $(LIBRARY).a ifeq ($(UNAME),Darwin) # Mac SO_PLAIN = $(LIBRARY).dylib SO_MAIN = $(LIBRARY).$(SO_VERSION).dylib SO_TARGET = $(LIBRARY).$(VERSION).dylib SO_OPTS += -dynamiclib -compatibility_version $(SO_VERSION) \ -current_version $(VERSION) \ -shared -undefined dynamic_lookup # When a Mac *.dylib file is moved, this command is required # to change its internal name to match its location in the filesystem: SO_INSTALL_NAME = install_name_tool -id else # Linux and other variants of Unix SO_PLAIN = $(LIBRARY).so SO_MAIN = $(LIBRARY).so.$(SO_VERSION) SO_TARGET = $(LIBRARY).so.$(VERSION) SO_OPTS += -shared -Wl,-soname -Wl,$(SO_MAIN) -Wl,--no-undefined # Linux/Unix *.so files can be moved without modification: SO_INSTALL_NAME = echo endif endif #=============================================================================== # Configure CHOLMOD/Partition module with METIS, CAMD, and CCOLAMD #=============================================================================== # By default, SuiteSparse uses METIS 5.1.0 in the SuiteSparse/metis-5.1.0 # directory. SuiteSparse's interface to METIS is only through the # SuiteSparse/CHOLMOD/Partition module, which also requires SuiteSparse/CAMD # and SuiteSparse/CCOLAMD. # # If you wish to use your own pre-installed copy of METIS, use the MY_METIS_LIB # and MY_METIS_INC options passed to 'make'. For example: # make MY_METIS_LIB=-lmetis # make MY_METIS_LIB=/home/myself/mylibraries/libmetis.so # make MY_METIS_LIB='-L/home/myself/mylibraries -lmetis' # If you need to tell the compiler where to find the metis.h include file, # then add MY_METIS_INC=/home/myself/metis-5.1.0/include as well, which points # to the directory containing metis.h. If metis.h is already installed in # a location known to the compiler (/usr/local/include/metis.h for example) # then you do not need to add MY_METIS_INC. I_WITH_PARTITION = LIB_WITH_PARTITION = CONFIG_PARTITION = -DNPARTITION -DNCAMD # check if CAMD/CCOLAMD and METIS are requested and available ifeq (,$(findstring -DNCAMD, $(CHOLMOD_CONFIG))) # CAMD and CCOLAMD are requested. See if they are available in # SuiteSparse/CAMD and SuiteSparse/CCOLAMD ifneq (, $(wildcard $(SUITESPARSE)/CAMD)) ifneq (, $(wildcard $(SUITESPARSE)/CCOLAMD)) # CAMD and CCOLAMD are requested and available LIB_WITH_PARTITION = -lccolamd -lcamd I_WITH_PARTITION = -I$(SUITESPARSE)/CCOLAMD/Include -I$(SUITESPARSE)/CAMD/Include CONFIG_PARTITION = -DNPARTITION # check if METIS is requested and available ifeq (,$(findstring -DNPARTITION, $(CHOLMOD_CONFIG))) # METIS is requested. See if it is available. ifneq (,$(MY_METIS_LIB)) # METIS 5.1.0 is provided elsewhere, and we are not using # SuiteSparse/metis-5.1.0. To do so, we link with # $(MY_METIS_LIB) and add the -I$(MY_METIS_INC) option for # the compiler. The latter can be empty if you have METIS # installed in a place where the compiler can find the # metis.h include file by itself without any -I option # (/usr/local/include/metis.h for example). LIB_WITH_PARTITION += $(MY_METIS_LIB) ifneq (,$(MY_METIS_INC)) I_WITH_PARTITION += -I$(MY_METIS_INC) endif CONFIG_PARTITION = else # see if METIS is in SuiteSparse/metis-5.1.0 ifneq (, $(wildcard $(SUITESPARSE)/metis-5.1.0)) # SuiteSparse/metis5.1.0 is available ifeq ($(UNAME), Darwin) LIB_WITH_PARTITION += $(SUITESPARSE)/lib/libmetis.dylib else LIB_WITH_PARTITION += -lmetis endif I_WITH_PARTITION += -I$(SUITESPARSE)/metis-5.1.0/include CONFIG_PARTITION = endif endif endif endif endif endif #=============================================================================== # display configuration #=============================================================================== ifeq ($(LIBRARY),) # placeholders, for 'make config' in the top-level SuiteSparse LIBRARY=PackageNameWillGoHere VERSION=x.y.z SO_VERSION=x endif # 'make config' lists the primary installation options config: @echo ' ' @echo '----------------------------------------------------------------' @echo 'SuiteSparse package compilation options:' @echo '----------------------------------------------------------------' @echo ' ' @echo 'SuiteSparse Version: ' '$(SUITESPARSE_VERSION)' @echo 'SuiteSparse top folder: ' '$(SUITESPARSE)' @echo 'Package: LIBRARY= ' '$(LIBRARY)' @echo 'Version: VERSION= ' '$(VERSION)' @echo 'SO version: SO_VERSION= ' '$(SO_VERSION)' @echo 'System: UNAME= ' '$(UNAME)' @echo 'Install directory: INSTALL= ' '$(INSTALL)' @echo 'Install libraries in: INSTALL_LIB= ' '$(INSTALL_LIB)' @echo 'Install include files in: INSTALL_INCLUDE=' '$(INSTALL_INCLUDE)' @echo 'Install documentation in: INSTALL_DOC= ' '$(INSTALL_DOC)' @echo 'Optimization level: OPTIMIZATION= ' '$(OPTIMIZATION)' @echo 'BLAS library: BLAS= ' '$(BLAS)' @echo 'LAPACK library: LAPACK= ' '$(LAPACK)' @echo 'Intel TBB library: TBB= ' '$(TBB)' @echo 'Other libraries: LDLIBS= ' '$(LDLIBS)' @echo 'static library: AR_TARGET= ' '$(AR_TARGET)' @echo 'shared library (full): SO_TARGET= ' '$(SO_TARGET)' @echo 'shared library (main): SO_MAIN= ' '$(SO_MAIN)' @echo 'shared library (short): SO_PLAIN= ' '$(SO_PLAIN)' @echo 'shared library options: SO_OPTS= ' '$(SO_OPTS)' @echo 'shared library name tool: SO_INSTALL_NAME=' '$(SO_INSTALL_NAME)' @echo 'ranlib, for static libs: RANLIB= ' '$(RANLIB)' @echo 'static library command: ARCHIVE= ' '$(ARCHIVE)' @echo 'copy file: CP= ' '$(CP)' @echo 'move file: MV= ' '$(MV)' @echo 'remove file: RM= ' '$(RM)' @echo 'pretty (for Tcov tests): PRETTY= ' '$(PRETTY)' @echo 'C compiler: CC= ' '$(CC)' @echo 'C++ compiler: CXX= ' '$(CXX)' @echo 'CUDA compiler: NVCC= ' '$(NVCC)' @echo 'CUDA root directory: CUDA_PATH= ' '$(CUDA_PATH)' @echo 'OpenMP flags: CFOPENMP= ' '$(CFOPENMP)' @echo 'C/C++ compiler flags: CF= ' '$(CF)' @echo 'LD flags: LDFLAGS= ' '$(LDFLAGS)' @echo 'Fortran compiler: F77= ' '$(F77)' @echo 'Fortran flags: F77FLAGS= ' '$(F77FLAGS)' @echo 'Intel MKL root: MKLROOT= ' '$(MKLROOT)' @echo 'Auto detect Intel icc: AUTOCC= ' '$(AUTOCC)' @echo 'UMFPACK config: UMFPACK_CONFIG= ' '$(UMFPACK_CONFIG)' @echo 'CHOLMOD config: CHOLMOD_CONFIG= ' '$(CHOLMOD_CONFIG)' @echo 'SuiteSparseQR config: SPQR_CONFIG= ' '$(SPQR_CONFIG)' @echo 'CUDA library: CUDART_LIB= ' '$(CUDART_LIB)' @echo 'CUBLAS library: CUBLAS_LIB= ' '$(CUBLAS_LIB)' @echo 'METIS and CHOLMOD/Partition configuration:' @echo 'Your METIS library: MY_METIS_LIB= ' '$(MY_METIS_LIB)' @echo 'Your metis.h is in: MY_METIS_INC= ' '$(MY_METIS_INC)' @echo 'METIS is used via the CHOLMOD/Partition module, configured as follows.' @echo 'If the next line has -DNPARTITION then METIS will not be used:' @echo 'CHOLMOD Partition config: ' '$(CONFIG_PARTITION)' @echo 'CHOLMOD Partition libs: ' '$(LIB_WITH_PARTITION)' @echo 'CHOLMOD Partition include:' '$(I_WITH_PARTITION)' ifeq ($(TCOV),yes) @echo 'TCOV=yes, for extensive testing only (gcc, g++, vanilla BLAS)' endif FreeFem-sources-4.9/3rdparty/umfpack/SuiteSparse_config.mk.in000664 000000 000000 00000034710 14037356732 024307 0ustar00rootroot000000 000000 #=============================================================================== # SuiteSparse_config.mk: common configuration file for the SuiteSparse #=============================================================================== # This file contains all configuration settings for all packages authored or # co-authored by Tim Davis: # # Package Version Description # ------- ------- ----------- # AMD 1.2 or later approximate minimum degree ordering # COLAMD 2.4 or later column approximate minimum degree ordering # CCOLAMD 1.0 or later constrained column approximate minimum degree ordering # CAMD any constrained approximate minimum degree ordering # UMFPACK 4.5 or later sparse LU factorization, with the BLAS # CHOLMOD any sparse Cholesky factorization, update/downdate # KLU 0.8 or later sparse LU factorization, BLAS-free # BTF 0.8 or later permutation to block triangular form # LDL 1.2 or later concise sparse LDL' # CXSparse any extended version of CSparse (int/long, real/complex) # SuiteSparseQR any sparse QR factorization # RBio 2.0 or later read/write sparse matrices in Rutherford-Boeing format # # By design, this file is NOT included in the CSparse makefile. # That package is fully stand-alone. CSparse is primarily for teaching; # production code should use CXSparse. # # The SuiteSparse_config directory and the above packages should all appear in # a single directory, in order for the Makefile's within each package to find # this file. # # To enable an option of the form "# OPTION = ...", edit this file and # delete the "#" in the first column of the option you wish to use. # # The use of METIS 4.0.1 is optional. To exclude METIS, you must compile with # CHOLMOD_CONFIG set to -DNPARTITION. See below for details. However, if you # do not have a metis-4.0 directory inside the SuiteSparse directory, the # */Makefile's that optionally rely on METIS will automatically detect this # and compile without METIS. #------------------------------------------------------------------------------ # Generic configuration #------------------------------------------------------------------------------ # Using standard definitions from the make environment, typically: # # CC cc C compiler # CXX g++ C++ compiler # CFLAGS [ ] flags for C and C++ compiler # CPPFLAGS [ ] flags for C and C++ compiler # TARGET_ARCH [ ] target architecture # FFLAGS [ ] flags for Fortran compiler # RM rm -f delete a file # AR ar create a static *.a library archive # ARFLAGS rv flags for ar # MAKE make make itself (sometimes called gmake) # # You can redefine them here, but by default they are used from the # default make environment. # C and C++ compiler flags. The first three are standard for *.c and *.cpp # Add -DNTIMER if you do use any timing routines (otherwise -lrt is required). # CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O3 -fexceptions -fPIC -DNTIMER CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O3 -fexceptions -fPIC -DNTIMER # ranlib, and ar, for generating libraries. If you don't need ranlib, # just change it to RANLAB = echo AR = @AR@ RANLIB = @RANLIB@ ARFLAGS =@ARFLAGS@ ARCHIVE = $(AR) $(ARFLAGS) # copy and delete a file CP = cp -f MV = mv -f # Fortran compiler (not required for 'make' or 'make library') F77 = gfortran F77FLAGS = $(FFLAGS) -O F77LIB = # C and Fortran libraries. Remove -lrt if you don't have it. LIB = -lm # Using the following requires CF = ... -DNTIMER on POSIX C systems. # LIB = -lm # For "make install" INSTALL_LIB = @abs_srcdir@/3rdparty/lib INSTALL_INCLUDE = @abs_srcdir@/3rdparty/include # Which version of MAKE you are using (default is "make") # MAKE = make # MAKE = gmake #------------------------------------------------------------------------------ # BLAS and LAPACK configuration: #------------------------------------------------------------------------------ # UMFPACK and CHOLMOD both require the BLAS. CHOLMOD also requires LAPACK. # See Kazushige Goto's BLAS at http://www.cs.utexas.edu/users/flame/goto/ or # http://www.tacc.utexas.edu/~kgoto/ for the best BLAS to use with CHOLMOD. # LAPACK is at http://www.netlib.org/lapack/ . You can use the standard # Fortran LAPACK along with Goto's BLAS to obtain very good performance. # CHOLMOD gets a peak numeric factorization rate of 3.6 Gflops on a 3.2 GHz # Pentium 4 (512K cache, 4GB main memory) with the Goto BLAS, and 6 Gflops # on a 2.5Ghz dual-core AMD Opteron. # These settings will probably not work, since there is no fixed convention for # naming the BLAS and LAPACK library (*.a or *.so) files. # This is probably slow ... it might connect to the Standard Reference BLAS: BLAS = @BLASLIBS@ LAPACK = @LAPACKLIBS@ # NOTE: this next option for the "Goto BLAS" has nothing to do with a "goto" # statement. Rather, the Goto BLAS is written by Dr. Kazushige Goto. # Using the Goto BLAS: # BLAS = -lgoto -lgfortran -lgfortranbegin # BLAS = -lgoto2 -lgfortran -lgfortranbegin -lpthread # Using non-optimized versions: # BLAS = -lblas_plain -lgfortran -lgfortranbegin # LAPACK = -llapack_plain # BLAS = -lblas_plain -lgfortran -lgfortranbegin # LAPACK = -llapack # The BLAS might not contain xerbla, an error-handling routine for LAPACK and # the BLAS. Also, the standard xerbla requires the Fortran I/O library, and # stops the application program if an error occurs. A C version of xerbla # distributed with this software (SuiteSparse_config/xerbla/libcerbla.a) # includes a Fortran-callable xerbla routine that prints nothing and does not # stop the application program. This is optional. # XERBLA = ../../SuiteSparse_config/xerbla/libcerbla.a # If you wish to use the XERBLA in LAPACK and/or the BLAS instead, # use this option: XERBLA = # If you wish to use the Fortran SuiteSparse_config/xerbla/xerbla.f instead, # use this: # XERBLA = ../../SuiteSparse_config/xerbla/libxerbla.a #------------------------------------------------------------------------------ # GPU configuration for CHOLMOD, using the CUDA BLAS #------------------------------------------------------------------------------ # no cuda GPU_BLAS_PATH = GPU_CONFIG = # with cuda BLAS acceleration for CHOLMOD # GPU_BLAS_PATH=/usr/local/cuda # GPU_CONFIG=-DGPU_BLAS -I$(GPU_BLAS_PATH)/include #------------------------------------------------------------------------------ # METIS, optionally used by CHOLMOD #------------------------------------------------------------------------------ # If you do not have METIS, or do not wish to use it in CHOLMOD, you must # compile CHOLMOD with the -DNPARTITION flag. # The path is relative to where it is used, in CHOLMOD/Lib, CHOLMOD/MATLAB, etc. # You may wish to use an absolute path. METIS is optional. Compile # CHOLMOD with -DNPARTITION if you do not wish to use METIS. METIS_PATH = ../../metis-4.0 METIS = ../../metis-4.0/libmetis.a #------------------------------------------------------------------------------ # UMFPACK configuration: #------------------------------------------------------------------------------ # Configuration flags for UMFPACK. See UMFPACK/Source/umf_config.h for details. # # -DNBLAS do not use the BLAS. UMFPACK will be very slow. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF do not use the Sun Perf. Library (default is use it on Solaris) # -DNRECIPROCAL do not multiply by the reciprocal # -DNO_DIVIDE_BY_ZERO do not divide by zero # -DNCHOLMOD do not use CHOLMOD as a ordering method. If -DNCHOLMOD is # included in UMFPACK_CONFIG, then UMFPACK does not rely on # CHOLMOD, CAMD, CCOLAMD, COLAMD, and METIS. UMFPACK_CONFIG = # uncomment this line to compile UMFPACK without CHOLMOD: # UMFPACK_CONFIG = -DNCHOLMOD #------------------------------------------------------------------------------ # CHOLMOD configuration #------------------------------------------------------------------------------ # CHOLMOD Library Modules, which appear in libcholmod.a: # Core requires: none # Check requires: Core # Cholesky requires: Core, AMD, COLAMD. optional: Partition, Supernodal # MatrixOps requires: Core # Modify requires: Core # Partition requires: Core, CCOLAMD, METIS. optional: Cholesky # Supernodal requires: Core, BLAS, LAPACK # # CHOLMOD test/demo Modules (all are GNU GPL, do not appear in libcholmod.a): # Tcov requires: Core, Check, Cholesky, MatrixOps, Modify, Supernodal # optional: Partition # Valgrind same as Tcov # Demo requires: Core, Check, Cholesky, MatrixOps, Supernodal # optional: Partition # # Configuration flags: # -DNCHECK do not include the Check module. License GNU LGPL # -DNCHOLESKY do not include the Cholesky module. License GNU LGPL # -DNPARTITION do not include the Partition module. License GNU LGPL # also do not include METIS. # -DNGPL do not include any GNU GPL Modules in the CHOLMOD library: # -DNMATRIXOPS do not include the MatrixOps module. License GNU GPL # -DNMODIFY do not include the Modify module. License GNU GPL # -DNSUPERNODAL do not include the Supernodal module. License GNU GPL # # -DNPRINT do not print anything. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF for Solaris only. If defined, do not use the Sun # Performance Library CHOLMOD_CONFIG = -DNPARTITION -DNGPL $(GPU_CONFIG) # uncomment this line to compile CHOLMOD without METIS: # CHOLMOD_CONFIG = -DNPARTITION #------------------------------------------------------------------------------ # SuiteSparseQR configuration: #------------------------------------------------------------------------------ # The SuiteSparseQR library can be compiled with the following options: # # -DNPARTITION do not include the CHOLMOD partition module # -DNEXPERT do not include the functions in SuiteSparseQR_expert.cpp # -DHAVE_TBB enable the use of Intel's Threading Building Blocks (TBB) # default, without timing, without TBB: SPQR_CONFIG = # with TBB: # SPQR_CONFIG = -DHAVE_TBB # This is needed for IBM AIX: (but not for and C codes, just C++) # SPQR_CONFIG = -DBLAS_NO_UNDERSCORE # with TBB, you must select this: # TBB = -ltbb # without TBB: TBB = #------------------------------------------------------------------------------ # Linux #------------------------------------------------------------------------------ # Using default compilers: CC = @CC@ CFLAGS = @CFLAGS@ # CF = $(CFLAGS) -O3 -fexceptions # alternatives: # CF = $(CFLAGS) -g -fexceptions \ -Wall -W -Wshadow -Wmissing-prototypes -Wstrict-prototypes \ -Wredundant-decls -Wnested-externs -Wdisabled-optimization -ansi \ -funit-at-a-time # CF = $(CFLAGS) -O3 -fexceptions \ -Wall -W -Werror -Wshadow -Wmissing-prototypes -Wstrict-prototypes \ -Wredundant-decls -Wnested-externs -Wdisabled-optimization -ansi # CF = $(CFLAGS) -O3 -fexceptions -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE # CF = $(CFLAGS) -O3 # CF = $(CFLAGS) -O3 -g -fexceptions # CF = $(CFLAGS) -g -fexceptions \ -Wall -W -Wshadow \ -Wredundant-decls -Wdisabled-optimization -ansi # consider: # -fforce-addr -fmove-all-movables -freduce-all-givs -ftsp-ordering # -frename-registers -ffast-math -funroll-loops # Using the Goto BLAS: # BLAS = -lgoto -lfrtbegin -lg2c $(XERBLA) -lpthread # Using Intel's icc and ifort compilers: # (does not work for mexFunctions unless you add a mexopts.sh file) # F77 = ifort # CC = icc # CF = $(CFLAGS) -O3 -xN -vec_report=0 # CF = $(CFLAGS) -g # 64bit: # F77FLAGS = -O -m64 # CF = $(CFLAGS) -O3 -fexceptions -m64 # BLAS = -lgoto64 -lfrtbegin -lg2c -lpthread $(XERBLA) # LAPACK = -llapack64 # SUSE Linux 10.1, AMD Opteron, with GOTO Blas # F77 = gfortran # BLAS = -lgoto_opteron64 -lgfortran # SUSE Linux 10.1, Intel Pentium, with GOTO Blas # F77 = gfortran # BLAS = -lgoto -lgfortran #------------------------------------------------------------------------------ # Mac #------------------------------------------------------------------------------ # As recommended by macports, http://suitesparse.darwinports.com/ # I've tested them myself on Mac OSX 10.6.1 and 10.6.8 (Snow Leopard), # on my MacBook Air, and they work fine. # F77 = gfortran # CF = $(CFLAGS) -O3 -fno-common -fexceptions -DNTIMER # BLAS = -framework Accelerate # LAPACK = -framework Accelerate # LIB = -lm #------------------------------------------------------------------------------ # Solaris #------------------------------------------------------------------------------ # 32-bit # CF = $(CFLAGS) -KPIC -dalign -xc99=%none -Xc -xlibmieee -xO5 -xlibmil -m32 # 64-bit # CF = $(CFLAGS) -fast -KPIC -xc99=%none -xlibmieee -xlibmil -m64 -Xc # FFLAGS = -fast -KPIC -dalign -xlibmil -m64 # The Sun Performance Library includes both LAPACK and the BLAS: # BLAS = -xlic_lib=sunperf # LAPACK = #------------------------------------------------------------------------------ # Compaq Alpha #------------------------------------------------------------------------------ # 64-bit mode only # CF = $(CFLAGS) -O2 -std1 # BLAS = -ldxml # LAPACK = #------------------------------------------------------------------------------ # IBM RS 6000 #------------------------------------------------------------------------------ # BLAS = -lessl # LAPACK = # 32-bit mode: # CF = $(CFLAGS) -O4 -qipa -qmaxmem=16384 -qproto # F77FLAGS = -O4 -qipa -qmaxmem=16384 # 64-bit mode: # CF = $(CFLAGS) -O4 -qipa -qmaxmem=16384 -q64 -qproto # F77FLAGS = -O4 -qipa -qmaxmem=16384 -q64 #------------------------------------------------------------------------------ # SGI IRIX #------------------------------------------------------------------------------ # BLAS = -lscsl # LAPACK = # 32-bit mode # CF = $(CFLAGS) -O # 64-bit mode (32 bit int's and 64-bit long's): # CF = $(CFLAGS) -64 # F77FLAGS = -64 # SGI doesn't have ranlib # RANLIB = echo #------------------------------------------------------------------------------ # AMD Opteron (64 bit) #------------------------------------------------------------------------------ # BLAS = -lgoto_opteron64 -lg2c # LAPACK = -llapack_opteron64 # SUSE Linux 10.1, AMD Opteron # F77 = gfortran # BLAS = -lgoto_opteron64 -lgfortran # LAPACK = -llapack_opteron64 #------------------------------------------------------------------------------ # remove object files and profile output #------------------------------------------------------------------------------ CLEAN = *.o *.obj *.ln *.bb *.bbg *.da *.tcov *.gcov gmon.out *.bak *.d *.gcda *.gcno FreeFem-sources-4.9/3rdparty/umfpack/UFconfig_mk.m4000664 000000 000000 00000025230 14037356732 022203 0ustar00rootroot000000 000000 #=============================================================================== # UFconfig.mk: common configuration file for the SuiteSparse #=============================================================================== # This file contains all configuration settings for all packages authored or # co-authored by Tim Davis at the University of Florida: # # Package Version Description # ------- ------- ----------- # AMD 1.2 or later approximate minimum degree ordering # COLAMD 2.4 or later column approximate minimum degree ordering # CCOLAMD 1.0 or later constrained column approximate minimum degree ordering # CAMD any constrained approximate minimum degree ordering # UMFPACK 4.5 or later sparse LU factorization, with the BLAS # CHOLMOD any sparse Cholesky factorization, update/downdate # KLU 0.8 or later sparse LU factorization, BLAS-free # BTF 0.8 or later permutation to block triangular form # LDL 1.2 or later concise sparse LDL' # LPDASA any linear program solve (dual active set algorithm) # # The UFconfig directory and the above packages should all appear in a single # directory, in order for the Makefile's within each package to find this file. # # To enable an option of the form "# OPTION = ...", edit this file and # delete the "#" in the first column of the option you wish to use. #------------------------------------------------------------------------------ # Generic configuration #------------------------------------------------------------------------------ # C compiler and compiler flags: These will normally not give you optimal # performance. You should select the optimization parameters that are best # for your system. On Linux, use "CFLAGS = -O3 -fexceptions" for example. CC = FF_CC CFLAGS = -I../../AMD/Include -I../../UFconfig FF_CFLAGS # ranlib, and ar, for generating libraries RANLIB = FF_RANLIB AR = FF_AR FF_ARFLAGS # delete and rename a file RM = rm -f MV = mv -f # Fortran compiler (not normally required) F77 = FF_F77 F77FLAGS =FF_F77FLAGS F77LIB = #FF_F77LIB # C and Fortran libraries LIB = FF_LIB # For compiling MATLAB mexFunctions MEX = mex -O # Which version of MAKE you are using (default is "make") # MAKE = make # MAKE = gmake #------------------------------------------------------------------------------ # BLAS and LAPACK configuration: #------------------------------------------------------------------------------ # UMFPACK and CHOLMOD both require the BLAS. CHOLMOD also requires LAPACK. # See Kazushige Goto's BLAS at http://www.cs.utexas.edu/users/flame/goto/ or # http://www.tacc.utexas.edu/~kgoto/ for the best BLAS to use with CHOLMOD. # LAPACK is at http://www.netlib.org/lapack/ . You can use the standard # Fortran LAPACK along with Goto's BLAS to obtain very good performance. # CHOLMOD gets a peak numeric factorization rate of 3.6 Gflops on a 3.2 GHz # Pentium 4 (512K cache, 4GB main memory) with the Goto BLAS, and 6 Gflops # on a 2.5Ghz dual-core AMD Opteron. # These settings will probably not work, since there is no fixed convention for # naming the BLAS and LAPACK library (*.a or *.so) files. Assume the Goto # BLAS are available. #BLAS = -lgoto -lgfortran -lgfortranbegin #LAPACK = -llapack # The BLAS might not contain xerbla, an error-handling routine for LAPACK and # the BLAS. Also, the standard xerbla requires the Fortran I/O library, and # stops the application program if an error occurs. A C version of xerbla # distributed with this software (UFconfig/xerbla/libcerbla.a) includes a # Fortran-callable xerbla routine that prints nothing and does not stop the # application program. This is optional. # XERBLA = ../../UFconfig/xerbla/libcerbla.a # If you wish to use the XERBLA in LAPACK and/or the BLAS instead, # use this option: #XERBLA = # If you wish to use the Fortran UFconfig/xerbla/xerbla.f instead, use this: # XERBLA = ../../UFconfig/xerbla/libxerbla.a #------------------------------------------------------------------------------ # METIS, optionally used by CHOLMOD #------------------------------------------------------------------------------ # If you do not have METIS, or do not wish to use it in CHOLMOD, you must # compile CHOLMOD with the -DNPARTITION flag. You must also use the # "METIS =" option, below. # The path is relative to where it is used, in CHOLMOD/Lib, CHOLMOD/MATLAB, etc. # You may wish to use an absolute path. METIS is optional. Compile # CHOLMOD with -DNPARTITION if you do not wish to use METIS. METIS_PATH = # FF_DWL/include METIS = # FF_DWL/lib/metis/libmetis.a # If you use CHOLMOD_CONFIG = -DNPARTITION then you must use the following # options: # METIS_PATH = # METIS = #------------------------------------------------------------------------------ # UMFPACK configuration: #------------------------------------------------------------------------------ # Configuration flags for UMFPACK. See UMFPACK/Source/umf_config.h for details. # # -DNBLAS do not use the BLAS. UMFPACK will be very slow. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF do not use the Sun Perf. Library (default is use it on Solaris) # -DNPOSIX do not use POSIX routines sysconf and times. # -DGETRUSAGE use getrusage # -DNO_TIMER do not use any timing routines # -DNRECIPROCAL do not multiply by the reciprocal # -DNO_DIVIDE_BY_ZERO do not divide by zero UMFPACK_CONFIG = FF_CONFIG #------------------------------------------------------------------------------ # CHOLMOD configuration #------------------------------------------------------------------------------ # CHOLMOD Library Modules, which appear in libcholmod.a: # Core requires: none # Check requires: Core # Cholesky requires: Core, AMD, COLAMD. optional: Partition, Supernodal # MatrixOps requires: Core # Modify requires: Core # Partition requires: Core, CCOLAMD, METIS. optional: Cholesky # Supernodal requires: Core, BLAS, LAPACK # # CHOLMOD test/demo Modules (all are GNU GPL, do not appear in libcholmod.a): # Tcov requires: Core, Check, Cholesky, MatrixOps, Modify, Supernodal # optional: Partition # Valgrind same as Tcov # Demo requires: Core, Check, Cholesky, MatrixOps, Supernodal # optional: Partition # # Configuration flags: # -DNCHECK do not include the Check module. License GNU LGPL # -DNCHOLESKY do not include the Cholesky module. License GNU LGPL # -DNPARTITION do not include the Partition module. License GNU LGPL # also do not include METIS. # -DNGPL do not include any GNU GPL Modules in the CHOLMOD library: # -DNMATRIXOPS do not include the MatrixOps module. License GNU GPL # -DNMODIFY do not include the Modify module. License GNU GPL # -DNSUPERNODAL do not include the Supernodal module. License GNU GPL # # -DNPRINT do not print anything. # -D'LONGBLAS=long' or -DLONGBLAS='long long' defines the integers used by # LAPACK and the BLAS (defaults to 'int') # -DNSUNPERF for Solaris only. If defined, do not use the Sun # Performance Library CHOLMOD_CONFIG = -DNPARTITION #------------------------------------------------------------------------------ # Linux #------------------------------------------------------------------------------ # Using default compilers: # CC = gcc # CFLAGS = -O3 # alternatives: # CFLAGS = -g -fexceptions \ # -Wall -W -Wshadow -Wmissing-prototypes -Wstrict-prototypes \ # -Wredundant-decls -Wnested-externs -Wdisabled-optimization -ansi # CFLAGS = -O3 -fexceptions \ # -Wall -W -Werror -Wshadow -Wmissing-prototypes -Wstrict-prototypes \ # -Wredundant-decls -Wnested-externs -Wdisabled-optimization -ansi #CFLAGS = -O3 -fexceptions -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE # CFLAGS = -O3 # consider: # -fforce-addr -fmove-all-movables -freduce-all-givs -ftsp-ordering # -frename-registers -ffast-math -funroll-loops # Using the Goto BLAS: # BLAS = -lgoto -lfrtbegin -lg2c $(XERBLA) -lpthread # Using Intel's icc and ifort compilers: # (does not work for mexFunctions unless you add a mexopts.sh file) # F77 = ifort # CC = icc # CFLAGS = -O3 -xN -vec_report=0 # CFLAGS = -g # old (broken): CFLAGS = -ansi -O3 -ip -tpp7 -xW -vec_report0 # 64bit: # F77FLAGS = -O -m64 # CFLAGS = -O3 -fexceptions -m64 # BLAS = -lgoto64 -lfrtbegin -lg2c -lpthread $(XERBLA) # LAPACK = -llapack64 # SUSE Linux 10.1, AMD Opteron # F77 = gfortran # BLAS = -lgoto_opteron64 -lgfortran # SUSE Linux 10.1, Intel Pentium # F77 = gfortran # BLAS = -lgoto -lgfortran #------------------------------------------------------------------------------ # Solaris #------------------------------------------------------------------------------ # 32-bit # CFLAGS = -KPIC -dalign -xc99=%none -Xc -xlibmieee -xO5 -xlibmil # 64-bit # CFLAGS = -KPIC -dalign -xc99=%none -Xc -xlibmieee -xO5 -xlibmil -xarch=v9 # BLAS = -xlic_lib=sunperf # LAPACK = #------------------------------------------------------------------------------ # Compaq Alpha #------------------------------------------------------------------------------ # 64-bit mode only # CFLAGS = -O2 -std1 # BLAS = -ldxml # LAPACK = #------------------------------------------------------------------------------ # Macintosh #------------------------------------------------------------------------------ # CC = gcc # CFLAGS = -O3 -fno-common -no-cpp-precomp -fexceptions # LIB = -lstdc++ # BLAS = -framework Accelerate # LAPACK = -framework Accelerate #------------------------------------------------------------------------------ # IBM RS 6000 #------------------------------------------------------------------------------ # BLAS = -lessl # LAPACK = # 32-bit mode: # CFLAGS = -O4 -qipa -qmaxmem=16384 -qproto # F77FLAGS = -O4 -qipa -qmaxmem=16384 # 64-bit mode: # CFLAGS = -O4 -qipa -qmaxmem=16384 -q64 -qproto # F77FLAGS = -O4 -qipa -qmaxmem=16384 -q64 # AR = ar -X64 #------------------------------------------------------------------------------ # SGI IRIX #------------------------------------------------------------------------------ # BLAS = -lscsl # LAPACK = # 32-bit mode # CFLAGS = -O # 64-bit mode (32 bit int's and 64-bit long's): # CFLAGS = -64 # F77FLAGS = -64 # SGI doesn't have ranlib # RANLIB = echo #------------------------------------------------------------------------------ # AMD Opteron (64 bit) #------------------------------------------------------------------------------ # BLAS = -lgoto_opteron64 -lg2c # LAPACK = -llapack_opteron64 # SUSE Linux 10.1, AMD Opteron # F77 = gfortran # BLAS = -lgoto_opteron64 -lgfortran # LAPACK = -llapack_opteron64 #------------------------------------------------------------------------------ # remove object files and profile output #------------------------------------------------------------------------------ CLEAN = *.o *.obj *.ln *.bb *.bbg *.da *.tcov *.gcov gmon.out *.bak *.d FreeFem-sources-4.9/3rdparty/umfpack/UMFPACK_Make.include000664 000000 000000 00000005762 14037356732 023152 0ustar00rootroot000000 000000 #------------------------------------------------------------------------------ # Include file for GNU make or original make (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # You can edit these definitions, or select and and edit a specfic # Make.(architecture) file, below. This same file is used for configuring # both AMD and UMFPACK. AMD is a stand-alone package. UMFPACK requires AMD, # and for simplicity of configuring both packages, UMFPACK and AMD share this # configuration file (and all files in the AMD/Make directory). To configure # AMD, or both AMD and UMFPACK, you only need to edit this one file (and # optionaly, one of the ../Make/Make. files below). # NOTE: -DNBLAS and other BLAS configurations are ignored by AMD, since AMD # does not use the BLAS. This flag is here because this file, and the # ../Make/Make.* files, are shared by UMFPACK (which requires AMD). If you # use AMD but not UMFPACK, then you can ignore any BLAS-related configuration # settings. #CFLAGS = -O RANLIB = ranlib LIB = -lm RM = rm -f MV = mv -f #F77 = f77 #F77FLAGS = -O F77LIB = AR = ar #------------------------------------------------------------------------------ # for the AMD and UMFPACK mexFunctions (-DNBLAS and -DNUTIL for UMFPACK only) #------------------------------------------------------------------------------ # MATLAB 6.0 or later (default) MEX = mex -inline -O # MATLAB 6.0 or later (no divide-by-zero) # MEX = mex -inline -O -DNO_DIVIDE_BY_ZERO # MATLAB 5 (no BLAS, do not try to use utMalloc, utFree, and utRealloc) # MEX = mex -inline -O -DNBLAS -DNUTIL #------------------------------------------------------------------------------ # for UMFPACK only (BLAS configuration): #------------------------------------------------------------------------------ # The default is to NOT use the BLAS. UMFPACK will be slow, but this is more # portable. Try this option first, then use your architecture-specific # configuration, below, to add the BLAS library. AMD ignores the -DNBLAS flag. #CONFIG = -DNBLAS #------------------------------------------------------------------------------ # Archicture-specific configuration (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # Select your architecture by un-commenting the appropriate line. The include # file can redefine any of the above definitions, or other definitions. Use # CC = ... to redefine the name of your C compiler, for example. Without # any specific changes, this Makefile should work on nearly all systems. # include ../Make/Make.linux # include ../Make/Make.sgi # include ../Make/Make.solaris # include ../Make/Make.alpha # include ../Make/Make.rs6000 #------------------------------------------------------------------------------ # remove object files and profile output #------------------------------------------------------------------------------ clean: - $(RM) *.o *.obj *.ln *.bb *.bbg *.da *.c.tcov *.c.gcov gmon.out *.bak FreeFem-sources-4.9/3rdparty/umfpack/UMFPACK_Make.m4000664 000000 000000 00000006230 14037356732 022036 0ustar00rootroot000000 000000 #------------------------------------------------------------------------------ # Include file for GNU make or original make (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # You can edit these definitions, or select and and edit a specfic # Make.(architecture) file, below. This same file is used for configuring # both AMD and UMFPACK. AMD is a stand-alone package. UMFPACK requires AMD, # and for simplicity of configuring both packages, UMFPACK and AMD share this # configuration file (and all files in the AMD/Make directory). To configure # AMD, or both AMD and UMFPACK, you only need to edit this one file (and # optionaly, one of the ../Make/Make. files below). # NOTE: -DNBLAS and other BLAS configurations are ignored by AMD, since AMD # does not use the BLAS. This flag is here because this file, and the # ../Make/Make.* files, are shared by UMFPACK (which requires AMD). If you # use AMD but not UMFPACK, then you can ignore any BLAS-related configuration # settings. CC = FF_CC CFLAGS = FF_CFLAGS F77 = FF_F77 F77FLAGS =FF_F77FLAGS RANLIB = ranlib LIB = -lm RM = rm -f MV = mv -f F77LIB = AR = ar #------------------------------------------------------------------------------ # for the AMD and UMFPACK mexFunctions (-DNBLAS and -DNUTIL for UMFPACK only) #------------------------------------------------------------------------------ # MATLAB 6.0 or later (default) MEX = mex -inline -O # MATLAB 6.0 or later (no divide-by-zero) # MEX = mex -inline -O -DNO_DIVIDE_BY_ZERO # MATLAB 5 (no BLAS, do not try to use utMalloc, utFree, and utRealloc) # MEX = mex -inline -O -DNBLAS -DNUTIL #------------------------------------------------------------------------------ # for UMFPACK only (BLAS configuration): #------------------------------------------------------------------------------ CONFIG = FF_CONFIG LIB = FF_LIB # The default is to NOT use the BLAS. UMFPACK will be slow, but this is more # portable. Try this option first, then use your architecture-specific # configuration, below, to add the BLAS library. AMD ignores the -DNBLAS flag. # 2: with the ATLAS C-BLAS (http://www.netlib.org/atlas). # CONFIG = -DCBLAS -I../ATLAS/include # LIB = -lcblas -latlas -lm #------------------------------------------------------------------------------ # Archicture-specific configuration (for both AMD and UMFPACK) #------------------------------------------------------------------------------ # Select your architecture by un-commenting the appropriate line. The include # file can redefine any of the above definitions, or other definitions. Use # CC = ... to redefine the name of your C compiler, for example. Without # any specific changes, this Makefile should work on nearly all systems. # include ../Make/Make.linux # include ../Make/Make.sgi # include ../Make/Make.solaris # include ../Make/Make.alpha # include ../Make/Make.rs6000 #------------------------------------------------------------------------------ # remove object files and profile output #------------------------------------------------------------------------------ clean: - $(RM) *.o *.obj *.ln *.bb *.bbg *.da *.c.tcov *.c.gcov gmon.out *.bak FreeFem-sources-4.9/3rdparty/yams/000775 000000 000000 00000000000 14037356732 017073 5ustar00rootroot000000 000000 FreeFem-sources-4.9/3rdparty/yams/Makefile000664 000000 000000 00000006262 14037356732 020541 0ustar00rootroot000000 000000 # Downloading and compiling extra libraries # ----------------------------------------- # $Id$ all-local: yams include ff-flags # Downloading and compiling yams # ------------------------------- # DIRPKG= ../pkg SRCDIR= ./freeyams$(yams_VERSION) #-$(yams_VERSION) PACKAGE=$(DIRPKG)/freeyams$(yams_VERSION).tgz SERVER=http://www.ann.jussieu.fr/~frey/ftp/archives/ INSTALL=../.. yams_VERSION=.2012.02.05 # ---------------------- # yamslib YAMS_DIR = $(abs_top_builddir)/3rdparty/yams/$(SRCDIR) YAMS_SRCDIRNOLIB = $(YAMS_DIR)/sources YAMS_SRCDIR = $(YAMS_DIR)/sourceslib YAMS_OBJDIR = $(YAMS_DIR)/objects yams: FAIRE # FFCS - make sure that PATCH is done sequentially otherwise its error messages are drowned into other meaningless # parallel compilation messages FAIRE: $(SRCDIR)/PATCH yamslib_internal.h yamslib.c yamslib.h ../Makefile $(MAKE) install WHERE touch FAIRE $(SRCDIR)/FAIT: $(SRCDIR)/PATCH yamslib_internal.h yamslib.c yamslib.h cp yamslib_internal.h yamslib.c yamslib.h $(YAMS_SRCDIR)/ cp makefile-yams.inc $(YAMS_DIR)/makefile cd $(YAMS_DIR); make touch $(SRCDIR)/FAIT install: $(SRCDIR)/FAIT sed s/defines.h/freeyams_defines.h/ <$(YAMS_SRCDIR)/yamslib.h >../include/freeyamslib.h cp $(YAMS_SRCDIR)/defines.h ../include/freeyams_defines.h -mkdir ../lib cp $(YAMS_OBJDIR)/libyams.a ../lib/libfreeyams.a # FFCS - WHERE is made to depend on FAIT otherwise it may be built in parallel and not be activated because FAIT is not # there yet WHERE: $(SRCDIR)/FAIT echo freeyams LD -L@DIR@/lib -lfreeyams >$(SRCDIR)/$(INSTALL)/lib/WHERE.freeyams ; echo freeyams INCLUDE -I@DIR@/include>> $(SRCDIR)/$(INSTALL)/lib/WHERE.freeyams ; $(SRCDIR)/PATCH: $(PACKAGE) -mkdir -p $(SRCDIR) cd $(SRCDIR); tar xzf ../$(PACKAGE) -mkdir $(YAMS_SRCDIR) cp $(YAMS_SRCDIRNOLIB)/*.c $(YAMS_SRCDIRNOLIB)/*.h $(YAMS_SRCDIR) rm $(YAMS_SRCDIR)/memory.c cp $(YAMS_SRCDIRNOLIB)/compil.date $(YAMS_SRCDIR) cd $(YAMS_SRCDIR) && \ patch -p2 < ../../freeyams$(yams_VERSION).patch && \ patch -p2 < ../../freeyams$(yams_VERSION)-return-values.patch mv $(YAMS_SRCDIR)/yams.c $(YAMS_SRCDIR)/.. touch $(SRCDIR)/PATCH $(PACKAGE): ../getall -o freeYams -a # FFCS: only run make clean if cd to SRCDIR worked, otherwise this is infinite loop. clean-local: -rm FAIRE FAIT $(SRCDIR)/FAIT -cd $(YAMS_DIR) && $(MAKE) -C $(YAMS_DIR) clean -rm $(YAMS_OBJDIR)/libyams.a # FFCS -simplifying all paths clean: clean-local -rm ff-flags -rm $(SRCDIR)/$(INSTALL)/lib/libfreeyams.a -rm $(SRCDIR)/$(INSTALL)/include/*freeyams*.h -rm $(SRCDIR)/$(INSTALL)/lib/WHERE.freeyams -rm ../lib/libfreeyams.a -rm ../include/*freeyams*.h -rm ../lib/WHERE.freeyams -rm -rf $(YAMS_DIR) -rm -rf $(SRCDIR) #FH -rm $(PACKAGE) -rm FAIT FAIRE ff-flags: ../Makefile Makefile grep 'abs_top_builddir *=' ../Makefile > ff-flags grep 'CC *=' ../Makefile >> ff-flags grep 'CFLAGS *=' ../Makefile >> ff-flags grep 'LDFLAGS *=' ../Makefile >> ff-flags grep 'AR *=' ../Makefile >> ff-flags grep 'ARFLAGS *=' ../Makefile >> ff-flags grep 'RANLIB *=' ../Makefile >> ff-flags grep 'yams_VERSION *=' ./Makefile >> ff-flags grep 'WGET *=' ../Makefile >> ff-flags grep 'LIBS *=' ../Makefile >> ff-flags .PHONY: $(SRCDIR)/$(INSTALL) FreeFem-sources-4.9/3rdparty/yams/freeyams.2012.02.05-return-values.patch000664 000000 000000 00000000346 14037356732 025572 0ustar00rootroot000000 000000 --- freeyams.2012.02.05/sourceslib/debug.c.orig 2013-01-27 14:24:38.489115910 +0000 +++ freeyams.2012.02.05/sourceslib/debug.c 2013-01-27 14:25:24.156118592 +0000 @@ -178,7 +178,7 @@ } } } - + return 0; } FreeFem-sources-4.9/3rdparty/yams/freeyams.2012.02.05.patch000664 000000 000000 00000033401 14037356732 022756 0ustar00rootroot000000 000000 diff -ru freeyams.2011.09.23/sourceslib/blinde.c freeyams.2011.09.23-orig/sourceslib/blinde.c --- freeyams.2011.09.23/sourceslib/blinde.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/blinde.c 2011-11-16 09:59:50.000000000 +0100 @@ -36,7 +36,7 @@ ida = id[0]*10 + id[1]; /*printf("date = %d %d %d\n",iy,im,ida); */ if ( YMD(iy,im,ida) > YMD(MAX_YEAR,MAX_MONTH,MAX_DAY) ) { - fprintf(stdout," -- YAMS (LJLL), Version %s (%s)\n",VERSION,RELEASE); + fprintf(stdout," -- YAMS (LJLL), Version %s (%s)\n",YAMS_VERSION,RELEASE); fprintf(stdout," Copyright (C) LJLL, 1999-2006.\n\n"); fprintf(stdout," ## Expiration date reached. Sorry.\n"); fprintf(stdout," Please contact the author.\n"); diff -ru freeyams.2011.09.23/sourceslib/defines.h freeyams.2011.09.23-orig/sourceslib/defines.h --- freeyams.2011.09.23/sourceslib/defines.h 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/defines.h 2011-11-16 09:59:50.000000000 +0100 @@ -18,7 +18,7 @@ /* current version */ #define DEFAULT_FILE "DEFAULT.yams" #define RELEASE "oct, 2006" -#define VERSION "2.4 b" +#define YAMS_VERSION "2.4 b" #define COPYRIGHT "Copyright (C) LJLL, 1999-2006" #define REL 1 diff -ru freeyams.2011.09.23/sourceslib/extern.h freeyams.2011.09.23-orig/sourceslib/extern.h --- freeyams.2011.09.23/sourceslib/extern.h 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/extern.h 2011-11-16 09:59:50.000000000 +0100 @@ -4,6 +4,7 @@ #ifndef __YAMS +#ifndef __YAMSLIB extern Error yerr; extern Info info; extern Options opts; @@ -14,7 +15,7 @@ extern short imprim; extern ubyte ddebug; #endif - +#endif #ifdef __cplusplus } diff -ru freeyams.2011.09.23/sourceslib/inout.c freeyams.2011.09.23-orig/sourceslib/inout.c --- freeyams.2011.09.23/sourceslib/inout.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/inout.c 2011-11-16 09:59:50.000000000 +0100 @@ -581,7 +581,7 @@ ppt->tag |= M_UNUSED; ppt->flag = ppt->color = 0; } - + printf("sm->connex %d\n",sm->connex); if ( sm->connex > 0 ) { for (k=1; k<=sm->ne; k++) { pt1 = &sm->tria[k]; @@ -679,8 +679,8 @@ natv++; if ( ppt->tag & M_CORNER ) tatv++; } - if ( !gs->new ) gs->new = ++nn; - if ( !gt->new ) gt->new = ++nt; + if ( !gs->newnum ) gs->newnum = ++nn; + if ( !gt->newnum ) gt->newnum = ++nt; if ( !pt1->edg[i] && pt1->tag[i] == M_NOTAG ) continue; else if ( pt1->adj[i] && (k > pt1->adj[i]) ) continue; nedge++; @@ -864,12 +864,12 @@ nn = nbl = 0; for (k=1; k<=sm->nvmax; k++) { gs = &sm->geom[k]; - if ( gs->new > 0 ) { + if ( gs->newnum > 0 ) { iadr = nbl * 3; tabf[iadr+0] = gs->vn[0]; tabf[iadr+1] = gs->vn[1]; tabf[iadr+2] = gs->vn[2]; - gs->new = ++nn; + gs->newnum = ++nn; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_Normals, nbl, tabf); @@ -890,7 +890,7 @@ iadr = nbl * 2; gs = &sm->geom[pt1->vn[i]]; tabi[iadr+0] = ppt->tmp; - tabi[iadr+1] = gs->new; + tabi[iadr+1] = gs->newnum; ppt->flag = 1; ++nbl; if ( nbl == NMAX ) { @@ -916,7 +916,7 @@ iadr = nbl * 3; tabi[iadr+0] = nn; tabi[iadr+1] = i+1; - tabi[iadr+2] = gs->new; + tabi[iadr+2] = gs->newnum; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_NormalAtTriangleVertices, nbl, tabi); @@ -931,12 +931,12 @@ nt = nbl = 0; for (k=1; k<=sm->ntmax; k++) { gt = &sm->tgte[k]; - if ( gt->new > 0 ) { + if ( gt->newnum > 0 ) { iadr = nbl * 3; tabf[iadr+0] = gt->t[0]; tabf[iadr+1] = gt->t[1]; tabf[iadr+2] = gt->t[2]; - gt->new = ++nt; + gt->newnum = ++nt; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_Tangents, nbl, tabf); @@ -956,7 +956,7 @@ iadr = nbl * 2; gt = &sm->tgte[ppt->tge]; tabi[iadr+0] = ppt->tmp; - tabi[iadr+1] = gt->new; + tabi[iadr+1] = gt->newnum; ppt->flag = 1; ++nbl; if ( nbl == NMAX ) { @@ -1105,8 +1105,8 @@ natv++; if ( ppt->tag & M_CORNER ) tatv++; } - if ( !gs->new ) gs->new = ++nn; - if ( !gt->new ) gt->new = ++nq; + if ( !gs->newnum ) gs->newnum = ++nn; + if ( !gt->newnum ) gt->newnum = ++nq; if ( !pq1->edg[i] && pq1->tag[i] == M_NOTAG ) continue; else if ( pq1->adj[i] && (k > pq1->adj[i]) ) continue; nedge++; @@ -1263,12 +1263,12 @@ nn = nbl = 0; for (k=1; k<=sm->nvmax; k++) { gs = &sm->geom[k]; - if ( gs->new > 0 ) { + if ( gs->newnum > 0 ) { iadr = nbl * 3; tabf[iadr+0] = gs->vn[0]; tabf[iadr+1] = gs->vn[1]; tabf[iadr+2] = gs->vn[2]; - gs->new = ++nn; + gs->newnum = ++nn; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_Normals, nbl, tabf); @@ -1289,7 +1289,7 @@ iadr = nbl * 2; gs = &sm->geom[pq1->vn[i]]; tabi[iadr+0] = ppt->tmp; - tabi[iadr+1] = gs->new; + tabi[iadr+1] = gs->newnum; ppt->flag = 1; ++nbl; if ( nbl == NMAX ) { @@ -1315,7 +1315,7 @@ iadr = nbl * 3; tabi[iadr+0] = nn; tabi[iadr+1] = i+1; - tabi[iadr+2] = gs->new; + tabi[iadr+2] = gs->newnum; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_NormalAtQuadrilateralVertices, nbl, tabi); @@ -1330,12 +1330,12 @@ nq = nbl = 0; for (k=1; k<=sm->ntmax; k++) { gt = &sm->tgte[k]; - if ( gt->new > 0 ) { + if ( gt->newnum > 0 ) { iadr = nbl * 3; tabf[iadr+0] = gt->t[0]; tabf[iadr+1] = gt->t[1]; tabf[iadr+2] = gt->t[2]; - gt->new = ++nq; + gt->newnum = ++nq; ++nbl; if ( nbl == NMAX ) { LM_write_field(&ms, LM_Tangents, nbl, tabf); @@ -1355,7 +1355,7 @@ iadr = nbl * 2; gt = &sm->tgte[ppt->tge]; tabi[iadr+0] = ppt->tmp; - tabi[iadr+1] = gt->new; + tabi[iadr+1] = gt->newnum; ppt->flag = 1; ++nbl; if ( nbl == NMAX ) { @@ -1524,8 +1524,8 @@ /* adjust sizes */ if ( opts.hmin < 0.0 ) opts.hmin = max(opts.hmin,hmin); - if ( opts.hmax < 0.0 ) - opts.hmax = max(opts.hmax,hmax); + if ( opts.hmax < 0.0 ) + opts.hmax = max(opts.hmax,hmax); E_pop(); return(1); Only in freeyams.2011.09.23-orig/sourceslib: inout.c.orig diff -ru freeyams.2011.09.23/sourceslib/parsop.c freeyams.2011.09.23-orig/sourceslib/parsop.c --- freeyams.2011.09.23/sourceslib/parsop.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/parsop.c 2011-11-16 09:59:50.000000000 +0100 @@ -16,7 +16,7 @@ } /* create standard parameter file */ - fprintf(out,"# Generated by YAMS %s\n",VERSION); + fprintf(out,"# Generated by YAMS %s\n",YAMS_VERSION); fprintf(out,"# Uncomment next if absolute unit desired\n"); fprintf(out,"#Absolute\n"); Only in freeyams.2011.09.23-orig/sourceslib: parsop.c.orig diff -ru freeyams.2011.09.23/sourceslib/primsg.c freeyams.2011.09.23-orig/sourceslib/primsg.c --- freeyams.2011.09.23/sourceslib/primsg.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/primsg.c 2011-11-16 09:59:50.000000000 +0100 @@ -18,10 +18,10 @@ case 0000: if ( opts.ctrl & ISO ) fprintf(stdout,"\n %s\n MODULE YAMS-LJLL : %s\n %s\n", - STR,VERSION,STR); + STR,YAMS_VERSION,STR); else fprintf(stdout,"\n %s\n MODULE YAMS-LJLL (ANISO) : %s\n %s\n", - STR,VERSION,STR); + STR,YAMS_VERSION,STR); return; case 0001: fprintf(stdout,"\n %s\n END OF MODULE YAMS\n %s\n\n",STR,STR); diff -ru freeyams.2011.09.23/sourceslib/surf.h freeyams.2011.09.23-orig/sourceslib/surf.h --- freeyams.2011.09.23/sourceslib/surf.h 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/surf.h 2011-11-16 09:59:50.000000000 +0100 @@ -64,13 +64,13 @@ typedef struct geomsupp { float vn[3]; /* array of vertex normals */ float gap; /* local gap value */ - int new; /* pointer to new number */ + int newnum; /* pointer to new number */ } GeomSupp; typedef GeomSupp * pGeomSupp; typedef struct geomtge { float t[3]; - int new; + int newnum; } Geomtge; typedef Geomtge * pGeomtge; diff -ru freeyams.2011.09.23/sourceslib/yams.c freeyams.2011.09.23-orig/sourceslib/yams.c --- freeyams.2011.09.23/sourceslib/yams.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/yams.c 2011-11-16 09:59:50.000000000 +0100 @@ -24,6 +24,7 @@ ubyte ddebug; ubyte ecp; +long verbosity; static void excfun(int sigid) { fprintf(stdout,"\n Unexpected error:"); fflush(stdout); Only in freeyams.2011.09.23-orig/sourceslib: yams.c.orig diff -ru freeyams.2011.09.23/sourceslib/yams.h freeyams.2011.09.23-orig/sourceslib/yams.h --- freeyams.2011.09.23/sourceslib/yams.h 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/yams.h 2011-11-16 09:59:50.000000000 +0100 @@ -6,12 +6,17 @@ #include #include "chrono.h" -#include "edge.h" -#include "error.h" -#include "hash.h" -#include "stack.h" -#include "info.h" #include "memory.h" -#include "option.h" -#include "surf.h" + +//#include "edge.h" +//#include "error.h" +//#include "hash.h" +//#include "stack.h" +//#include "info.h" +//#include "memory.h" +//#include "option.h" +//#include "surf.h" + +#include "yamslib.h" +#include "yamslib_internal.h" #include "global.h" diff -ru freeyams.2011.09.23/sourceslib/yams0.c freeyams.2011.09.23-orig/sourceslib/yams0.c --- freeyams.2011.09.23/sourceslib/yams0.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/yams0.c 2011-11-16 09:59:50.000000000 +0100 @@ -24,7 +24,7 @@ if ( imprim ) { if ( ecp ) fprintf(stdout," ## Version: Ecole Centrale de Paris ##\n\n"); - fprintf(stdout," -- YAMS (LJLL) Version %s (%s)\n",VERSION,RELEASE); + fprintf(stdout," -- YAMS (LJLL) Version %s (%s)\n",YAMS_VERSION,RELEASE); fprintf(stdout," %s.\n",COPYRIGHT); fprintf(stdout," compiled: %s.\n\n",COMPIL); } diff -ru freeyams.2011.09.23/sourceslib/zaldy1.c freeyams.2011.09.23-orig/sourceslib/zaldy1.c --- freeyams.2011.09.23/sourceslib/zaldy1.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/zaldy1.c 2011-11-16 09:59:50.000000000 +0100 @@ -45,13 +45,20 @@ } } else { + int bytes = sizeof(Point) \ + + sizeof(Metric) \ + + 2 * sizeof(Triangle) \ + + 1.5 * sizeof(GeomSupp) \ + + 0.2 * sizeof(Geomtge) \ + + 3.7 * sizeof(Hashtable); + /* int bytes = sizeof(struct spoint) \ + sizeof(struct metric) \ + 2 * sizeof(struct striangle) \ + 1.5 * sizeof(struct geomsupp) \ + 0.2 * sizeof(struct geomtge) \ + 3.7 * sizeof(struct shashtab); - + */ sm->npmax = (int)((double)memory / bytes * million); sm->npmax = max(sm->npmax,npmax); sm->nemax = max(2 * sm->npmax,nemax); diff -ru freeyams.2011.09.23/sourceslib/zaldy3.c freeyams.2011.09.23-orig/sourceslib/zaldy3.c --- freeyams.2011.09.23/sourceslib/zaldy3.c 2011-11-16 11:06:08.000000000 +0100 +++ freeyams.2011.09.23-orig/sourceslib/zaldy3.c 2011-11-16 09:59:50.000000000 +0100 @@ -34,7 +34,7 @@ for (j=sm->nvmax; jgeom[j].vn[0] = sm->geom[j].vn[1] = sm->geom[j].vn[2] = 0.; sm->geom[j].gap = 1.; - sm->geom[j].new = 0; + sm->geom[j].newnum = 0; } sm->nvmax = nvsize; yerr.inderr[0] = sm->nvmax; diff -u freeyams.2012.02.05/sourceslib/outqua_a.c freeyams.2011.09.23-orig/sourceslib/outqua_a.c --- freeyams.2012.02.05/sourceslib/outqua_a.c 2009-05-13 21:00:03.000000000 +0200 +++ freeyams.2011.09.23-orig/sourceslib/outqua_a.c 2012-09-04 08:51:04.000000000 +0200 @@ -219,7 +219,7 @@ pt = &mesh->tria[iel]; fprintf(stdout," WORST ELEMENT %d (%d) %d %d %d\n",iel,ielr,pt->v[0],pt->v[1],pt->v[2]); - if ( abs(imprim) < 5 ) return; + if ( abs(imprim) < 5 ) return (1) ; fprintf(stdout,"\n HISTOGRAMM\n"); imax = min(9,(int)(10.*rapmax)); --- freeyams.2012.02.05/sourceslib/eigenv.c 2016-01-28 11:47:05.000000000 +0100 +++ freeyams.2012.02.05-orig/sources/eigenv.c 2012-02-04 12:58:11.000000000 +0100 @@ -3,7 +3,7 @@ #include /* seeking 1.e-05 accuracy */ +#define EPSD 1.e-12 -#define EPSD 1.e-15 #define EPSD2 1.e-200 #define EPS6 5.e-06 #define EPS 1.e-06 --- freeyams.2012.02.05/sourceslib/sproto.h 2021-02-17 11:09:34.000000000 +0100 +++ freeyams.2012.02.05-orig/sourceslib/sproto.h 2009-05-13 20:56:39.000000000 +0200 @@ -161,3 +161,19 @@ int outmsh_q(pSurfMesh ,char *); int updtop_q(pSurfMesh ); int qualfa_q(float *,float *,float *,float *,float *,float *); + +int hippop(pTriangle tria); +int updqua_a(pSurfMesh sm); +int loptia3(pSurfMesh sm,ubyte fechk,float declic) ; +int angdef(pSurfMesh sm); +int boulep2(pSurfMesh sm,int depart,int i,Ball *b); +int simred_a(double m1[6],double m2[6],double m[6]); +int typelt(pSurfMesh sm,int k); +int taspoi(pSurfMesh sm); +int yams6(pSurfMesh sm); +int yamsq(pSurfMesh sm); +int yams22(pSurfMesh sm); +int optedg(pSurfMesh sm) ; +void primem(int np); +#include "eigenv.h" +#include --- freeyams.2012.02.05/sourceslib/error.c 2006-07-12 18:32:35.000000000 +0200 +++ freeyams.2012.02.05-orig/sourceslib/error.c 2021-02-17 10:58:12.000000000 +0100 @@ -10,7 +10,7 @@ #endif #include -/*#include */ +#include #include "string.h" #define MAXLEV 25 FreeFem-sources-4.9/3rdparty/yams/makefile-yams.inc000664 000000 000000 00000002522 14037356732 022313 0ustar00rootroot000000 000000 include ../ff-flags # working dirs YAMS2DIR =$(abs_top_builddir)/3rdparty/yams/freeyams$(yams_VERSION) EXEDIR = $(abs_top_builddir)/3rdparty/bin SRCDIR = $(YAMS2DIR)/sourceslib OBJDIR = $(YAMS2DIR)/objects ARCDIR = $(YAMS2DIR)/archives DIRDIR = $(EXEDIR) $(OBJDIR) $(ARCDIR) INCDIR = -I$(YAMS2DIR)/sourcesnew -I$(abs_top_builddir)/src/libMesh/ LDLDIR = -L$(abs_top_builddir)/3rdparty/lib -lMesh VPATH = $(SRCDIR) # objects list src = $(wildcard $(SRCDIR)/*.c) header = $(wildcard $(SRCDIR)/*.h) objs = $(patsubst $(SRCDIR)%,$(OBJDIR)%,$(src:.c=.o)) prog = yams2 lib = $(OBJDIR)/libyams.a #.SILENT: $(OBJDIR)/%.o: $(SRCDIR)/%.c $(CC) $(OPT64) $(INCDIR) $(CFLAGS) -c $< -o $@ $(EXEDIR)/$(prog):$(DIRDIR) $(objs) echo "#define COMPIL " '"' `date` '"' > $(SRCDIR)/compil.date $(CC) -c $(CFLAGS) $(INCDIR) $(SRCDIR)/yams0.c -o $(OBJDIR)/yams0.o -I../../../src/libMesh/ $(CC) -c $(CFLAGS) $(INCDIR) $(SRCDIR)/../yams.c -I$(SRCDIR) -o $(OBJDIR)/yams.o -I../../../src/libMesh/ $(AR) $(ARFLAGS) $(lib) $(objs) $(CC) $(LDFLAGS) $(OPT64) $(LDLDIR) $(OBJDIR)/yams.o -o $@ $(lib) -lm -L../../../src/libMesh/ -lMesh $(LIBS) $(RANLIB) $(lib) $(objs):$(header) $(DIRDIR): @[ -d $@ ] || mkdir $@ clean: -rm $(objs) $(EXEDIR)/$(prog) tar:$(DIRDIR) tar czf $(ARCDIR)/$(prog).`date +"%Y.%m.%d"`.tgz sources makefile target: $(EXEDIR)/$(prog) FreeFem-sources-4.9/3rdparty/yams/yamslib.c000664 000000 000000 00000035444 14037356732 020711 0ustar00rootroot000000 000000 #define __YAMSLIB #include #include #include #include #include #include "yams.h" #include "defines.h" #include "sproto.h" extern long verbosity; /* globals (see globals.h) */ Error yerr; Info info; Options opts; pHashtable hash; mytime ctim[TIMEMAX]; long nhmax,hnext,hsize; int out,idir[5] = {0,1,2,0,1},idirq[7] = {0,1,2,3,0,1,2}; short imprim; ubyte ddebug; ubyte ecp; static void yams_excfun(int sigid) { switch(sigid){ case SIGFPE: fprintf(stderr," ## FP EXCEPTION. STOP\n"); break; case SIGILL: fprintf(stderr," ## ILLEGAL INSTRUCTION. STOP\n"); break; case SIGSEGV: fprintf(stderr," ## SEGMENTATION FAULT. STOP\n"); break; case SIGABRT: case SIGTERM: case SIGINT: fprintf(stderr," ## ABNORMAL END. STOP\n"); break; } out = 0; exit(1); } static void yams_endcod() { chrono(OFF,&ctim[0]); chrono(OFF,&ctim[1]); E_dump(); if ( out <= 0 ) { prierr(WAR,8002); fprintf(stdout,"\n ELAPSED TIME %.2f SEC.\n",gttime(ctim[0])); } } static void yams_inival(){ /* initialize data */ E_put("inival"); info.dmin = (double)FLT_MAX; info.dmax = (double)FLT_MIN; info.xmin = info.ymin = info.zmin = (double)FLT_MAX; info.xmax = info.ymax = info.zmax = (double)-FLT_MAX/2.; info.nedg = info.nrid = info.ndang = 0; info.ncoi = info.nreq = info.nvus = 0; info.cc = info.flip = 0; info.nulp = info.nulf = info.nuln = 0; info.qpire = 0; info.manifold = TRUE; /* set default values for options */ opts.hmin = -2.0; opts.hmax = -2.0; opts.shock = 1.3; /* default mesh gradation */ opts.eps = 0.01; /* geometric approximation */ opts.iso = 0.0; opts.declic = 1.0 / BETAC; opts.lambda = -1.0; opts.mu = -1.0; opts.ridge = cos(RIDG*M_PI/180.); opts.geom = cos(GEOM*M_PI/180.); opts.walton = COS45DEG; /* Walton limitation */ opts.bande = -2; /* default = 1 unit */ opts.degrad = QUALCOE; /* quality degradation */ opts.ctrl = REL | ISO; opts.iter = -1; opts.check = 1; opts.alpha = sqrt(opts.eps*(2.0-opts.eps)); opts.gap = 1 - opts.eps; opts.minnp = -1; opts.alpha = sqrt(opts.eps*(2.0-opts.eps)); opts.gap = 1.0 - opts.eps; E_pop(); } void yams_printval() { if(verbosity<1) return; /* set default values for options */ printf("-- freeyams options value \n"); printf(" - hmin %f\n",opts.hmin); printf(" - hmax %f\n",opts.hmax); printf(" - kmin %f\n",opts.kmin); printf(" - kmax %f\n",opts.kmax); printf(" - eps %f\n",opts.eps); printf(" - iso %f\n",opts.iso); printf(" - alpha %f\n", opts.alpha ); printf(" - gap %f\n", opts.gap ); printf(" - degrad %f\n", opts.degrad); printf(" - ridge %f\n", opts.ridge); printf(" - geom %f\n", opts.geom); printf(" - shock %f\n",opts.shock); printf(" - bande %f\n", opts.bande ); printf(" - walton %f\n", opts.walton); printf(" - declic %f\n", opts.declic); printf(" - lambda %f\n",opts.lambda); printf(" - mu %f\n",opts.mu); printf(" - ctrl %d\n", opts.ctrl ); printf(" - iter %d\n", opts.iter ); printf(" - choix %d\n", opts.choix ); printf(" - minnp %d\n", opts.minnp ); printf(" - check %X\n", (unsigned char) opts.check); printf(" - ptmult %X\n", (unsigned char) opts.ptmult); printf(" - noreff %X\n", (unsigned char) opts.noreff); printf(" - ffem %X\n", (unsigned char) opts.ffem ); } int yams_main(pSurfMesh sm, int intopt[23], double fopt[14], int infondang, int infocc ) { hash=NULL; float declic; float ridge=RIDG; int option,absopt,ret,memory,corr; int choix; short phase; int k; /* trap exceptions */ signal(SIGABRT,yams_excfun); signal(SIGFPE,yams_excfun); signal(SIGILL,yams_excfun); signal(SIGSEGV,yams_excfun); signal(SIGTERM,yams_excfun); signal(SIGINT,yams_excfun); //atexit(yams_endcod); /* init time and calls */ tminit(ctim,TIMEMAX); chrono(ON,&ctim[0]); /* assign default values */ yerr.lerror = FALSE; yerr.coderr = 0; phase = 0; ret = TRUE; out = -1; memory = -1; imprim = -99; option = -99; choix = option; ddebug = FALSE; declic = 0.009; ecp = 0; // assigne option and surfacemesh /* setting defaults */ sm->infile = NULL; sm->outfile = NULL; sm->type = M_SMOOTH | M_QUERY | M_DETECT | M_BINARY | M_OUTPUT; yams_inival(); for (k=1; k<=sm->npfixe; k++) { pPoint ppt = &sm->point[k]; /* find extrema coordinates */ if ( ppt->c[0] < info.xmin ) info.xmin = ppt->c[0]; if ( ppt->c[0] > info.xmax ) info.xmax = ppt->c[0]; if ( ppt->c[1] < info.ymin ) info.ymin = ppt->c[1]; if ( ppt->c[1] > info.ymax ) info.ymax = ppt->c[1]; if ( ppt->c[2] < info.zmin ) info.zmin = ppt->c[2]; if ( ppt->c[2] > info.zmax ) info.zmax = ppt->c[2]; } // info nuln et nulp info.nuln = 0; for (k=1; k<=sm->nvfixe; k++) { pGeomSupp g0 = &sm->geom[ k ]; double dd = g0->vn[0]*g0->vn[0] + g0->vn[1]*g0->vn[1] + g0->vn[2]*g0->vn[2]; if ( dd < 0.0 ) info.nuln++; } info.nulp = 0; /* mark used vertices */ for (k=1; k<=sm->nefixe; k++) { pTriangle pt1 = &sm->tria[k]; int i; if ( pt1->v[0] ) for (i=0; i<3; i++) { pPoint ppt = &sm->point[pt1->v[i]]; ppt->tag &= ~M_UNUSED; } } /* count unused vertices */ for (k=1; k<=sm->npfixe; k++) { pPoint ppt; ppt = &sm->point[k]; if ( ppt->tag & M_UNUSED ) info.nulp++; } /* get decimation parameters */ opts.noreff = 0; opts.ffem = 1; opts.ptmult = 0; /* intopt : 0 !! anisotropie 1 !! ecp 2 !! extended out put file 3 !! FE correction 4 !! Formatted (ascii) output file 5 !! save metric file 6 !! msh2 7 !! Split multiple connected points 8 !! memory 9 !! connected component 10 !! vrml 11 !! imprim 12 !! nm : Create point on straight edge (no mapping) 13 !! nc : No validity check during smoothing (opt. 9) 14 !! np : Specify number of points desired 15 !! nit : Nb Iter 16 !! nq : Output quads 17 !! nr : No ridge detection 18 !! ns : No point smoothing 19 !! no : No output file 20 !! ref : Ignore face references // rajouter lors de l'ouverture du fichiers yams 21 !! absolute : opts.ctrl &= ~REL; 22 !! set optim option fopt : 0 !! iso 1 !! eps 2 // pas de valeur 3 !! opts.lambda 4 !! opts.mu 5 // pas de valeur 6 !! hgrad :: opts.shock 7 !! hmin :: opts.hmin 8 !! hmax :: opts.hmax // rajouter lors de l'ouverture du fichiers yams 9 !! tolerance :: opts.bande 10 !! degrad :: opts.degrad 11 !! declic :: opts.declic 12 !! walton :: opts.walton = cos(dummy/180.0*M_PI); 13 !! ridge :: opts.ridge */ if( intopt[0] == 1) opts.ctrl ^= ISO; opts.iso = fopt[0]; if( intopt[1] == 1 ) { ecp = 1; sm->type &= ~M_BINARY; } opts.eps = fopt[1]; if( intopt[2] == 1 ) sm->type |= M_EXTEND; if( intopt[3] == 1 ) opts.ffem = 0; if( intopt[4] == 1 ) sm->type &= ~M_BINARY; if( intopt[5] == 1 ) sm->type |= M_METRIC; if( intopt[6] == 1 ){ sm->type |= M_MSH2; sm->type &= ~M_BINARY; sm->type &= ~M_EXTEND; } if( intopt[7] == 1 ){ opts.ptmult = 1; } memory = intopt[8]; sm->connex = intopt[9]; // a initialiser -1 par dfault if( intopt[10] == 1 ){ sm->type |= M_VRML; sm->type &= ~M_BINARY; sm->type &= ~M_EXTEND; } imprim = intopt[11]; // parsar -n if( intopt[12] == 1 ) sm->type &= ~M_QUERY; if( intopt[13] == 1 ) opts.check = 0; opts.minnp = intopt[14]; opts.iter = intopt[15]; if(verbosity>9) printf(" type = %d %d \n", sm->type,intopt[17]); if( intopt[16] == 1 ) sm->type |= M_QUADS; if( intopt[17] == 1 ) sm->type &= ~M_DETECT; if( intopt[18] == 1 ) sm->type &= ~M_SMOOTH; //if( intopt[19] == 1 ) sm->type &= ~M_OUTPUT; if(verbosity>9) printf(" type = %d %d \n", sm->type,intopt[17]); sm->type &= ~M_OUTPUT; // parsar -r if( intopt[20] == 1 ) opts.noreff = 1; // parsar -l opts.lambda = fopt[3]; opts.mu = fopt[4]; // parsar -O option = intopt[22]; choix = intopt[22]; // parsar -h opts.shock = fopt[6]; opts.hmin = fopt[7]; opts.hmax = fopt[8]; // fin parsar opts.choix = option; // yams0 /* check option */ if ( (option) > 0 ) sm->type |= M_ENRICH; else memory = -1; /* if ( (abs(*choix) > 4) && !(sm->type & M_QUADS) ) sm->type &= ~M_SMOOTH; */ if ( !(opts.ctrl & ISO) && (abs(option) != 1) && (abs(option) != 6) ) opts.ctrl ^= ISO; if ( imprim ) fprintf(stdout," -- INPUT DATA\n"); chrono(ON,&ctim[5]); opts.bande = fopt[9]; opts.degrad = fopt[10]; if( intopt[21] == 1) opts.ctrl &= ~REL; // parsop check /* check parameters consistency */ ridge = fopt[13]; if ( (ridge < 0.0) || !(sm->type & M_DETECT) ) opts.ridge = -1.0; else opts.ridge = cos(ridge*M_PI / 180.0); opts.degrad = min(opts.degrad,1.0); opts.degrad = max(opts.degrad,0.001); /* bound values */ opts.alpha = sqrt(opts.eps * (2.-opts.eps)); opts.gap = 1.0 - opts.eps; if ( opts.walton < COS45DEG ) opts.walton = COS45DEG; // end assignement mesh and options //int bb = loadSol(sm,sm->infile); //sm->nmfixe = bb ? sm->npfixe : 0; absopt = abs(option); chrono(OFF,&ctim[5]); if ( imprim ) { fprintf(stdout," NUMBER OF GIVEN VERTICES %8d\n",sm->npfixe); fprintf(stdout," NUMBER OF GIVEN TRIANGLES %8d\n",sm->nefixe); fprintf(stdout," -- DATA READING COMPLETED. %.2f sec.\n", gttime(ctim[5])); if ( imprim < -4 ) priopt(choix); } if ( imprim ) yams_printval(); /* set adjacencies */ chrono(ON,&ctim[1]); chrono(ON,&ctim[2]); ret = tabvo2(sm,declic); chrono(OFF,&ctim[2]); if ( !ret ) { prierr(ERR,yerr.coderr); exit(1); } /* print surface quality */ if ( imprim ) { if ( opts.ctrl & ISO ) priqua(sm); else if ( sm->metric ) priqua_a(sm); primsg(0000); if ( abs(imprim) > 1 ) { yerr.inderr[0] = sm->npmax; yerr.inderr[1] = sm->nemax; primsg(0002); } } /* pre-processing stage */ yerr.inderr[0] = ++phase; out = 0; if ( abs(imprim) > 1 ) primsg(1000); chrono(ON,&ctim[2]); corr = sm->type & M_DETECT ? 1 : 0; if ( !setvoi(sm,corr) ) exit(1); if ( !ptmult(sm) ) exit(1); if ( absopt < 6 ) { declic = 0.038; declic = opts.ctrl & ISO ? 1e-6 : 1.e-8; if ( !sident(sm,corr) ) exit(1); if ( !delnul(sm,declic) ) exit(1); if ( !optedg(sm) ) exit(1); } if ( sm->type & M_DETECT && !sident(sm,1) ) exit(1); /* smoothing */ if ( absopt == 9 ) { if ( !noshrk(sm,opts.check) ) exit(1); //if ( !hilbert(sm) ) exit(1); //if ( !denois(sm) ) exit(1); } else { if ( opts.iter < 0 ) opts.iter = 5; if ( absopt < 5 ) { if ( !norpoi(sm,0,corr) ) exit(1); if ( !tgepoi(sm,0,corr) ) exit(1); } } chrono(OFF,&ctim[2]); yerr.inderr[0] = phase; yerr.cooerr[0] = gttime(ctim[2]); if ( abs(imprim) > 1 ) { primsg(1001); if ( imprim < -4 ) { bilan(sm); prigap(sm); } } if(verbosity>9) { printf("absopt= %d\n", absopt); printf("imprim= %d\n", imprim); printf("sm->np %d\n", sm->np); printf("sm->dim %d\n", sm->dim); } /* surface remeshing */ yerr.inderr[0] = ++phase; if ( absopt && absopt <= 6 ) { if ( abs(imprim) > 1 ) primsg(1000); chrono(ON,&ctim[4]); /* geometry enrichment */ if ( option > 0 ) { chrono(ON,&ctim[6]); if ( option == 4 ) ret = yams4(sm); else if ( option == 6 ) ret = yams6(sm); else ret = yams3(sm); chrono(OFF,&ctim[6]); if ( !ret ) exit(1); } /* surface simplification */ if ( absopt == 1 ) ret = yams1(sm); else if ( absopt == 2 ) { if ( opts.minnp < 0 ) ret = yams2(sm); else ret = yams22(sm); } else if ( absopt == 5 && sm->type & M_METRIC ) ret = calmet(sm); chrono(OFF,&ctim[4]); if ( !ret ) exit(1); yerr.inderr[0] = phase; yerr.cooerr[0] = gttime(ctim[4]); if ( abs(imprim) > 1 ) { primsg(1001); if ( imprim < -4 ) { if ( opts.ctrl & ISO ) priqua(sm); else priqua_a(sm); prilen(sm); } } } /* mesh optimization */ yerr.inderr[0] = ++phase; if ( absopt < 4 && absopt != 2 && yerr.coderr != 4000 ) { if ( abs(imprim) > 1 ) primsg(1000); chrono(ON,&ctim[3]); if ( sm->type & M_SMOOTH && yerr.coderr != 4000 ) { ret = optra4(sm,option); if ( !ret ) exit(1); } if ( absopt < 2 && opts.ffem && !optfem(sm) ) exit(1); chrono(OFF,&ctim[3]); yerr.inderr[0] = phase; yerr.cooerr[0] = gttime(ctim[3]); if ( abs(imprim) > 1 ) primsg(1001); } /* convert to quads (09-2003) */ if ( sm->type & M_QUADS ) { yerr.inderr[0] = ++phase; if ( abs(imprim) > 1 ) primsg(1000); chrono(ON,&ctim[4]); if ( !yamsq(sm) ) exit(1); yerr.inderr[0] = phase; yerr.cooerr[0] = gttime(ctim[4]); if ( abs(imprim) > 1 ) primsg(1001); } chrono(OFF,&ctim[1]); /* evaluation histograms */ if ( abs(imprim) > 1 && absopt < 10 ) { if ( sm->type & M_QUADS ) outqua_q(sm); else { if ( absopt == 1 ) prilen(sm); if ( opts.ctrl & ISO ) outqua(sm); else { outqua_a(sm); outqua1_a(sm); } if ( sm->connex && info.cc > 1 ) rchsub(sm); } } if ( abs(imprim) > 1 ) primsg(0001); /* write resulting mesh */ // pertinence freefem++ ??? J. Morice if ( sm->type & M_OUTPUT ) { chrono(ON,&ctim[5]); out = yams8(sm,sm->outfile,absopt); chrono(OFF,&ctim[5]); } else { if ( imprim ) priout(sm); out=1; } if ( imprim ) yams_printval(); /* print CPU requirements */ chrono(OFF,&ctim[0]); if ( imprim ) { if ( imprim < 0 ) primem(sm->npmax); pritim(sm,option); } if ( imprim ) yams_endcod(); M_free(hash); hash=NULL; /* check for mem leaks */ if ( imprim < 0 && M_memLeak() ) M_memDump(); #ifdef DISTRIB /* free token */ if ( !IsKeyCodeProtected(keycode) ) free_token(&token); #endif infondang = info.ndang; infocc = info.cc; return(0); } // add FH 03/14 ... void yams_free(pSurfMesh sm) { /* release allocated memory */ M_free(sm->point); M_free(sm->tria); M_free(sm->geom); M_free(sm->tgte); if ( sm->metric ) M_free(sm->metric); if ( sm->edge ) M_free(sm->edge); M_free(sm); } FreeFem-sources-4.9/3rdparty/yams/yamslib.h000664 000000 000000 00000012741 14037356732 020711 0ustar00rootroot000000 000000 #ifdef __cplusplus extern "C" { #endif /* Edge: Structure used to store specified mesh edges */ typedef struct yams_sedge { int p1,p2; int ref; int tag; } yams_Edge; typedef yams_Edge * yams_pEdge; #ifndef ERR #define ERR 1 #define WAR 2 #define MSG 3 #endif typedef struct yams_error { double cooerr[6]; int inderr[6]; int lerror; int coderr; } yams_Error; #include "defines.h" /* HashTable: hash table structure for mesh edges */ typedef struct yams_shashtab { int min; /* min(a,b) */ int nxt; /* next edge */ int elt; int ind; } yams_Hashtable; typedef yams_Hashtable * yams_pHashtable; typedef struct yams_sstack { int *t; int in,out,cur; } yams_Stack; typedef yams_Stack * yams_pStack; typedef struct yams_sinfo { double xmin,ymin,zmin,xmax,ymax,zmax; /* bounding box */ double delta; double dmin,dmax; /* edge lengths */ float qworst; int meshtype,cc,flip; long nulp,nulf,nuln; /* not used */ int qpire; int nedg,nrid,ncoi,nreq,nvus; int nafixe,nvrequis,ndang; int manifold; } yams_Info; typedef struct yams_soptions { float hmin,hmax; /* desired sizes */ float kmin,kmax; /* curvature min,max*/ float eps,iso; /* max. tolerance, isovalue */ float alpha,gap; /* max values allow.*/ float degrad; /* max degrad. qual */ float ridge; /* cosine ridge ang */ float geom; float shock; /* mesh gradation */ float bande; /* bandwidth */ float walton; /* angle limitation */ float declic; float lambda,mu; /* for smoothing */ int ctrl; /* absolute values */ int minnp; short iter,choix; unsigned char ptmult,noreff,ffem,check; } yams_Options; #ifndef ubyte typedef unsigned char ubyte; #endif /* Point: Structure that defines a vertex in a mesh. */ typedef struct yams_spoint { float c[3]; /* coordinates */ float size; /* calculated size */ int tge; /* tangent at ridge */ short color; int ref; int tmp; ubyte tag; /* vertex type */ ubyte geom; ubyte flag; } yams_Point; typedef yams_Point * yams_pPoint; /* Triangle: Structure that defines a triangle in a mesh. */ typedef struct yams_striangle { float n[3]; /* face normal */ float dish; /* distance to surface */ float qual; /* triangle quality */ int v[3]; /* array of vertex indices */ int adj[3]; /* array of adjacent trias */ int vn[3]; /* array of vertex normals */ int edg[3]; int nxt; int ref; short cc; ubyte voy[3]; /* array of voyeur vertices */ ubyte flag1; ubyte tag[3]; /* array of edge classes */ ubyte flag2; } yams_Triangle; typedef yams_Triangle * yams_pTriangle; typedef struct yams_squad { float qual; float n[3]; int v[4]; int adj[4]; int ref,edg[4],vn[4]; short cc; ubyte flag1,flag2; ubyte voy[4]; ubyte tag[4]; } yams_Quad; typedef yams_Quad * yams_pQuad; typedef struct { int v[4]; int ref; } yams_Tetra; typedef yams_Tetra * yams_pTetra; typedef struct yams_geomsupp { float vn[3]; /* array of vertex normals */ float gap; /* local gap value */ int newnum; /* pointer to new number */ } yams_GeomSupp; typedef yams_GeomSupp * yams_pGeomSupp; typedef struct yams_geomtge { float t[3]; int newnum; } yams_Geomtge; typedef yams_Geomtge * yams_pGeomtge; typedef struct yams_metric { float k1,k2; float m[6]; /* anisotropic metric */ } yams_Metric; typedef yams_Metric * yams_pMetric; /* SurfMesh: Structure that defines a mesh. */ typedef struct yams_smesh { int dim; /* mesh dimension (2,3) */ int type; int connex; /* # connected component */ int np,npfixe,npmax; /* number of vertices */ int ne,nefixe,nemax; /* number of triangles */ int nq,ntet; /* quads, ntets */ int nv,nvfixe,nvmax; /* number of vertex normals */ int nafixe,nmfixe; int nt,ntfixe,ntmax; /* vertex tgtes */ int mark; /* coloring... */ int ipil; char *infile; char *outfile; yams_pPoint point; /* array of vertices */ yams_pTriangle tria; /* array of triangles */ yams_pTetra tetra; yams_pQuad quad; yams_pGeomSupp geom; /* pointer to geometric info */ yams_pGeomtge tgte; /* pointer to tge at ridge */ yams_pMetric metric; /* local metric at vertex */ yams_pEdge edge; } yams_SurfMesh; typedef yams_SurfMesh * yams_pSurfMesh; #ifdef __cplusplus namespace yams{ #endif int yams_main(yams_pSurfMesh sm, int intopt[23], double fopt[14], int infondang, int infocc ); int zaldy1(int nemax,int npmax,int nvmax,int memory,yams_pSurfMesh sm,int choix); int zaldy2(int npmax); int zaldy3(yams_pSurfMesh ,int code); void yams_free(yams_pSurfMesh sm); // Add FH 03/14 #ifdef __cplusplus } #endif #ifdef __cplusplus } #endif FreeFem-sources-4.9/3rdparty/yams/yamslib_internal.h000664 000000 000000 00000002266 14037356732 022606 0ustar00rootroot000000 000000 /* typedef struct yams_sedge sedge; typedef struct yams_error error; typedef struct yams_shashtable shashtable; typedef struct yams_sstack sstack; typedef struct yams_sinfo sinfo; typedef struct yams_soptions soptions; typedef struct yams_spoint spoint; typedef struct yams_striangle striangle; typedef struct yams_squad squad; typedef struct yams_geomtge geomtge; typedef struct yams_geomsupp geomsupp; typedef struct yams_metric metric; typedef struct yams_smesh smesh; */ typedef yams_Edge Edge; typedef yams_Error Error; typedef yams_Hashtable Hashtable; typedef yams_Stack Stack; typedef yams_Info Info; typedef yams_Options Options; typedef yams_Point Point; typedef yams_Triangle Triangle; typedef yams_Quad Quad; typedef yams_Tetra Tetra; typedef yams_Geomtge Geomtge; typedef yams_GeomSupp GeomSupp; typedef yams_Metric Metric; typedef yams_SurfMesh SurfMesh; typedef yams_pEdge pEdge; typedef yams_pHashtable pHashtable; typedef yams_pStack pStack; typedef yams_pPoint pPoint; typedef yams_pTriangle pTriangle; typedef yams_pQuad pQuad; typedef yams_pTetra pTetra; typedef yams_pGeomtge pGeomtge; typedef yams_pGeomSupp pGeomSupp; typedef yams_pMetric pMetric; typedef yams_pSurfMesh pSurfMesh; FreeFem-sources-4.9/AUTHORS000664 000000 000000 00000000362 14037356732 015423 0ustar00rootroot000000 000000 Frederic Hecht with the help of Olivier Pironneau Antoine Le Hyaric Axel Fourmont Pierre Jolivet Pierre-Henri Tournier Jacques Morice Simon Garnotel Cedric Doucet and other ... sorry if missing ..FreeFem-sources-4.9/AutoGeneratedFile.tar.gz000664 000000 000000 00001376525 14037356732 021053 0ustar00rootroot000000 000000  }`icH0|Eڸv8@Ӑ6sM@ng{M_Ŗ ,=K&=f:.rTթ6qx:OcA?#}{":G;OXOœ<l5pVп!''aNϼxa[Vxԭ=яRd"P(fDx'69EH̳as&1tU`]@k6q^ {/_^=Eq&n.1 t܇e0BC!2&N\H#d:b^ <Gvb,LR`NakEmkW|Tqa`a0&ìR@# ƀ4ņ|Tې^ U,KC" '>JJFxZmu5Z2@nvz[ ^o|*>`y\Xab#uBTȤ 8/̇Neb~'3}DEa4 7+ x J1PR u= VD X2p~dgig+]ҝK6j4Id HŸ\W0(YՏk1PJ"po7dL5OmMlRTmP<+ě-n'yjk(g>3/6C'& #̈8,_$瀉(\ΙX#^w=PF$i-0 I!L1UϚP9(CȪ hԩ;jm@A,KL$1͵f"*2Rڥ1P&QOWB7au@z TdA=Now܇4Wvڣ=-eU.!qxQDs}!K$2H>=*vg"H8yD5րbHDOxZ+"AE 5䍲 -Z4$ۙAPXX҄gDa4)t=dsV bR8BCt& {[/ۜN|70)(bI-ImfUU / **[g,p %!$8O8Cۉ&Х vO*c~JA͇6/s1˱N:V V<Ƒ?WMO*2xz3AKm;0B+D N i1SdZfq`Ph Q]:GS2ӱUEUvsCW@nu\dPA'*XJKKHh)UBHX(@##RVq5j!yq%UYÊ= FC0 "^OO!)ŃNVnws ϮcXu rIpht`Pɥ:?I,%b4@tS_˺`ih@UwȀ֐Uܧ7! RK]iUWD,VYKRlJ꽁Esu.9 \9/A7zsK:ЀV԰d,dYRS/5 Zl8.,+(2zzz&h&(E't6}_Ѽ'o.. 'EWD= ѽkQЖqKe`%{Fiq MhIW2)Бax C$ӌ/yGzr{g4ƼxMhͬ򤂱!?z. z+^{G@]-ZwV+6yyz2?Wk+dF Ak 9>QhN#˓V!J8<>,"a!sp]ϙBUtX(Vn"rY .aSx:v 3RAj$'T<4PT 1kT,l!#>y2|DYeWQYb+XVhn %:LP|ZED-?l8('s?OFƪZd#2Ѷ7X=YȠHdWVOv /W) d?%(\7 9kڱWO+ѱV (I^mZUe-8yen`s\A-[F$}Nʑj|>p-p9|?T q ZK7~C/Lې^+R:)'M|! ѸCݔː\[*gU`9)`UQ~_!#aO:E= +KeNE{&A+o_jPۯj9EfQ &(uIj|0p[?:!锘2V,g4P,hHA=¿ A&Ȥt8T@{^I2]qʮf㏹?JP 1Q{"4B\&d1rіO;NЕˡ5:gu7N@ ɍz{\ϵu\#˂(܁6 Z4wj:DTPi]وy]@&X FMLI|"g p?gTQ츢.~r:WSnÌu6H&jh*A+h~6tVLu,{\ 8O3Ct ax. IE`@e{9xߕQO$I]]y]P+/JZbk,du8\%nP3Z}q2#DHο mEc&b18iRMl9Z<P/@[ќj>j`q4+r/ֱJQŲs:ܬTV>[+g"JMiT g6pdYDuG,^zX[&a'*M U㺏Qѫ)_y3tHk V Ǖ7m/c.;AՄ0km]VS;9)ꭼiW{V:A)~oU5heGr +U5fi2hmo׍ȩ+ uߕ۪\pLmB{m U$[Wy|RrO!:"]Пd0 2\!(Lg>LoHl~~qy̺?:vk͗ݽ{px?:Zu*:pdQxfPJg?>OTv@p!rֺpUe{}|yuj.?KiI[lD)x.TS&}b3b;D%v; kay/%Z*wVxlxLHԘ;nlmm:Pjl+;I0D>sbwulϊqժ֐`G0Y}z9^4#_hq< ói<.k90 bp덐| {l]p ƼBD.kːr &G+A'lQh)Y3NSܑJSG?4w6fG!mKkƈdq$Q}n5tV Euɶ 5I(v+m=%.)D]T!8^`5-6Sq*Hg0$8"&ëy[o`[^/CNܕR %!-=PiK:AE[o}}aX:)+5YScrX_j| `ٙ-;m&[؂'-bE"a`+wWɽiGpjKC5.ryp%oP`jjkkOY$ɇ%UCL =bylv]rKjC[G?s w;] >S<.ND~*ꇿۅqx٥t;]`'eAo{ދ>-=+ă3v%җAO٢ؤ?/no߭ɛ j:w Eãݗ:p_Ɲ{h)`[ƛ>. *vz*5nF!4@ﻕcフ?[g!n !+`ckoNaL0gq)䇁[X{(_[C tŅhAb3ZZX`JE X BwInh)'.`xy 'h6 Wq13 o7Ga߳"^#9Ԅ^l"!2^we2E}qv}}ogFQ&*_x 힅# _W>wDLvU}5,;4C&o;yJL$2} U [*B;/ձv P/*Beug@+%l0Eؑ製=U%9zc E. d00# bi70}}; r|U!2V*/AX%zs{~AgF{{G1|c- J8s0`ٜ<0 dL"gQ6 GBPF{CP2?;Vw?MPrv_lY*cv_R8e H{q#;l`V$! 7 I|إK Is}]Á($q/LOePJb~EQScBlJT98Yzi ebV xv6f5 PV_nDǯ<΂±F1x.yAD%U<ó`Tt lF7Cކ VJC(P~p!H0R0crt6}j0; )t>qSTWΧS? 0d!A[S HLnM@Sa$iw5'ijկ5KƔW)V Q>5"Ṅt" M)R1gKȰ[9TUy"t"tSTn8թf2Zj L-vkrdBxwإpEsTC.(Y SKY8+.3|t| C.xY$L6k ?]&ztHb-s"ΔZbw#AG-ee,ְ>`"XJmI^xEf,RAKѵH֣k⮣-oycA Hd,/M6})xJN,̺K^)7DAOp_M[E"4hyc$ |;fڲMwh]s胡@&EEv(ν.9ó3)#,5r75%c;N"20ܣp@%2@9FmK͐`30`}6UJ-+YTbJ{<u;|?q6S- [Ct\x%0OxZdh0HtiB q:n]C&"Ȇ-yph]@2]b͠eYaQjHs[~Z5W&u!1Tc{2Ȥmc& 1fYv)Z?`'Ts v* ev;`sOXN n6Dzӆ6Ux >הkʥBJʼnb?csS6L5))5*$Ctv/]p(|!Jł< ->/giD㢴YRVg7~H<^#yoxuͽ#< A:䣳}o}DnnE9`e" ՃytȊ`NP0ARpzG"$&C%,Uu5pf'7jͅMP4ZP n$:-ta+ F~˦4dsG3&C*1䍺B9d1) bR9DY8_jWltoc # [1-St)K!&as8_G~͛ҍB J(Vzc%UFSyr >Vw1q>>@b$kǪ*xPAݕ_΁spĹ1'Ny;ULUr-A#ڃ-0j T*2y%}/_CUfpB$X0fV{hԖEM""]ĶŚHaUHg5% a:3"~Z41vzDUS+ʥJ+k.*[}K.ܷ| W~YSvaL>ts?/ F O^*"ZWG^O_H٩KAR?nȲ!QdA? Ẹ1k!:&gn3˸⦼t ~VX}"F,Yыmˆܷ]%G9_6Au? Eg\h8? DT@K.LRQ%q(k|ܷ|Y&C*懋~qXV>C* M\r6ARՀ&\MvѾ>{֊}V ߺ6]_َ/f-#L9{ 1!2~Aw`ȒbBL>"߃=w44 X.)0b )]$/Ì}͵'[kvF.0GOR(N&CL~]T"!.{$6+l: 6VcnQ &C Zo+2&C]R͟)-ƪyU܋5}^#Z|ąevt}#.E\8`wO_޼v#¥娿YZe?4+!p9"ŏYʒo% *e)N˦><#-v?-d8&C*8S9sg)\!&ǹY/ LB7Neس@`PjQr1+-I@KɔN11[z9>jaݽyWzZAW]i>}};.xS"V|%eBVvb'[ov '64>Dԃak%=_ϱ;}^6y`Y>eydƋeӍDӗ0RJ*g®KuV?rLEcqL(S1&S,zԻL.m69 ȿA)e_aʯ@xjzB̩ 2L7(Z̟hڧ<Ū(>őhgv}3#}2R?#)_X0:'RHr]T# myk’Jچ .ȌݷT [|O3=/^$P/3 feh=>nWwpV-߰LXtѰ j4 =3"4MY3wE:ӛו MXLȇiÇ̪Sj,{i~ D]ǨuDgby)NG'?tI 8r! vC᪕:յJ]>:Lb+܅& A#<ExQU1V-A KՌƒDr{y4HO;9D C_B}Xh{*H?PN&8tbAHbAoc.E_[YXw?zM"QŻ&t0NqQr[׈ +-_g_{kkou?=Cyiz:m-un5Re)I8O8 h$ -{Dc$(^fy U8Wi?p(ڗ1 S`VqLW 5#S P\uuQ<$O:(]gfJoyM&2"υO]$.%;x:¡y,u9򕟇spuҒ2b.*#^}ɜR29񒓽wV&92*V6<<*Qi&n-wH@> h.bTL-2o; '0>GAg6(y{:%51#lXc%>߈& hT-ԃUfL7I-oW]e\>/unR|]TҝMAs;|Dhd _[>{hAbTƝΕէa#m.3/W ѩrFM|AyeHpC- bKY 5(DϣTOM+v_Z~ @PT]9aҜNk;>F6C9 ə/~1IUJye;'n M e=h=X։yŘEfbE ^+X .,yN@._͟rf =?squ0יHiXniQKEEM$2\ y:zm99"SS6<:cQO. )]'ݮv %5esL)UzP~+vMtF97uJIE4R*0s+X~aT \.n{q=SI݂gtZޱOY\*c?e2\)z'_y;v=CR'%T:Td0RN|PiJd-)ŁР/E+1xkD%gS&v$5D];]TR|Ԩzv- )Url |$ɔדj5BцL6#0)gaT`epz 8?U7T'aOEౌ/%z/1ĊѪ9:R ,]v R$>WձmAbw2AY-Ez_>l|_&= ]yR]=. GablL 5-ʹ)+~wU/˨]v{"~p>y$ᘎg^uCk]@#%$#2Cل`rI}h#9H'ڬ3 A8 ^^rd ΀ě@-|԰+QP]1 4ĵ{C½;[lE1= p3'ЫxB/n9m"/}:sq;O\|oC1:lׯ͐N`^)x Xw/=w%ww\QÆ4w<-5AZ0ᩊB&D= Z FE7_Nڐ}pJZWK]ϭ)@Nwh!~ZFcWo/i]-GȤqTE-.Kr#klr:0e{mE": ԒX*A ' ֠#h1Q:) [8F/PJj$ ;B墨ȩ;P9UBh7!5R?)A|'׏=Uw ]׾jRrw a}7LwU#ԿB快n»t:}Tv3hEW%;P/W|XF|rw2Md |n$[B\7kOw݄׌"D+P7^n2wT!/r}Y:uje4E-k{wVn&T*m/Q-~uU_nc?F\+tԌmƯn f x7RMZG˝wSۍZw76B`}=ik=[ .ބ:hGjuM}gc\hGqV\c}<W\@5 0ՄeCp\ &w[Be7t%EuUW墨jwTIp33Ie7;{0Ɠye!=\rnyX_7 rQ3E H0\5!?0Zp~W3צ(c#?YkֺP6hBCBd\Q酎o:sk5(n-j qzc+"ecFɢFc gB6 ɓ,*gHYN#eu[juyr]>4'0S~uG4FƷMKw͋oHK 2ARsBPS@`nA{n!}ؾp e|)}|U{"4.VT־J¾%:= {|3]Ž%tq& 1ᐟ`(u&U˺BþB7GztNW"rGV[O:x;* ڡ0gx6T0xcђmynJ\* AydΣI8 XR-Rx('PK@eZ|6hu+[2 /`Or5Pgfv{ͦ'o)6m=q- Cg 5p28jyeGn4n7+NICb~濫Lq:HU; P0G&vJ]QKUJUg!qBovc~s/%J4D \ +Ǖc9~}f U>TH%ǀE*r 0Xyf{3Zk|ŭW6s?t$nBLϝ¤f051n:u*2j<+Kffs7v.N=}3rG}6 |ZF{{ >qB72 lLRоAOq{mZaZRQ_wy&g}7O ,J~./oˀ0Ƅ1O& }-rg{L0 v ޞ^+bξtiGKpdKG^ya\./X:}Ala 3W=XVJqLR,'w=X}u&EKՒD쵗;oyG􁇷j:ag aDa'%vU$av7ޭyvw4=)z$zWL|HSqX$40|k(y# ꟳ+1&-Pn֨A[ET,KWMҿfmw xw^^MO"Q/W֎IvVT)T  @E_=$mYȺ[z!j&1?^֝7A˲opeVkXQXl-,qp11!=+ VExurvlƭn$<"BK鹦1n>,*ĘwTQe&4d2 qbwJb'#@&?C+;繊a5bn|o;>u,?r#5oz Ͼ|ʿiZ5:?ݍTM\|7 0l;〗e ZxmCN@Q EE3#HV[ 'g撩y Mp/&OE%azN$ELN,(uQcv}ofۣ>ܳQ'Fq7*)Cv 왜3\i?%_[J輌 PM׻8wn8 Sľ,Nۜ9%14p (c4*!oWuN ?!:@6TS6vKov\߈N',-q? ȿb,4AuC %y&(`'bjY0O&Wb\,H*ȋ3|s$f37ũt LXHU~t%^ Oꪭ#A%}HNPo*0޺!@>*ㅲR]iUMH3T]C gyy@WxnY-[2ʢuEѠ[>SRc+&UJ )w>L1K@8x5rFT0gb!f0P!".':/vn|$ "H{`R}4P}znT;;vz[7EKI.*C 0L.i BY7dLjNUXZ$Vye)ͅ'䒱ˋۥĿzg>b11֖ǖ^< Jڮk_>77j"TR4I 3gJ34E&y<}=J>wk~XyQ} /oEv=7(!"Y(J)7Vnw |8>@)8%~zt9*آ,s$>զ_ :G2PS2PA߀Cj|KQ}QZ&Qp2?={&n`mZ!;^,{Q[jǹzF]&S CWEu= #'[:GM_fR,N32}]-1@3BLR9?{2}JG.'>7>DE$vfeRLz2v/4iQ[QBy}bǺ_ 2X;4,L*hf^. l9< }d)c^!Kݦ@$PETR 0g8JO&-$8&?tc7T9)σa aw;;`(1e`s߯$";Ew[f}tP4tP'mG(bLՄRp#?<62#oxGL\Д`a!95Va !lBX׈k,:Հ,"(^  xq2B]T,q)IR#J %1tLo7 G +,d$_O&?xehoMKYڭ~-Fu(n:=tdzR7w:b&7 a'9:h[7#cLNH}E?0y*0fsC* G ,GX!hH3,^0ٽi|A;^qTctUwmᎫWs* f-Ym,3 ,Zg&)k &Ls3p/|v_'H+^)"3bN{*p| p;\WZ\S^^FB :`<^J2YL/7TZZao9B>>TBU}]i)/7_  /B#cC| jO^ Rt"2UB官*0p1JWZ!" s`}Fٙ4en `)@t#9W0{nk f 3w)2㎫Uĕ?Q8&2&VF:\д[[W5k#Znnu8  _KI|;|5)|`P9uP9ɊC\C(,a2x\B}\_{o!q/XO (X驨N @ތί7hEo z:liF;c(? a(D?Z4 -Z#J3ϫĸEhc}0X֞JrÙmI=c.J1025432 Ge]?! Q=[U~rqi̇hN ƇG}8`P\VZ"!#ڔj:Y2@$R%1&PmItGP+@ JnH0]fjz1d"E/ONϗ Na|"1wП٧7O۾^> 2!'[UYt>}پ(|zQx{Q:N/)N/9X,8F*̤)/MqdJ8uY]yO=y'kO`tyj*G)%{ȭ ,4KD" ޢi9AlV b%.M @a^ ? =Wp8G}Ç4<`PlBex*}bS&֞8L[mrN8}y9"ԣt$lɂ^1ݘu=0ꙙ5ވK+\T6";) 0]T8pXye(?Ɛr kT g AoUCBT*kUat)ó.vJA޽r]Pg8p5l 地TYFR$иc X&cux̎cgg@+ $.` |UŧZUTM D ^@OhwҎAO2J떸.M@~zqTp$D04$'z5FmQp@ý *. r޵, wJV`f=$eeQ 9Zϵy< .CbdM<%`u]jph&H$:+s%Y"3v3I;?N-a}$Zzb=|,cvXA(Ԡ=l=|ÔτL(W6T֣DFoIˇ^g054EcE7A&wA] GX_I0t:Ȋs7ahٽbbpZs3Sӹ|tVoj5SMS75w|4ɟ0 f3䥋GFabZ@!U(G9[ib Na-H%̤&äꉃCRlxՒf^Q\U tkE``0[?Hkml 1 @RXS;i8Q{ħ%)ٳn!/hڑqT \49ڟ@}'= XAO3;;k;䧛v]Vze<}-U5_^>:N+ӷVBf>mscy&Gc39S}n@'#kuB: G|</ay iY.!l/NvsB!$e $R9"r1QZ]pv8КnG?7I(˟ ?`gWƮpo;U|׍31UV[+Ƿ&Ϟi kv,c]DcԀ5ycT\I QW[)8m\i3:!h.@o\6=MYZxv &qO;k;\忿sdq.ХmzΒĖF#i4(KCǶBKD،+,u…R|;Kpn7 tj= <^,@BAC"#2-@AvO78j(0%MUX%bgxɅcAPh|!%C_;^8օwgD-p( ɖ78.Z&zVGnøQLdF) ju^dKTb3^]Ǡa I,~JP GB~{ U|Mq?*.{` #IO2wIo6s=T8 /XH8 n]%?6O\/Iz~eX\@7qk~Zw2Q-!HgvX׎;- bQĦIitz h}׷(@uR] 8Hg7]>4(F[A?==!,bGnѱWRCTdCltuOVɓ?}\z=7֌BUUxS,9U%UiZ]^?--F&3t{N`}tcC3L|Hq>]R@S “ǫ *]R<}o-xf~.GUDŽzA&A%l:g[Tjg'] j " Ve:;J)~M"` D=>MhĔH냁 -a3%-d+љz=%U8":5i4;XfwK՜̇}B*7bꢉ;! FPz^'էYuK5Va5j xG5kdмC$5CBY3Pǡ4Xhtҏ47OSǧ?L0N&Ψ$LT_p9I~uvm/@*&%぀Qb sU}{|r<ǷN;4."A:2 #5.:H B \][〘,1dvl# ֻ9^*бaM٣VջRҙC읩ahBm9'×&lV-hÁ~*<.ÛxH\* } 'l YIw\;zP_a ( irBf1B" ta UҧKKe6%:-PtIh9~ct1P.cF+\z:Y @f7tEu*fp(hejOzKF&sR/ΊŀI0rEւJ& lrImS[V8l(0ʟx*f0^VA*/絥{~#ڇտuD(:04x3ɨ $}Xi:.G. \5*Qp;4fؑLґm I#\pB*zf^|mDAKgɫSx`l,|•g7[x [k6:RcL9&(% W*^C:CRJY^i2(1Hɉec!_E^Qj&ZXYRa^Vy$6\څvt="$ㆌ]*C#c0OvWJ&(@ uOY6B($my+,J^Ԩ'Pe]2*; Hni@ tzQG[EPX6( GdvA䧲Gy5?04~']`!Lw9PxCQNȖFwm..Fb;^I$-r51K[=w2`A}^an0 זkSJ)p%Nl*QD>"Rc\sf}MFrvq>'33Ao;i7!g "=wRU"d'tL{b@fv i;s.|[=X*)zF#F̡:yq)[i=:PRCRD@m.k@eP3|Tq124NMKMI%FxLDU;8ub')|tϤ=Ǫ5_nof~ TP:U9WШJ$E#T-{sx㒔NqZkB7ؖyJ*P??:@RWMnVX}8Ѽ"uy 4lE70H̺ٙ4V*g*sJ݋7=r\#Q,2B8_wPմ)n!0@5`<[$u2bЁS>"P !;+N왫,ƙhJN0 ZH֐(?IbS;݂zhhɚE觺x^d\C=;ϙqWADJ%g7w0  _ќlAm۱B cTw nSC줃.lIKyo)N2!8BttIAt9dAO132 :@SNº4 #YVI0uT*:` g4^*ܧx{X<]uъx[)::6KTNXRW鸚z4"AuPµ`XP ^ 8rؒcT-_.fTp@pPZr 1f!Ԝwzu [C\2 Z-'*rF༔i 45菱'3K=$rtKFHP4*逘 H? Y, ,vfeHdy8Mzڻm%O?mKgXi)o(lJSJVF$SP|^ʰ8A'/ZOB!Ap/$Ȼxe/ >TADO?[Ay*rr>3[ۜn-{J7W]e-ͻ(Hx"*SVF̈[fhGpf3BZdOr୫n,deSO$T i^ds( _ú[Di%n} UKuׄxLJצTyBjIKDZrdNʳaIкUJ慻Qkѽ,"C!Oy/ x7Y!,W^{arp>dB.Lp (76}ӓ66ڞVpG緃|𥏴9;..DM0亞d\F4 kT6Qj7>Ȑ: ɰL_hs$Ri\&P2 TJ*j̘̑i\MV(84Tc!:,=p1ґNVAxUpJe_c5Л__֌zQJp5PHy0V(<-`#LO"ئ'xU X0J(*QTɝ;(QQ & U/fA$: _y &% a6XXa _/~VO>(?*wڂ?[[.>kVTߖĸBfuj 'Buد^kBRIm [؃62s!Ѩ|FV޾+?4U+76*wKGp'FIbSa^nR+m28{I"'7ēy%2NSG+&̜K&i5}u,5g4(0DƌfX:{Ybޙw5ǫVJƕaE [lxAhݰș %C@5.X1D.&)i T.7;[1/GTyvW]z{"gfEFnwehԅ3c0Io\yd׬tQ2_e0b,)ui#2UVů9?)džPoN)d]/sSm eW1>cT J k((G$ABM헭G{[G+|dE;e*-8ka4("z * FU| ^CuvX]wKYIYɛ a[bnAk\G櫍L,ZfQW{,yzOi`l Nؘ@{dqSf{L lidꊸ+:_-Ȳh1`wT%_MHTaq⬎x":2ՕJRFmڮ[{ P+'ut+N,A-Rքɯ'YXz$>RjueXW~;/lܸKVuoېV+7Ln,#EzaLdowh04 2"eu@DvXï^nJ8c$8Lݷ;rů)imzYr؁qf98Y|smNhk)Jۦz̸m tLGp)BnQz.st< FL%J-WVxT>;jqyJ _' I5 YTw&ZqX,WG٤L`lTx@4|h_$dj5 l팃V:\n/k:D`!e]#m8!Ia@rE<.i('TvqFB:3!z_3ڭaְ(p+Q:9¡L6h'K^A|F8=iu,0RTs|ܞKt.~z"C[d~) i(m- fACĔ3}-y)OhjQ z< _/,eY|7;ؠi[%6~Z?~22?:PRM6 *bS)b.OH:{ pG' e͘/6PIDk}˿΄հAp=$ױ95!S3&ӫѡ-u'|˃RK²` |SOaa±/KF=G2d$@Ȱ/p͆᠉*C*f. Bf,|B&K .Pp0 хz2 0ߙV}p\spwv&r2@Ԩ10pU%(1hN;{ ,#3 %,ua-\*/mc@%{ɔ=S5bgjA >0jpvo) 0T3~UefS7FC<yD6ׇf(EMxu.v z ҖPV:Mθ62bjCxVlg"p?D_POsS`R"q53ψ&i2}#b|){DkN͖A/Q\T?:ȇL9*G3%I&U>7lؔ TU5s^ tOtqST_w,n'0f'E'"'#k05 0RFQː=w|2ѵ`C1,/Nq,fn\.`PѪ^a:wG•Eť4yQ12mKTQ2d):EoXc]beE7c-8u2wVDm~m5f&C7:Z2И v L~<+*[ò#̰7Żm8=ko_n˜9#!f˙m@A5zdR%52cJyw]k;Ԋ~P]94KW )Ĥx୍_2r 2iSїzVpVSjS?\Ц:`ҙd3VԕCЀ$FsbFjT罹v(f,8D,48QDj_+(1oPDc,4x}.%VZp7MnC2~QUY QCdEd/I]!(t,%ЊG#1&:wBrhj~GHa̋b=LOhL`2-P$|z\ H2OUUg1ոLAα*K%#P:N=Fzٿ3yQ^or`ΰf,98p.50z׸;)f;LX~@#y7X\ŰRu#t2C^Bz2890|Y$=N qX!Noo?x7q5-X2jO1|ph*źׁ}X8,4=m^&gC^*lrV|,?\3sr~>ہG% 'q %1C5붏kN\qRq6X0G7(+)JœcҔ#Yrj_u}s7DßK' '"09AH1pQr)[PS26& T#QZ ;yHQlJKbBXHlaa%('u ۞ {z4HbRIm5Ι&AġS'.6x<'@.iJ<9}A *x5Y"TS~0E֐{QRLrLL=uA'Tŷ Uc믘E P_J\*2Rb*sHL7 :e"$㣀"CZ%N_1v D5qj\tӏ'S99/.v$3.p AdSd8bb^V[[Hq$D N.`I59E"lSlK8IїũIn&Q]5l$.M Vv:SF))$;#%2UZl;!l 0KpNЄMlH OCC6/"x):ݽmӄe /< "{]J&Bs^F"{5pڵX:>.jOW 5GIBρgeSԘ14(,V4Ow֗@&Px=Zc^/9Rᮍ+}[4 9T˹GoP=㌛!륂3[KQ^x@+/ƉQI бURqbbA `>owt7'iRh:en-_]:CMͻ\y׳(%~;?ytOmO_+f?fJ' %8^}:ˡG6!%lS9rJՀ; N3~Ƶ>̼:0M#!mNӅ"V^Tk5:~A 9dAC:5_VuՊ*ؤpHAE7~'72iQ%ܙ-J=FXM"j̧?*֮qJ3A- m]+sʣjU8<^~w^kMTKbεfjO25VJZxHrk[&)S%+CLWfk/}(2E8`JBD1bq6 tcC4,>*>"cQ_YQda>.tpZ'a5Aب#i.s '4}{F!ՕѴ-9B0X[O}ee'"M)`W)"Til&P巤O u)?)2Mļ̝؄:%KX^9LܥzMÐL'JV,dʩ&YmH,nhr8&ާ8~L͑e1AH4pRƟ/B4;;xn}}*n} }:/hrx%p/kG/5^? e+DeJ,er,ʍ2X&fe v/6ְjVf}b>H!hƉp);gfN n.A #rm}O~8M ͖EۧJ7U~$64`0+4ED\l^lnrZi*U]#,VlR=oƌ3jamޤUh8&ui-sMn,LFEx Ȼ[.bGfDC2IGkl)56 Ei\/ b#0SW[|Ӱ_mO\Pa &Àt/ QS?8ZQ,} "ze])&J59jl3гMtM]11vA4YSef< 3 Rqh"c}!Ezᩯ%(sѰyiGp7lSvU%0VFJmVu10nzFG~T,U7DeV>oweH:Ȉْ<0F v>h%B3ʸrTU9a(JyϤЂh[(Hz7>I4~~e31<9rhFS +I3B;:iލ5$< ޳'yY L U79 `xMDkI8 Qoռ&(@=*Eem/?ؤ0AeZ|L~Fbi8)]_:Y`*rqb|RϢiF\Ӵ dd"t)8E˯pBg+v!$Q،a$|g bJH#a6S&ab0 {W-jdI N T\riS_V(jT]KHP: k`N[JlamQH?!LP Xu ZE plk0&~.mP5Gao ԽeQVh-Z}+ ,gHZQ]v)/U~#x@oVmQzh}QI4_'~M8s9V׽Dwn y*5 (fI<( 608 A `XF@2 G嫫F|?fãG 6x4BXڜVDT쇫[!~,_9:?5EdnmɈJ󤺗lrT â?Dm<1DQtI#?D0SkdQQ~50WekweTdk`0$>fUX)Ͻg7J%+.J䘬 ߒ dG]QF]߿:|g?>sv9;ΜgΎ3gǙq8sv9;ΜgΎtv~ƸGo17SqwPǑ5ACLqC|f#<=l߽ǣVӝPWt/W{Yk7 Qyՙ~8ddq -8u4gEq &*/"("Plx*ce-%ؒ[lAK) xlS\o}w8xi13%5^6lE/JVVixSJ{("^"zMPSIw)ҥDj\/+/4d8&OoPy2 w`CM `Ab1`Dr ^$頏>8?KǗd89k N`PͶ}5 z=!zN4@Df>X7  ƗQJ>'ə6'%^hD0S"Õx%zY1}0&9gtǚdbN 2=I'a0'aPT3(o: h܌AM0(I} J!ƠY> oobcVwEU-z\r=yCx; mq7ZOVgX43M`|;a BZ u'6`LD -{a2VCn9TDq.t(~JKDN'P UIK>L8FG$'L!Lh({¼ŠӑâR+yR)(`' VGx¸el4-@]0UK`*@f=``besŒЮ:Flm{_?wValF Pt.P?jhwj{o_Urfݷ ^F/ ßw9kh%TU}qj (J*= a] `\,!E iߥ Aʩ)>L\ e^TDQ_H(cw=*փRz04;*$򟚯d(I|nۭ_kԝx/p*3gx()<Ǩg}6}؀:6z>W|4m.8#] >`EJU/۽_yM j.3jO8ZEɳJ6B\NewT\-?.,o-Y%@PIqЎr附9PW4w)gy7 ag"Nњ15FZ t;@U8G6Pt B\/L>58ϲWuvSPTSU9; o@D@4;3r&Ծ CYJY\5!*<3ϛe!)Br3NwO)0?<RLE68)Wg)#%geBјEՒPhbB9~k(ąیka N#荍'_C:`G2 K0o= Mcq7@(lBh9a sP QzshHCe`4/5*.S%b8/J{H =dH0,cJay1j"E9cadRUU}TQbzcX'MwzXle&X%w~Ao:O'?62L*_΃l5)&hs@>(ՠe/| /-ElLlS:2=k?]a@H)dRW'{~@7,ha-nQ q0ݹ<>-KiI=>YKdf _`냝lo`g>LK3d u-؄Arett0A5ʹCa wϬ6fn0}wh 9bFv#k/CEG-Q`ژꓢ,5]x EH۞Ian9 <ֿL wfL7r䋖["tQ,e%DzA#VE^ )ek6!H,Wtư@@m̷h=``iLG˽iZ),3q)q*ڈ+L#l_1Ks^p; bz\1zVZ.>)Y_._={ۻoO*_RxEXa7O$C=Qփ \nb9~ƝIꯥ_T›4^<-_e"[fiC@:%"KMM'E y+y (CXKz}إSQo˓ɲZPD=/%[vap#qȄ煏7lKf>?"/Əa#Kh/T+*+ۡk}^Q&r?I0UUQS0Cʼ=+n,}/"' ;D/gqRuFe=ZF@YՃx f 2sEZ. H..F*I@`$E韓\an5 c3S:ss)CAIJ|NSND,rqêe82m2 m.IASQ4Y0:0{c*l{ԅL#nſ7ȹVS~d`v΢}^}Dfs6Ea-,L#u#fy]X|)ƿm8\[i@͓ K ,!ř!nC|8{K,nG^N' CJD C}tGZy͛Ϙ^.dDYƁ֋RypУ G7LmZ=GiV4sF P@4f86R'1b'V ÓB7*3vIsLsw{ __1*7C"ȦDVKp{g'tQ^o kFpk{(ԚޤxoT4$I@,)XT / Ri.?|1A˕-).sekP@;]ʒGx/iOx(>DG<`ó %ԥ(&Sxn9&ml*5PqľFԾX{C{' % uKP\%rBF]k^ U K &%.%Jk6o hh _=с-9hyp+b4pi|V.- "ȧ\y1ClnSDĨڼ (J(3QF 1n|`RCfG23kL]dgۡU,6 mp.*MڇX.zj /'a^Mfy!ꑆ_;;VIGj2W 1HoV(rC f. X([HϢM;+ي}3`)K1AkUowE})оrB`\4 X|v]]cCNA/c FR\0ޕ^S>>͐􄇱_DAC3vYrғd{/~3jPvg?m!n'4G&x"igBہjh%r'"A{B&<#+ȚB O|[YY^}eEtF}d5xf/8/]І*Kb g$G$^B3͑e<^GN_؜Ƣ)K=6T޷K̒hu<\ggBƸ'05'Au^H]B#m)L'TU2}++iwp״ς,E#]w &GD`:|=ͨ=Y 9X:_9|7[o3znؒ4[ɫ@i%ڼAS e-4y /j0^>ɉX*AfYX!%Z2e<98ThGlU{W{'N_I„^'bD*"ע͞K,Z?`5~zw&yˏT?79W""ķgLsD2;t.٣G]K{R+39#^+i\#^-9EMDZ|"Z))]JU.1\ ?<36Xu4Mb}9fZwD1chi&EIT#QO"R5E:&RiacdI)/'`<_Rb3Ũdf}s?.K*qLM$Q{[]@K֛E$5!F`wq1?/jaOk+u?tʺ?ǿwA Ѹ?ć} 4g.<<;.fjfAP C#u3 IzE?ء M4UjnC OZX\/ŗ[F-;N &/nZeeo/l7TpB"v<s0INE&VfqRa1Pq;-(bL'KS6a\;x{_B i FbүAe8˕;.ɷus߶RFr*IeXMqR4f5d,jEA&Mi"/W SfE#Ȼo{^naUĠVE;}/akDg`2!="#6;.MdGxCkpϜ_ :C^Z|ftjUO_[ZZ{]YU_sUMi/^SSi0rwޢC9W.y}m\oƒ\gs6׺j,q5=]p|4̋mqf䴼^2rR Ʒ-xxzx]>ĸclg[DwI0Tѹջ%ѮV "$ 79OIf(d탭íCζ`>X Do3d8D#}ԥh!HTVRYK- Eu!o3|^HZBZxWR niۄw,G<І4HmRPWӃCQUF7Msc\ @z+moDo\/>?~luϠlDɵȚa(3<`Vz?@=F : h'($(Ljۯŕ<938Y*5Kw*``c%҉SjE ܀wd.-B1L #bCK)cǏP1I%VZp7MnC2~QU QCdM_8:~፶JCC4%T鼌Hh$ zȜj8X|)%[Kgڒ)f*)ID[*1tt'37Dja{0j=h3`+F >3ufO6X[%& L¼myq{bыJ g9j4?cRӑE{>wF>esl(`noWHhQX!9 \Pr̅BKg9IvI|S8ANQr"Kػ&ڷgO'C.g>u¦k`7/PCC;Y&'J"+W Jš l`@N O.XY+b"ZhY]JxmWSkS>CPPS>jA0޾uqׂɃ~ j:a%}EV53,Պ<ħ7ޔ @%h1E%/IDqM$jO)ob{Tr . KRxD5zˏ>  ǩch,B)dsp2?'_6lB4Q}<~>~u a 4X_J<[8ŗ|2e"2% 2h fi,,sn,s,sm:,s+̵1Xfr(0G`k- wkG0u|X~hwQ?ιvߜs~979׾3ι6\s݂sr59c)YprAGBEEƻc&׉l+x8vku.S μ h.ɕ42J^;۪_~Q G𴽝/[WfMr-XwB2d]9AK{E@1|rc][k6(c@8Щ9(/Ub ˩HɛO,WY۲-ܲ"V7 g ,IdGh4Sqo_k)(e|Uf6 &-._tY`gChǓ ӊŘGj%bc3IdLJ;]ղ KLɾ^b'6 [>n 2XUCak"C~(]8"CmhL:0 W2-2mBܩY+2lUEr|v^~6vA8֯E^3i䨱{:CT`cQ0xrɠ;h.@$lad4Fj5v۰K' heuKP V}ޯ &~xqQN6jw V}}K ǘj gZJK6ȈR//[ F v>h%΋׽rQQUz ^4,+Ζ@ 5ĮջLQo#-#W@C1%ζAkE ܞB$vh %t>1DkϭWAѱ{$l]t*`ZHQr@pMDkMe Qoռ&(OqB^#~1̄ & .Ek7M~Ffbae8)]/,|ooqdq1W>Y)wgQ*Kb9H7렱ծ4-KW22e n|D-~~b~[L ݗeF$A. nlFa7@" z=.5#)Xh;hh[~+F j^kb9Y, 1J vkJgx2riS_V(jT]/cHPN)bÜd8%X~,HJtp}Χ(ê_mQ)rm7Ĺ҆x]\%QX<O3#:yǭ{0ZY/gE*PDlY5&(8kJ\ȑs]]Y0$8/'K|CTT癕'k?7y}ȁg=DQ8b=O\0_WC}R3, +8abV@ 0IN{{QDF V^ۊΡ6m |V(Vyj!ځW`uC-aH$"Gi%6lw uP,H9Y1z&"uU8P|4$U'q刿!K&V)|IR\pw)d Z;0NL6 Y*uPeo_ܷvCuP;'HZWl.)Gu2NpQ.g'6F|= ,|B-"6g{snslqOF^ B?9ݢ^ѓ4[z̩8fFqE Zuc%$'ŵ^2;̟LESV?_@u)T:C}l* >%N71ӕsG"Bxu.Me |Qv1YN)T $KO"B~ZRݢb_xA4*TB N"[Fgՙn2t$R49'@B; h!xtPg+W~'EXG@hCx Xu ZE p.H0\p)ˤ;2$7BA6n$P w퐤Iν#N~@.)Y5\wfF/+jk{ľ+HI3h$ʫ6-CpuwTA;w^/CT]| qwd|75&Y1`tZSi|͜;Z}9>]'m ܮzx04lgW&bH Yo`;BԞWM7;GVgX3~~m@(:RH"\]b%TeW/\(R(wpZ)VsX aˋfVmx/߄Q(K B4+BJK/Jbi7Fknf MfxVǽ=w rBh b*%%z, N\1AæMDOEM+I3t dŽFn'5Ab8rGg<Ͼcou ϱw32PR~{'S)Ptۚ:6Rd9{Yg /<@[yyYY} ^Uzs~hߏ,>^ו7CV;0!/gm*'=&G;p.w%|wHaߍ`ɀN;(^JJLPOIt2FX B:! ;Wœ[8 M@u6^0I9(֚1!vtNA{VGR4Dѡ\bz}7ǸG˱`>%!GώߟOJ6zhE%Xsyp&޾:'1,˧UC %4M@^D/ R9RZI 1q|PA J|Jkh?3 -reI!rͰ0)Wp;/B|} * ̕ 4X,k\3s/DcM|~X}"[Z5Jk291~l2m%y[i~2qKMs}^8q"c7zhbM<#WyeZ1W̊wׁ-UWXnQ[Gܣf^m$CXfdf^!:αb=h!?S~s@;QMv J6"'|ډ3 sS`g.Ǹ.A;CnswDKY'ʨ;FbGIuT֏O*y}vR;>>-(&4աV'g0(򊴸gG3JzR;/d$8:byy^ {8TCDK)^{5Ve1a_6Bkuy4.S0P .Q*>KyL*.ng0[4?#VUo ڜUY R͇1"b+b|J.Ϭ)xҚGjo>l>;~xLH{R':`}^և~adw2)8]_e݃eRWŶB#Y[tSZ-< EL7 \oDhJڽ7`Mmt>̼{ov*WJ>jp@݂ ׹!L=mטp_ns5NޯMUw4ӰN2 eCa^fga.3+Wي$}mzqR'[P194GPuהXI[_Ffoȁ 72Ф \mN}mDU` G3s3E[޷Z O͕[M:)phi]Vo% \|Kl.]T"Q\t>a*?tI@^1x[c(,_+aQē*R$Ih5\P”ȃTO VN41:YLӊ)g`!8alV5O"5D)7uhҪ)=ޙ' kVV4%7PL.mz(:0횝ϲ$ӣ yuV ƒ)MԈM)6+v(.RRK JڻS׵KI6, 9'd%V-THRG9ӶBoYNt_]zEnWGy-0my#cمneA˿zYY@.j9 TJ:5/}gBZiHkŠ Fw41xck9c$1^u^axĚf/ hbKtd+$4o:S!)ϔ&rM p)+Zr/9ͱo{=?4C= sA;qyWnRNS TiR9xvR9XƇP;??%૆}-?M -NTJOA7͏0HU?իRP(aYshGk0obuL(/y[~*5k6Oi:}I#J l` dХ-X)Ks,穛K> h3D_Q7^t <`q#Z@><]4|C +e/ڼ > G9X/l^l\ln>-;/.?#)y+#6Bc)a7<_N ud_WvOsj=yRtz1 C-E+vxC}\ &_j}RL8Aq.РEç\ßF>[a4{ dƄ) ]uATAp+҃Q# iA0sm-Bz띃j>J59\ %YƃzC8E'xy2=.%\nE\r'$d!μDOzHp7N4ԱOFJuVMsΆ*}4N?MbˮGM 1j_E0\5JJW5A\)Eg'77'd-smܨݠbM&\/Z-"V4XC5"6&bt\>B~qbGXM/gBm؍W/tЌ>jUCLqQ بDFTMz|M$= #"'=]Gɑvi٤J rE:= UZRL8\ 6I2p|jN9DqQ 9NZTM66n6ozIIٿ;7SpK&O.9jJt ;o ت2J=}1_pW^;J/MDv'})_G.ifܙFD_vv+KҬX:SHh7ηMaEh}?CY3b=VJQd"t&dwXJ.ɇ8Үv+-߾p9hZ 2G``v M,b0,s,]M<,O '|Ҥr"sS_бtU~JnqFfu%*=S>GU-XSTnVxꄂ!ff҈X҈ [fh o\-q+nc❿n.Hf:l??ܯ~&k3~UTNVm"&ie!Ag 2]=vu|i0^Y QWvizvq֫ļ?~~_@XcoJ*& LX _ڰ+]ڰ⢱~XA8O,3K@ oUVOqt\wq ZE6|߲>VPqA8c|r ŧϊW}K3 gKIV*'΂,PѸT 4KT]$!R.MTR.D5phiJnZJXb,mXKce7a <,[ߢ X+]4r^L0M`.ʏɒ8ZXLhȞ`:,|/]7G|ƽ1 (l{`:0}#h7}/RNe-[WmDӧbcXXӻH|1]Ʃ2 #;qtپ3m'elyzΉuEG-}5 *\­VEh0I0{pn?2c'6psHd(6׊%Xmtu"3̚đoe;O.NQ*;/2w& ӿ79+g, n?S^qp6FXNz;͏GYZm6U) g_/]&|mF hC\SxR䘮Xԗ$X2+nGNq麗 [1%*;X+# eF*#41_y|t׽|!BBMb>Pv-Lh|5eBf~N?9#]N'#ю )0( jt?p{._`9N\$O%VY=*QDΟĸƒAՋn NJk԰߼`H͋쮦 ~5Uy-lqㆯ{7(Sߔv Ylݏu?)T3UoR۟G5XP/~z!媶^_$88gM) ~;9~pATvqA $cvQO0qx'\iLe$JLS@/.l1ƓE˳XDoc+t)44,4 oXKɇ2/L6H{;ﱲ _jęA77U*o~+o6ڭv@ˬ:J܈ߩ#!OzX.jl j#gVp`%_Ӡܯu=W.s FngdWl_YjaE\n~n _wlotӁG] G&k{"kR Ώ:7Xp)tYˢM ZIo7'ӨA*w3^*pKTRID)nR,W->եarDH$T(7s*vuvx.uX:7MrmAq|*w[U=Et}ml1́rq݁COV5sGm Ne4AF3tF[Y 0 J/UQ*$!L-R--TS-0GrY~i _\Xeґ F"b"ϾM鱲ӥM)U;>,n|6Ѐy<>-6^FKq&gv9@KP [aߍr?K ?iHKܦ!hJCRR|ebiZ+YhI}Z=Vge_Y Vb@+Lr~3,-?1o7R, ?T֤* "-6Z| >Ku giy\&e!1B维|#|iy|uiY}EI`yo5Ň/m>Q6No7JR,>S| 4!3g)&B1,m?|Q#-qrץXwO?*w[0%9{=!LB"Ȝǝ&M6IoZ xN# ;(p1 6(AVcHj-;Iuhﶡl`_ChhK^ X 78mDJa.v"YOrI-U vjrӭ\. aOE=j4iYU~DfןTr 'gM[PQjv?KFLv:[q N7P{ sGώߟm{5{V69UE*,XX&8<+Ҙj|H*s8wnچ8E9MeF"#SQt>U9ķHT Anˁ , "2&M ->jLM`Er;m"2W˰;L?+&Lwoꕓ_H5Gb-|ė6T@w.ʕӊN?oFl=}vG7c]n@8vPZv);7CU_BVUk %̑Ѱ}5U=DUZ`Y1;:7 y`nkX{髒,T!N.q`١ \>K `@>Y;hG|I;Cs| $!N\-ͤf fP,WDJ}*c*k7*l/w e_;u5b!> { j:CH#!` ?l"ה ={Q{8amq ajqZvu/hD2=l޻ބ>;|ȗ+[NS`=TϤ`iF U/w:@ 2xqj{9l7vٻ5w^T@1WyhʂRPX666jV#_ein։%ś2icÅox~@?=$WաV'I0އƮ5rI-c͡CG-`zǸ z7woGCgcNpG_yހ|R n`6裙wɁ C՟ g x/*#ܛ=n k| w\]Ю\s^Or߽Ƶ\n 5Q{t?>C;8 >ki txٻ K{5r7EMOܬ,hр B XF;҂s-3_k`[i1kkxV*w“eDéj邏i|thfu!A,j!|D=Ǯa j|c:LҚKݙ-}Gocg\|xT@ %g6%3[^Ze̠ ?72sk ]@ٜ~x7:R|L&pGGNg `30_w߉ -٘31{W~sC;* F5<*2Z9c´O) dU-w&C^P:{hE8KE'n˲3nkA,ϏxD;P.X*zݺ-Gys)h=7&Jq;,׆AK j )@74g&j2cg|+( R\b ܰtM׫ 﫬f\d.8.ܡUڤK";Z=B$P։!LpPYGF}Z.T檣%zXaiBmA%E g~SB#Dƿ!&B.|qBi& Y02ϳMdZLwkeLYDșroCD`i"Lk0& a)2;3e#:+Egfb4ޟoF/U8LgB7t'~$lf[-l\LE!c7vF¦KVidYGZD!+ڧ53|k:6Ypo!㵔rw~aw$HoiLF ߶h) d&;!JqB`W$?3i `^{7Q(&:." b\0*9ͬi ߝ#8h3\Nt0_Q~`m9zSVs\z ه ͎Nм45*L_gG#唲ٌG@G!"H HA,)D00pff[]1D:Un&nTdDlP*B s0kU}\9;_0 = s'[<"nry ػZZn}u H< zU12xr vn挰<f= t< 8_3H#2ylxvYf079!0mG fx R>5ےZOӪ$x @jm%n o{86C^^JskEz4#&A-5$x;"nko ~A.~knx-~B3{yn“̣kQYLNщu֩n?{ϟm!&yyV~) bpgEA;z7^=k Kzw|P9g݁кcx2\hql1 \J ?cn~Ɨb^g[|Jf"OB fVB.X8eXUf q$F/!e6|FBA>fs[N:I/iL1>d޸)qNdfڍ:6=(&T)*0w5b_(?#ǫOIP3O}HXR@pDizPx0EiP!rQ[V.FT$S%Wj=y"@]Iʽ3pH Nʷ,FYpgXpTdʲf2Y?QwCBMX 1rk&T" GE_}TB^9򉏒qM+3N588$N("ӪIZ;K#Wsl6s,UYRimI̚= ~i`<-iaIp0+l8,{`XGko|QE+7fc Ϲ˕Uq$':"C`lM'`bZ17fߖHj.MM {睤54}(&IC1&Q[.'_9:?:? YҮjv5N ڽL7jjo>?67"OaDlkRLyx{n4B5|1x/-x9;{g2 #Kݺ*<`d628p"3(K53 x=7]+/"!UlJUMPn 65@|=0*[${Te6yQᥲYUO3P6?}$#cj utñN$ ':"h%?:*p} B'Is3Yv[Kk5&~x$1@+P }ېEuHBQHZ>2oEMkwGJXeHة/[ fuZv}C~Z9;;;:7zMZ(8k8Tw|uK܆@R*yнlQZ)TLFr6rL87#pQVx \:iAǔ2Jz-šhyg#cOhK,ڽ=\R}na{ _'z}u8R"C<@z"]&#.J \[i*NjV=hv0=(l&OxcJ.#wd͹H33pڊ5 n2޷9f8ď1 dD rA@ZZQ<:~vw[Fso~NcR"2_¥{d9qe*WѤg,g tx"V6# 3fY۰3A^1wtH%D%eJGw{-,]ൗN\/\GjFM'pf|-gAcYf*禀A.Ц)*SɓՈd)?B4ٮd-' x^_:"B!ІM( @wt:Û%ٌT3'gJm" !U ga0^jL1| Z5*҄ -E(5 K|#n|ĸ}>`RF!10FYH}ՏN8c?۱l q(ݽ|1qv|06|?±/B +1.;QqC[n@.,LcI(W(xXb_)e/oo;HT8@@;6#а;j{A9MAua=a ČA+a[4jAs-؀BЕdZWtQ^䏥l9s~k xd&*/iBlEdں]~$tٿx[n;b?UЀTbJT?8W-{P__ WrRe-S!UE(rcKj,G6q7dx'bLÌ?]˾K[D17Po9mtRPsgʤAkhdqL?*xfhEɐE}g08\6 0^fwEqMK̦Y搪h[6M ύmo4}[ɂb")컔Ȋwe|ɍ\&v"L-ytnlEPPۦ̳?^wq;h2"-D(H0aB`d]7Tԗ渢'HTxf7>[=n+].rrzfKbQCD:P]JBf}6uL?5Bd`U u%䞁S* "rϐ*_{s krϘ~Jq!Hsw|e H!t(кcBcy\K=6Ђ-֑Hw L'xfIx=㶮ƍ3d( ~}zZ!@ۻF/ YuJ7>:0(P飰4a\v޸m,";gYIo}5?9{5o_^Ye{/N2(6`t)- tbˍ ՠAtΤкB&3xfV]zW-/_^> yknE+tp[; \ c\߯TH '2,Gn?-Ebw[1 8CF˪XRi4{.6ЈS||0AW{iOBNg8."H{Z>a@<&FG?Rz'#OlV Lb;+󦌃CJfd}-Z+a?vfw?~ŷۏ2ſ ^ܭUsn֪{5]ԾNHmm#mLvi8)#6i<ڣa1[4;4l*JZ>-XN, ,g#?<ˬO^Ļߖ.(?/um4)OciihV4FPpLZ"OѐpWX_hmp9~{=a#`DK#]OL^[׃΀jR616R@Y lMTz3@%K)So䘥JmR=qWŢ3ʈ=KNۺoa6m&W# ¯ uRFS ?%@oHL8RׄtXģ儃ؐYGDfwMJg`̱ 6ZA=JG> ۗzm'rjk,nlz-'(7!UA5tSuY(bŏ@NźRaźb]Vb]WJ.+ZRXy@ʨ"2L”*&#ZZ/lBy1Zvg2f "' AtH' 3yc`S >_M %n6*^¢P- -"0UDѷ2JE6r iB!H?Zqo/WZKmm-byc5g- yMo<5^0_j kqG?KQK./=,C䜼?Վ v\?biUJhVy4xD.v\ШܛCnFiwT\Ņ#ݬU.?eWU~xJ ,~ >? RꞇxPl}_z㡿Q^$S|qq;٫a{nZHlwƣV'Y5wyc;:|]~>ў@pX*,W|PxО,sG>7ո` C Mo,A׀5SCzj #]\ hFUZCmr~V99;e9}O3J,zG4.AP,WeX"N |JYhZ JQ9kkiafF4-;ق-":Uw9$QD,s2Ä u.ջ!4%פyͤwl| o0TJМ N& jOYqLдiEZR4>;:: AEοrtDɻY픟vPQlt~۔-uz*n5 aUpJ;ao&sE26 [KŰ4Eų3 dJX:a6KWTRb ip1E.බ{ضɷrT¾ӳfXr`ڭo0GmTɷrzO5r&Mf!VVrmpim娿;&[J{+gmV_@}r|ضrg707QO̼ߢ0Gk7Ke7[IyYi%&bؘ0;23YR31Qa3Bv S׺}F`7 4Af&"HcML#z$H1&6D6.]Z,$lH'7[ OLnxϘ bttʒ^h]tfu^m.nfoaҲ@wt)ۇ,sג}%ԠjMl*8/m*.LtlAK_2Ew?5˵yݥLRn.~JTԭ/н, =+FHoٳmYC )7DXyFLqjqp-0ϮC4Z`8[ϱ#S;͖ ?d&cM|;ieu'>snɩUNmVl#s _.ªѲc4fiC➫ ']}hY\ Ik3T d&&3MM&507095f' hzR5D|[P1eR3Gٱ o$ *ԢjOL5苴 J6`4J4_gfx(~9+% 4$1 Ac{Z5:׳n&<-Fl Nj)Hc!7TuʜnR+SO w'rE.x6.ΈS-Đ ^4 kxb\B 3%t'c\WI<ªhb!T Lꗘ4KYcrުYx_$:O ΪE"^H9Y'6ч 6^McS2` S-mMڰti83D 1+O*4=k"6~$ЪM24 0I1#7X@de p :} }v)Ȇg!mUqƤBɷVY % wǮ۟^A-OOpFtytJka]:NG&V-&V-xABL1K4R40*`Bx#*]5C1y+z@õns #.5[0n{RSl=J::a:Y@Y:n)6RFT碔)KҥM˜'|ZpNj>/3{ ̊w:&Vab9H!#^#-c"zHXU5뿎cP>0#ȨA2+M _A4$'ބ k^5:qo,>5ue$-O_Y)Cp]=e#|(k[Vff[_M/#DB\xݺ`o|*x_U;$EvGF;{q?MypK9]%>wI?8]LWml\B*cį" &!+BD5&Ԙ\Ucb]ψ W7t_%&:bXSz*Ŀﶜqg<}$ҟu$ VCoiH 4JPUCX J"Pd; >,SzzM+3_ӟ/W7Im} H 9 SnZ&Q6n4'Zs56]_w%A57 VˡS|P<1| o-/j:\rbu\vn罷䫚imX84ǏϞ =t?{N]݁p` s fegR &P!|Ʊ3ۇ0%w$:Gxz,5s,/+9rVZ7JP_sCv'zaߋ]l,nȬ b2 FC{ޢcjVۤ=hv0=(l&OxcJ.CD0\ x8ml7Ytf\eMcЙAɈ(W&-(  ;rr|?sa)yrH[]ҽ SMߜ2h x,g txqLf$3@~]ag6*bqOxߏ,)TWZGxE ^{$xς7 HCAYFM'pfmޘAKO1oI3Yҵ禀A.Ц(d)I|jD} \3hn &ɰQd8=V΋-R+d>B*!71+* "F.vۅL%:zo2m" 1B \p^?Ƅ{hx&04`/_dՎeVviBABPCp|ťxE7{>l9L( dC4(7 7IbóI?}gyg;͑i>14|?5ذ/&6Ύֆ輪Y8yYP~c2e'20t(^umțÖs,S @"Ө{*E+1 Kkv:xR]"8pTWDD|(x/6- ]S"N bI4R2mfI7G'ŷJŪERw)9NVMpembǎ,ԒG=]ymʘ<ˑANo.#b۞#] J(קL;ʬhu,M_qEOTn0} z05VW()w%] 4+Ţ tXZ l/( ![ݳ 'ˡ4X7uo}B2`,@/K*9_6K N=ǽq&ƍ3dƟ<7J__o >C{zPS]h:0(P飰4a\v9ƮS=t79 KƠ>TȲb&d}; |aO遧3cWJrA^i`rP4o_^Ye{/N2F70úܧ!Vk1:F[oj J`[3au5{L0)BÌz냊\ðb ;5-t̬7cYD ;oSà[i b_F{ÃĶpA"q (m_mztwI++Usz< OEޠRP+5` X1߃%9j8CeU,B= h)s> t7r9mP)[n>ynW*gIkCyMCF=-ԎJV+Ϲk, HMH?z=[dH Od;+#;42-Kڽ߰c|OC;N?ևSG/X֪97krY.wj_tF6jx6ؑ6 S 44WѰ-fq6UUN$WtvˉE3lļgZguɋx7ҥnFSְ444iTu,- MՊƈi}wKyj^)FC]r>_aͣAzV/}½EkzP-Z&F ()Jod]`[ ex c$mS Xn),WZ23X [2zQ+kZ_T-p=SSh) KsJXc(t Do7|=O-e?jdTeYP j)nOQuDfQKhfx/v\ШܛCLFiB!tƮ=vh_~`ԯ$0~Z)MϢ^7cIYi%'b0C2YTYa3Bvs SۺEF`7L2A"fF",cM0#z$H1m&6D6.]Z,$H'7 O"?cFk oӽӽa #*Kn[`aщ0{m@A&EI=Il ZW緶]KvPs5%༈rq?}:Ǿ(`!3hZ&DBx*W>+n|s 7҅O2/{]N-"ΩET7b kYyf"ڌ{ɵ̵z~)Nr4)g_oqML. ;&ʟkvNr bXv֝6+O 'i-=`G1SX}B0,?~MB-@+o%S&5O0R-ԭE>H YyrdNZf̼,S+gk>8,_-l, I+ÄXV)měQ|R˶%cJ#5X/yDs3U2wԩE}Mb>+t1vl|qaA"[l;hH]x<|46A?|w27H+>e`&s?BMjuەT @if$gm͈kzu:U[j:@g*yVH;!1lkmBk.Z_|- %-G1;^0 "p5°zy8D2(`mgIUi\M`lFűXjaRt^&$$.Sf I+XC/+h> (%Z9*+:|zFA߭'WBK'6p#>B}r:H];I{:M|K='H/-T!@CDU.knc=؍eW.t޽k>8r$>@o\u[NDe E'h%fetS/#I*^4 S2Ґm^tc{d*O7.Y1 $0kj@Rs'$FD^+Bj2|ad=vE`vB? !)k( ؊N_Ga=у3W5Y4Eo"j!($ĕVhЗ;>K00 (uXm`.4=$1?'iɸ{go1\Frvv3E_bÀִ:N^յ^z;n*B1ڣQD*!+BD5&Ԙ\Ucb] Qkz`b-U5&d=2%g#"Ffw[θo<}$_NA*l$<å.⻻+ )[:"|8 VH6َO j-k7qi5a|W|@J! $w zI2vkžL Fq(hOy6]_kAe> (GLOf9NCI|5> 像1%{۽JGK)<d{"zyQ!ǚj_iihc"- fDTvqD9s0_aId6xT=y&VaJǏKG0  _ @^'xvrzm|2c|`VIy&u@k"pg;}C j1Xr7?Jʮs?W\kYX^hWr̭buo8S1.#3 Oa毿( Y\˱vY (/6*"e94L+@E7\w }'V{І"Pa,{PL $^ ]z9a͹H33pڊ5 n2޷9̸a0˅΅Ed!ouK&L6˘24*ha`99a5͢w݆٨[T eǜ|Eq.ZKe'a0? ^@..# E@07Z-=Z΂2#tdy҂@rX5^&ȓ'ER*JĠ=%@&V@o(M+Y{x^_:" B!, yĬxȁ$a!|sMz/c3?L Ǵh44*A_8~/_ߟ?cj4jQA0Ɨ/2aXtٸU]PЁ!_A|qݏas G>`RF!10FYH}Տ?{_F3+[ !8{ ͬc6b=n; 3oJ/-Lb6[IT*lt|zhE{.E$´#Nj7f6] }YCb}輭ed_h٭eƕNū* yg"OqLd{iEI{*t[B+1 A%Ax+%8{ `HpEEQ8THfp"~8z_`fZKz)W ˜m\Z7"Pk^u'#,(avI7?9Y+bM>ն6>[]i8ܵEWUx:izo ?DV77?=j=m6 -30S̒PA_xvR˫9 0GY&7?47tp__ׯ[H~ǖx"G6}=;3[ZM+ |z+K:k ѥ_(6 Ȑ-Y%?$ĺ&mY[/bA ;y2S3ehstg`7]Ig*=fdz@ݖ{|> J415qf˹h yC7*4}[Z1Uɥ5/(x֑q/cܲL]QU?4 ~U>''=WqJOѤ F z-$}~5fG6 +/ўy8p c@4' XȲr4~[j X2^?2&DA&pm!cнQ(N2&0ܧUvetb1ܜ~3xVuzÄ6TKkwJ`%9)٫OE(rMҚ.YNTkkB ?d;`yf'jE&\t<=ǛδGڥ@B#ٳ5v'񀗸ZGp+ I{Wtp{|8 $m4 ٠'&)yraW5x`xwa!pK9"8C%yU, a!Ya[Qym4Ԟuk5{T/7A|n6~?z}t&]ϣD H6.\S*(wmGTK?vE623*6 @-ãEQ%*J|9Zen4K͒ ڵi3yMdM||C~ZmS} Z)#U}MHW M׌FsVޢٞ 8{`?ȟX$AXVA:>P@k)K(=%_(+bDL(hXboRj}Sf"-ي؝)`MW8& 6%DMͿ(kK5܌}stI)eH]QooR_wM1n/f!5Q؀yRV[zػ )" \Eϵ j Ѫe.EKF!t KNFBd-umU.2~} &Ļ`Uwɦlw#w)}țq& 6w$t+n9H&(sh"ybDyъj%!8"ς=0D}rNB|+,!nmLEJ V.:p2& ǁEJUVI%Y3v0vBM*d*;K5PWƟc1KZ~~ztQ߰61d_B;(L3n0/ 뇡Vg(ǸX+ax`eZW)>,Mv=vk>TZQ}_kT\?ylm-F)6JwD)*v LuF`K_՟RMD7~GE>~2q4N嵡Z(o~/2E[zcָ7iqQJ  (r:MU| =Q{y BhbL OJE ; /]n9(3~1[o=·xy5B f{ԟF?.AZң3Y,,dk&TCwg958z:ۥKJ{wl%bSGu6=g{V-BDj6N "lC ! Y;!*<8Dx.k-dBLW89S̯Dc%JSi.0f3uE#.$yD6RS>PSH$;TNZ~]ݵ'in/ `=kSA=/y.*b΅CP,Z5Tbk;W %͆;EVb,+~,^K٠kr?dO2czY _|ثWӑ2nЯAvX+j}=KɄ#M-EX`s|!LnbhJhRb2ȚX%!`rK[R+smyݗ__EBy=e?r_SZ쯮af<`!JZx޷g٘vZV.!@k e2=B4$b->f|n)$[|n{=7̎MΫVi3;JrM"_2'R.2y`~Ywj8.W<43B_'h럓k0@$ v) d X{v acT9!Ā#cM"r>J26`nЩ]w4$aGQ8$cwna <e Sf;^T(-l/D]-hIׯS FnErzKD %fBՌF|]qiăh`oePtb MWp2BQnQQ%$THx̤w%0U6O5љwC`ܓGOjЈxvLҽ0Ӛۆ*\_Ӹ&Iqu;Ԙ9vfMQ >7+0\_uE˰K#X5xYz83^c|&$ F^}⣤DM6AP-^7{cLX<@10%}YsM8s~ey[_/Zm]}c a$URS;ƢY>=8yb[>Ke 2r)nݤ>{cOͬeMEay@\|RGD0kr,[_M.~a-r_65z{n)E+9ճm{6;$Ns;}Z-iN& Lkab%Tiŋ[zAV2Xc#Uh׌tʮzۙRuX Ock;;ݟDΧM ·LTq /SQx, ְh#~T7Y*;ه[5^MCTk Fzq@Hu>4jO=441bbKbC 8珱a|e(YVoj:T g!.sXXNtQ/!nşCX]'-[DQUJ#}I҅ E{mSƵ} ڑ6ex,ZM.-˧`Ac-5l+~}x)FܾϘLsn7_lD~'&v7V`y^'r3DXljl͒Mf&yWtc7eC+jXVP034/DJPx3 HKݴιNJt7AXSmX<9a0ZlK[L^;95hqԪĎҹoDq(u:dڌ!n\ިS1WE,+B7nl|QsN[MG_3Vm4[րjk!K]]3lj?FAt⛣!׏H2?o ́ BFbIq~#ԭQ/t]y#J}XUUUU ;Fg_ E !Utz!BeR'9+Vk?SbAh'drB5{pG,N>].\@f؝vEhmmŘ4A T :˨Ň֦hkPu֢Z[i gHɇatv ,p6j?xv>xumDX. znΠJŒ*q:1-/#ZA&PQ != *p6t#A@яrQ-ٶʉ#b-Y+PiIWE[\w FmddB0-DviM9:l7|xy蔾3-f [ygm Py f 2}x < ,=n fE 6ehF0x8< - 6)` '`@9U@Yp~@'t*EZ?(%g42-cAQ%FkE<^BWu(kcF&p|BpУVa~ x s!D6gmqpՄ.PT~?dŃȏʶ D21 8M*mNp ?W7yW'W/4Tj<~L4/B6+.A!aSN~-| EpMByl;;?؏#&/jXz)r$Ӄ3Lox~mR̴r d^,\3g-QoX.c |u18|A|ŗ)iw[laRqe[{Q@X󳣗Gl[^4VC&C\ M+TNz,x-q_ z{}o5`㲂4./0c\nI-;=Kܝ8cH#⦦zX^ QeONQ(t=u7'x~\œmoqXyJ gr^$nim!pk7:@ș%aKDCP-`!-w%Z$ 9% c- 'I;RU}??jϥP1OWb•(js^,đc82QF̷bĘA&%UqH|n7*O`fCHj1@ _o_OGnE[@~Onzp]%j7`Weѱ>C̴t‚ ,"kQ_rA.yf)t[x7*u_X1|.ajp 5m'[zcDoʶe|)3i5Zͫ;1\' }y%c}9Q~98mMbor`H{jl%1'pOcJ:.ⱰZR=lscc mouuD?^@'q(X2f1V{ؓzejzO\qڼqj~_G>qSJ(k~boɼ6&ϩ>{H$nJߐ-A༹^'C{E@Ȃe1 ;:Qk:4T23v2CU]d]U b>UrNE[QN7Z_Y5I7ڴ )X¾j '"x ((K*<7P.6lN k! ۙjqeI SeAoMz㚂eUh\GgdSR'U1@zb~B~7(TtL\Vs½C]RujSVT7 v{o.PG!On\07ksiJk?h^_7;Y?+V:>:n^|uT?6OΏš̂\Ʀ~;?@b>y"}%#4P؃D?/ ՜2A8$r %P,Z̈>|ރ]R=~e^ꏆ4A'tٚ][ p-) .|v,'u >պ t$R O m#tSAgZk nZ:6TkHq,M T dV;T-7Q05ִu*|X־[,2Uo5K3퓶JrԄQ4 o6t 57뷥"HUn1bSfNnxH5#@$U>.|v\;ޑIhx P䥸XqAi+bD}vZ";C񈢻 wΥl=I5MZnߨJ2ڔGrd4;SvyZIH(7N%jo(ě<-(p|}"~fN?}DIlOQ=T3_w89$:Kx??Uk?C6Hғ!fXdo c # O<ۉr\ta;B=[)$_> mǂA"mEc%4Ȗ*]tWC"]YLњeH3sG3uNGD2xreG r@4ef( ɱΈ 9G2DpeGI10!J&$aRFlq;Y o Eu ! ȐE|_H2S2LP)S\?52FY$7[lKXT?2XvSE &$g/H%|yȝ#SP9a˚ehctpM;(~yz|t L+$FoȣFI)??-{y8ͻ7W $6!\EO j &{(E8/Ëfk2 t5q2zSX~Q`.Dl&K9c]keknm)MgՂjsril d{܊;=)p]$ DprX!0n*j4q4*%'h$BV~? OV घEh|POsk\C|['ێǮr]zA L|g\yIr{M6eA}'6y{06԰3 iVnfXۋ_)/̸U`P_/"RޱzYbeǜkQɴ\ I\\ ^d)M[B+N{ѲAܚ[UMSET=wA,[TI3ba٪ஈ!g_~s+6/=zĽ`/2^]Nl^Swt^ pDk?MU}55 v}%ܝx>[>bLK31UdlIs}v=Ga j"F{%y~Z:iZ) 8;?~]6, _ѵ="LEVhk4 Zc|d|C}( NhҁIȦ>"7ӋlG [n~qYz;e`]sgeNt^&=L,8z#;fP0 %yup.V N݀ڀyeO:o>a9r4FYXfWvNG}f;hE`\p .к(7 ԉ<h }cLTYӣמ:DicB/ +"զȡ$_9g3NA:F;I Z?Av;`Oԁs=sv`'8=%e2YQc|@V ɎJޢ3eN{%of8,#|'83\-iƙ4[L?%%{f6'8kԳ9kj}5 ߛ<͓9fiӣIaH9Ktnl}4SҬ6jb0&y'sҼ0kJ$f\wZg*j8I~HoipkV~|8&ɿ)r|{?Ѹ5섃N`zΙóGnS*98'0Ht[=}ݑyfۥ6=;4,QkFi?LnFv ]*5 Ytg ڞeQ{t{ó Iœc\[F7)nk|UK6ҙ7Ldynjon2ܞNhWCO5=#\шOCVpY >ˤSطUL%%DJap$^bڙt0,0Ww1i!CtZv"|df\AB1je=qI/J/"Dm%)0gOfʺyʇgM C$Y{c$ k+Y+YJYn=LhHmɡ '/p7t|pqvvR|.kbZ(]$"s#o3q9z0꩗d(r ,Ask鰇حE g UAl)وODz放qހxF:SXX)xI&pLco bm&/^D)W`I sïE C~!߷zeߛ`o;p @4Ξ8>9b{" )ώ0Ls=01 D G%\} Q򉗶-i߮.v $- &Nd&]u~en\ݓ"~n-hWiJ>+ VؠEr|AЁiBPa6_ h1."~y>ObЃ&/1`fپ"o58_c$_нePEeE俙,Hf1A.^LCW`{pH\S*9o7qN?K贙"oK+-,Z&)]ԐK 2SY KlFsx&r`I2g:9+ỹ%sq$NxOρdq٢x6lmؽ{m\b?Q.yqr<m3YQrJ}x8pq ^k'Ȍ&nwPN;b7|E0ny5 ;I|~w7NG"vi"+j6(S5.4}c^bU-S@xd KmYA0;l802؇ ՟T- Fŝx3,9E% 7ȍ5Zx*Kda-UM=9qLFߤ$^Of6*/4eJ˟LKF4SiZr;N¤%M1u A'B=A0W+J7|ƕ y #rwj*(+^Xȇ\?pC/| z% ^>`D'$`{zw_7u _WֵaK*4_$Oݮ? ݑ0r]X0l-}q|DR!+R _VJ,AP )ՂJ>P3Ӂw PZ`:`+g_+-Xb-I2k8[C0d\i^,]34x i22uL0zmd~kDLe1P9yJϞ ݄iET͓$T'. dPx ¼JJ%gJpgo7F5/D+@¶_ Vhu_I+W 5^4{+$\ "u>6;7eI4]O&hVMU(A#P@ʨӭ]\BZ7 z|]C a0ȧ`8sE;/Q]Uҽ+aL]mc^X*u2E֤r~gw-7ܣǏ>-ITMʕnZbZ\^rV{XwѽW@zcxq0:t,=S5 @ K%V]fخF*,Nhˑï^DM_ok;/2&Q<=njb[{ ŧ!Cf }E[0HHc;Ϙ18~h EΌS\,˪44vW( rf[ eBg91}Mc]Waj'M~-5܃N:(eҍߏ^TDMS]e"ұ'ß!уNo۱)W"e8w?=`FcQZwvP *L' =}QR=p|iϚuYS"ys,$R3v`v;*fO>iX>yjch_`I_"vr5vga_!OXî>UrŨ A@DFHL /u'H ԙ3Ws5JfCF ʌ @OmL,h2 =Ef2'⹽o` ?bh_-語! NjO\ p+]53.x@cD%ZxK e#r.Cn{H\1<ԁ3ZWguO T'\TYtM*Oiژim!}5;^60A:m#]+‘,mPCUN(^>RrA)v/+uB1!ѶL@gě Z*r52{'ZXFB'|( $%T. $@9WxZT;1f<&l1*㠪=v5//?2sӞVRxK魫kkG9k'i*vcT[l<4kitȄ*߻w Eqo[hR!~m~|x.$Yqhrˣkw0eu7tvNǴ10GkX(!+qOߧȺ:\i/"XJFi&5T0׏oBr`mEx&&Xwք>Rʦ}̲NO jhaO)0*;E鰂g^$y&dƩb0˯ܥJʫ ME +@oz.^WmJΥF˒vߩF!i ,~hyA+TG.%\>QdoyJ|A$D8*"t:[~ aT{ _}Oյp,yк%?j"H ]zrĎ!mv[FHc84xUvBya* whX!iF62ց 赇Rb( o;TR MS~?nVNpĥLpbiP(>vw1 T.kx!Bi`9ͱ)6e. 1*] = %l* jɇc:!i1WSgAu`_|Ćf-cQ$ ØFd,73OӬP.7 ƅlAdw/D-ܦupih󳣗G4xg3$>?6|A^V1^+p92q098F.|fzfV_j669jo0(7۔^e:w`s-YYhS{~ߨy|+C1J^nogՀZcni>tgmvoۘHR{W-jQ>_SѽsbĂ<RcS o ]]Q K[jϔK44ӗ]x94 Χe0*%X7.6@ RA_ bfV:ʧh,YOap%ܫdr4,=]MqYu5hKFIL ])ڧ9jɑ!]C͔wJ<,ʵSe[c%lF┝WLW\K=UFm}[bڝ}њ$6VվͻoSfٛ7Y-d3avC͢|n9eK)[Fj*}v͡[8mh;ߖݺɇG&ҖD銌f`3&yVj莨!>-): >ZE::VVƘ5_>pyʿ5 Dz(R~T HͪDϠDM5f0te5Zt OS,VZWe ΥrTDWYH$E &(/ϝ^-GIU$ͦ A Rް7QlޚHUKҿ_WZNhI^ԑEMT3+SS3띕"seyjʿ5%BjQηۊӗߊ+qI™\2JC|B IF|%XAR빐ga>ou\ATM4leL3J v4H_yՁá\$EUZ/zudπ[ X7@=vխ9N1TV: 9 ۺCOɫ=yMQL^b+"ֽ7Lz @-*oS~v2qyقSsf/ڳwDV9U'͠ Ln*hu+ b,-At=&]H\@7 x#*vupTiOun7x|FzRnwd&ˑ*ܮGFr!Tm!BU$\CQtfU%ђr#ʁW4Y[-;C!HRȫ ^|z@̌^^}z7 GH[+"9 aoX;]^F+p7~ONY}c.a;srݚK;_ߜK+[|.OmQB'iQZtED3+-UfxVh(|WC5W$84&BW YFoM#P95Bi}]]\ PT"6Z꽇 qI.ʍ߅$| Oy7 =xiah sqvv<8{H咙~/诞Jb:lQ* |~S/++r8~-q@cR2)Cy{L&˜u+z!J- JrS퐶._KcsьL2IIfhWh3hxz |zr(:Fq^]4&~{?i=[ɺlX e!uL~? t(&!"̏xD#E\08я}xHЇ!e<UyGl,lj-sQ㔙ȗ]:f݀QوѠKl]:^pѠˬzV-0͹*j"ԅ >yѡ[Ɖ2^JVgD# #!s"D-,?x.c4 #â,5X~,\PӸX#iNe6I @AFߨ9 j&j EBOl/2ƼFKgl@1nD&?}&P)nJծldi <4_*liiì@3(Np.i3:2BG>?M2HaD$bw98ANOԛ `رa{ ,Z.jv; h@.j,aѤ|MȲh"MEgѤ~>JL1ȉr4YSe֤y+O6%69s6J j"ݶ3픉 .+'- +9kaL:]HsX9u̪grf Qsrbirm+kO&Gb+A;SFM$ ʙ.I=u1V8XYpE[9SW,?Xo>+gvSQ Cw!{CBER+-7ۺMCY3y3sfä(h懙d}tlY0߈E8޹m7zƎw9ÍPcn̘1ËXqQ>h9ѡ&Ї aѠ@PXPm#@gÊRj̋lF|O.Чbڦ>9'N3E '85 ELl#b=Ŵ5s@5A^ݶ%T3D-VQQF"] g\FR}QR΀]'5ɒmW[4seU3j;Ǥ{Z^ 3P SH K҇IeI6ǃ:!)'8*q%X<$f#%7(l!('-E ;\]'Y6~ӨRFܐ0iT ز>B5%\8MiݸeF ϐi  >(T%L1\s.0FR' ( `$c w}aU\m-_ÊRjjKwɸIC/MLۻd-KJ'tq<oGzD0_.07*N!egiL.}339s{fƢ阩痩w2vyecL3d]2|#Әs:dwcr3I]/&|X%`⎘YfJT|v6lTy'eAP"}*UY ]nuR:wg}n%|V^+_땅Jn+RTjRTE, 9׭i\nլ\V?+ןg,qx4: D(==`U/$g80>Ĝ،7S x 6g/legx#7ef%qs7y#NQ9Y=' :t;9nfkbs)5 ?3Y4o^{>k ,Cn3A!U 9aP=ُ^=o?o&>9l6E۳貙6l!0ϭ[W⿩;;pT.t8GwfeQ2 ZpTgQ2Gfy=QC 5vpTnTp4p4Ge-&g|pT U"7(J>캙B5 \0jdѼYzਡ3L:i !A(D9!(JKI*ĴZFTlp7k.ށb !L5on^RM(yr7$)Kd)8N'KGD !uzg"Ҙyac,4]Qg0}eW*N4[$P 71KÆ]5GO0A[.:1 cB+,V('Ui.4 FGd2 Cl!I I}O A'"ôZӆvTdЎ5-qa :5Wn֩OEٖ tP1c:YVLaf̳`,o.032ƈu -Lf(0F`̘1Z.0ƅ'0-Qw쌱X m+˪45`')i`aB1{G`+ 5*U^a:C$&SP_6'LA7a}5D{sd/<1AC&{N]&k.j:3w+l! \=T)s:InLwòl`6I<;-{ԼYlɿvr!x/=pˌ[yɁP?c)Td́ͷSH 7׶$PPJA[$7w4Y6Ue.,ܶ\ ʍUIHzu/Rd $Iټ`r (*`KlD0-ރ7v/~9X>Kc-)u㟱S.ifq,d鐛(QNUX:S./bE $ b/ bs_bfc\ R&5f| r&h ž];c$g`aPYhE IJ*ͅbuH?xnS)hI^oޞA|=wF&_9q$B7Md5ދg/OΞHH88gd>'g3, &e}0:8.db H?8;G@yzQR\9h?=aß//N,6TB-Z^ԽVڰ&׭ )qs|K@*QՔCO381I?)^.Q-gH9OUG+~`"XffŝRKRgISBOƭUPi#Z+>`&pn׏y}vvTVelŰn8@ut4hcI!p~] wdn{־֮AZZێj_JPԭ^%vyE~?~N?=k?}zH%,ZޱM+>mBJ8h;"шjoQQ-rQ^DK fd%u.{\0 cև0h PEл r[qr&5<3Y e,;~Ų^^JO'q@v[r,uϛG2ṇ鏻&ha8YAGFRb QC><6  ?:, AlXC8~0'-IM4  :9v1T)6T 9 ZcP*6CvVKsRlah TMt&M'~1Cǖ4 M CÆUnA}m^[00CFzTke.~j}l7{hwC}˾c߳{RZُN8n]'N6*=ALw=)ݍHpmeG&008^]Ӟt͈"2*DDy @rr|~Gͨ>h7@?{~{ jDP|qDYK%(kevK^ڢ>nnnBۣjW,r jK,\+(P?dWK jBr,mhERG|vB s4>==}F=!<(yZJV͕0GP9m٤-井39vaTF2?0;{RVs7D>}1lI eK+a2qV3+rGb}Nb ]O8lFvf Ki\4}1**Ήff 4 9q}r%Ac'|mH* a>pe㔼^0b0e0 ,ذuJi\mypovl#:Gʠ`c<i Lt%&M!ctAP蓌ica@Htd#`TUQI 2Ӑ& 37v0pg(߁Aֈ38(JcqxknUj4!ԔSÅR$u5FVt@H*GHˣA)c`1I_^Hjh\8%h]ZЪ2vJ@Ļjxl-x1 ?y8|O؇qk4 ~͐1o1PI貜A.+GvZ֬_>otE`si?BJ:c&SHzPrWg89;ؿ8@pqHF"t3baIU1.qۡö_=7 !^0F Tg dCh~ol+ua&GI OlSzcois[?Q6ěz=E0D(oQ\2mpv7`gt[0؆qÙ>Jcz %SDkm Ò~oh$]>ƒp8!ԠF^e急HaL㫑 u JTE7~h54O*jX0hUJh݉F(q1lPO4lm>{X k<~<|OxBa\AKzt-6 چ)P^x)h[^g{IPvyg30^M<Ղlr|Ag<|13)?oAk &b>]h#dQE@IP?b`HpBF abTzϷdFLJaT-y֒ UZJ<6"5AO^sEQ19dW. fOEiK4Z6 l}+Qmh;lFrpngoժ[nL? E3T^94W{9i}4c0쟄ː,h'wv0MƂ͟O~>o^|uT?6OΏ뛲,%7A (!t֡ /cxovVyX{`QULy\۩| LDX48L2~z1#)p#qI/-Ih؃~xɊw:CTi[ca?kif0$K4pç8*#K )hrƕU&?jY+,q(hU9j$FiVp#kΚ=Sem Qh㉚6AMJgGMjƬ1j ,u6[^X(f c@X.R\699ܵ}҄TJb\ @6ɪVˬ}VϧԆ\r-]o_SH@R!5'<J0I)hKJ&W@(S >a'6p.y1b K9_nVbo!qeG*iT FQ0K3,/$T5x`֍[@pzX Y9Åu }p@9 }M21LzkV045H,\KO nj\NXǏ;;<nevexU6 &QI$a"(z L7[1]"9RIoiyk3 N߱ي E h* LL>)Al O7HSc!@yO?`+a,*,!mɕ:%GJ#HvظVҧQ ~kJ&ڲ@1ZmRGŞD.v>m`օ}OUg z? J*ʢ~0ZU6w-KG[1]j P(w*& M4)XfĔ#\Fh2'S@WwmkϷIF`'d!@c@ 3#%4--cg][cﳟ͓X]UVZFSݲHUYp\)Ȫ|.b_DyWj0t׈]6q>hKKrQK;ł[f>˄[~ R,E?:e&`uTm!Rj*VŊiRԀr%F+ }bGҪ*|EZ}П麅kְ%.}҇K'5M/jz9h+4XBꚬ-@.blU^ @zAz𧐴&wJg3g2IWF-I|#_KG`Zz83zָ!I<j e@-gp5{g@(ݪuz9LcꌖY vM3srV;hF1l9*MchB'¾ ?uK6bBUdٵE}6SJ\E~_ʞ}#/ pCawӈgV<{d RF)%A8%/ځw&gwqARb(uB84%i< iѩEҹ<%O0qf>H}Kl~jBuBd NLdn\c[ڿ~"Zn=ESvŖ,|~`4">1zlgp>wmPn\ieޗ>#aG@V'Q |5^>BrHD:Rw@~:Gܹ PoQؽWvͦ HD2g4 @dt/f=T$ a!!57l>aE )#=l.x͆-ɻN:͉w.\7Э)yq7f;{w?R IڒIY7U_q`8@$OQ(4<̝? T0nǑwoq["<o';0׷O^.it` bFhNV%j/9 &U<_BFDqOET; Q 6 gf=ҍ0gpp+n^46#@Ҹu`u+]BYޠePBO-^)F~lyΗNiD7!,j $0}TF @߲,f3X6t zs2 t#Eu@ԏР4N2P빴1V)K֪" [L#nSk#҂Q) E3Yb/0#`߱Gž͊ヒiv5lk`iPKq\zX'zd>͟v郰p-vF G ٙ{2_~8GiQ;oS5 f oN-jF؈/ǜ֎!ɮUGΉgv8No;ԡ#ٗ!=Px&r"vzy<̱d"툝7ybZ`Vܶ@QިjK>|ْ|xXN`T1E!F&vf;tXϸ*1 'v܉Nqg;$Bhwx190pR5 =zf:nKHuV0| ergr`4ͱsI >qN[VtqH!О"@ab6Bwqc]ncfٰgmOɐ}ebX ϗ!)ƿ~E8ח)򟬋%~ed/^8V!X˓\Pio~J+;$֮f5XWWPMT ]"2MT7]&2TJfҺj%KciIw6iǎdV;D-T"D*4PLceK3@Y$UETcҬ5P݆q|+͓Dk!ϛ{B\%0;Q,Vܝ7 ݆٪ly8P޼ȵ5DIg7@sOL!iM~7G}E*Te(\nGn 7leK( T~"Ξ>QsɤEY^x1 s*U/$*J<={-tV3?yv٬7Q; o<aaZAzvX# +}k}:ПNƤGMXqiZL)]|7,/23&EEמݨEurRܡ,}X4#պ-wrk^uCw\@7@<&I<|JT+ .4?<5h*KS P"Q9Ԅ _߰J_q.;\Vc;}Kә4ֿE߿۶&[4F2:;_{7/ܻRz5ܡ?܆T-Y\VZ˧KDpeqZr∢>*b// >HY}g>*GC))qNo;lܷyJ# MM}ZR+nfD;vfSEIvp?lqV@І@ryqcYO,sK 'y_HnĨA*Kh/\-Nw^ U@ ݡ`&ar~Շ8nWB+l,+@U=U3OŨTUriGt8S[U9 V<&?h yloq)^F6;Got5@|(\êa05T@&QD#8\Hl|@]/A\ X~ };"芸2MΒya-N\_<`E4/ul?3Z΢5:NĶl1}<7l'3gQ⁣N-8)?Ѿ͎ ;x~X,v:=χ9vLTʜM_y"K©7L+wU@4V=GX oMW{a0Bw+G$ov<llXv:WxNZxɡ-Yʈ;uCĔ{"p\ (, نRlo@WCke^A pz2oߤ6 QJU$!yJ+|f 9kGpm-YNeFp,UJT2AAY^P$EQ6L Η69|+Q* R!hS,e,) mކKC7~}y$h]P2xL\svY]EU=BtCL)BOԪ'N,\&:Yٲ0!QD}C/$&yYr_xSJPܹ 3'Z eEsINFjZ;5^nCW KN>yܑDIKN6^d3) qAI=B8pufxyÛL[IxBqtO ($bcfO{O!+eOE8ˬJfVa j*(kV[h7ܑQm4$ZU) OqƗs-=h(冏>"wS1@I7ؓ@;yј/p°沩PGz #||*yq #ދ>. g1o}/Ǐ ;y;)d ӀG"Oݙ(.siQK@]#C_Fx4yxG&a*3h-#S86*폗Èڙi|Sw׌J\3}gk.”x]:zM:O&ujF;ə" Ns)pwMMj. n 9&7Rl65*k|6"*Jt,)Z3)ڂ<r}ڽ0br2|UzOa",ʊ󾜓̗0ltG~X3!;Cޡ fpSNw6+ghtм˦},˅}0~׿ږJ y(%JINp_fee:~Jy,D)i|RuLH/ԗ R7=8]woӞx>^N-DxwbY<@#o)GTS<<tH>+z0=9sRH)[]0R}<˲]s>'2BdǂVGΤ4x^AHg` |_sAswӮ+o^cۈU +뾠V9nBȨ~2"4 TeO3EC߷ =vΛ$oo}4)v~v2umkz̀q2ᾈ;n3_-5,pK2Ŀh/GC-d O0Cx/:V>E2NL F"HEZ R~$!Vδ56C:H@'"UbIѩ۝lDQ+GJmPxã?w@*t C 2i:CUqN{8_7.Z<Ь5  Y|ks3DKtZRLhr?ԅxU!Y 3+=b3QFZ8Lz39~[S?žj47ѧRߟD>]\c2,Z"Kԇ埥ȵϛ>/=uz sYѰj7i/-B5AEW\9Hp9-k["c' '(¼FeqS]:3bO0ƄRHR̕GjIDtyqmU^OuLHQ-(Ը# 7!N2!1ÜE-~{.qeɚMoT兘KRtd.U fFŦBSw&LH2s:}iAE7ӶPvl ݘ*teIB;I '8XLV <:M"<߳$Vd|u~P*  'ғ'g$<)9֠|xRAR^Vbb45<跰~^ou'. dY%FI*hɹrNcNRHLYMAs9#{6m P.Aޒy3/ Pgk01Fw$|w]|v$ҘuƳ%ǽ9/r.^;Fj:^(Bpvd eS9,t5CnR:SՔ\B? {za~ 8)xb†B]/tPƿ(HL/@ :EVVţn .<ƻ;_Gču1)$=~I)wϊźWFA҅ՅSf<<92Ϭvn}LzŃrWHȏ%& 0p]4 LD>G)ؚ_TӲ$} PnaCȦgkrn_qLhsRfN]OK{*p<#9;衛JU:pP0 ckNx> D",;M"}ފ]qxj{PTTeo>&Bc`5;9 5wrbG슝gb{c$kHwl=46GuI43/w,_[}eBR(IC7\;uE]Wi._fW ɵw]`lhMdk~C$=tɦ 6Fwpm*\#Ҙ; Ywd6HcDF&M^L*WmmM|޽v{Q;+"?JΚH]ԥY?jfHILZ*h:QYBcE;EȀBU;"c@zPKؿ6{- ǥMbf2G8 Ii0n_06j)^/q{35wsS# şѾESH [@;ų'hh<'ker}Pzub'+Z4w3roěM"/,`"P|ں fQ!ujL&a=\3vpŗv?A`HV'$ǜ-܂SĬX{j=yY{#)yc6|wM9-^T+P4H` i \-rCFfMao sE; mC&JT &y.Y5$=oD{83Xwl0bKELG-9|'qV=V-q(\wI;M. Ww7P @+ۼ;& Lx7B L^ʯ43浭 1b 7~ۣt>yZ'ξCZ`2XD%\$9kȿ̷Jy!ʹCgl2e&U,KCjRCLCTdEecCwCR8M5:CWN4X#}H8̋exj4g> x͗qhVKF!=ls8氨O(<6|e=-mBSɶS7Kʺl1$hSݿ~ٲ}:( tNVmhʀ `tF#[9bN,VxP.8,-yZ!X^Vߐxc2O+p2L ::?&&{UXRB^֠\+Dћ:Zߓs㕯iMa {0ױ}i9\ C,ΒG[C6)L~#"qiZHvRĘy!WBp[!=& A=?|>-asY95Ac,[0 gcjz-MZn htEkP7}$M;L,oC$`>t-gpᱺ66?M]iz$l$01-(m=w}Zy3bDqQ#scL{=__!ſRn cHhZxH7ܦP-M9Rm%AXz6iyOC?F]L3.w48b^N0/2@әΆ/a,eWC)m2L.ODEd +ɵ%_LӸT(WrT,~]dD4gl&`ݴԴR '0] 3d)-m[sbdtIb&ʁmFLKOZxRilBKklBŸy{ 27|I2iD":d ŃE饦e*%ZλJ21R?p?j)NC8܈[)L*QJ452.ekfv7AyND坻4ch.GK2d|Ӡ;3Q{/JaSщLĐ$q?W.{qaRBꃉQ($+bKσ~wuEi<tC_ d:ӇV=]V~;o({,h`ڳ}d"dAN/Kq~;øVVDY\s8$'8=&QxC`C3kO۽; وo+Lx!-(T,d! av W Qۅ<(}{bjF'P_zKcKU8.c5* IQ*8A1c&AX .7H(w0NN̦b'hcBLNj}DE[R*7Ggzå_JA{ |[$`qιC:>~5PRxoQ>SK&z4w?O&9oBrz!.F("䦕$vBvl1^"͌kl&BA'X#/j3Jy3yӽ/:2v+H,fJgl=Q|%S(\A͸,|-| VǤȫ/YqiYC]A&qUʨrӚ.|5!_tXǯ}ˑvQ-G3 !HMjk0~+ي|/׾z2mZaeZ-,Q.v6Kve-3fXؙtSOvmPQ z0wȽh^M7!ޅ9!b=/(I"`2-8l!_ymA-CgJs &7 o]c"kȖzSWd~P"bB=iX)`Lv$D\ɇHI 6 ^3RȆC(%.Q$Ɲ16H8ّgei56k<`z2(x&WDqsiv2nkuKb YV=SI3OF` P.lQNL14WNIS h=rGzJ&avj8n(K͐lvQ/x ~uh,4֫->ZF%B^%ۻR}BfiOi졒c4S)K@)iQ5Jfs'J)[}cB杜w=q ˹1p`*z~'g(+͢9M œ.8 BMabA۴ Ϧ)tB~=C(CH`Ķy~(G=܏Y m-g[FLnqCi;UZvv%8VZOyY Ys>Vm .h-},.:~d vFl@w9!Q^=`GSnʟ3 I+l9iݠP"{Wbݷk}xp0;EMWdװ #p@-HtaF~,{?URqBF7R.D8-Q8S<İA{mMоeIxqyT8HCm-d6g#=C;g+sQդM 1FBX2QB|Ct񇍦|'!vF,d0z0MMBsnENMo>R (`0]/~mf1ɋ!+jqy,iMKnRV cXIQC!EgKԜT;8;RZ4%U G*ɴWItcNJ0*D߮6nISB&ELo`F2}Ҡi]-@ŏ|FطFlZ '5ȳEwߑKq!ݒ2-HBK|z"j.ö4Bgr?2%V[O5 ~tD23Y>K&O͓G>Xj7uL7R0aM$[KňT? =L87{8BnJneDעXKdR} %ںd%%=x oN&Y1. >Ոև<|Jz<%\nwQ̥SԫtFHްfԎ/KI#9W$dV9<V<v=8l?h^33pFY2iv=N5hGhPɶ3/Ƒ˖-c?6*ϙ܀FX SxV-U\֏OZigcNSۂ2V?;;/tKjY{L<}hƨJ+nwp BBp<<}F$bURU^\-f݆%I'_`L!_ ͟ 5F+Gʟ,evg_{4UWeu[(<dƜH$ (0B9Cy1 hV5ErVk.o4 }<-(KyD9~χhQ(@L7kjd|*5:;G*#A̖ë;2]OrUrޥ/X`4{T<~L哸mx>nOOq]mJb7(C޼Nc4<) (6t]-N78  YJ"Fn|= [:y =vi{)?IPM@ɽm]XDgD6^&IQF7d 뉔K9%'5=A'! )%yKoӷ|G)}KcGk6(JHY;H;c c=.ZL=X ҙU.GX;[1'f\{S-%qiN |7 $s`|,g83 dd`1&ǧ[,EaBwވLLZ?sRwŠt ?GÀlJfS>*{oL ,gڐL;j5Ow{puga3 p yV]Q?u ®z3cm_Lk8IBNK<4x8?$'|G]U!c瑱;гG}'Q%_{wQp胊/r dz$dW#n <$ޒWR$cIi4ػJ2y$ c:Olэe<^LV$cI~AK3>㳳n8.! E7O&x "5# ?9<^Ue?h{f߮,X.~$Y$,Hwp6+ ,Y#C5L$֪˹L+[ 5c )16+ʍ>ȳg> <kucǁYER7E/Z$ZҫI<*10XW7wzpuvf|<[cjeӰ{k"Uqu|I$pO#B Dg|$1zߙpY6 | yUdARk5tJ"ҳ| (pZ{*9< ^IMHhUOʵҌ$YVC}uG+> IXNkU[a%Y>kĴ"gwuQ 0EyO Cyi]|<h<' 鼓4wrtDrl omH/J [„v8W,/&~歜9V뗋Z+&Kz󟼁WHA=]FA&)i Pt4,<` a LdA IqZQN'b0՘q*h㇩.[IIhlYNGMkT|g/Ok.=ph{%)+7ٰӛW^$ / hATACVq9eXrաqQ;O { ej)x:Kj۬U燝Մ؝ۻfrqҸE 0-ۨamf#3+i$` ?+7+k `XOqW"š1 8Q WVyQlby|ٸ >(%pRN\ʼn:8[: !]w .4蒾iW%@)jWG?&: ֨FI|fZ`q~z=4zv R .r\H1vѐ ]P xGV#i+giF8RkU2#Z 7VRت\. g`LsugɣUv٬759-]>`(93#ex㓋 f/Dq\gi#!F^oۂwcq^r^(o(eϐh} etB SC:Ap0 |UJ\ݰ&DQyڄCU077̰˵pACZ+J΀G`4M"{ʹY\o譃(I||kC7GVJGSH޺mܡ@y>.PD vO70 6;=ɡigq< jg&#t3*:z:-DM]<& {ߐz7_HԸj/G|Y9oXƊwt44d| :r}ߝ.'~Zo>k&j 3@^:\"qh֠rZ)?j5Q:cQ<}?DP| 00ƒ`4~mdʚqC!pR4~ pn! < Ӄ[ebKxCؓgX0.bF'TЗFϾgvR EsKϝ3Fj"!п q7(_Ч<=K(?]jE~4L &F?(hQyYyB,[lO_;ȧw qyL4B?OOdHsCz!6`9X & L)8h0]tpVBqO։( qǝ0@Kvh//?ȋV]~s,(=M+} x#Ck[f4XП/!M@ի\0|4*H+,( =ffo2E92aH򂜵N0'=YMv4*G4a6|a:W,Yl$Lc)FF( 3Zvvd-o򋑼gkar4v@6kMoGU2AnS0B1t=VņULy/'%+`W} UqS$PȒt%Yj^C7*v9OnW&#F-J*1yʉ ɉs$!z*z`QgG*g:#u.:wp"vT:mY@ŎTMV\;7@(VƤ$,Yc!3q"{oa\0V2D$BfJH#% ^<ѬDǵ\p3,3 :7"axvCՌ޻ &RmaS6XyFX*zςDL"81 9䉵|('wFGbCGA Xɇ90|>T]fBA^_׍Õ1xFh |C 8XbI:an,n$Ft>`Q+pĝ 6S8W~/"-zQ;Ioxޣ_tga|}P; vg ?!gύ!N‘53~nmT6t7@BH5mI \i-nT|? juQ^߹-%aU|z`f7(E*]Ԙ=z6 l C9CLƫ%L !1q׷V}<UB >߂pPs;9_;54eTeˡfqQ M(]gq%۰./~ۮɃ9+)h~NDoI),ark*h+\uY!Z3< uQ h\0тnm浆7hL=(Ú(no ڈY@;GS>ۨTx3S~ɡAc*6Tvje6UCpn'R׳ h"89_-lKxoE`x16DHL͒ƒ@q3`iDݒtm4 dt^3)e[]uf!r gv!To"a;IV1s]̾}{UvHQ V6+["TOUDeSR  g)Ih4jA/ ƅIet˪nV.'4uNqp~ѩ&`D<@ <5 K0s}AW[+"[F'ܓZe,x7RsKupS{ -lEa-]7~mKB w9Hkrڴ?lѬ <rx1X)1PgZy]h4񡖒Y1%?kP>1:X0Vj5OcA|P,l"EyQ9;kGP[?cXM; s*\^K3;9ot.+ӑ!H16)Vy^h-8а3RHn?Np:I H+,Ìu'ވ.GÛntWᰫuu߉y8 ’my4eٍ1+昨U2 ""S3QĢ`3Ïd̹,l|v @h5f1vP"͓&kQH6ٌNh]I2S/-;%u%XS%1ѪUt@:)j ׁ#n+Ӎ4o5AҀM80e=̰ډ's--Wgg{wSb9nqP#7`ӻI`3'1C\*%Q \M$(69TK3>;H =ް)#΃t01 d;boqq 5$d? JqX44H4՞)dY=M1He<RSpM4Nx  8ם 1xmO;A$rO IfrsJA0G`A_(FJ8(ʂIɱx \uOԍh \u a:Ƃ^#΂f gFf p]n嶗#Άzohac1qcm5l({]5#m8kM5PzԈM4(su,JA[i+ւ3f_cX Z_Gb*4Z+w'}BhlT1qxYkJ9 a `STG~I{tHZaNeiռ%y͸D ֱ|9*^޲뀎7-30&DANydӛ*݈bP$U,|H+#e뀥)Y @rPug(*~jdjwޤ7TEH@~r?F BT$A̻EX7FwҒ;PەڂcHE'%&?)n0 e|Q 2Os \0o[{Y5+"n#p]$S`9!oԄ8<ڗLBd zKOPAbjhd ` C9 9շFS b#ix${D~8A h<0d8Zt_iD!HAUxSTā֕9 -6<5(woau>-d9R6E8~`i:uL+ 's8A3"w7sǠ(>YY:8'@qly) eM LP @OH.Vԇ oFwSnEq{+ fЃmqZ7jǦ=.5*|ԨJ@$*Cn`Xo}hPyAgW%4ѧ n\iK)mhy.3}?!@U'"P@,3K[֝ k cX+b -v{m0NXAݮYw_޼hοOD, fD aefB:FGb~^#$Od[uZUW٭_3 Jd*[]F vL+ k>zC&%-)("(l rf+YR >ؼyr)KY\ <-;n3`PNRQMDiWEA+}M~nي«I+hil_ܣ]rc  `䣵u& yV(mhնQ$ sf& g$GC8K7b+2nDYaVc5H15s1%Jg39HfiYKz,@"5%*謄|s=4!BQ7TdRj mySw@, -WPZT<{ҫ(+Kƚ/DNU젙))ώS mo"F*ߐcjVc@rC@X6tIeJ+w 8fƜuaŹ*l&y^B93cgS--APըnzLy\#Wrp],b- hί26rKUeI.|k#5?P0m#D7tAd)+,3TqQDvn$I(&و j8#}zH/ˎ3a.YyPTJ5U [ 4` $Ë8J}~- P`/DlN*taL%\V1 :Wum +GDsi"uQRIJ&#Y .f,if*p]?aXQPD٨YK85sicp>adF8B/{9b@cv! b-#9~%<4Sxd,C"k(vg橘0N+%&%B4qLЫh eQa9^0F*?cylC TX =HC ё[92{Wv5161Ƅ+5ϐ(gYxW \"mQVY.BJX=4.Ғ2T*?v7 ]Anɣ)Gch(r$ӆ4LݲLVX+dUҚ]]}/>xhw a#&A'cE*žmڼm cd י@;ODɮez_/-(-(d@rTx{a{+@1BSS]E+sSd lm(6d+e$,6HG{Xœ%-JYY!%eǭ*7 ].[t*-0~b!VU-rAfA[I[_3 zq< ˒f(5%H/=eDأZx* O<}@|X"̒MiLہi([YĖ8߶Of JYq-EƋa-S!^f #%cBys J | (HwP6&*G:CZ֟J !A tV dTod[> dně Lk!䫄 &O]:e}\zӚj[/%ϝ[+0!kJ B^ b?涓|xN/mY6, ޠWB3$"Cin |hp덫YJ;!Ś+3|;)467-U"g-l(o%I;I 9 ͪS*<_q)B2W"ЈEQoPyVO%t)]{CWP>j>AB*\Bq!er4]H7ݎQnFXgO3GGQъ;.9yvKpB¼\)uoȂg1xr\.3={ qJ%>آ *moƂ(-΢'%v/:~ÈZ{1N84mT6#o-_EksoB|N>/0`NtAv;-+iWRI-Mo_ $SnMǼAqMDzr~1 rW\ڪ9ɷB4E̒:ӺEsΆg %Tʕ}O˓qSF s}3Q k[z5WL5"_S%9BuG)_R8E:@Ll~]!,ؓguQGhպ?ECqo=kc)V jSU{NgHzl3\oy\㪄Sz}?\$yvl[U7րLW 0+Y~eR !r/roi6%Rf\{pW-)l[m&>?o#ݬ)Ǻuc[U" K(1aE<{Hh!#A?R5 ={aOxYmf{7߰m]AgFH!|8fUJib^-r=a"`5{jX;w㩰sf1Pƍ9_Ӗ@!PG#,\/ /(U@{%֢.p{9,mB0xCiDakJ&q{n2*E3z̖* մ#h8Z[}f^khzMwsn+XVYÑr9eW?^t$Ք[tr<*hWS~j=%!mo'r/%9b򉛺X~:Kt{FIL& &Iv"_jJ9m<"~:˝6eLSK%,I1W dpt@4Fm{c;k,ae pV.fTH}ynArviv7CBoCr)2 4oPS.^엽 olj7qciDUz&YdbAKNJAI1C7QhXRdZTyr)w愖@LI'OAI|I#dLD 6"-Uv`p#zހc74qcz]~(7;#Q<=ZffH;=[m&n]Y?+K jNGHs8.P70X۰L) COR9nѥ5|&B71qwX2 {"6#+3{ExEC N**?4bZ4)za9Q?Xew.5R%(O7Z:*{;:D.l:> qDsA4j7L1Z/pc/1gധ]6Wc^O"ݲڛq Ps?<3jnyFx>4G3硜S"ĵZ=YsiF8¦B z)O&N\,GU?HKx{~;>ާK!F=6ոŽh|R,cdx#5]4M”CZUZ配"cf ve͇мm`x0(Gd|2MKo|]÷09E1ѡJD4I#|#"7Te R>|nlr J9Zt*ZL:y/ϊl3n3w\ TcGc0> QXUY|u#in"OUiG[r"{;<9sIy|]pİq'vSҸE,'3)[|I.W;T`3$Q1HjL+Ԥh D}m~D!蹜2$-v|@Ž-H\fο 奡WƂZ/ɰ]~{1<2,6EVqc u񩩅#a5;*SB;r)(,|zw(L=V=< گEQ )ZZeSY5>k\7 y,ɋ5HQ"GrY-[Xyn4. Ք3fwwSD1(Cr5j7 a[g5MѴ<>˦X^]fZo =5Ӌ $|%I&!>\Ȣ +nCخQ I$ /Ͽh-_˻ͣz!vZYhXJenٍ{Iq=+h`k~3I!L.L__]T}:~5N՘SØ3Q~JToY<ϧm3 ye3:DZ~E2GezxXAR@0X_Q*P_V>*TJ_z,FbFrBVp/U/BT.S(_'Uo4X`{̧ܴĥ^f|0r+;U<Oi\3'PUZoO#Vk#d̫*mp_W8[0__ds@\ W,yЬRWģߏAR6_" ,5~0nR+ B*`rX84jh#ЁQZ Ѯ%.mGH+-(RAv ʼn i ln]_Nܟ~fwl=w\X.juېS*~2PBFi G~T$wpB-G# \ҏʧ&\tny\gN7xu .  /vѧ@c_C:uw'B_E=oB^=xƳ/7y47%}2xR<gʘ)eFxu=P qgwQo2˯0g.L\bo VB"!\MM^9.Dbs_AL$M\x1B#; T8Y*ӯE2r9hY^z(VCbyi9 ?mЅ]ab}~*)8JGUqVڑL$x΅y/W1]BSF4%:lbKdkP?. }>:(l($ Y\4]wQ*M՚usGb5E 2bz!s_UԦM3VNveU`6+˒T0%ʛcZZk lUǟHm#*o'g>zR~L' 'o6 =S0(Әa1}6A͊wro:ЂqG'_Gן?}gO=SOןx'/ƳgA jO(@r[@*%x3GXgYiQx^~M rt9&Y^MZOۧi~QN.>R{N aTtĒK(\|D V$+'8HsX*e5pm#|7u2V`lQ}%z*ֲ,؛ ~;y<=Q*;|NJn3[):ǛN`3f{nQ^mW{{'һ{'cOmC:<=:l7r n1 21D$A G??C{ݪdGWĂ1:|nTWE|ָuG\P.Fs2 Q鴯"sgBqݤ@<*:I 쥒̦E)5ʑ#ϧEnLlCKd N#|cB&VAme/q/:(T;mCkE཈x4+@xazg@uVmJj%[֭`9X ׮SJڣJnky>4V'^g)tZ%ޟd,T<:;Je/$g4aF Iey WT"582R'(=5?pqn] D1n*$^"C0PjtrMY#nYjy$+дSl?VɝY 684E8G2r֑嵊!q>ʚחj1%u )qA3.Lei@cPB]s 2h"gU,|p¦|云\/~1j>z:g*n  ?Tq5jfČ7@i*hš/Bҝu-S7EąE[M) ?0Wc<,9);SM '0W ?"0I  q z8_4FE߾Soz!k<) x01T:p\uZBM|{MdbKO'p86D16D12G+[?j a/r= Z3B|iLeS NyH#-fHBɏD  w%0QGp-j4[?!r!'r=6_`ۯ!vw"qdU4R ve;{#U\ĘBcX*7۰?jnzQpP&$} Rqʎuh:&rhlDYdK`K?HI2M#JmsBBZ q]~HHE_۹]Bc fkћg,C:fgYKG٬E tgSz.cwd:lbe2J(#0s_aNk)ñg5"01cQFԢG}YRXp!jOQ/<״U$ᇾMb"H ul( WuYnX:? f:@:n8,LR\KZז@ 8C)XuIQbNdNl#7p hcgN6pvdJPU0/d c1,m$AQn@"hZz)#GZX]+ s,i>!:>%>6C R_o['.T~!^nXtxf=Lk[4 r8G2.N q&HH-_r`!@< CA~E1<:~`Lf_z or;<9Q1dG̏&|vyX|)^ Q? AF(bG-¢{H0p]t4kddmwTxXkNm6*b 4V2b%R#h ? *Ӹ~QBS9u{?Rۺ/q6Y#EaI&AӘ4"5b=tӘ06Ӹ$P0[IscR)FP49iOÍj"Fcpl3F'Y&B`(g,H'vcۘL 0y'dI".jO@ g dp^¾m܂@+\v '+Sp[c 0zQ|=ShJl!>aW<ܕ@ϓaR}\YEX-$ Qa!i ![xL,'OSpۗaefƓ$y8YI=)%>^JxP3%g$KazŁj>4NqAQVĎDbB{Ok{9j% a `0cLM4BȖ.W<.&M|hf%a40:Lʙo > Tp_M=-E bLiu3/@|<)PG Ⱥ36P "6@PP~.ۉru&sc}㩋ᰎ5e=(' čMOW ;}"|"ϑ,kR;ovM|3t> |Wj`A`!{k !>S OZ0oZ-&Rx 0ǦH㣅 .x-4ugWߌBMC'?jF?f'Ζ]HhF6:vcXk%LE ggM<;g "_qOk.1 di&8EhS# hE ?k*U/fY)aWV'iZgP2p|9Pkl\秪:p!гzR-@0lCIBG)}"Ʈ29>R,Y-/L> *13Q<4ͧU{0tO%JeJNQcCt=kO?ΏcvWva5QsY}"ޛz#+D+AeFQ""e XZ=-knSL ~pxzd@m(V׶H'[8!n~m:;E3$]'_AXMZRz.]gQ/hsk[4Iߤj6cBndų&XhhZ`nG^C+:h`J4Hk+Nf$P4:WR{ J!6 F:NO8kN֒Rd#cB[(DR!j[Zh 9PI=4]!L bZ$ ڂl0sQcS-hU:/KvIy\*p(1=: S~wa9ua5 @*_7B8Fv]5zF(Bn0cMBw$!9]+Qc#)"QPsUN Lr,g;-APըnz,]0rhH@}H /qjXVA'fcq떗 Bj k>m#D7tAk)+,3TqQDvn$I(&EM5rx=EeǙ0MG͇*oH*e|zgP0= G%Z\_j&D$0t3ZtʲM*pNʟEJs0ʼn A MY\h%*wj8BXA̖ڱLEnI-a:K>wo_2ȓ;)U SjjFKڒyR҂NK/TY{8-* ].[dJaĴl[6fJ 4UЋWQ9,~*BVEl-:DVUoIbլ]/EhCMEֈW5h_W-GfnEV&UomTYqqj8%V)qN# [HjEr^jYiGMWw'v [*ɽhbe< 2ɥgGXl\@^bEQTU@[Os֌i T)%9{2 nCi`y @߬n-\M3-+YO%d:mp|DII/bm2غ4#ERTö="MQDmj+F6~ps#(*NI9dfM݂lvIzi˱_ wE4wӲ+:4rfuJi:;6 JAVLg%dÍWIJ@T'ML +XgoRLťX D7rLR/ư @N>jnѳ!7~6\L>]s>١( $FŒGbHlaH8Tط+i"/ߐ8!O!=4V1>dїH;2`Vv]gŸFX54gTG4Aibr0qw4lReu.Q@ U0%bT~|8j)AN2ڪ)؋\;LGeKG G1u:;b>pt!D7otQs5sG>6Wosu#XxoW(cyJ鴌|Vop{NM1[R(uVuGeIF&N¶zʿ"\dBV2ݎ!{ Ne%9379ʃ+Ѷ5c5lγQޖ4P݋MX= AbB`xx q|Y)WÃ_:òuJE3\dExwڵ<%Jv.xRi&V;oyn(lZ wˤ$7먒7hiO-'SR1Ofa._ASa_Mb+XZjs{+xWڃJI*xҼDoL>|(Ʌ̠M+R^n$]ZG$o=)?RDuܘ Gc74ԠVu FOUrb\DA b!oPBYo: zZ -`W?gTX__ϟcoO^Y u@.!piC=v?AwJe># DQ5qb4nQx~>:>8yH596ĺto כu9//}TUnO0kܢ|8ۯ%Tzwq|> NGy܀ 7i5V~m-F+s.HL"ÊU?=^2 Fsifh4 0zMp~GpJR%`|rh\X(Z;z(M)X^fjr}}]I(}RyYJӷov#4g[4Vn!^qp܀5@W> }ػLU}>RifرۅSңc P.T(W:___QW9[0Jh1)6=hɌ+vSVQi$M/+)fL[L}uJ/__jҽ|E!}V[_ WB [e~[Iz<>I%~q*rdOiY͢L%*h6ܑ^ŵ/ژ0Ǖ7wD֊Z%q2V4WA[.񯠭 Rd˲2=jOvgnYS0L/)G1,n-/ۧY "ݠlɵglY3U8OdRY}L5{j ]=~s*z!,/'{G?erQh;5qMz#XFWx>'\_ .EhAdW*'. )kIEl˦:[kKh3˦ _Z+kc&\HZ0Oi.$ m8d!I~(yhU` ^ 1cU,=]sN<e5ڔ*U,FYpY:@gX^Ŋs 9ULY˫w G%xB}'BTo̬U#gtw" k`EUuO%sqFXh(ZN Q~ /Fͣ-50@9=HJA0bK8B{3!w ~!"gj&pj(`2:ҩ2| t@j^<| @YǾH*-xb.1v Yŧ.CX0G-; I\f1򮅳if1Ld/jڊ!'Ƙo#8WhZc9JlNcwtG['8u!]v8 up&#R:ZU UnBuTߜ$Cŋ Pp^A`(x S}pDf{:A\CB<@1^W#tW#5ݞ Y*gdBe낔A K{evpspuz=ֿ"X igZ< :S{ ajd ^LJcR{+(/b ں5dMn;(Dj35,6/3fmӱ&Cg--IVCOałk&Jk xn 6R5XFu(m*UMԒ,) ~fr 7I<)Ú74yGsG)$f9تuT4kX ܳJPOÐNHxβMU9): lN鎬7@0ؐ: ^8=iJpP;Q\c+t o=ԢWo_{^oNfU!v~lg? O T.uZ^g6W]jw65#P[MX=Ʀ`%H:"9A$ώdž(F݆(F0ReG@ g _ X0~II44vZ)P \3$AdݨxC0"g+0D5o-Ґ 9А 9ڙ+`z.q?5@j=:v/w^}zb 3\c,KfFS Gy f-_M9V>I0؆M Tvt n^@Ín G44n%Lmt¹OK?$HI2M#JmsBBZ q]~HHEg!trlB:v q)Eo,_ rg[408!R=ul~]ߔمt^ 862H%|p-r.5ia|&.֝IR cOj>D`HkE7>0 B&N[2@sMk[uA~$/ NOZG扎 t.- 2XG' @P^~3WBM=9%Ak@uHAg(1˰=i8 Ả׉A}`.]mΆS ʟ3 d`2͟;:= "c^ ٔq-Ạ?-.u ݕ]oi5X(8N4ZtlmkR~}:ywHuZ fuD:3+aB(^x8 ؚIh9=ڗtqbP@#OEBj4PҐ *Id +"~@@r!fAwxr$PeO; 8߃,ݢe$!`0_  q}WG]_!1RMK/a= -:NQP ZE`6 2,b[h6SOSx1y ۘTxXkN]dF\ їb +dA 5xMaxAepww!7>fN݃xt&|z̤b)OXkx^dtCxOiLAe)\tEeaalr lcd4h45kKYhSr#,!07cǨY\·>`3 wt 0ylIEN_)d3ɬg56 (jhWלjCvb4q@"b_ţxb8cMYlw5ɰx<pIzZ92Ez~Bk曠f: > +5IL 0ː  j)'m-7CeV0B;!\ |6(M44~=S=zgYمtfdc<6EFtZ=P|!X0}ģJP-GkE`BL6ً3Z>18&>Zb^<@ lfz2F(R$hB4󑬉֢ĉȵ=2 ; &x3f6T[bK2 fb& X?7󒜓v+qHwWb#Q]E x3zqwB(Z? 1 hID1'Kli2v/Pfeiw:]'2+KN6PG#`>W"[թ3][B]VY$,xGl-sm"{#rו:\t荮,8XX+N6ҝGvdV.'3I7\hr&;Jw fb&\,y";Is)0#k+i"bcjv ]JD9]ȬdV. ~'[NÌdl4\;Bwd-䆐L7dd >ɬD<^yAe"~wJM֛!~dq-\ȢF1'Y'KBF;aFH4s"~ҝعMRd%+d3]vЕ2x%'&w^Фd{>O6ljѿ3[x bÌ#Ik5XNN. fswH؇s*/OG(v[<'PIY|Q. [L0< s@Γv'1Kp-C(B9'9D`02WYPw9u9EINz)3 ˤ@.󻙊!zzy*9JrڏE<Rx=A(g\jף gA($x8zgiZ<*q7ےN0p7s9x"Cδ2.s(?<$8U ?9ddM%⡞Nzj$OY$Vefދ`Ϧ[ K=y{De:ddev˒C[,yS=a@3-84'Mvlgy=rH$M9۝$7Rrlv|+q>ˡ o:9B.2"$ rd0H> On#i^GhHU06QPU3Q ;['GZ$G9kJW#sT}-S-i`V^ҶFX~xVx Va4{St+(%-״2]c1%Ev26Ӗ%cb6*yHl6>T쿆bd>a5<'xY;O?u}ziCq}4#nER(_X=-׬ɩ5U<{}e9pO#(Nfj-숪ʱcMP7C]er\ ?ח3tO픩ߖobոo[n{]^ЕPO}^,P.lf&kdVK\ qn"Yg*(R,6S-TѻӗY3=ixnPնg*.  ,} d6`ƁSkUJ.ެ (OI "3V$Fa,|B'Ņj+%l& d)q#[ط\I*?cylC TX!KK0V4z֊,#ii\ih>֖1A-Ū|\E9+?o n`qT>;&k7tn+#V-*&\i*gPR3*%nhb/R?l&\ZޚD۞ ^v Y7oՖ` K,XsOK hxg"`1 u$M7ij81TIC}أi 5vLro>_xUK b6J8T vkPUFɠ,AƚA.dV*'j>HPx_jDOQZG5RBHB01%"CY6gk%e"M=^bKD '>n).U~ %cY04? H(2mdDi9A:# Msʛ ;&Į)5{ۡ.YL?d2JL,Oˌ0PfT"J 5kʓT2.7ArtbY ۙҊtI ɾAN_ +k%F<\Qke(ixL=w24i1 ~hǩ]^HÇ8Y.WNjW䋪Ԏ1RBqJqFWΘ%ka1uZX6I@,roC2.X@EYy$W 5!*7K'!ڛ>5-zkXJ1Er.}W=.h &PzVT>.KRyE!"Ti=/UR|[x_iu2Zm24r5Xa]g1eobZx) ";y`Zj6탌V]Uˢ282FX{p=A2Fc (rZ[34@]q7#9 wv^|-ݞvֆ\G;JjGkh(mk\G(/Hc 8e SxP&*'=AOc}Zo֐G4ڂSڳ;Fj6uML8huMpn ,4ARAqDN,*4; bs|M`OPb:h#>g鏰 >m{kо>ԁB/MǓʬ(ojAWu!2XUGx0ϭO by ]3KO`)$DICi'&J|1 ؁RAǧo~VC57 x 4U0!._[Sʣc P.(W:_y_y[QW9y['>И3Z[< @6žĦxdJõ{[_#(3^蹜4^}[U-m?E~ƾxʉZ&$7Tt(]\O9~zwLXS>iGhŒQy~iцG9yU;}7͡"kx-X:毠uVeY1G1$ǛP]t 1h=A"Z^oZ7M^Y "ݠlɵgl*'[ggR>NК=?h#,q9YaT=ȲR(|隊rԟF'voЀn r16 Yrº Nrd Z؜5leoAp_ZpP|Ab{`ks WKk{tW`ܠ" $n-'4m6dYBف$[zm ?InX^7njU$5. yIԪhjEV|/2t>=Ӆ7h0<˫.³I#/wJ<ָF{;8n@o̬U=\č;UGEzt0' =B#k՛¶A~˹FG ? 3iNҩRL*7[מȴ yVp rZT7vçQ@7:Pe۝8Ԭyyv} TX[䃪(Mҙ:.!#ryϏ3ЃK\f1~Ou3#i1PzQV9EmEy*%EjWV a5D4vOOƟ`=}3Q4j8J^VUe4mke"j\2DkoDLJyYfM1 4~Q4{A"8vҚۓm&Jv=8z<{CG7||> 8γa!0q9' 9FnoUgFm:ntUuWRHS$Uzc`WYR.ېhgg\S^jG'NL&-.tgDT%UQO0)"$Muirxd@w\gMٳ*l,(+cBqnBqchׇ4yBd(Gm$Й,,(L3vn Erd.~$W0OD@܍I'pZyyڠ#PNP.{0 ԓ!X6^:Q|*GQеցs=ؐgC1j˽aKH$#+-nXrSD+LF^Ie_qdƏ-'.&ІJ4öF*I*s4 L,V:Mwf3CnV/R f_m'*yĿ,xZن4 IΘ~.G/s1Ё a~G5$5ϙR=X}&?Ɓ'lCxvÚFsb9p9۴z44-a˵;r@2f[){tIç#9 !i]dCul՚ZUi2?#Q0h^(K7n0' *೤J~*ߎA.^ÒHsUx h$ 异ݰ xãp.r]?Uq*I#0K8GɱJ2,2(Z@JhN b; oH^C-eΣ +z,%2 |J9:9m* >I)'@3* #_@9K{p>=phJ{N'._RlMBde^\>- e3?)RʑB|RC-Ea| HTi||o HayB $2ک6'hxg B..Wly"s1-#I(yRQׄGz$Zf NP< Xa{&݅CWpe)=3rȥ8A )C aq\-r;@wTQYOe=5){fJ|?5|=P_--82'Cs%95OqcsH J\.YΞ)sݨPnG|z?[;^Jʒt^*=ҁvȆz 9qwwo^$O&='tOڝ œtt 4ٓu`C%P< uhT0D_Λs'tKε9|/?%84_mXCvUag0P™[/ϫ.їTӀIsCRONɦ*ƛAɝ,ɓIT=f)sG$y%zrQ=Io~r wPʑ#1;_(ǿ::ܯ`$ϫud!%G=ƿ29|ռJp) "w1npKoo}`"uJ_R_v2oi=_E@(1.{N!@ ;PqV-7xfKζ:XWhŭZj^K1@mN7I-҃)0TZJcBYwYҩAVl^9ѥ9irRJΖWj֍a3T y G|l3j㿪ږWSn ­5/~uqT#Sr=28[|>:?JJ |r 3ңk-YJɖEҍ #ÁX]ÉkNIx׳d;ݓ6hv'm: it.vv.J,/wbktcKzQ)4Y!T<vF3 ZUʨVF2Q RfRU&8!σ !dxƁgxƁgxƁgxƁb8uiWlxl6*Z,KBt:Edgi@D*RW2DK췋ͨi)i{3$pZˊ=NQwPtm>(<kP/ b|! ` ` ` 2@ׁ   /ᵜw@元*W{)_J!Nܓhq3}-'xB<EMܱĭp|c(c%x8BA%R:*ߧ`=ym7fB9൐õyYS肫^/orR΢9<NT ~wNKkp:nja0R"Iu@@ODAa/QXXNăt6E?K1@ë6>aa0FTR:MrW$ d=L Rp *w׷AkW,Rl|MpyJ>6န8pֽjx$EKkPGA=DM;?@e|ʮNˎ0A+ͨRJVd'Eo+SȊSVX֌rlA|@Y}Wi\67E7}-PxϲK ݤ~]n_=->U &GG8T2кich6J0snlD}m?!SǛ+(&{,D/Gۺ̠ ;"7R'T+>8澅3+;8[TW:KEl;S1|!d@;>tNFc L$[ Z\ХL<9KgIzEްleT9$)X0ot£ެRNDZZә>w? <{3%~|Q7hψhψhψhψ_#=ΞΞΞΞΞk gp gp gp gpxKEFw0=3=3=3=3=3=3=3=3}( pHx>:^B\B Z㾼JឬB[ŭy̜uի?]sOut"n1(޻v/Fg8xg8xg8xg8xg8xg8xg8xg8xg8x  g3%ˣB`Bz^[^|m?+wv^CCu(ZwqY6Ihw % 1mtŃ\lwo8J&q<ɦVx~%u 9,k+C)leT~RJ L&Qs'~ ;.g BzV*{\aR2EM 5hc#ŀE Xe!:ļ =JoR^ߑ ,ү"KJo olg[xOK> yt"=@>E?Tvo?T2n8-Xc/FW\fhdz|X(쉳U5f|c&AFOndE%3-0U_W)=  8Vpo(vՒ,92-3vf_ټq5@l~c!l;}Y/R yuk;r4ۆ5M `T֤IZ3:8>&_?8#׃)ڎJ'E/g;_qe[>TOUmjՠKfn<Ɗ'o܍Ee >gtJsx^"R{G5 p p p p_&HA0 y^E|g39~8k՗Y~L!Pz^In`5/']m_L\}#VSW2~D y;Ɖ.:ԛ C cuAu@U;N*Y_Jxr HeY\+-)WӉ,Y#y)p{Cnթ],o,9m*;)>MK%)K RbT^E\zSͥ?yU%2x'_"llέbnR9uS(nF:/d*5P*ͩ ]5%'kZĊ' K{/s1. g;noƝV_`QtP8Y iW'$# eq SLJe[*9pK%3+k/u9&E3]Qҙ6Dտ8nI雲4EZ C \]UD9TE-D֛}ۅ֒zy? ͚ ؎^Ap yO[y*hw39ը@(xЌ`ȦEtrj٣%sU1w)ApnF|`בX ay+Es;$7ZwMRVJ K.,/Jo5-ݹt*#d֝73UwқǶFF⋢GO^ 6n=)vA? ›Y$:NKԀ7%* RIMͿ&(]`:X~)rd^N"8g}wJ-R~1vyY?nǂH+뼃lP֫8}EQ -wK'Tw>073Y ijKBXzqa驗lB%cKذvcnl]<*gsao%hqF_ՕtF|xPKKYJvlKķ TkN'hNt: cOPK"R$dwѺMDBŃM(\:65"]9;d)sF>hny5Y8*s NG.PNڽUy^|}A|^98WN˚+SrJXyA8&jr5κ RyW?+-;،n_to6g‹ 0L0,2*1*"`8>  9_? P_r2/wN4p/WB^nr#d?+U sq]#"LX\)¥FUJi1'"i})[,<. jӳͧ[&?m=yO_OiOӵ&r0UB>Eoӹ{2TW6Ʊחts+.txLM3#|F-N#C=q6kl/$ɍlYⱈVb Sa|c;cTRa'X-9 c)CY2cgGYHz, w<?K5k M-!q+i KPI(͵fxup6},n&vt{{[Lu''E/g;_vG5N~ "J^Y.);@k&^ 4<`&8chxXTcqx@TA+uPP,+~wT*tNtNtNtN#cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx:cx]cx:cxODْ|C;2g-%a=jwԈ_~ܩ {ñ!.#(66t}Oqc8t{Ӊz\QvυK)d9Ƒc9Ƒc9Ƒc9Ƒc9Ƒc9Ƒqdi)ǘr))\{s/wbnQ|R.|s— r_™r\s\@9ض \)0rxd[ ?G1ld$i؟k*8Oa-r$ɏbVq4N-0Y#;Elsg={_11߿{Xz9=/AVZװ!a` ` ` ` s$d8@Wa=_?=3`_8'lxYdIɆ #/ _2UZ>H/􅔎_z2GIt֒UaxјЯCqо6Gہ䠵t?RK}kr5/zt!r٨VfZwhHR6'[skS?q7$ [z .}|ynm4sJuA+~lQOc<:¨b8#n*ɚ{.ou>v7*/? &w~;&N>WjCK+N^1Xq7*`W"D$h5+/N-e/JJ NVsXqqXV*՗\prD#}%V()VEX^J.ya`2M3:VkC-/U(JҨ;$˦)cy~MKmw0F=衊'8Yu'M$c&LfU坮tN-ۍQ"" COl]n}B%f[l 5Dkª\K!~iy89Ol!Nq[>D"7XYLoOy{|}A88>Nе˚'SrJdyAV{c R@d0z(Amm [e6a(vpx tf: ]ރBAZd'Bʇ~eg7@V@0FG|RzK_6JcPcOR`y/8B(wϾL=߾ďf)c7c7c7oooo&ϑF}FM7"[(Oģ< Ņ:ǟυ<1@tri )fFb8hP+&Ƹo?¨b8ƣn*`$;(ݿO;yJE姂X⿨rkhr'rT[ؓqwoRZ Ƹ]qUJ>IGALZbĕHM/Ft%鸅dxᡤtǥ1p?z$ƊDzRѺ]Ɨǀ}:@"Yf LG4]@ Uw;xL5`z^<ن 蘱`ev/T_C;¯ d1͋&^Ȼ_1߫}dBF/0z!2z!2z!2z!2z!2z!2z!2zaJ        _^h!w:Ҍ wQXj&ݡGq` khfXC5dXyȰc2z#om^$mE>"HѶ=oj=7xƙ-f dngBOΝU2&l֮^X^^6;dǷdZIC./($>[^F_<"˩ AOpheDyTtg4+(NWb]n ILEZЮ4t嶘n4RWRJFM%im z/.Ϳ&. @kbJKDw%ो!_d]^~_ď p:&9o-e~k"$s8kHQ68RdU S8<ꯏ_},1ueXWueXWueXWueXWu  (ՠˍrB|+.=Wb!W|{#6 B‚ub3`gb:q3`gai8\,X'Z,, .ab8,, @XG~{uϠo6m}n<{_i 2˷@hlNV-G 3H@"AEڒ0O DW1TNd Ƞ Ƞ Ƞ Ƞ_"mIIhAԘEXtEXtEXtEXtEXtEXtEXtEXtEe,:Ƣc,:Ƣc,:Ƣc,:Ƣc,:Ƣc,:Ƣc,:Ƣc,:ƢWaYK hA/F#/|IM>]Foi>7n<*?m!܉q^8ĩ>56oM9 NߢXAX! E8i}B(zݽJt҆u'$rܜ2I4&R89vЂ Σ,0&U1=)38,`~Aw8_;c3}uɈ91nK@ (Q5b4Fc8Fc8Fc8Fc8F!1DC}5qw,<7 6EwDD…o3lL[\&[q-_{ázwۦxӭM?c7c7c7ooo_nv>NI/`R+7gzm)R;Huw=T(jTʂd88onpurW{FfMzљљљљљљљљљς?<^]5?=Eg}O_O/b&F`b&F`b&F`b&F`h{L 5@0Վ^)؜mbA Sw!NCOrM' aA]7:s"p ߗGp+tw]4˫jnAt*D@.0LTiAuky(at!tF D?@k]Q# ɑo*s %SΆUuLD;c/1U=^y :r86hrq RNp_BJ@&ھ)+N~҄KdL^b}ə4Lt)YD4R1YLQ+iРaڊڳH3 Z JYIϫ #X Y)D+ Ì淟x*馫-WoJ*lAtYd*Zm|h,?Z7fQ`%)D7G7Vj?3w@#2(1DWQ_{\J_T2H%TT`Ib^-&UꎋҶc=8$b6%Kx񢠀|Os V7Q3!o\˫C+N+Qh͹ `Ee.{P:e^pj`u٘ZbHa[i 2֦D_F݊x\^*$RQii";x 8܎(f2oAEխ-~S¤J#~P@lN8⇕7; x_} ;T!"+55Ŷa=5{_ti=ÊuNB{PPH/*?k{ERkMVQRVą L!˅m-ֿ ap.QQtgz_/) ;ۆhjZZzɊBQh>I@O{Zc虣g;#g t;zrWLE5q}`{rՀ)KF _>2]$߈k*~p='v'(/Lf1Jm"yB'"Bp-2|Tt+a}B%ԦFd\p7-J Ŝwi5Aw}dZSZ4sM0`}F"qzO%L0- v]YgEx=;:.?;F8H c(+&P?m&/v,dJA;qo;nkUC/~+֯6t#5qN@H`m+Ѓr?l^Жb_$;ݟ;7]1AφK.cN12f¶bf6Bae[l(r^sd}9GފsdmPw8LrLsy*7gEׯ";~捔ƬڑMT,Ls"o>_ Bz :mdL~GdWg+ؚ6/~=7^kY *˘b ʗ g;CneMN|qWqv׬g6XveA$gAl냨<gƤM}:x:]N1s&( !-MC̹͆*-_Ag ZkfxeODOXX8?n=(6:{smٹ-;eܖ۲s[vnmٹ-;eܖ۲s[vnm]>;e綒mٹ-;eܖ۲s[vnmٹ-;e綮"vnmٹ-;evA綄Squ]l;KPqbY"!" jA6TmdMk$CBABt%t-re@SX)- Q`_)R1^"8ˊXԘ۠+VQƵ[Zš?+fsBOhXInJ\ь8%aR1~ eM1[ŹtPԵ y(%MdwMePvDWh24&FMdq~MvU#im|66VY{|-ln4ԹҐdT(gtӀSqB4䅁vcT8L)S#Pr9tH*)]7\]u!s o!᭓[mdn|+wƝMP ) ÓވzrItzpZq/[YI|{P-P970ǚ꧚MJq5lfc+Ah.N*e26[yzA?̘K-< 'ofpx5:|~6$|xބyWqU/h 8wfZ~0ڹ/6ɴˋq)%THvp@Mi8%Gc2动g@V@K˓U])S^$(\ ͂HܸwKК=@F!'Ţ~y@,P#Kn\8pF<''s9E }HT]s5!/VvLx/Ph)`{(8ԟo-2 :i#4~HYk(q_FifH[ƿEQ*CLܳб|lhѽH+G mq*>gp +k|%9m>|4.#,fU;夌jX_ꁑ9fVWi'RZL[g^4H!_{@;ѶΚ'bjz]ը~f㭴l 30<n It7?Eu17ѣOnT3~x os >/,['s<.Za[:PsӨ/H%G^ l;?y78i;v=`{< ^J!ݫ,{M ߺIc\ݏ~ 2g*A;jsbJRQZ9pm\>;~x_3yh |LJHGRHQSȂ|4|<̇SpLZyq q2Z4iY9LD) Jgqt-\E|J\yyS2_mǣ]8H} o>>D &Bw}ARyg@߫^ T^gkYH{q@gA%OBv W1XAts|l&3g|c|s;NA\g]+yF=xF&7ĂyU^-X; izݓÓ54+^UuQӻ{9EcsyYzv7txfE[*-|NB^?p* 8SQ~T4'ӽ!9p`0N<~oZ>(åCb?\"!>d,L1xDbLrER<<\2æi>&aH$.C ~Xys[\?އA0h([XTنΆZjum+BzЅ ~DPHu/*?k{Z5E () ݳKF[d[_AZoo vQo|jȨLQjk%~gG#sz ~+ Yy[| %>KdGz/ӗ~vDm-;GPY^Ŷ7ט EmkzPFx"՛x =st~\G3DWv&dnWӽIr;?tƵ׵mu.Xt˦趼L Ngy~MIܫ.yOphzC&KohuQKiK7t!QusZ⶘ntzWѥ R~RtO {W{J fYX?b C'КRJZQ.F3ճ>K5%L0- v]Y"es 0Iqom9;1%dס3& U'v&Kv22~qJfhol/ɝ(a{MGvIf&OUS, $U{ajZ =~F_Od[A6A!k '4Ii:q-S{t Մ%/Y;W7ѻT'p)''w#ȺO>q} $>nMoǷS\M9.P`Uo]/ 3A= c3%U@LL-#wVj:sJc2D2ĽԜ5jM_ ݇3j T-4 yW6x |L2)4$Q>Es pCȅΥy9>JELjwCni־0C!G-N XE"tLtw=M)3_efՓwI ڗRڽxe C`,OV;Q~=( VP"S3wR%%sd ,p7[:WfL\d}"*"tɏ Kk5#``%N06-:"#q/>#krӜ@;1>cVxhB1K~'d5JT&'MS JszATځ3t~5bfK^˜N\MݙR|Auc1_Pݴ//K̗솵=l1uzS7(_HlgJZEsrzL虂LIkA8nbT<9zvz~ǿ n]BƓ'O=}*ϾO6eɓ'[Hx 66?ēJ77m뾋'r Cy"?:i 7;ќN47ϊ[3G Gb:FIxe8wo`83mcq0`y84;7v7C`D"\E1(ZumpR4k../t?f`h?aHsX4Kf;lmŘ8Nbڶ 6rm:ism7Iܙ0# +~9xyB일읟\~E zMTk;:],OM~0yz^y ^Վ.^c.NZAʫ޹8{u~vZBYa3Oך4M/mzKU#~~Le"p8`|ei&~ MW(쉳U5f|%AFOndEW~-LWUJCB ;jilKƖ;Gl8EBw24B˶cX/'P7ibf۰ uʚ4\kQgݪp3K`ZK=?)=/z9)x@g|Վ*Փzµóڻ-\xR@k&^ 4`؟u1L<׸}, c*({]nҨՄ Gd(vP}ҀhЫxW[oX)VZsV1f/Jh m tEe2Ίs~tzRX1Yyu85ȓ'\1`hyJZ:(?~Tldh.RMF|ctKn1붗[)ˊ4$掫Aj] N  e8ibmec]Z.:}yZiUF[k]GsO]d-Tԛj#VrGr:}|?$K7!mUXJmc2)RLo댬uFqmyTWet+hIy7/B\^0.{kPY˓?wvP]obvTKrcϦp@2jO23՞f= 2j{Yj{|%"m{Un,/{**RSa+T,'L|U/qQ=e V8]A zFoFz.u{ PZ ri>:2*K P. KUWaewqz^Xqjk#w2ivw_<{ԕTw2]z"KHixYۯ&a& AHxjS0Ơُ ^vtv^*Ϋ0V@x>,8z120d @W:88eWQ\#+P"i*.7v._xNqb(!`2.C(;#C_Vk£Ԫ*&L؃1 ǢHvao\fIQ?%WFla`ko'sc37KlyѥM^\ceLp4eȤ_ gC٭A';l TR=^.Ђ\?_*g3 w<#_-K?G;y-Ld&r95s ѐOY9ھ G(;[ZPf H dV,ވA Rh+k[JXJ#(h%(eEۮXQ l#Vlo?*~UMW ZpTUقfS%ܮcK1CfRrsG-we;YyGP `wSisiAR㝵X̘'0b'Q6Ee*SH¼Q?+Df  IS%~F2` vsFSS֍TA+~l>y~F1~&bm0`T_~*,щ?1h"Ou.۝}<};7v\ -HU-@cW 9tTɺtW"i ظRʓP ~D`fPR_c$1 $ƊDzRXŗ_dpě?T tРӉ^%h}Wg콀qpPpxfGDrALc*ϾbCG{F~`-{?WꯍH&nXrSD;ޫ$h4 UTGZ!OK'6F$i[(J6IZ$L%Yy4eJrI) Yt'):H:W5Pf:ܬ_> Qgv;B^kngCcȆ$GgL?ɹb@g?QLNP)#(g8cx:68jodx*n4'hX*\'z44E-~E,$+c!}#ᢩy.T=?rr$']69x7TKAVZ&cC:2=q S٫b$9zQq톉 sr]1hB8_j >K $9,4WPQPAP+ +E~Q7< -ׅ) 0S$Z?2/ Dj%|ّ r!#r%-vD(5R J<*b.Q]aQXr!ʻP̧ ,+/r Sʑ${ܸx}Fr H6v zק^){ KIB̫ˇ92 ko!E\J9S *P@KrzQyvU$hs@$|TOj ِNe9A;8wXr9G'/.|6@'il7hIB͓$..*@&}=*#0l%G|gvQTŒ/3CWpe)DzlI!1;o田1~JhTK V纟 P*饥^ u3jڈHvhKPv$ij SrW#iq=^tP"Џ'$t-_ivV;Lnq$9GeAjd#- Ơ~D $GqvjF|FIr(Kijget'Bb(MZV1؆?[;HGTQ!;T *|*Ksu"H3uiuʠFSˁC#.cˑay1-d/LգÉCWA uKʧ2b`j =#ΑQӸ&t RYR&#:jODQ.U5Ĥ%Uy߈' Sr̐P黙P b\?f"䧃tGJz{Gy r]0]نGudC+<Ԓ;2᪪QȻS22=k0C\)'+>e^Iʉӄr$C>-Vz>EgA8oDp`$'$FXHlVUߪ{Zې =U뿉[gdL 1b-I!+Rj,`x[~I&Ж7T{=r03_䪂s8$MWǟSjfimpԘͩ1"BJ!E (Y=;&K֐,ң͂@ŰЬG{{{SxحwfRSMT-ST77{\k \=h;CL~IO7w|FoUYBh]4hR)bDgƽh٣b 6: *ʕե!W;ʑSbPWs>D$xH[ER] &Z2d ' wY4[! ipd|vmp>V":QXSʊ?0Bh5~Iu*u X;}9H[bi<[EA-U35"SvNA5Tqya^vijFVV0ڊL uiⲘb#kʃs}rw J胫k)7rC9P"Um9eeH=Ҵ޴Ë.j b1E(5bw 2`K% < Ct Xԣ3{{:x\딑;:9BHwlo*&'Kp 3Hl02oud岿"VUC6ɕrhmNLxUBk'&h@hv r܊h-'5)/b^d[U*qΙ˥5!wȭ"֊ddAϤ,[+鱗q@NiI*wF;QWaZp¼Ɖ~OwvJ&s\QTYVV*z_.4 #.|? ;|q×n`96U-.1Uh4fOS9y @hLM@tuE=W?_@35$ԎPٔ)(: Uy*lޛN ͇{T~0BoO2EV>ו@x,эM<9mfJ3j7ؽrHU=;8/Ot-ny[Ca<WqESwB|(^E)*c?hochb{9NԔcK#K'"FGLU=VWVjfF[3qtpƜY@&,>lŊ"'8evrF(gԁ~2|,TVLÇfLeF&=fkXđkygi(ǒ1ts?yQ&/_^bLp8nhZuYTS\V^oG8.%l л/~-|%}zI<ش\(=l־%*_Z^s{(sqιua9YGX;%mc&S09a֪V,`|'ƽ6J, -a?'RDMRY[4y;qmf$y %uFvZr!,'ˌ5Rt $bS ;1ilLAGLjj 2,Wua}R2Gw2C97ڂ826BaX4ѳ ݪ= nK^4|t]J!M%L:XeX&2g eOji@(V0" 4+KBw]I*3),<"+ 'vlV9^J6l"h/Qi\@Π$v{1-cl3#{~31;c~Ju*lwktc/AW>4M#F ~ hw5D{$nFX*qJ[ķtFDlt~R N⡞4 Os}UٹBq1wfs&ênp9O3砽?Ưf2,h/bm,ƶB\sLs50kd'D>3{atp#ڇ#?dGR-Q8K`aJN6[K֏\y}-MXa,4lmj8es'9qyG,R7D$$Q`gz +IٔSvt Q֩^f]zTP?ߊiSƒ< K 089,$.FzSfM)М-U~6_6:=ʂ8V R_wM7o "Oź;zdIw/yb\{p;]͗tjML]$EW013f;_Z8=UO^O#9Ȉ!_6K_YzY@lW?6bK[/ %',]d\ׂjq Q{J'p9/6(՘ӭ-<~6dH|#ַ6Ƶڽ$7QAU~t;oT>`kg##c-im՝hLM2\ Xc"o0nDIՂXۭ8A{|ۀw".+,2Wc^[$SїA0D ځ_k; .;\]=!$*}Dt5ǢCPY% BbNapͮF"nD;kȠt &v)V*'] tJwB,~=;=ywc¶huۺ_6euznXFYǵsr&v!a8}uvzr^1g{ӽ&/R#"S˒?03UT,D4aAGDl. F ba#1_ h8HVaWNQ4O 1>?k]{ށ&fUI+8!avBGaI6L\K̦-vf7y(8%4 @0Ba'7ݖiE{f b I.<1 QfyQx<,ޖ`tڕg%f+_|{(]'TsE},#+ av|^GGo7vv@UW)qډG0JubQ+Ô W0SZv[+=<* M k@>bcm2OoVaz;4/Bzb j5[ 8P© XH;΍HIiEq>7iB!iX?vmK"P^A@Wz Iq\^,]]poyaՕLtLJ ?jGZ LBR(CK2MfPlQ<: 3N~#i]@e P?XS!W:>2pfe^{QV9LoN+g95TuY7m9<zt LpTvvys]^ְ9Qp<KΆAuɟ_._cnUsyxK.Kxm2wSraU\]ν6iJx&o:zN}[vwzuUjWdLH5!q )b2`Ra*S̭-,qö;3;O&ÂCX{4K Lf{9I(?n;3n(.϶K0=_bjUS w7GUHynK}YR9a%]ktN1;m+, ɛ8^!ϛ깰j1J_A]c::%|RA{B_]҇$]4[ɯF/[,[["̜@҅AXBdP\ o!r~k9:=aةǤ(%IұrJN^Yw3G'k+b!^}6j;z[4➿{ o"؉@e6&2ca t$rZ&0x2.lgod:!Z& q/Q{~6,g4Zדx<Jջ\ ڧvv4?'7;[R*`r*V<8bL =:(#2>`E[kpB.:~{Ze5ܞ3fafu!e,oĪ(=A}U8 \.#`4/וB;#Q|0|~0b~U_1V1- xWXB'Gc*\w('@3;#H‡Dw$x{?eU^/3 D]⥉Zس1Q&Rߤ"ԁ֟ j|E9qt_|9g|C$vVp!"[SOU|p*raZÙ< eNaԙ@+)t9F+L-4ceٍ1˹1JB::].DP6HhF*}:, w6HJ=(p Ic0OgrPˣvSD}X%h߈TP;؎5,uZUm\:+݄KG{ƨ~Q9҆lpםH 0'8s'#2\Q{tkfTF)\Xd9Y@neZ)&[eZl.y>g>]]OPW։'[mNJ㓂@ˎLn ,I+6| N<Әzk;[OnO cq|,-'.&1 ÃA:o5Fxa6E7=3n4oMO x\&.^~xtI`7xosiIUj'㩬;ٟCmcҠ'^rïƌ*/:ȗQ'\Yr7J^di(%/v <HjW;{~rWL zDRT`s'S8wxgI5*6$c4~?*ݗ\?nX0VuL**>JcUӰZ'*CT覻74X^ T{rZ;r)֣åx [Q^gRp}ݲڛ_{ TH!<0>㣓={x$S۫zO1wpl<"1$x+(pD!Dꋓ:M!(džr(rC8+P 0'J#l7["S%Rӏ&D0};[tG*-XlHEgCc1_V?~QxvgeZirQ)reO r+}ET.3`?43RSZsF\ 3qRǫe(*4EnǙ7B`|JDHi2 ĝ!t~TΆT"1bZcо ~BUffAǤCP6?Sƾbu!jL Xbx<\OE=,𰍵ddyXť0n0JCvR*lH#Dx"ZӫG5C:1=~EBEq>z0łL>WH4MP!BD=W: ⾢GNܡ*v~qn(1]eD |5b9B,ϫ#AGBJ Y8AשOrl(yO5ZR\&:}$>@k'ulncAwzqƢ c^D@ڮ1< p\5!ӫR=E]ag2U% *V 6Sv0Ѳ"-%/_}wߞog8KqR@󹺍M.l4׽t@n+z\7V+Q=u,RT%M8=nj(-׃fЋǝ$>t]ziVos&78qNz@['?wCGTs;UG&>~]JSCij3LO"$|`ܼI?i6LlJ?3$]yJ5GI =q<${^;VElAVlIOZ&{tcCq8~|;4<6Mvx 6_Cq8\h'w9sd.9\޴DovE\oV0VFs\ʉ.#S-Spu)T8Tt$'-D?ʗhϪ^c eqD .4a)Ѭ7꣜~6To[\!'PRUЧS%B,XU'! ~&jSYUNF]MGIM=3V$x"򄯨*D1-EmQxPI?KzT5 bO"-ӊ~&u;YlC*FMd<Мv`QȱUP2=V$P%2,DkR!AUz] (:d9v4D!;:Z=V4iC#!b^ga4o VĊD5>“0]7cjBH6w&f|R{/= : DNcRBw"Nt;7qK,I'F,77(DV ~Sl,skgBq$PHp: en"#dLxld;0%R)kxI&Rc9@chtWYcs8a`c/ѠZՁ kKH,iP >zf sq ՎjĘC$Rν@;~?r"=AAuni *' *2ՍW;>b$D ձX |ogR0ђO@{@H=<~1O3zi0Al4+ZLœ2$:3~d Bi{P2da)Z E螸svtOLQΩ+:k)+W'OA5樴Kne؀^-. ڌ..)}5n)<'wP>?b}+7c9 8+щk4s d-߿.a9!P Q`)`f䁐գ>ƪ2V3FAޠV1~ރ딑$2ZZyF[vsLh7NVJOVWV7KzK;ғm{ކx(j6/Y쯈e^hE6Zw:u?BN>MJA ,ʭJrRK|~[m=f-Zأ&65UZtd-eF[+鱗#1@e6Ԃ3v4 <6G#I۠͘ݐR #䂱' (i|cZhJD}и[.16%~b-ioWՒBZ V͊AIk0zT)~ElўD EstS$s kZ}ч:c;LYt> ̀Yfi p3l.sNr|(1mi,tR ZS]СQ<~ ZW`qgAu#}32݌K "vjYj5ҞȦy( ݽ;}¼ՄI͎GF)^}`.HqAeT5FoMuE`%TB#U[@' 0\+h첥Ǣ5ij|8ʄ玔|%8BKpv@P2d9u^>h~j^<~:?%E{Sbt`nYx׈0+pE+kg\I<hϗ2.f O7}/)5 9?m$ަr|j@7|(^I- ;q hLG@?3/g>׉rL54T$S(Bq]d82+1=TI*s{rY}+&{mg9YeɎu >z>V9d 8gxpY1!oHNJס`;\ @RLd%LGów`|nƽ6X9*[~Ot0Lښ0|͛ޠ%&_n߉?>mS6›Ǽ+QinJzǭ\s<DX0 $T jR@G쎉&T=Tͭ "D(I&Dty h'@C3 44 @C3 C3 14C C3 14C C3 1t'C C3 14C C 2 14C C3 14C C3 14C C3P5CI+zZSq0[()@Pt]6E+DjbIe"_ƨ8I{mzziL4uG dQҰQ3]rXA4K>?|h2x:&@es|h2D' Mv@- 840́&PeZPa۹B Ӎu%֟oeO_?}ӽ讓]+oaTtxkBw\FqFK%Ya\䊛ۘ򾊡s7C!YeHVUd!YeHVUdGrYNS} TGvB Q {A!2B<I%AP`PU2! ,T݌é;VH ma^x(wjt*"CWYWƯK&Y؊ZUH>VV3!V}j=~zO eUU<}ZL+b*XV4UCNpc(cetOVUe`UXUVUe`UXNVUe`UXUVUe`UXUVUe`UXU92kVUe`UXUVUe`UXUVUe`UXUVU=|xUjD)5-CfR uJ*5iqhy9oW2/ n7\? t%]@W恮(GR9yJ88ʎ.s.oa Ƹ\G_8n QL+$# bmWoT?kˀ/~~^G'6{r_gU/y~]WgyRH=]_ύ3\x^ĝ~6̜gN/s9ϜgN3'̉~D賏1>>uw7nC?߽Tt6n6c %~˚u/"(j?G÷C}߻~w{K],W6tg{{_>#?9׾Xӭ3_?o9g3?s8oG*5Rlu\S\?s%8¼ fxf7Qt'Y&vԁE@rhcX[ jpng责Ӈ. :My # t͸?j:1{{q›;#FH /4)dCxLjtFɍW?MxGjcx;ʅsϚcI!۝OwN| ¨3vzg&4F- ByAf:3HF9jCA'&|:PۃP3 :$ǮOłϬi[. 4>Zi~#JT;B宖Am/>7m!n&Ci\Ȁ$҈y}S+7G(; FYWq[nܸ͂ɤ٥%fVLǥIqd4*MJd4*MJd4*MJd4*MJd4*MJҤ3T &CIf4*MJd4*MJd4*MJd4*ݓ`Q:c;S!Fc>c"1N l6E)v cҔMeZ崩<ʸ)bS[r4GS(Mߦ6hJ)JK):hSe49FShmলh*Mm`d^37E0(] ^Z$'$M!T\ަ{hj{C6qDQ*MAJMe ZmjKPzH6GhZ3z۔6Mmkd67RC Cpx#z# `;M ,q9֜T24B_Dۋ!y{dGS=>kwL=G/DQ&'45ݸK.lV- &p1E4c tW1fF%Ay$PL-X#P99`w+l*`1Wj:l@w6']8^iɚv! Q{\= &AIePR<()>`SV5zGV(vxѣ0&膕] @"[i3wK3%՞ͅ=70j9]7F{D6Aen;!:e8,jreyF[v|p;Y)=Y]Yo,UL>ߗ/whw߫6 },`dFQ{eE\i+P ?:?BuN>M>Tϵr+ҷԤyo8 T293y닜zކܡ"\[+.BZ e'nآ>^z7HWw՚);wF;]=<6wzn})Goz<;Wo}|fHA/cʅ>1g0U䂠 K7~lľf-V NZg<e Jrd#P#lM:NJv;=v5RfU /tXpYhʟu:+Jr*Wcc3~d-M&ci?yk Z.5 smY+qQ;(xZIx*s(3 Z+_~%}<@Ƀ}h5{yk9vY+>93?BrFtwFtJBH%;n /W mhMz%,_)-J1#%_"R$\?T| SɱAFi4l|tfM{ibpBKy"/1%'@'X ؎Lb6|ri[q'k^j.ЈkWS<"ycQkN.% XJtt"3r7͝)TEҷ"DOEA|޸U.h! Lx2~t5a;}*%" 1Xv NYIt=~1R|,Lf ݯ=} Xđkygi(ǒ/t!9lQ&!%/_JRcTb+y VD19fWo_W/ Z̃M˅sN+^Y|{yM>p KCosV9ad,Njz];ult0ޜ#,fruo-8geU}VVx"b,N^ւ [`pQ(v 2}(-vJZbL larj1ww V7PnӸFŁw uT0EOoքm-1jtNA\n& oH]ݨzǭ\s<Eԓپپ}= ݗu(zGtn>+ Dzz3njWӻL?Z9{w Ї+IPoJ8 r=!v y\EXyR@fc;p܆:G\KǏpQ'/:it9OIo?_xr,=~]8dI!9T',NT؁'SݒOɩ%3u aB]$<4zΘ|EiKV=v|nG6Fǂ;̓,ѳd<ſ^ZZڔȖw.uB)nDՂXۭƚű8ǷQ,>t;+,0^j(SԌ kD$px Hap~CU]j2E9i :mDɸ""db[0x28Ncm98y"!ҵ'] t{B,~=;=ySBXbjm]u|˲:a#xߡXY;w!h"4zv~.O.?=8:=NOk8=?kRK%!wXO˒?03UT,D4H6'͟;73%p44cDV{'0®8\u$wӬ[klɍYbEܡ>/6T73LJZ 8R *dc˔ayȵl2kgq3BHXz͟te[1)t?k~y4ޢf bI.<1 QfyQz滛xX^]-]'z+'JNWzrgQmGy GGo7{v@e鿏ī~,N<ǰ}:3W)`؇9V?W0M]xRaR.Nfɺ'C?ԕit[H%Q#]JRqr~%&R7%&#+Hū hsu)vr+,Z+`zRPJ2/6ƨo0Fʙ|_N!49U%`nG_1N ISm]xCжvaDt,9ա&ޓO./~% W%oWkK//NCwNW #(ru: *} }8No/C˲ֽT 9^t?%x)HԛuҧL;8PpVw(>lJ1#}oR/>9,^:gK $'$n'iٞ=y3톲lj{_ 3/FY5uygyC}ZŌwt~ڏ*VҵFw 6^A"P}y@>7A! :8ge|L[ģOCu7X |U_v#fXYMg=tksZWsV4V+QMŅ"'(ӳ!L*\3+0DE0vE*{UQK-OWj=7$5Abř[C tIwJ?|g)5r8t_]0`r&q b$nY ,D7b#NZRe$&J+h-DiiNEw_]8LkNkȢ:ye %.cUF:3.ч8ceٍ1˹1JB::].DP6HGrmfWF]$]9Gq()H9S'.l1&a_o.;wrŢPV[^7#ߢ,S{&m)u/샹ӏq Olo9 Z.boJW}p%\zs@i:pˤdG)/IUF?w"0ū D'zT+6$c4H~ꏄ ?zxJX0VuLUEGIcSsU%AnzK*.#zr<ɊRG.fH1!U7!)46'T"6ĉB'xp Y#Y^]7#x>lerBx/~t8âhE0yڂ8Xժ/N4䓢ʱ\PuZg> _`AX"S%RsfCH};U\G -UlHgCc1_V?~QxvX# B䨃w^bd}  p>?uH%NAax}q)1I)tVMgjH (U_ ~,qg5B_!cȆd~D/1B8_pգYP1TϔwumH04wֱ<KP3yٗz44ZakW+ՊKaaq툷åT")tؐ7FSZW/E>۵Wj$ᇞubz"JD٫Ʌ$9|^a|"hB(/5%OB'rwt2t R\]''& C-UEQb.x.Lijxt$ TDdW&|\ #9%[>ձ"Q5;F< 85yԣUjo9)B3ROgpį3KDR cP ϧXD}拙~x D˧O 8>b9B,ϫ#AGBeOpJ%S*P E0.8j>>$ho">@k'ulncAwzqƢ c^D@ڮ1< pH5!ӫR=E]ag2U% *V 6S s,GV~zrW=|}+<1^m$x:KsK 4+馹u3lҹ hU}!tw甶\ %fbz0d>P}70-͵ m|;N}Ioh RC$gcnhqp$կKij3MuIDr(`ǟ4S&I} ZX+R; =bdwk۪]-HT|Ê#-Idpsyl(Gݏo'Ǧ/r0k(+{.'|e4˛MSԮH`4CVª0s0h.+W9sd*ynW2Õ.9śG*n%G)ʼn.'kNt|PNGXR%RMȧU"FbD~<ѡ!txwt/:Pcv61CeUwK4BgUU󰌵6$c|xf<8|YoG9lu4]}6Cg{5YN̅9uAJk0$yȋ.MHSQ&}F@%ې(*R1R!B*J4Fb2ÉX~bFȪTԣ1 Hʧ zDr##+Q]$pYDm*ӊ~ɨH:G]wƊODQU(- J0gIU_S|x `L1W ;A^ I_bg^ǺQ Ar6bDϊ:i'1 VXձ U9!c)\IU"ZVODd}EA 7 aNB-U|Ylz$G(R$:ӄM%,X+-ג(n$.8.r͘P%Rzf.0]ƸTK|OmL.5ÈH7nXET`g j|a@bnZݮ&nG<Wq14&`h43=4 &cmy&zqo0Ƥ;dݖ:n!8o1LPTH;U&bi~ $8M^d%%na %h2HUua"7cih!@k?i. F|ɬvAMk0T*muNXn5Zi}cELiZڄ^Kk?n:q"zwh. "-*&So6*q]x3 '*cZ.vXi va`S6 PESaFhA0rh*.NN_ò37,D9cu)xz?]Xxo7id4p sh?=`5nycs7,tv9c|Iuѯ,AA*&+_ibx1_6 dIvp#ldg6!V~" cT6e2eq.+;f'jV́a1\ O?"#)Xz%cV;bjXY"6ErP[;i~g^۽lnraMO [ dP[29AsjVniLV>Hvhg˪E?\^EOSA\?,hX2?spa4s[m{qiP@S jwyCyy88>atrmwE?M'-q/Ô/1;_ ^hKgP`(!0 ̯!S[ Sc`#nSLoy3gUjs\tôM0h8eCp MOS|㶿PwP)8kXn}~|^="viλ%ң/J vJ=X3#i`4&aYe:Zu.b|A_#XxH*TdLdО0 el'{Q4,!`pxrWBQt@>>>035H;Y~ElU5$UT!D˸/k?2SLrtt9 {;JO;VesJd,1z[G/CjS*XT\M mQz{3cp4hƉ4M(cbHs0MW$!IQ!W_:Pb <ѡe:\zh) F9֧AwO|;T~a5KMA샾hN'yM5b8ͷ∏Lgv.WB?RΊա5 UxꄎXh=C+Z^ҕ`bO;n&DQjԋݕb1"("|JuG 2]Rcr4(k@*⵻B]s[=YPvn%F?]c4mvi@nڣ0vݚV QZ IG/DQl߬oJ~|keZҖhJ P)| ҁYMw%"-RMLF^JNg|abMS`*̮0o&@!($s[ܥYT^ rUv[*/_f F8J6ySRƸ{bFLD n>ujN :7}${bɎ WYwzLʡb1}, -SXTV[ ]jgO !->~f{9:r4Ӈ/y8xM@ C%hdpHtqK+˗'H_iJb< ߆X[zVgeߧePUTUUAUePUTUUAU}APU&è02*FY 5_Fqs2JP]ͳ~D[YK›okj's=z>#Xh~#&%JE8ۇNо4T1;䢵ӈ(lX΀&TD?хTSS©qH]Z)2Ul+S |Xf{,tV4I*яoaJ$;/!}"%]VcRh$FFAĠߌW̗d91BbR1,4x8Md矓NN~֒~~ ןᝩgh,5xSRosq7ŵ_͹5lq0jY oqtNyTˤY&\:WΓמ ~pR_bԷh̭v\\.[Uvm*z Z^Fȅ :v<Rؘaٙ-ؚ+ mO(i TCdzzBIIDGi Blue0!+~uTYctQUhP;ӫ %|6ƝŪgۖZP׎.ರV.W,,E2;NM? =;)e3ogelw,Mrr*Jܱȣ]}caanxhcw;S ^wm2TkZ^_6lJGrwRR!"ZQ7Cĭ)#Rm֖ZCYip.!zQbkp'YhŠzsdOey>*`2؟y?ERK\Ji vU4Ä5~-g} %KO:ߌ,F@4okiGGP Zҩ\tx Ug)ze#S]q/9Spi ۛNF2ma*E9b;u("D:KmImt{(f H@GhhQ ڍNw֝q:pP`Ydwq[)Pcxʋ#Y eA$v]]eD|%gĩ}RO C0i>'~?Q4Ҿ[Jr&Bl\H9{QDTGt!FOQl̚,-RUh$f|f60Љpw+g-͢6E[zP: Ap,f80lvMG=h4O%8p-#4#b)emzE;jZrbWY*H./:fXnT(h y0XΒM%WKpT3i8IL/ɼJ$v("&ɫNbo:wOri-ehg0t=rF1XŅQdOdE\V RuXm~5 HYuy#I;lEE Nl*x]B5x#E^PH`t5!,;lU 'g}.b`A_)QLMl꯸'_HS]`c/)=<0XđkyϞK/fA! 88??T2T 3CP1b>`o7Lvha+mΕs?zМF(m'LۺJYJ Ba [ &]0_xyyg藗9H &2k7_oaRS龘lϷSZƾk$c^:q `'Y Պ+:BH~)<k6@LA[v8Ӱn o3 v[2;v!['2z!(\Dud'dT|ܙ` hh@rCI>(wy\Ve }\5DwUSJ/px%+j ZKtw^HB5PDYc6k}ufe_קEO< /9&˒J^03UT,uQ%=hܹA,LxDC3Kr<&8EtrՅ}Y7ī9Ŋ;Klbnf^bf'4pp *dc˔ayȵl2kgq3BHXuy͟<ɟ5?+95_tݳ=Qa`9:֎krtpzvsgT ёxIpډG0XubQ+ôƳs严֊/w g@b-f@b-~@ҋ(tM]|,F1fx_t_jxX/K7,\7Q̈́/Dz3 k}jcGt‰/K,ݽ=* `FjF>;xX>`jV ͤP?KCeK;KH!P{ kdjp(\ < C`3 qQ){ý@8fpt28 .28 .28 .28 .R]GI KZֳFdl0n\+ $jZRiJ/5g"q3 ^@Mlԡi`AL=M!n *5)s/s^?ЫC^9;o4#cao8hq_ǹ{aV&\Qu=7lΐ( qZ0[t) DAiɍGY?Ϩ_F6BU~^(#a$ン!^E~ >3 EMn`H׶8xex_5E~ͺnw?9I TB:~T((dUPJ˻TT}Y|Sڭ&xt H)ޥ*x[`ҭe \%%T080Rh\FpU P~Ӹ%[ɨ\w Z{kK[K_CQg pyH#p픙=A,`w<+3no^䳖vgt%#{~~)zBnV?ھPo so c@2 "0 :KDB#pc?h`vf㛠z46σFAKcU*7qDL3JJXl7|.d-jF)j9\]]K ݏקT hט$aRKsJkѶUKKo8Zl,Am!? \j/ @m⛊ NpFǜ!mxJc l7o" kmQբг>f7|ȴp5 N[`?!XyHUG?G)isᝋ/,qyz!GΟf0<,< t> ?ȯ"C] <+$}DtlDJ>DJ>DJ>D* >C9i=H긔L}fɅX׽`;T](BT6_S{5׾5V,?N` ug3AǗVZ?W{=%!( Lў .ع KU>AWSjpPkoMqfIc-̟ u '󲝿꩎ы8NX<8]ywNy|B TvElߘ\*7o{]!Th2Ieխ cVEs)ȋaz{U,tNJU(خh'U_:p@U+c!k?1${C_gHKbN aoZ:n*x>P,󉧏WGTz2~tkjL]nd2 FxyX.Ӄ }~Ja;r6k#GռOissKS)_jAGOaNyi:eЀ/b~Us_k+(/$dF1G|Л|xk *jkW>~c> YgXV%xQ5@n)d^pۨJ)S߀_E/WU~MaVw-wᗜÿV QOXwjhbA % KIKZ_Ȁ8o1țTc #2f\`ImhDxLC_ybxJ |]\f?9y¿xZ-q ѳ>y8OpLF1=e!7PZR &d~𰖼K.=wqAXO/Hdyܤ)6e$f(k&/- "s Z1>I05ʓh n}v\kzӡ_M$Ba£TX0 ="ÓꐫmG{YBr8L#N?-&+ |?  ~LbisIХ]O!-X`º]M;'s"IV(ȣ__ m[ H W/Yr/N/i eM Z!g`'{n#Q#x2*} @)B_~= _u X'K1"C.N6n=H׺8ڿŚ'5?PD)np*XDӖTs XvkUsQ,i> XPOA&]sybUzouazUl:9(Aȣ=tl̨?>/շtߙs\a^$DJ)a7:"у`0 y  LfSE2On(##JVw>ZSL닯od]i- q/ONp: ?X&8?ĉseH?>〺XEt[~]Bkl~;!Y=;:2Lglt3j:[3[Gl%c|% c9; }J{sRHJkE_psc,:F}_`oN4["(1ʳJW}'깅|+}ph"?*M1]rÈ[oZ>B3y)cQ|FÖa91`tCا)vTc+U RZɕWWj5YQEwB^nEW2y]u"^WFWQ7A^;cBji3joANրZ3^[k|sC/;_p+791h0+%Bnnacn=ȷYyVA:Q%]=6Ȁ|{9+,OYZ03+v'vh5F+hӪfV@toȏ(H}StMK׋uyθ囬JyA9o gv`JR7ဘM0ŧWJ78,#6Yw/ըy}iϕݞ 17G˭-m.4aRI^6-p? Gm\_A]8o50!CJQ0Kb|\CVc >BlK&*"JzL 92-HIzyI#Nn "b0DKs]ygil"1"M\2U,zۉg.; q  ~?~[nW@ $Ut}IGY)I:/a ]k_jJtJ@_{)6E. UԠJdL$OãCcg](N"n q_ =P~<2m7E2<̓*l8rR &}B,Ë$Xa4-4_u𓤜jʩ\hPKlW Ts"Li :FCECTU[=<|q|y Lc-rf|x.k9Vv>H4UM0*2Dנ4xH|8i`d懕0ˌTW` |F\d2<\EeX9Ё[\jܲPBؗ ,ÏGԘ ĵ!>fgB2FW qʘz-m1С a~yp_mUeR?4;ʳ ɘ7 X;ʂûەvĚndxRuץXrю-|l6rR1/2D3Z=n1?Q:g`./: e};vb&e>Md3SOq&>!pLT R\G:/^h)*pZwǹGf(1v.t]27q$~jH:t_AT~Uz0Od(9l~$ZT(a| +:r*c9N6iq_Sm򡡣Ea,B3[8=Nas[`O@'ϩ&[$2YŠ5Dçf Cb8`i!IulCY2EB{[YlnT'+$TS@J{b`\ p4| LPgȣ;GyvؑɄt !; 7H=bFSpAK8..ΏbucTD?ّSXͶ2NQTLf sEVl'ͥ$BBCIS|=2ìcR)QhiE,J2\0u3҈dh B|t8g=Z l4GJZfR1emV{ \AkǷKshBSeEj'sC3܌*rN(_*"?GiLNRifFl#"_Y^⏻ތc2)rKG L\&`4]qw3fr#Fns )hYeel~N[=X)=KLKt]jiuQ^Ir ͍cX(C!V-&1|Q{ƿ9Y Ȫ/HT>.[mre pt;DnʩgM(ܷMڂ `J/LTEbP[!F L-*v |i˧"!7 XS9⺓VPs^X]C\ή|dZEA UM.Sz-yd+[;AL-w3Qzf4*"</gjh $x"%Tі$(Y$:٣ ِճ9T[](kD>sc&KN5wVLV݂>WOD1'vMbzH::!qa@9:h[`N.T??y1"VBcMvIn!6f-Vv-"g'q)Ge)!Cݖ0ĪQ23i%c;.csPOơtW N@D~;˧KUw&NcxQ {E{i}L:0%PLEaaxC =GXwGIL+{mbWQ{$K`U2M>1$r gݻwUx3ȂpUH[ƂB*A/-U]]2v ,EBVsdWYV{siR(/DޤN͝R=F~uW{qmiXh#EH9EN()Ȼ1 ?(wc{{ *U{Ao R$<=MD?YoׯZ4_7k_|__?////////ῑ_BYx%\x%\x%|^ [Mɷ~ /7#;b`/V|3DjFJ ?F}K2Az$w>qٌV.?fS^m9JZKW*+}#\[IHĝ4#|M7T×j@: _ZδJ&ɩF0o vм©2&;:,ˇdOothGþw$jO<ކ©1p 3=/­ɔmH uo$H㮿rzr|8¤"%A8LVmkycQF|qt 1̢G`$sAp k]qǷUl¸[ $"Kw0a;vꋒӨYq+@<{QWCLEok*+#qL'YG{w D$|i뫷[@c?-)8uكtAe,Ў!Df#`xSAд \g9nW|! 2DC.~l{K_j^frד16BuǫZW:*^ t"ʪ9ddmdD\OX_1玙EN9Qn6G vCL;ZzcC oZ6 X:onǒK%28_Zn66D^17̙7=.פ+ŀ>9*QRRMʚb1 [cΥUw              fL2g`~kO>ٟFSQ LvU!U2M}KOhZ^0'@7O7cռL pRȏPL Q__ j+¦W+7yҡ)mٽٛgW9il^xdȜ[1g-1/1/1?Y])WPXpMSP z,' qN.ݧx-:xo*]ovDtp7ugLSxsWRImg JUbvʯ$r*Tm;gVڍ5H ͳPlKl04<VUNAS*X(ljB0۠ә( _Q /½ U ?D,Wmu'ƽ혛4!Г}sUBWpJBwj;FɭncS ;;e3g%r4ޒ jۍO3ÃQBA+TB%םĩn x⛠8 RuZPuҊj?żBYW$$Fމ)S&TTT4mn0AuUG.ʢM$ǒF)R1EfHz)hk+NՇ2l2»k+RxçW&, K CQ˹V #la8_R#WWW8&͏u kg-SP54Ҙ$*Z#5̍c 7UFzS+NtB%^O<0+Y,J`%U9ַɚX=-jsZ=qTl{yBnͽwD^Nd#,,HiU1JqG᭯nq 鐇 kiQ-B6%v+:0 NEЉ }_ev}:#%\w$6 ,d`2yhCY!Ô=dM4lHD $S @'9EdQBNAYC(sxE96m&l\AO*IE92.K[洑?Y ӠFv\lYz43){ǂGb\jEB2pHWj"=FGMGf#]ǥ#]YC!]Y@!]\įyPH&*;"#AHOAFyH!Cᑞ>"q[_u$X?cw)4hϋȥ!a&>VVW;ȆPR^{)HR*ДTx|-ޗR*Q,J6ׯuƪtQ;ɲSV:yKv_͊M3\e?|Zm}""6H:7AitZ(ǢXq@T>֫ Gz5Z8Ur&$y0h쳐k+GJ**kNR7 ~}B|E]בMj|Y>;Ћ~o~|;Ȳ( (s (ȋ]k9t7XBQ]I[5@/yCQD0T lV<.mf=0ˣx`=26.[B3d%`C+=/^Ϗү`* SB>͐pO ĄO;bݩB0;2W*|5UYeq(׺<-\VDC#^GTKTܾk+sJ<^.K[\~WidQ/fs>6l3֭6B2fn9%)nsXe~J`C6bf4mTjߛGM)%}lri7  ^!C[I\Xz&mN}И42$ N05dL`҇hr"C-!~N#?lj<*KJQ1;?k$6 ȣ|"&ďaxBx ,xC"Y? (O" 29a%7_?P:Xȧ{ -OpaZy]jbf^\O3Vo`tS1pNn].)~v$Ȏy ؏脓HqjP_1O#\,GN[ J , |1G% %*%q_)e;;dN*@+sDCp:;=i^dz6dsyF:IU~u)"o23F^BچCr -.,If9vP.[P> ʭCI8lMC4"R$&W,^S:ھb,rAL%P^FuӸVG6ݴX&:Y zH'KJ>o&`u^IznӕJ6 D͔::A+O!3N״_ "ՖEZHBri嶲A;ruiZg|_ZϷ]irdji?m2\ՠß6PO,Զq!S'Fk+P:kiG6}7MUC|!xDccw!2#z?GaRkǭ[f~5_662 !M㊙@̻5'8;*p'.+pC$: GQޣ1; Nm|^2#xȇR4Gho-j'AX2+wABFfk(LX|+~?xW5V57CQhcLbLUA+ArlH]rlgcK@9&#pvHau)U#DSMAˣfG3ʤocPUʅVP(4YK[r%(Bc>"U?]4O8#u2$',`e{lJ򘀮k#D5O4}lD\`D]Mbg /ШX*‘m;V|{{W ml+-RDۤA J'lODv4ܞ*| ڴXM/Цh mz6@^M/Цh mz6@^MgXh mz6@^M/Цh mz6@6cS@~=#юMG1C9ԙ4c`LSv1ŊA>eV]KqSعAgUPv8"$%}Sx>:Xx:9Kiv"` ՉrYMSȤݕb$_\ifӫQ22-{b \6[*C]4w$Lj70IFR݀hoOW}̡.dӰ8avZMmSf():߻erHd֭W%um* f ZQq@QӖ'l>cfjH<]%]{fLg̡NTXN> b2h !@-O×ںgjR0&Bc'0tU+'04!`vЇd0CS > 'm}f+ xBn5 H9{3%*=FZkTLw5 fQ^&@[ɥPb,P=Va DF8ޕLI>V{ rud\.)ϸK|dUxSFmVx0=j2,ta2 a{ݡJ=P55Ⱈ躲ѯ,LrgfVXH[ ؟([`V sU-f8E5@xz:Ǫ[e(9f |67Ov&YR`_|`ۦw/)aXǞ(- ׈tW h!G"=y2\I? @fG#Ln3\$f..G~5NC݆AO]qXu\P6J.,Il_Dרc)ʨ8zeNKUքo\F1R>aѭ8&=Au%|df6q)|ݝ2]>/w8(>]_y檰 $U90wK޺p9Q ߢO_m[/A({aghk~K/-pPDkxɠav.ps7o$@\ E7AWumvNfB?b$R5y#~iGXM8@ 8H|((%1XrMVld2"fMKJOSuJ* F`q'Pl*F!5'vūΤHZRJ!Ж}'%IJGRy4n`_b--fMʵ@wNmz 4RMMp]úR.^ٶo-q!"rng\ ElgcV#"X3DsDW"y#5Z"=zl[xPda(uS[ 46X,;wQJxM_|*\iid)5dJ $vb{_2[ZR MOt]P׺ HTrs(4fV["jF7evЦfNe%+Zi*p h6y(s :v*^]ocOM!ᅫj`ccg`~40,8OC9̑O(? #VKړ elk%\vQXY5lcF#'c9ÏbևͶ(K9wS"Z\NjwqK7ԒĚHmvE*{Z[IZ(0jrvQ&C4xK߳Elv\ U0xF^t_4r=UZ6MY#F#5 4V4-4{5et-LY2L͖3d+E]99"us::1 s"*ʛ!-J!\UbeVM6:*i#WK5mfմUf6+'ƊriZSy窪VVJS# /rHZS9窦|8V\~JC",T\ՓX*JYM)VU.KysQRa$㒞jkV jLX&Mz֪Jf+jqi9T&j++'⹫lٛJS]m#r|MuԴUVVM>s5Ij$Q5吟rlՕoR^e9 QF[z_ˠˠ5w(U-FقlAʹsUri) #C8۳'ŦjlVO\UrvZJYQ)U.u+۔CʡvC~r=J]D,hGRރTX)I-ו2ǥ\VS1ͪri*UG~u橫ם_`MRVXI;mO++umj][>퍲8laGA6٪81C0IӞz,"dQ2"ڑV h81aV kIU59 VVN̸f(of+M˰tդZ=)ʳU#uФRYJ#Y4KU*E~2GU_|U}U5J<1V lUչ_(VlnFf(Fi%>gYqFK(":П4.Mե)u[]¬Y{sѦ)ťG=dȸpU%6vtMaٳ[ b\RdUEpjIarU%ޓ#vsUG^ިѳGYf)5Զ|mT_$$h̦H/xvNq{Sj,L_Ae?|ɈdDgRjc.&ji\=ld5%*o^vHՒK}jI`-uP9A©Cyj7w^K>#f吟¤{cITRKz9kόn,+"hdhU=8-ddVL\K˓v'!C]D[";f:fUlbc?ct^M΁Y,¬x\|gZw`-T!miU=0?>4r.Y\ȋyjZf쯥Q:uZeVYNsUY MzbˋוY PhR3Aq]HfE벹KZ*-30F:5T_W?V) m>l7l:'{7eQ^&z#QQw 1q7~&.3R"{_Gtu~I8ɕ·B Y($xM?d04 `x)- %~Z?<4"Oũ?pAhEc2p׋AaHJ}{yE(XETS?5 JY_x+jNFN߈eƏ.= 2/$5Q>վԘxxarEoэ)IWwJB $dj S7 Ȓ[!ӻ/q.&ȺDT'կ.8GaOL?މ_ߋ~.[`ȡ¬ Saշy=q}/mTDлk wн`UE8Gl{]><8m% ^hx%/[g&nݼ-J_@{FO6Oj~(w0LSTta1N[R Ͻ+ bUMc91cCC/zchfcT&pp(WouazUl:򘠈 Yb̨?>/շtߙs\a^$DI~J 6eCٞx0*gh1f7ޭ; > ɠ>0(ZݽhMWE &I&]m0hPrMkڗ''{ 5.s Gv_n 2q@y 0 H_OUqL7A%Vj86'{GBi?_ya/м#~>>n<>a.,:X%NJ`CU;DyVܪP-ի+qzb:޹ zZEW2!]ub\/cbxe>(iTōW1̈pD`"Ab3joANրZ3^[)u>/#{NI[\yH9&{sR r $cxpAEC )lWW*RA$ە$س,\gybbY\ۇk9zvO8Mٷgy;>+f2if뤈G_2/PVT)Z/SUY.>9m'_ZU4#N~1dwլ0nI^}=<( Kj ]!Fz"Ir_u f8h;N<彖X+Eg I^UWS%yqFn+:KW%mB5B/t8 8qw=)phR㴟=} W3Xxb<ï $(oKd sJZMܟ,.(SQ }6.[B i̅d ahQLe!CAr͐pO ĄO;bݩB0;2W*|5UYeq(׺<-\VhƑ! ZʜOWa_U٤dK\D ۡ5ue٪pNI\5VEt"^5{+ /V/6K ;?k$6 N|&&ďaxBxO-xC"Y3EXqb+:H1d=a%7_J}0B`uG;#aCRTn4w5d=șwJ*szQrc<,ZѩJL53t01:ͻ *WhIGm2xGtTTw =ˑ 4i@61QIwCN{KJUbK/)(Sw~wje0Qz\\iBIƵ>'#0Yf+tKyy 2:6teN%w݀"ӇOG&#eZi,OEI*"EXfL u}_YpcKx%uI& Plh]ՑMoƨu7mt4NV%%ɒϛd `un{&($"-' NK^*}7X>0ۥ"@sѭ}0b15d&LH.dQdmy\-sl񎃣w˽Ajk=v͒ <pU:;:ۜBUĭSc~4hU*գ_ճ= &0oZai(k>R1sFUFm%.:MNJ3jLAdsLM՗?(x^=;]74'p8,Fig2O $cUq!jgͣ*QסֿւѴ* (nÀܖUi@_"+JZ#EiKu& `'bSS4_6N#{OFdyjG]XJtgp%\[RP;/⬞0z &z:Bff0HC!t995NJڠUt~3 5hթCLήZ^tU9Mw!ň1R`}ssv\{}$"*?E2Tu$agS0F]r(!\N7( n.UǷx&ifDSX`X9qIB)YtQFJBՍ:;TSU;'~!>agsb昢+Q~qTȜԘj0u+\;6(Gn1TYLR[W InɌT`zk$jwFdy̞Et0JBXk5j.llNS%ϪhF~( SnLEj6;$!9y~(-DVLC{L3Pɳ0K"MUYuU{l 8C)X5r@vuLΪ' j%0c qщ,Lp~sE(';?{ k:sٜ$uPd@"kh]Y;A[ŵL*ƙDAmUħgcp(>Rs?ksQ63 VGP-hJ}p%&Y֩CMA#3DT:S irpL1ggz~%?ZU7i&vi $ ' 91 ptN 29R% ?UKX`"hʎ4p3sLF2/]̭i+=;tx%i0kK&=̆MӒI+x\׋ -PpI2.4SF= mld|X]wh>nO M"|`>t暎\OY|O>gr=ӊc]OmA4Ces#+1d碷V&dg(aW?.,N̴/҃AoAӼ'ކm(T+/hRx9Yg ?)f=5H1+MӣA/Vl!'m= p8oǦaW`pm.Jsl.oXJhk7НҮIא*/LA|a[rvU.cSr쩇;ܮ-Õ.M)Ʊ BORyX) |qPӳ3t|c_,꽠^v9/|, yyy\E9Bt\ qԪՒO02):35|ջJIeKӌH.1ʰpz)}Ѹzom2RK$,&``t1ϯʔL .5 H(ejpE`i:ҧ81i2_) ť$nk\8/YVҿ-+Nv6/FXW;ѓ{i[-$P{5'"STg 0dzZO%Ōlk\LF%ҏy(xh ?f;w|Qk%ʄvE$RZ[\8_$\ EHӾlL9k/P#P=6(C>y-)9 Fu(ĄPAmة p.mYS[?p "WTˆxb<:!UŴ@I tr޼c lo=vXl굢|zTv+ \/|dq^e^+؋Ap+C6~?h?n]0 +c͒X 0xKl*A@{Ch>"`t讏 P]VXMݺƳdq{h9T:)̧|u t\92: (H<콟z:K\9@(l)Q8Mjz97a>͗WÁ h"Gi-o:UP3bXVݒsrX.\c0ojuA6;[7IJqEPI;qcX{ iWE68\:;Q(ڵ,؏nNOD0u@ugϏ:v0\&A(sf]璠R*NUqSA1oіI&VQi;ISgCdSQ*}&}yGh1OgU,шk.>c_ [hST%F)xi2: !k6v9 :+F4dt)\5ͤT uߕ@)O:B_l _oֶs)gʴ@7AmwՆ3MW 4!30 Mdr]A|&y\mry&|=< 26 n6ExqmEh\EjlVtK"FܼlW5$z,PchV.BjYp+cKO l#3z룾Gu?K%ZTq^m[쩰]fb6}daA6(V3#INÕ  I}n C\[YUz"e?Ux68@6RYJjE_aqh!<Uy 퀹θ;7\٩VN] HOLi~E ?Z6`~ F'd>dM2C>|bW˃;Sr^? q?a2'b(u"3"Jo1Si&ʅS7jgT\&lT*aWוp/wW;'O8 /WӾ(as}Jt!Vs>"ʞ hWV{ Ox >7IbwUV #3Y:aN{ۜuq3ycJ^di ,gGhZzɜ=Jx¶eK?٥٣ԆbtxA|TUdmmWo2(]Qž5HPa2IgI(}yRZeTfNfHiR=CU=zúyV-V>dt HW9Ƞ k#T5!cы Z9VCUs>@2@֒DXI1:JUEڝD*Ə١U%ұE""C+Hzt^(Džj<'d%yT&`xQr0,ʀ\9\:Rar7*. w̤Ju>lMyjo[5"=Ty^XoJ@L+Ye)(m塪J@VeU])Gz-wΠ16gqUh>okg]bvfX No+'l_noeuK«%O|ɓ,DnoC{04AY8Ao}IXnUzH( 8ΑP}G<,/,nP/aiiowv~ؿ͇r\[83dʬ /-@ϋVZ0v2=j$]Yo)YlH{B 4_%F'\j݂.pJ0rC}_|IT%`NR1\b뙑hdm6,2hi2'Hv"\:)r"1wS=Х}gmrqAI3Tees1>q"p`՞sZ5y'qoVRaJǐSvrʮY3م'삂!C=}^irWsZ߹@] (SIOK%WL{Deo@[}qgp.2N)Ǝ)'!/ e2: 2E&!Ư˘"k oҘʑ C%ZkUT{0խa,~1bK%ⴠΕ D%<&8\3Ȝ#A֍]Gs #A- 0%w"7qXͤ05lz5i1PƕwFEۯڭ+'MȖѾi]ҡ3f5;\RfWnzJd]StPG7f@ =YM_ȻBW7ظjvB(, |1Q.Q9 5^Ò08nH遉CR K<CFJ35[ԕyѶ ̸d@Exe`D,'"×BjW&3[:('r[zjaGpO5\a<\Fc^}J6+̏ aP(LBo3mJC)ǗϗBJV)_k|T)c@؊ )0HΰZj?ZnM*B V`+{82yHX˝-JtՈ+X(cH@2ml-Sdau-<_-K^6&4m1 wwwk {R- v@4 q +t[͇ 4do]r*jP.hC+Ŀm?,g7u ֑XR8LʼnOU_ٞ>t(]nu*@PUCy[a_l`C #_20cxu֫9ݲݿr(!j o\Up]Gpgmw؞W7=xPl{$_ӓkltӨ! ϫi_Λ4.j:_A@{%-hi(LU"5+|d;cI0?0`5+迄"LA'8]8 Wx9a6 1j@zr}$89|Wuayu'8 .EU8VVʆn3;wK6[޹q}>ʺ|OؗCbxP$Sƹ w{OQb0XAFa=2dk}2DkjQ޾)f|9ڞpdS RC 5jNkv NO?j*OOË٘nx ]O@g5IxL0k zc2NY4Tyz-#0>qtUd->|ҧ!4ѧjĭqaJ3]xEt %~'F`CNutҺC}ğ?&zr=a,0-]CGgi=EιE.6+9/WVߥu)XB3EV5+*N&YL]-N1Qtt nP 5&_^ /~ Wlw%hswr9+^ #U'o.%UpOޚW}7^tg"Jw/}<]rP'Pn2ʨ9Y|~{ 'op^0WQtl^m1x7}ULg0Ka伓Ť7p,V%`EDS]vqd1h4[gӮ* Us0S;*sKys!:śk㸌9ư0检c@yqh2Y3Ɨ؟^lJן9yMwHއC[z0ܣdrJL>D&fhN\8"u;qYs%,w[_>jM2_=$Aigݟ$SģZzqqv٥{Bk RJjoIdZ\h_0ڽ[yW MN_tY ʠFM՛U=I8\کzDΟO&xID9ߨK%j~o{U:h&^l2%ܭ7Z&'XlmAx}:htz2mA8ZBj٫>r"HB`-I5e~[\ѬiU{E{%/b`WDKUMzmr03ԓZg9. *~kX~̣]sD4vc契# ?ݝ.bpςe: v!,8Ngv: ^7/t Oew@yu+-cm.,C#4O+myJ|Ҥj쵐;?R~ tE >@g;?LIj{H8]"i#'RQ1|й.$~ $ErWޯ׎Op瑹{+~ 94!&29 nzuEǐ!r?1s]g%VX/| 7 oQILKuB>I/g! #]l9>qEu[GӉ>믿ѓ>g6pЫ4Sg;0W9AQ^JǃTG+U:0_0nH9R^]HW`rIeM0'.)dKJI_U_?Ox3AJIGIs5qR]=HDO]`)<4?Syd5ǯ˸K^XUɕqd8񢷛ꞻ7ȏ-8ؘ]( se;W;&}}4,\qYdoLk (C@MunfH?i\?·~~~k|6kGJ-- ;Z]kv;%?6#&L] Z6WO/{"NxFϽUG0*: :IpWD-ȽW2-z[%Y_=U6f'Ծ~(|~3W:wـنм隂̾5FHX4chI6gxv1}ƈJ j?ۤMrlr0~ YW-*RO0ÿ¿kXG dFH@*l']5 8} LzNV`=s'xVjn%*90~Q;=-fUI@@0|9_enǸ w,&h/:WC1r:WĎ ne%%5V%_uN.S\ wmav*R̔.RL1e>,0WЮBs D'7CӓaVuC5s²Η4_ o Ғw#XL!zcH~VՖ.TAbC_*"^ 9ni?ZwSi V tbZy-è]m *~pyLk-Π[{nh(gfO$Ѓ׾)3B9ZfW{9OLtcRթ~;f^nzѱe^beDz}K<v~sy:?'$'h{ cC}M2eC0Gۗ09r AX>([TvOkQj5[mσmu͖ y)] :|UלxvޭmQ ƍ36:kkqj'$.HO 57}շ2m z6Y0P3,c?ROB`ռ>ɖ yBN텴"5>1fajqsU㫞@ŀz>éLj0-P>凊0Pq4n1nwfNcdBM=Bqy-O->f楝J\,P)7c-LC …zdM KdJx9.P݀V}ռ԰!ux ݞWo֜!˳Z\R:B?zYqI~+";<}cb!h_Jң8"|jTvZyz@&U%kTԛWZ7`]D!y@ \FsU8+ m}5O^4+4x>Ί&Шȶ_o]v]ymT٫2]߫\rDEx}h{]rFkW]G=0>Kgec_#*+?qX{>) kUE3GJFA訫FFD33 osxfrn.ZD uzQx!`NҌSSpß@1 Wώq : I;N{8EzZ=l ơB1ɘ`UtxYƀnuſo do4-Jʃ<5D0:?eUWxһ"nrD%4c&`6ĉ;ǯ ͗fޓYQW7փR<]!ZF %$xKA8( ^(I^?(٥)4 PH @NkҀ6h]e@#,&@=uuASzǧmUlNSgsH1bXܪ^u_0Ɂ紊jf3U|*IYDk) gW?ćۆx,?(4_ƽIQT~V|F\xRwaPǫe)]F4ѹҸPuc `2<,@k _ȵOvٜ.9tm1br8_տ*2'5f.-&L(z3ר%)[L5UԶUCR(1#4޽ɼY_5r!]d9?ZکlDӔnouKgUkzZ#c ?օ)EH m5E{OΈG_,ET!ki {E"QS)4 Rڇ 9`\8jt33= ȟgY jYC; rrx~qϏ`Kބ8:aR]u%0]lU4eGuQDď9a&#pԮOVb잝UYz|维󹦣4tz:{:9iV>zh Y$5)^$8eR5!?Ci q)fqgDxD| a_yO PR%2gK )]Rr(AǽW#fez:l=4=ZT|bűrx6ni p f=ylvx 6!A ]> ]v)\| Y TU`.kW%96U`i.ǞzR9\);)ޔb i,-{܎u8=;#AϷ1p:BQ eǢ@<'5Z/NONO%BGZ[-m)C/sQ 9Sת^ĜD\4 <_\ G_r:mP~7&cp!0 {E2m&O+MLiDRR& 3rNE|4>G{q/1(g5X[ d4>}<̗yEu1=dG*r>_ \e6=ɩ}{kRQ{dE6 8!+NWg"fth JM% W !0ˠtQ8}FEO&f߃9Gƫ~ne/n&FyNL^$ztЛQI>Џf(ۜ [洃Xd#zD%q\DN LH\`X88Eu'P(Dw_1SCœU2 0QtE fYȢ@D MHr=-(HYK]K{x HY@[6/c5(PFXyYF\J;7󒜓Z-u 4^LC|/nz Ao[)lx]5g^?.)rÉw;t*rpGA?r/]'kתbS;Y6P%2NyA]I>U(*N?:dnqO=Fb>]sҴL}*T0ȅ`QĐz?\Ǧ@i:\v!&d2HZ@nMğE.m˺nҞGkVzg# ,vTPK trζ[Su;,p?iZQ:=ʂbE*OVWqwx _?z/Tq%dž/¯/mOխ+& ^5p<}ᲾoY+L%(KxsFq:`OֵhɝƳdah=:gSNgs|u ٞva90V4M7sULJP]#Ex9KYBZ 85Fplm8EfL t}Aq]m8tpH#tQ M&o<~o&'쎑~ oai cBΫvnX_qEW/m4E|j-WcVD26_Ag67-x`¯!Aգ]-diuCoUomt_]]b`)[=+]<[*2ϴ"QhbT&fb|JSs TIo+BF♑$A'qu$R{NyqKȬh*=K|<Mlh,%dF[Տ0 54L͐Ag?*6wF\H'ĝTUTO+x'`)͏;[Gf~̏O֛oCN;w,`/KکYYo؂ =8(.205i@ڣ$B߶M )'Z§Q|.7 {:J8r+\Tr@f2+4 SK۪UVj59PUp%?}kmIL+KbBWiAJl P@]WezׄVk5P =\.ipN(EV%r?ҋr$d5F.#VɲzL֨CZĖ+VX50IaU=5iV >dBoceCB\!dɮݔVvZL֌DX91IUEf*dY9&k4!U-b.+F夰#VzRVhÍ<\,PUgw}.pRV͕+_9WڹQ$IJA, MET)hS^Z9 +G""lkYQht?TuW*`Tz22ILQԇFqv'VRCVO*'vU/fvZp*f$PUs)W ;6P՛uNX ;.7 j^o "%xCUQau俓X#M "jZ.gU@cuK)~PL;+$& 5t_oßKS8ABo PܼeM!6L{‹/rȔ!=K7jkc9&8oqEPŏ<G ۜOqr,Vm}1$"iWp /O(}\R^-ybЫ ^ wL WҘ u@3D$,7v*͊(h4N/u2Ty$p< KTc/b\C \~^k0yƛR=3B;1yJ V#jLLݣF,=NbpņdgƻYTG|l4{bO{;# #ڳy:*I*z{T!u=3MMۆEZ]3u-MZdɮUęS 9A#YE)grqAI(V¼ӌWV6]['V9Uq0'mf%o?!].(2D@UV w5G9_9S/\A0)0<{ xb::x6)b[$Ι`؛I]nukE&jp0qZsa0nT'uJc*Gf.J/ ChJHRET|GKJTr3j8W6 ̛D "RsE\ "sY7Ftm)3[ 4dœ܉`N\a5bzV԰ &w Ř~CWnޥ ljn4![Fnu1K$/|Iq)_+*JkwQDЍsCſ;C{Bt%d5  Q\ br#ڪ ,t7vG,F,`Ռz Kt㸕# &I5,a XpCPԌJoQW>E.4 0㮒pa<2:$~ayēEc~xxy5OƫϙA:-Ly(K#']Oa. x|.Sq7_ s%XCDžr(&~!7˙6A}[ˏz]x }!%Я5>sWU~u^8oB<69IP4ƶ~ߛANT(l#>۹BCҾM~ amWEZ^ ǰ-F ϬW؟u-Kkٳ7ݽovvg;O}ٳpgoOf%6 Tx؆;PcvBWK^݆jN7nyoS8 9CdzzEs|qv>,J{;{;!Z=nzYg:$jb41EzĕdQ5@s"6 $N6W=U(썌*f]}j3)>bzR oJHŗVN޳WC{qfWl3ӼSl3Kl;IL;;bKoBlGUf^sR3*T)ұX@99igOOѽ_aTŊyz >ޟ{LU y"(A:5U0vcHDknMNg/; _Ep;&7u eN% ]HճOsnXF)Q^88L;ERҭB9&9d0G 0Dɴ* /ʻ2ir@,]@m?5Ro, gI`J`L՛0 Zv}?5`1k}=KQ*7^{;r F@m&X{z~¦RYwwNoUS:?GKHM0{5YXS]v.r,4rߧ_ʏA{Cl4z[P?)J(>~-vF&Q?4)§hsnHq ROB\!ˏqKh iGMX>cp=bԭ}ZCz<{OPGu.XQ`@op|)褤Bs:EvVd!Kc$ۗFrQEmt񭫺>5vh6r(:{Yy[~7zq.8F[8 &JNM\1=ĥ]5XQ`[.W9tO_[?Ks ):  FYȄ"GknGcwĄut!U+]rtieQ uꈜ8ƙZ{dZJzl O}tQ2fLu" ۡy5}kitW!ђ m`Cc6 ct\I{<%#,}<;;-E21Hhѥpaװh+ol 7ȌhDqD`U؞OzqU$~/DqLZc9@xR_Al)9aLYy}*w6tr/c:|u{WDh` _o9v+bGW7l2X-1~af(p ')j  Zg"ʻ!9Jɥs+F:-)R̔.RL1e>,)x(ЮBs? %>'U ì@4%je1/oixp~g)Do#Kޫkhb1@~j"b[0 #(Sq-d2_$p #;ͱP T7#  ?$7KԱRi~Z6qh9\cZDkmA 654s5x^2z*Bz')\2yC7F.EDsqdle~[浉)V7l/>S.+˜$O1ROah8Չ9I67]ljyOۗ0۹}|`1>B>*pE5hgI}Qmu)vU[o`g[z]ike{i?d|f pWExУxvެmQ ƍ36:kkoq#.HO a>;CecAVʴ n'f[ e`]qIW'ٲq$OH08,p"J%l9z[**pD 0Q1>tSd@Elhz87z3Np4J&4/DO#d->fq/բ-uJTYk,t gJxq1!dg@/+Z&eZWG 7ӿ#:yz%U#DG^h`P)"XQ)1H _'?պjA~E!IUC ګN{|[b. ȹMx"q\pȅ6ѾE/qAQS4}j8#5x?`㍲NkuV~jtS%ztKzV#D\{JƁbo,%k `FkvipJ! 41HZ4 ZELr[Gi8 nB@%Q!ͿLʕfYXS8:wS\c\ ši/NJp~-Of)}$LKPcQzǧ=g%wj\+) G¢) H./ڋA`ųw#O2=t#jX\LCgq~'D. qJj9PnO[.jLw'i& slשOǙ[ eư {ZώR#yL~O ZOUxٟݣrܿ=jr>PL$qcB ĺ(cDzl!O`C g_O-(qWgSCޑ౑ݗ1H [/RRByoW{}Ovf&j{b3T"~2Q@hj&j5[h~{;XD ,b;b`2J zc5܉YQ__l<68҇G #C6A`Cd-Y &9z91|E?7-`Ya*Ą=EFI :rx᳈%FnYs[ڿq>pW?\QIhf$vCiY*0Ͻ:Qsm.E l=vXlO^J|ez2VfOwxۻ˘} QR+rAp+b6~"h`!Gխ+ N5pܜ}ᲾY+FYU%(HxsDD& xC}xz x>cG!>{U0`QZفkN1UOB ʶjñ+sF JpH&o29x K>k7$uϻ5 |EfF+WY\`Tm`CKe)Q$5ڪ~e>an R>Vߢ} BrcgܠrfzZ;;xHOLi~ a]:E//=Dgd$ɜ XB&Jmg0b۰@o]k!9;wo|XZ]U]]]}45ӣaU6=i'rOrz? }(z$~v`ϲJ)i\Xڴ,QXfNO:hv>ҷLXt:A-"bIoJARZhM}/T"-"ʸ@2Zk_8։`8W'N}0M&NZę8k|}Ӝu-ӹgJAuЩ2MT7tmmq;oRDu"tQ|n< ji7]\BAJ[7صx8mh=-TIřM5MW",ѵ$,ص$ZT1{kaSJZ_#Q*ŕҢ[T1fbYfYx-H6]7W!mDE _ hC| ,MmJjřтFn* MH%1TQS%{"Tf2f9fYh-{(6_i}٦NIr6W#j{b(m;k'%-/9# wj *~B͏W,Ȗ_E9=Ty LJMmacI>ڔBO7rކBM`>W~ ipͯ7zɯoϻmy7o6Oa=}uwN=^w6Isq+gz*]Q\L%Zpi=;4nqv?O1gXH53zQĢqa$ݷB IY_ZFlۑnꝰ`+ yCf ^$ ٯQQ>t^1C wR;LW$Nqt,akhJ' ˻9d]I1^$!doNzaO["3DLْTdr7CVUd.xMKrdg1OzI3I!O{YޖF%d=^u$ܪ- e ye}II}`\"6 X4hBj9OU2/ MY$Vm|]埕 #K5k,=$ D ֣V{Q29/uwD82b9,Y`6ޝ'm 7k>?z"Haꑗu+4r\VRAQsO"19VMEsNj_2l\V4`O0Xl'd9[HO -lN9|<cvJ)"H1FyLSH ]ddjP%X5Bӯf/?Z, MpQ'*L0(El`USԥ!7<!3+4Ll89lS'q&FZAM"!lt=[M+75_[4!XDnuk /FR!_R #e=ɡhJ4u{Za}Tg@ L` 6f+tfpFXW%dfvqG;`W0.DQ8I j W) 9kfUd .G鎈GhIlSua|Ҏ? 9Cĸ4\2召#>Ƃ<_/gloGzb VfWe[T/fRo>$T371sM$\t˻PfK]y~VC ӳvjFSTE>;UyhUAh6{o4AcP_UK{h2Nn@2]%ނYw<g4#弴{{{[NWb;E'g ΅F\jUڋVuxU/bHv)4*0\B[lAXpp^9 .mWUk0Qc}T |m G4|>u |dq2'~%?0ڏѤw'QN1[R -ѵcR$A9(~dž, ÍCn@+XvnG6Er,&"c |8 Fǰ!W_RomJg#+|>Z5ϧpza1/.b >&͟4e #Tn6ּL/ t1喎&v> DޑynUoK۷oފ~% ٪W۷v ́6j81nm]% [ $;̽iDōR}?wWp)cGÔ4ggddkkw[dK[@ɧnqI.5siGSeM})> S5pS1lRoA3@"Ijm#ieHcَCZ4cmMz25ڙhӪ2+`kސW#[)UX&cK}$s4UFTV>opm>g~h|m=]\(wW[MУ>g7Fm:vC'8yzFoH>O{}{XҮ*X^z00a)` M:n]ӈ"[{# L#ɋpQ<:2*f Hd˺˃`<ĞscvH,0ckpv41OE K{ɳ(}@/{'Dt{YJ:RlʓP~ z]dLş^(t@rMs7V/u8N8 5Btoj#NŢGI@Q<ٵI(k] N5ɢL#< 9|1x5^f l ܱ_z9^9Ӵ n7j&4ͣW8Ga,Qw|۽K2f=n[j<^AƯx<*2jnl>{<7ral7{mz 'C;) + `Qopo'Ç|?"7l].Ȑs 4ӭYhr{It68~{ pSU:|E/8IMVa_zu8`-lv˘$/ŁdlF˰ NTn,>Ů(>+ ~]X}мtv_)&O[ϫ/Hݽst?z0+#!@w{SIJmi,? Z C;Cu']gf/<#ڼ 0>R*QSpdtW@* 5&.U)+wYQ^y9KG6' rןKPC-'W{&m&'o~'"9GE2,=UvǓK!1[О,N\D _< FT==ܭݡi| 󌸐.kM(T\,%bF*s.pw.ps]ahW>b xPIQ]R;(GBk<}}x>b #֞ȑӗ<<`T+,\q?5 0ӠAiӡ4ؕVuʿ}Ә?(׿IZL㳞Rӷ!N=T^Je=sY9 *U##jhCQIomeSH6f W֭VB êpJIrd*C+jyDFVRq"l>M7yB J Yz,DOCq k$G&]rEc! vaV=?S iDP`hx'Im$+\38\t#90Ɠxx!;`W:{%>(/{VILwƨ/aEx*W3>aS屢Gd!okfXIOE!|#NA"JQYYgp3)<',s*M5N:/Č zO+zTHhibSxAG1&AUbYFD0->wY QC ;D;DL:A O8V?Ľh&<^sթW?5j orts 3VyR.KB{x-,Ifs(91h.b=nB tL*7/zNM&t (ȝHw$/d[*kb@}3 6祿 -.FӞ-6Ku.rY|k.F:g F c#E5n|wY q 黂^LlA[ QrwɱmoJxһoQ8~0Q0.Z rMVA >׬Pyu3[,G/y Ρg ?0lr!*%tj|0Lv2`[c\xNpP &|Orr @} &]c[Tt୳',RIyLDuR8oT :⿧Bi\T.a=P]\^+74:D;:D;IB`(GlbO)_iR5*'O=tTa=P@q%p L7H;Y4_ XӀkh%0蜞QEr#ZC퇿$!u ikH g/L~NcLp-mZ)tezzvVZńF8BffEU%*(6>6+a'? v^Up*(etĻ@kCyLz6D;J2%#*| 6٠e*c4NzX~1/ QckV/!ԪuL3!UdBQSw/Ne[˓:͟ gT 񧆟NԍqЫ0S oVO`+g@RߢDr(NBO5/_1Wށ)^'@VP:r*c'԰c x6]BQqe;s!4+u)\H9:c?šքʑ\/ X_f[X4tj=ռgB ef!hpV@Hc_gR@tBKB\ZǍKR?ͯ^@6N;#,6+q9??khO_B.0 GhWgF C9QR~eGNa-m%NQP 2/OO/\[؎Rb윟Wm,Ÿ'-|q7wq 5ʋ3S%*{TH##G9᳈>휟2CGQrGW5cwِN)<-EP}(=TaH5T4O\OPp iYGH4N#1^8Q*sBЧ>Z.4Jk͔a#BOfuH+}f4l bO , 3oXx1^ GCfUN{ Qu'akAG/CdPXC`߮f]LZ[kKa_ q(\k*O@ѹ̡ t2umމ R%d4 [BC| KZ0ի6Хst }jWZ0*У.8 hP 94lKPzz:tq_ }A5lrdHJU_j|rIpB'jKNM1|ƨP9z]E4kUH"3N o}0C0uzK)5:rZ4j~B a 'R-c5[(˷ ϲ|aA)PFSYr*hh~+rPz@@gV`8,*/db6[ZpL$W9WrV QB6Wꛠ=i{Z> 635Ih)AEeЖa&$pTLꛡ.<|'^t=>vS>=2` ]g2"W&0VO~я~OuS6! Y}KiVL֤erX(PLLk0}- VrK ƞDلU: r+F@ޣI͠`N@-(KDw6@0Eb$iB<]+Xג"Q\H92]'c CHmV2aMdIfu(w{$ёK"_F ӹx& ۜVjx҇s.%]C y *g3@ou4%1HoӟLZ3›Ǿ8''x +WHVWj>dO1@vy+N<&QoKf&f\A|P~9aC`;IX9/K%iS6n|9[ҝ-Ks+Mt.-`F=KA}n } 3T/׮@n+`P@@/e6&NFu4\fڋD3Q~&,T^mxN]}]ab Ew\s?~2B^xVx9zI4]]|*!fizZTK NGwY7=_EaR> YȽW\jad#EE<YJ;iro qݾj)0h:@B~Vz(F)|8H]2:2橹¤ JB=ը܌`hTPqx/T".ŲrD\۵׀SoZ2R#z9Z,~(MwܛONm]ܹ4_xoՂD^ZuU̯4/+N`K2iyIz15;TQ%x܅5?V0C-P#\G06Eg*C\~*z[ 39_| GX?MOz!mq1y[DЃC*0LfSqwJs*|@x/IJm(jvMoh?]ĆFtqZ P/pmy6J_E0D 0.f0༕2떀Jv#j{f2,J 2TM+3 [U;`Zj $:]B [R3BʏGZ^#7;T n[D5ܲt3RTtKAŵ* &sda?G.0էi&<Y]Ir;'KX)OW[%MّiDJ>%6;{JLe1ډ5|YZm 쉶#زg`'d9N E<ﵰg).wdyO:%vgge1HGf+&RdQ|wp*2\j[-E9^*DN-ߌ%'qzۨ_rYogLWXٴȱl)aHsnb1:fz[Ɨe||^W[Z8 >lW+ubc)z$MJ3K(km`:[M=Z!{uZF. 1yLsi= ̗wX'稴"2/i,=ڦ2qvKlgH{2ϖg*(K Kw<%GrYq_QGNg:|^sllq?EܟuֽF9ѩlmȠv`8A@ka6z22Yn b?yrОD,wʓHq2L<۶'d9\a?6-G\\،$]Mг܂fS=SB1#ǻM* h%$SmZ\r?LfUR$/c ]N>QH ii3)etkH! }㎠{e4zi|bk-HjhmKb W~sZ1`璙eRUx|]ê Iɶ'>۞(9="<2Yj2Y`úͲY{Ykl8Yld`aÂ,ǂL#eP\[[UC?Gʐ(UK)Qk.VÆJ'J>>2]zU(4M9JxO?ce *TnX| Rj@#҉ض-VIwKBJ40zr=,u'=׾ З֖C?I4K0\Jk)2 ~FIXV(ъIOMg?XJ d XH؜UhWj% T`WBzzlGlzL W(?M~<\ßȉ?Tѓ zH?Qa|͉P`qd /dFMigY5l㡓Ôd91.RI`}.u1 JC8/ke6Ut)WYv)^ BK/K}Kð0Za[%b ʒb&vSh=zLa*{^!b1CE NF{*$#56VYX$j+rU a_@Vc!7UCJ`Y-K- i%$ s7ZT|!HYȅ*APKZid!0Cp`m,8"s@ZX)MXP.LK* J{`i&Q2Л*J*M_f[}b4×$kC 2q/T Kj_˰Q`+F8gESJb ,[ v i_Ck̲Ӆ; /tpDŽX tt A֤͍92Flai//" -/\ KjSEn.\"Lǥ.Ѿ>QN7*E7_k}F7_v%׬UJ~;FgS3+ z]MUa 5/ dPr@׋L2 (i TV!0Mk[]g_6dSEp,ӳNKlldX b!%\ە4ddp񥛨wN4: gޑs|7_hTxt!T{n*> .쐧7U\.<܉tg2Ҧ7U)*R_)+%zGMB8RюC/,MkࡈYX/bK]h;| _,) 5zu0r&ֲbSŚn;b.!ֻE"M]ܳ_Z=Ń,bKAhe6U|i%c0y)"Bb(}l tMi/Mi&RbSx!,'=>Lv1*GPgP"TgeagYx-H6Ui>g)68"TfN} 6Tk:E\OT!BvřTwYxu. oݬ[O%qv+}n2P*:  o]B=u qঊbۖeQP-tĦbYH20*E&y+^VNjY'TVS.'b ZYMHR Sq0g נ7UƖ~0.&GI:NEpB$t PIDiQDݦjVHqkciMa2a WgQ/Y#]QGa6Jvfv%xW!>tɦ6]/ft/t*̒h<, $Wf#} Uc]:X_5YadQVz⯡ Fܚ + 0Ukip_){ka6yd,\g tM ~MCDeޥ[+4M9O🷥h,ܦo2H<"KoBՇ4,S6!٦v@%45~ںNdq$󵔛*vu3κ "^Qpo-ݦG e"}6Um4.OJwƯ I.Mt6Vw{-i TQfvd;0CUv"t拿7[A+amJ=OUяIp_!Ƀblj1$rvyxٹg uuR3@<4KRƌT_UڝvZ&)m=fP @(\!<3|IW)t?ZYi5.Vy姪#~eBw7r89kO!/,zmwqz}|N|I_o_wϟws,gT| 1ڐ;JK f A59BebCџ J`0.sL?-'.j4cQv1gMAa'X?Au.BĢqa$ݷB IY_Z>ٶ#KQX8ID v$QHO$eT<?ҍz0^׉+% 1Tx@NHߜ% M@@vtCşɘ_ qO'X} >~ouںpnT%s?3-t$,鸙Af0V"-ῠl:_#j jBi$g$B&s9WwJQ7)XG=uiA8/v`!*&D - NdԉECŰDlj5m!]VS* x́VMQa3F5r|5C>Ƿ #e=f a DCjQCcP?P9?{]v g9XqMLk*kVv[jϙif["E &6I,!r[P Qu&u X5.[fE7O1y.gqyQ|2~kP,o$f+/bN.hp2 NKaT_L,'^/E-Nz.yCF=5 Ww*Z2tIu鎈G_!,0UƗ)c 1#l#弴{{{[NWb;E'g j]WF\AZoUvsFe@tD(^@UKxwq *-`P,!\-WjBF,5(豾DhO8_ă(rÖ~DSs3̏\ ǫh;vď>G/޷A$Ë9ƫ~%:#E\V"8[ ئ!v8![gY /1JXP$8.,W.AɜB_ѿjLyyM:N LtD8NBiNěwsP {*VСDa8@^ Y)"KDGПSdC/aHl`;]gϞ  ̄{O9b_<{'{_V0[@Vs6ŵ̤` *̞|#*4].y:]-gԕ@]&;nTͦp;|,c=q1^Cg:΋g{OΦbvIpB%&0 QB3>4V˹3a݁4 B͂h:g`Ivt _]`L"*|1.pM5,Ə?,˶5 X;oSmQTnU۴[hC F5yXtt`ҡFs$>Š:q=i\\Y8ytI|HX(Sj{Qպ?WF=uy|<9kE^3KY@vP^#hag>Hi_|ǡ'y>̭c^&0-0|9G8eh"y=4 CHi#HMgNӘrKG[[zPU;[m"<`EA T%7oG>[eJ޾ %niNNW3wɧdwk*o8H aH#*n,Ѽ3j  K3Q0%k/>k!>4x<b򔘚9oAm 䣗[P;Cd*|*@:-“_3{&5Ȁ"Ijm#ieHcَCZ4cmMz25ڙhӪ2+`kސW~x_UX&/@Pv,&x>Aq7wY\^~w*/`8 RD&эGW?C@x^QtԪhsz}ӏ'vEfV M)OSmq좝FN-_V+'&Q(fBYqr^GFRȨ@Z.U7 #q.~,\uscleY.G$O.[''y|^=(Y.~qq Y=FlN-ˋgT2ؙ<> eogƻgy-f 拁HOBMs|pa2~P&&ѿN)rMw^^B;:^T'Y4LM#|WuaٵV222MAG|Q2.J: ϹQu: y΅.MSFX*IrYk;D{@EsX ٧nE_Tyi $ L1<9M rqh}W+MZ0U Ũ;%kф'ծ6sxd"irtE=Tyy}ٌ_姼~^}qVGM/y)SY"[!@w{SIJmil|Ow,BE{nϒ(0_x DGy`+|*>oSU_鮀VcU@jvqdZbT_Uܺw|0*)lTq"цЬ᪂ԺJ!aXUvN)IUc!̓<܊h"i#Z+8\6. lx.Fx|uGݫ1KX<^%UϢ|=+mcE d}BJ;^Yb'Obw7hߩHt$`r%5!e 2yJ!,@3p}NZ٨WmxW[l/ KDvѝpwIW3cηHz$}Ƭ78 %; ܼlDA}8*PeU7&1 3}*f6AP\B÷0ehړ&tE 2=aE|EPj0}(+}xC;NfbvJMO_ }Wы6i"HeCl6$.967>5/[4c"Ĉ+ bۃ9;0z);лH_z*m>=oGOI},K@_6[g?W;JEΟy@#-IT f|UZ{fDʂDXF6˞cHC.ԉfk5i'IsuCwDp4ͮ^R6mNpIxꗎq?&JII_>S$]u!Ӓ6K0Q?'O@#2bZpYutp#X:;Jfkq}X} kV(S~IGzJHT36gsX_3:5GxMGADU;{P-1. 8"ȩ8e ͛? ּK|4^ xxKYAȩdl̡(ڕqbNTxّSXxKg[AzT}#z1_{A0vv|4Kd}GWo[ <1^x̧wqX8I`>U2Gu=Cn(G3|2اr(3ʞZjsp6py=a/GI>Pz&Ðj2 it}2ġӮ& 4HFc:N?z"] B"4jB(5S&ҏ7>p-{!DxIѰ5&?;nt' $̼ bx2^U9Ѯ&ԲJG}ם7Vl@M=Cmc q(i|u=2vhm!.5ġpusR8m^ o @-妜P^뀂}kp 1 =;' 2n *gK@LlcS, .>@9Me̯ Uuw<&AWYf&g Ī2aliep3UUF B_}d[r4tF Yg)ofj1V@$@'A`A*C<3 ϻR1o>,x hZFVMEPȐ)t|J_MIOꛠX ???YMل$F6d-3CJ--tmW< U{ `/WVg8Ϯ^BxWR ݦE ʉF hj2OotaRmO~Wd9z^g+c=WmvR)zzI؎AGhx&_,y1-ҶvED?;ٳoxG ")I/=_2R#:8QYu5!1kJ ְqu,pŶUYlX-I_M'EkE\C>xJQwI ҫ_fDf6ꫧ -1(Lsy#Gi44]mrFEvCJ2%4O`/@G,d#K= `>G]9ǔa~@6=;2sd;Eڋ<@PAeX¾*yQ}R:{4D~}o~͏`5{arSѻ~`plHz2YqG/;4cn&=MZ3ҡqQY;yM9(alး:gI2"X-c˳*(b?*}P6eIc9gFI|6-.^&dy*)b y.d(B7R7`>e(wܒm-z [GJWJjq}l#h հGq_ 4g odnX$8t0VlQr22V:/"mURx9Y`C8選񀯧L m\YҭUZKtd8P3JjDzFiՏVߠcd(ʛF<~-ރ8i7(\ѮJУC50؜ u.1 I=P~H x?"Ӄ59ݗ'EA~ÂHjדo^[x}$ۀLHsͪaldX$tJuI<ͭvSӢ ki>5#pE>mK*Ze}p6>R*exvaõ"URLx?F;r*I[5 Iwq3RsSnSֈqe L eA,I5”Z~>+~SŹR u- v- 16"ڔVLQH_J`-~SřzbQ `7U_8dONK_׉{һ2,< Tq_ fe\& ס7Uk{c{7§|6ٽKu)rM#xf]yOތidlMZbm`ي%q`3[}Pa)_K ~W@d/D͢r *4;U+uI ő;g$'{TQ4ȝV((F`ߛNC&zWVIA˭]J?.;M$7IMME90(7v`2nKj2(#ZAS`i臛6$$> 01 u3J\~"2 j0z>JQ4)XG}=iA8/v`!*&D o '2m"N\!a0K"hiB6g)v]@rQrr%Pn؀eotI =wzpҼ=%Lj}B 3PW92 ?=.;3SȜ6P~&k_Xݎ 5v9-ێF5LTY cM3ڭ"liwϪ!nxeL(e$ql\ЃEVϋ ~d&0_h=mpV UVK 8w.3`\H7n~)j1,/ޠ_P}Q/x9#>Fw*Z2*zŇ ˔vo +n].GVv_wi>%_W1>N 3+Ц_-*JY_#`s,r$\t˻PfK]y~VC_O ӳvjFSTE>;UyhU-(?hnV1q򯪀zr3 #.n~x Ffl:{ 9H@2?\/mq8]g'?*H>`ֺ%.sOώVj/.j7j4_PfZM\NɁ/ x X@7;qI^B[lA YMph)ZZɁKʐѷBWaS"Nh>uVos.?d>}?Lx1#%:#EOc7WKhM*Y%Aomo6>_|ARtLb%yKE)eȤZ)CK5o咖U[緿se]r9jc3v ̟=.G )Ɯqh"%l2NUM'j<o$?)'xKcYc ]c^J_YG=BoWַ ;[j{wwwno߮ Vxqr- wRWJ_0ІRw.o[~eD-QunxFjYEs ނn[ق>^:ʑʽ;pat<%C?뭧8lrho:焼вM؛(OXri+aAzy>ࡊ>T;֕tR"gV;(b)c  !ϛG';xѷM6ChR"y>^if\V.sQp:]+f1K#a/5kjK? F7-.|x: W'4֒~>ofngK%1&Dq ZË"fRŅ|_ @+2& d3Lg%P0H'*,PѼ3IFw# g~ox4dC1|}Nu=x4jqiZ>h-MqRk=C%KOSN`K+0϶iɶŶiwgKz&^vjmJUofG.ci:v] "Neɛ8g.= ]tN7]XN7\6 L]YIǞBgf|^Ad0"yE|k`EUD}+)]XRh)"c]BF<.F6##z*Wfϣsq ea|05X:N `_ 8]z4W;-Ov )xN7'Px-f fHq(OCB`\S1-tKۋIY9ϋ3!|!GGls$}khF~MM~Ml(sa ,WҷāJ}K\j|)G_>Csn-S uWp+G.*w|;UYÊqxG9?W%ٕW0 Q,a/ixu~4D𦪫53RϻUFi@a)UPip/閠~I+ }/rXG9Z8rpN/Zp_9w}76q0^m[*G^ދ0`2hP{c㸣w`s-o@`Ctk:[mjvH_O?n~\ߩW?k?NZwU[064h@Z8DA[deI޴?(nÚ]X橤Z+}/ӊ}Wa¦ h:FLk>\(,Ɯju?Ee'nk/']ѩcKy &0RH#9&t\5٤Duz:#-% NpfUUc Z88x#^X<&70i;aD-A鵟SpyuFrHlSa2)g4/ qp4 Yh9oJh ~ C̑:Ihg:[d5NQF(bAG3ٌyh~J|"%;ϟ0lO٘eUh&/cjsHcIkVfB_t3xjiQ - _ _AޭN<~tozN2sfIܝЃO+1hfP u6b/MWxa蛰ԻFvڝ?əWZO;5%7j"4f`ʙ\k9{'%r3b`?Zxt6cנ^<}^JW"'фEڸ>r}c,4vM xDX*d rC@ jP5ʆ`Z"|̱p`t/,ʴ/A_A;n .Ɯ6*TaڛAǺNIdÂ!(Z CU4[ov ߊkUn ZdPc h8Uk\.{U47XF&Q-wU=Ϯz$~uO:V@ZŗTj}]\ƝE9B/}j " #:lCƧ Cg[X~MR5ޡ2gob#Ն΋z8V" QL%(:49#T0j=4R[v _ DD+xvO"6qr|I0cWt"*`Wf+& k|2!Ѣ *1_=|1V3I Tl7{/Msy#J@;@Q.| W`:[ xM{+xbkEL\{' 9i9*-8-ו{kU8Eڋ"@PHeX1DpJf }Mtfo#{yag9`79|ֹ˲_rgkGtX{0 =#{% ` _~Q)^U@G?1j8#TH@yqJ f5Cd]Q ]xɇ7h>K%e,҈{+e[ <9p^3RMkզyhXL>2TT·˫UœNKq:dDĐOk'9>AAM9F}u?<:CU,ؾ4ڱ8E^sX ۹j@Ea*U7̛ =;|#ȷ!F[? hOhv}~u$GgE鯔 BG(.BotJ˔Ѝh"5I%raAoa<^ /\ ‡J :4fJCy9>4kG'qhͧ,Yd-r$*ά,, 2 c0`P]bF|"0`e Xp+X|f VX+LHrK،%S *C|-BFlF\#JF\o+gĊv=Ǩ>w1%߻_3d<存B*^BYL! +l9/Kn ix'soRa,<fe\Eeb$HLjF~"?+,yɇjqO( H{ =۳,_ tŧK8"_ۆ߿US(s0Y\].sXD;3gc>w{wl=:oԓa8~i@zX{KahiZc1nhZd)>t9nD{R֖~Qf\P^'!IFm#2m\+%+h|Njgb=9! ]>4tC1<ƯpmmA]Mf}zqNU{na#^B"sE}}#XAfhZfe-&P <8MP|M%Y8vT_Z'>G3)qي2]Dɠ&"vI,bwX!Fl($,3UᦟZdIU)?9430XxۢN7gӁ*ñ#}D{{(̒7Ӓ ]V - Ȯ$mdtd 삂 =mq 2AY{jcr*&Stsbp}Xp-vNZ%qeǗvxtoL|.ި;VV" i)X ܨM8Mt;Kf3 ^ASbi;*R;0{;b }* %NT"r j0z( T` "O[c""s6mW2^aKD'>mڅ0=0+*5Tz5i6Pƕ_?y^PB:-beVgWl)2܉=H }7{J"[$-J8# 焞4do&rϐX8µ^Q\Oov@e]ǿ] Ȗ8@q_%ávGӃ0x`|ym((cb |u} au>;OEw &m%;M yZ@bJoU ۀ*>,!Y};oP1 $ pnB`ZWsd{ +r^Ar=פ&"wwq{'d7SaRT?]ϊ|nn3̂E\ބ͋0Ϻ|I**&)dOx4M= Y/5WQFzM+!~:Uݷ緶fzǻ[jkݭoCmF›DДƣr- wRWJ!Na@/oTw.o[~#UWqF7 qXTnigkgKS`y݅M1ɥɴWg U.E'뵌YO`ρuF ]Ƌ2tjFp^-) >#l:_r`ԋIxgy  J)4gugj Z~!muP^4G*K~qz4(`|b@# Q- j3xcCVK7ʟ4'Muv6}W弲y}}]NWb9ee ҷx׭D6'zON>}_)5@9=US WF*^(V+:01 =lrZɩy8+0_ܤhxwY|dШOk?Nūhl[/sPg,yh"?rbytҼqğ*n#aDxhR"k>^d\V_sQ~]+q1K[/5k툇-~8 g-p*|)^ ׮JӘZK_yN~t;oceeq!*9PJ.nkd3L$P0H'*,PѼ3A#rɾ~ox4dC1oNuW? 0מ\LÜAkhv]J%-!_zxR psFm(燙E|4MN,8M38X-׎&?3yZUk; 7U¾#;*t)Nӱ/%nxO͵u*XtL|8CJ)>ogf<7tB8Мd5J:v Gm=3[.ɕZ/|GX+RO&]I ĒFK6 ≍(F֏3Ჽ;zBVfJtzX*`9~qr_uOμ>pgf1b7OOG*&Oj_n1E/{_d O,lw)}(ߑcura2W#&ѿξ)pNeB0s7QC>X;ѵȰBHB3(}UuuEVu0pOUVO!mTuATgmFğ& @HAlQ@ڴ*i߉΋Vuwsw 4:]/ Z lHF,WIҡ3PŽ.ه:l4`VPX_7Oh~PT?Xsj?M1}mKclL'/N h|:c8_zj2я2NEϾ. 5x,7aU|]PW#\-tƞ6 ds8m4٤~u^QӵguMr'3,oךsh~[>-tpxfKkaI"8 v= v!wp24ſH 1yz}:Q#~?4@y=n8 e` G)*JFuL2V~LCt13t1Sm?#kf,sffv)Hب>^>QA^6U!.'60ɑGm3`m|T}ZtP|`Lq Km `G8@t1]s ɟm:Evd!C}͋馂Q#: %ԡ̳SS:KO(:?y%͛?hw!pa{VN,GA(E+iD9͒ 2 ~5Z}X1(GTSY;I 0$=ƓvE4,%pٯԁ57Tu4}F\JyWZ(T)obCir I2DQYs*:ET?݄[}z&17n?^U0)$;\^r$oxD}ku!(}*^[[{ZyyV5w9W`$F\;JU6VG|ŽtQRzLq"'ՆЬ᫂̾JHh4ax|~Ǭ"#V+2<|ߺ&@HB 6A8Uo#jwrM>X?{Wi+oo{ { dI"?Jfc䪶ToR%E,]v$.[KRc&ٝ5Z'/c}MJ--2s.oИ{9F} 'U)T/['lC9vo+bGd!HX"[=~3:]--|Q,˻z]b%RhYթD_R-1c:0Ɩy7SQb =A;CK!ژ=^SAHpfQ՟7H+\WN GS;eL:|Db  Q%#"[&K[!C kJ4x6%H`#;ep؝d ;Vq:Oy: eiJzp$PǬ7݄' ]<9 t XߓU. ZB3sTs9^ibUfv&֖7̾1nzѱ6{W4>%;3:alrN^I4qAwZ'clv~`.R6хƀ/ "/&C޽( :0ñ_W ܢdqt:xrZ<;hu7|˘Գ5tyzi =d|f pW EExv6dg ::tF] ?b5 SErgB sMdi{xV[ e֗E<-ƨ)\va׼<捃 yD`cA)m0&Z31v T fS\:@E-GxPh1c.QupTn1f*AI K1Lha_~;E{q6xM{pBFͽ.rԱXl#3Y w18A.4뎓*ЛSHߕtrrú :zZ /7ӟھ}@ D6/UD*C!d}X:i4{3"'ȿ( hv3]*GEZF kpK u{5O4kÂ'4AgA} T*⥾Ax1<v+ȸwoWbȗ4N ?7Q1aY:ptl5Mֽ MV~jv}2GNV/5B0z3PK7i8PsQ,dc?RaXꌀ<8o7;uVT]1h h|ŏxc5zr) zR .byLl୓baTS!jg:p+C--=; GiX/aP: З/ %F6#KWOLM!J~<xQ?m9Y @v1R4/SF wQH_x)&T`t86Q|5ѡ) [H]Co< tuE岀#,H=uԂCL>6cyh33\1{Xg@DszbfUg|:I-G%\53׽aDfCyLzvj4"_qDA3Xy0˦whVfeDKSp2j;Zs @38,kkvܰs9]r1 3jcÑFsl"kpBz?vl~c޴4/+Rڢբt9KͪNZ1lʼ:cjr4֔CJD5֬Nsj  ]̻7=R@sR=2J”"Hi6m 9yr(-Lt`u :zƟ5aKd:Kt6O- B%X5V]0m\mda5ܺ2foI\y#)c9Iy] E7xm❞Hd-2"8=xtB%]K(Xh8zjTotu)|H5>k?e媑>/ ؚӧ" ::Si^(h Gs9l7@6#3+u`g:z /tj֣&P 6`6#9d^`;!h=t5g6&(qOR dE.MlQA$`!U2NS%j粒4ZD!/9UЀrͳg>xt9_^wp6I%R )ߤj2LSMi,2i18]Mς,N#1A8PλlD}ҚRE)Lka<=ٲwՈ>u lI_t' $xKNj*eU5Ю&g;Ij:~eh:9{R[G1LլG+uBZG^+>ṣ tkuWW S %4 Ua!!ЭSzU}:O XӧܣuyT)=S)xTvs &x@i) ]A=@HGQzŁ~/h"kqދVĉĞD^<Ѧ!wRstz_. AYk-!N021&W^!!g[J{;yG,D -W9*E[r&m|ܷmʤEѯ+rU&06$Gv(l:s3CVɅF_UɯS*10䪑;QAo*mFÁ@5B+Fp oP$hF3x0<@6kb'g"" %4!T>Vĩ䪻Fn0?I˘\5NHXyUF&4m%&Zu(mw}mK颢ŗ:e+ =8EVEo~2Kw59:Fٳi?&{ݥI_i;00I<ʣo-Egl0η |ib66 =R,349\ŋ+HղMz%= G~vvƊ( :1qB: T3ߢB 4/V}K;kWÞR% jH=2oU(Ө?fMCXk|xx}&{?H-PFv"J{h/*xBt'j (vD 3U3OoPLߊHA/qde}E 5L /%Ӵ?+]/*`f#|$*wڴ~_%ЯzGWc5uwo,>]KֽeYT+ ‡Eѝ"5z\r=kmU9} =$]Rh:Z"&(bg}P6"sś5#HVm .sOrVZ$\r2+AM#L&Tyn!"o}s'rYV9?V0#[ɔ0 @~/nr]py#p7LxB7.l[>nY؏@,q3f񒛱 #,XkXqc,ZF+5|.5$yZ~,Pbx[D4((~a?W@?0@?1aFfƿu/JP'蟱Oq&~/)dq\#*cPHIZN"mr5,m'=׶" e-TWFDKå߫t{>y?xG_󤲔(4Fen8`m \ny`U{{7(\>8L O \ ”ǩǮ=h* p1]OQ0j==jiOğ9;?FOi#Ĵl7CKy@ɸW|󚱗Ѱr9eR W䝋'/tdɺ1xp~HɠŎ@SNަQDڿ嵢57g}3^K /HR2Eo9oY݁9mɤɹK9Sb @rneo̩MwL ,1m6RŜ z{=yIħyaR]mEFu]|Iw q|}E$ x)>Wl~P~wڔO*jwã34^łMSK3_5T^ߒȼY?|buڭc+{$fg^gI{tV0R` 1/|\qd,G[AMkL (8T(^"vʕp5^|X; Cc4Nvt],5@VVXLX!YQ\Ed>xLEcrV,>sv`<`B*c19YU$"!, I?w _L@SȬ͔k1ټP\E# X8+4o%f؎iQP!,*EBYd!Ԋ,̳dyy ʀ~bv,$` P يjؼ;U4ֈ%c,r X9h8NB<݉x$suZaa俗KnW8TDAݖK(t-R7gqP(>]iN6}cG ^ٞOwpi`׍O '<!͵Xc9<̈!DJK'[ HK?оqu :w- 6\q`$M ߃6@䑍ȧBx0 U u`dȞ/X 7Z"dlGpCӠhIJ˛94cT.dW[ug&A-2gp9B:+/ k,0\P;'J;缧,Yoz;-ieІ,V H&M.(2Xj" N;'aݯ oվg1Q`/hQxQ @L!&Pxڇ%bW6>g(jGby+qr(Xd{_,xO>GᰚlP'qc;Z=6^\Βf&rG%>|7TSm@L>:8Dӡ90[-߫\񡲂GU9d xl}9LԞk{e|]Aztػ\Zx$)2Ljѐ)2œ еyWv mf<\ބ.fszy G zdT^spi:[w14ɡ)Zf;hdEةƝ4mE9`6.3uAx5M7T.U0:n4kől^?.G>.b?qتa88"+BQld%8fdOx4NM򓂝 /!W#FzM+oD.WC'䷬ckk]} npvzǻ[jkl-oʠ)GoZ:  tC0{QrFgm1pTBOp6YPčâӟvK;[;[ 5j.їW~N2Md<AJt)zpB'&+ ρoF ݣ[2΋F׹\"1ZRH/Uk2>)%#кuċ3hluWb^( /8Vcl*8o+<_=pW1\-uo{RP#- jѝ.oP%>5ǃ'')]Z :gAS73,kzbF'q.e0[7ܹ{>6f ?`@0y?8wu+ڳY<8]VȬKJt'Ԧ`ﯖyes<ʳps^~[*1%[ΎNkV N}Sj rzΧ#F aHAct5 |c2.SWX$Iўv".F_PE~JhA:2XjEۆ5%bH!yy/H.Fb5-CJk+.*XaQ_Vy/0rɒzc)r\֎xR7Ńc ?͠`y'B;d({DB'4Q\t/%"Ņ|_ @+Se=|2^?h@/#x@oFvDNh 7oaF .?@9hV׉C?~[C\1 s>5Ec'?J ~[B >u枫B"P3liƝlYl!qf|'q۱d[zݯM~.g/h7v@>Moh݉oOvTR<&cZٟ;TV蘼pR|܃otq ϥu~e:;4y&tp!zfV˷@}+_^_x7a2VT*MԷ҅ %;"2ߖmkQne.FFiDžDg͚2Tr>mWxр\ 0g/"ݓLz7b/vۛt{$̻H~}(ߑ|sa2W#&ѿξw)w{Q[+5~c0u?k=khF~!JM~Y/*J[h]w~ܪNaUVO!jmh h_j!4U?W4L ԁ t=٢1U 9}V ".2it^@P.Kؘ.K;U{>Dz;q Xt U f,֩r q_x#X"_c&0䟟Fu2M#kJr{ENVNa5]O%|@4(k`A:0:#-%:P;.)դO5zr (j=wI7YrL7a4f:j(&&H5%G(:<(?=ο:Z\O?M<hV D)]JGJIadm0 L5aC3Sich^:9Q+";T9>n( :Ga@E8Q|5>kthpJCRאwu/h Cn!RH=u bzl1F7Z 8CectT4kOk/;偈#+2X=t`C%h9,Z5͡1! F˾@MG8n41BRۦh!t&I$A)Yt̵ą:ΡV!/ /88Y0 i ;E.0FϾa];9ώtl"kpBz?vl~c޴4FyD):P櫑;Uuѵ*뎫haC4%Xkk9|H5b`r:=R@sR=2J”"Hi6m 9yr(-Lt`u :zƟ5aKd:Kt6O-Q #`X}rij'֕Ah.6p>EPN*%8NP3I5)k339oD:z+#ӃG'-P-Xٵ򌅆.ǙFFyP->D R)@3zXF`,S`љ]J(j4?lc@ ȟ8rs`ŝ99?9ih͏ul~Q a  @>hbh\b̬Iu@=-`"qZ4Ba3ȆC~NB{%0KGOa2Tn&Cedtq Dq (4 "֋6A5Ϟ룎8A=hH&-=BDmKS(4#֋FA()LPJATdA!4/>RE)Lka9]CecL:C4BζnW2`fm<y5m=,s:ɰlp$x@(UF^j4t.G [VM=mwZ5ic$z)*75T3.]L\?xK 0EǦ-DiZfu&Q&-"~5hI(MalIA[m.1z PA,U+{D'g9Vu*9`JVMY˲*lɪ.}1Uڌc(*`r1j XyUF&4m%&9K -֪D6w2uRkbW,dgKGR*䂫i)x.>ɱ-Vնd~*Tg/ c Z/z,4/J #d6ЛGzWc 7O_X;o~+j%[y>ZSG"ѧf^V~d_7c%&=eM+/.-?/+;G/!x=B~#HѺ|#n}.g|G1]ELw{\>Qz ]>t2:P>th<~qnoۏwn{o_/߾/o_}o__f.s|V~`My}rvV׮$uu?* ǰ?2w3=Fot_oF\|ILnL}| H1{9^?> nPCz`cpD=J%L&YXSKܯsE𔸎Qw|ݽIdF[||CsKP󙓬vnslGݏ9DZi3{=)у]..dN7);  o1zR;t_=YV~(Tt.LeSQިc/U[{^ _YX}[!p3 5:|;[.͚?ML08ϲ%X3Pg,L?Hqu*^znԬ> jSg oN`.6Zfl_XT''uy^ʹѴ'LW06z S>- ֡#rOxrۅ/q|^]s ɟm:Evd!C}͋馂Q#: %ͳSS:MOHRymOQ`kU64)^I(٦mnrŜ.]Ui67XW逎ݿV~~XѰ j8Y0&5 %x`4-kiOx6"h]+u`m#8(ψKI<R3p-Q3p\3?RW$]90,gb`k*SHI: sa=4k o M*93$W|~Ǭ"#V+2<|ߺ&@HB 6A8Uo#j㗙h9Y#0)K.}"R6`VAZ" i{'w?-UT`o3X.@ I<]j#&*9jgkł6Fg㤲R/f7@5B4^Q_‰dc` qH%ѣe!,ۓ> (u_hW(tq 2z\/8ͦuwޥKɥ9MO~tSZbta-nzv44Q/B[/* $8*vW?.+'llqCNlT`;JGDT!C kJ4x6%H`#;ONZ ~xRdԲا^ts2w.nꁧag ֡Ѷ0dB"-cXD^ӗhOodS}@5WQoBsYʤۅڂp8!IcrXPo]wԼndIoaX&ZQԾ3`NYcV!zh[/Gi nJn-DmP,Xƨ&: a+1msAay=1ۉP;T-^FӞiug=af<2 g FJpNJyDP 8RzSsz %w%]_[peCÅ6.73Z^i!k$yoHu#UGDS]`Q{]J_uai^s$dΎ~&_8{ZI2\qb98A; ˨roff ;#Dj6shO RE\0_Ό&DŽA(TK}v_J x۩ TjH߮HpCߩ3;-^}i< KͨY:I NӦƣk<8k<ƣk<5xT_Q}G5xT_Q}G5xT_Q}G5xT_Q}G5xT_Q}G5xTL<@ZGi0z5eմ. >m /Mo1k ݽcԨLŠ@xһqe|޽=lvrv'" 5Le1 FݎGP`5~=F1l?<Bp~Phjbzqpݞ{PO`-c'gG^w$f5 ZnnaI?*;~{<[Dؤڰ4z/=h{Q}Wn;gnf L%hǹogD!W8D=P.x@W/Д3"T:A]蔺_܂5M]{J(E;Cg/ `_F 4 IZ5\r)7"ͮZWZObuM4o4r*g}x47&zk]QQp!DM!э.q\(NlR. LCSs!yUݽM+q9=?"@ڃ Rni(R!B\SF39jٔaK> Zx5+U/c,Ȗa)쥃za#؄+4EqB^Df+ŦX/jp 'y8 ,r+'E"Pf1ps f)}[$u\; ,utҐEKm 3p0rE}ZPktRвE64" 51ᝐAw:7tgrSMO\$rwv[I'gIqjT$Po5܌If" f d9de7`5R5w<^]0aD4Re%s9j_=`bbÊXNof}Jtg/|ħѿBW/*Lja؟6)e'Ad jw($$ZJ"q|5,l=`׶"SM$[W- ]`1_C_SYJꍑEo01t̴f;?Xjg\u`/$ *n+0/C1qK"Z ?OW`|0L\OC{:'t}<>u gIP>xğ0?Y 02^Ȼu`˚qc]pv5ѰrЄE\ W佛'ot9uv~L񠼘ĉ@cN(LAs Z"ѸfpѼp+(A"w-b/h%~#\ș(m3ְyL&m9pPYzm;F5425 s34{sdN-ngYk; P9;c};yJا?x`RCmYfuCг]|I7g |}~i,"PXAD@O:'"xޝȃ_}rGߗH Pզ1 Ig{dY0Y|bum]G"fgR$+ ؅bc0$/Y@f + F92%_\pux#+w|Xw)W{ {a& Yp:H;0|ɇYFe+ II`9=Ve$I1cLrq+ h+iv@.ؕYY,2M٪̯*8 TO{{3P| ;P@|<҃yɷߖ1#\X[$z>ήFWp nIth辺Q*㣋E*NI8Qc ' xlT%^6O%;ߨqj( մx/&roy.hO)~ 2wCry?/fY2N]ӛC q$0MTtEO4ޜ<dz?tFWAԃ<.06E+g91G~0KlTR<@,O0qUwE90"Hq,+읝5Z$ 8lj`r *w-[y]s-( Qάe D7a|/pՌ&p]G!sv\E:^@pH.f5+~?^>U ̄[;~Gm?ajkvZpo)Gb% Сl)%]na.]-l;l In_vv .u 7]n†:ʴBv'I\*֥JB@a_؍bse|0Ht>Zl@Q wI F=q4_`t@ח>_tºE ;`9ƆX;y^Z{'7,~@7|ܷqj¯Ck{(ywEY~::>jNOgg ڃ{^KNg 蜈A|; ]:p;v˲~HL7"5+6Zy7s%*,`br a I=U8V1aMQlj0?o]lpۆ|. 0;s͐Dzlᨬ?rp-e[ڹ>VR'}~+F5lW :^U' fp!ĺ$DI}4@iGP=Z,f͛p,GǛ?"H;Tr\C87NpzOcXOG8@9 \^-TKwCB6Ct wsueP{GJ |Hh0㞢{^>0>n8}t\"ͳvqFI\,8:8jC%$DN%?Ζ!0:`R>Qw"ԓvyqrΧ0se1t/5k7d[ By0lq1LRb,_+w!t#jR5ΚbN=)b4C-}'o//t2]\N%z77i[>;u./c-#1T9̽Xœj}?up"rL7]tDa׹|3МlvI>C;c~w:Xi1Z<⢸k#P{T'Gd G\m{Y'L4%"LqgKj"o^f 1ks`4= ]焓t.^5ܺJd0 # tf.w~]?7q1uL٨u~LɄC>C~:ߖsE'vUhA9kXQz4Q?}*1 du::mg!OR0 Q ldˎ\ N{]P+iaIT|>5:<`.!xqe7FMG÷fAR;no#~T˓`~Ww5b2W/n/H#fuazy\@pҗ/c6+%"0Z)@*H"ZJ^v~ھ_wN;uYNb:y8;?n{H[7-dV 7w9 #u $!]`MZGA)LK )rDHg % ltI])n=;CM PR6aB:4>yfmtZ* Ev~<+Ss0keP2%R6 ,CI֏N.*TLnuW&i pOcL^|X#_a7Li.A̒A=ӹ t$1Bk<$|l{KLIP˷9T| S,_H{Gz}srwu|8ۭ`fw Op24ſ0Έ 81yIOXn?8]2u}*̭'sa ,WD]5Z>Of>.gRc?Oi(tkj t ReOBK=EZ,K8bѝ&z3242ݑbu*^7JԬ>UsOP<o=-Zm}N_ ?5#q6Ws ,AH(ѶֿvMŘ2$W^Ԗ;MN.| XGEM`*< 4(rH # Oo^L71,"UgLx->>f;cHfyV6/&sZ>ɻ@y3mb+H.fF*ckIDaKf+%ϕw0D&,f&_4ж~F'ߠxSVsFXY?KиT-*@?㗁d7dƤRC y2ԡr8֯fNLKG9!Qg)nrcgL[*G5`2h\P G|Q&uSüVnyvw;L MkN/o/U|旷a㗷}ΞEMcc$n-\-r2o:ip.RRhwGYɅV2#Ys8WMLah`9l F> #3#0'@f?eGK)g\r')ct8*D!+zFHTU0V˭P u~c׹ Ɲ!1 !dJ=K%^^+ۀ~WPWceHd6,/ 2 !!҈顒S8'9(%ڟ"#V*2|߹GLr &!̗pC(<%K$]FcmVVެmxVYC:$QI8|΢1~YU[waD`^Ӈn#Ƣ$.UPpRPg/E&w<;zvkzñ/p.oQg{F #V!jqGB$]KLgV7#ߨpʼnH_/<-hkaKkɻK|ɥ.9NQ-tS>Zd`Nsl36'hGCsyq8' p8Tߧ.;'h4p гUv M(mUlڂuIB MM(6>hfi '?'/'mk3NOGB$MX7fޝHǨ emc׹z#:p@A z4eu,K&ORT pQpߣUV4oѼDH6MD/c̽^tTk[\QO/>Q9i坓D'i\ 5qlv~`,]u1`o IG`erH0 S ]145xrߢqptqbuС(gFӃ-uk͚q-C <52O maɄFCZbK(ڛ3Ͽ_88tkPQ"&YI USaE-PI*[rXPo]wԼndIoaX&ZQԾ ulӋNCc؁Vc`_ES:@D-FxQh>ԅ2Z͢(BTDeb(>G}}.h5B6 %^)4zeuyx3)x9>0{)9=ɻ.^ -A)Qz꽙~3q\  <"&F7čV@F0}:;iﵞ7ڈïM<-!XEѦݜxn:"O:˨ro>I=ތI롣 `,6Y8TV~6I?|3pGfNX:flPfX/aeP>`}˥8tLi8`sQ,b>J#°㣟~e`v\?3ԊKq8^9 <3LQHCiMd_CaUc>>4N_6 [vkn~p@Hp3@yvr0:=ue3@_&plrdmrd 9I"I61u_)Oi:AD& itLgNsQGp;D`HWK̨hNDz>0 p)g~) 9Z7Hz6 &vluO2M t>t 奁3CO$tpiE D&W')sK Lܡ,:&RӖj۞;:+#YA`GV|'Cư` 'k@"i:(#'~}ܷC >tp$w&(͹p8@Î^H.es6yȂ9:gpX4=%&4dg$L3IdfavllIƏ YwT4'qu) wrYz!0E*3qbbۜTLEw ,ӍyRp>^f xC]N}mw-xbœeraKyl!`0 oQ7fck!/{J] <+`VbZJR֮J"A|I9 BTXHCV|*>'S,eyX)9S)xTts '-'$Ka&zxIA=LO8P=ͱS$m( Fbo"7P!utsp|_. A8Gf|1zf/RotDLs.2ykz๏T[0=օT=%DPRJp g@#D1S~u?.6βA0FV јhΗSkkUweR.`|1z}ع$Ӑkv!f0{/=!j&kQ mMG5KEE/1k ݽcԨLŠ9]AF=yWg?V0nvFݩDD")6LQ!mm ]n\Kn=䋰hG%;_4FԹsZ$AȤљk ^YD%noFSrAInDgYEBx]\DfN+ŦX/jp 'y8+fv9"!Sx Rx]iIھ-π:u}:ZEgQH8 l)c)nZ- ZUwh2༙F8&&܁kW ro'?&'䎥>*>+It,Nϒ+j:$Pf5܌If" f d]9dֹ7`5R5w<^]0E\Qe]s9j_`bbÊXN赏f}-vK%:Ƴ{*1)o884z)v)Ml`E{ W٥Za <_ qص4St5>J G$=$M<>}Bǃ{Srv-ӊGi35hEޭ3߮9@!Ga^3 +C8]ĕpqUQ;J޻yiqݽN'1YkwʋL4:}"]-1w 53_EGB­ d+C M]dE-K ;p!gX1t9gma\Yzm;F5425 s34{sdN-nڑ߳,\_5SŜ zw1\ߤIxSaao*oj|L+!㤁r|eJ\D9ذ >N!&E+|4И jj/31[1N G~jf+*@Ҋլ?ޠ& +ŞOpe+(1R DPъIY20%sFb sm<$ݳX}Uuj1teJ6ZE (]q_Վ/84RtV#|_!JBNK(ll.Q;5׫(Db|iI!+%b7,..Nf$,H4N u'ׯ #(fV}!/`Qp^Jv@%OƧpڇ-bW`uMF 2tYy`hnڜCEj(s -xE@G0/\z]:0YHA![{\ ^Ia屁#ʺىdea ]Lgfb~WŤ\%Ss\ '#NC=a$8CQ;)\Ώg@kcZ0o-E: \[[ $z>ήFWpnX$:Uw`FXQ_HZhPϞ\Y*_>:T֗uaw޻z.~mSU‹8и+"wQ jNPdkwwdTT?\O@OͿȕ˛QyyeQ(0/fJRfkQA6 z(Vowucn[0!{(MmnFp6^&p8Z {vX:".CHkAxO_~n|9mUO*߭Nommoomn v?Im^ D3hDuXh=t3[FIgR[]." ˻l,@T;p O\GynCk}ק-uJE F>(usCM{eTxI $5Rc%!1c8l~{QLvhU-C{MtT1“rтB=1d_i&Q4t`PAG!%n^-yvozlD+4)k$O vr \nmwFkNo6(Uܷqj&q1PUtq2e(tt!ӇGٙ]CAӽVhxN[ͳtNkJ4񲬾~ӍHMŊ| \2ϭO raseo>!p>cH9±:]^| lnh +ņ GW. 0;s]H݅bXµAlڲk:ZE"OzSQp!ܦy=YPX~# eĺ$DI}4@ib̳bVܼ)r4n|ck+㺕hqr׻x*n=|JtaB1:Tp:(&~}\lr7Fѱi8)|[̸hh(wǍ[JUE:/gDcvZ,8x?oEmoIwu/#z+oaH*,PѬ3cB#ȼ΍hx4dCh>#p:x_.4͵0gC1Z4MqQUqе=T*m ғbׅ#X{6el@=,mqmfm&Ɏ8{ɎȎ%~`Kq5ovjJu\ag.sI:vuشYŢ#> g]s0_MY0<_uLYu~L;wnh^]ڨx@E ױ&~6SS%% HB2_me"5'O0?<\|6~rwkё{M(T/VJī Sj*M{q;s%S6x@,:Z6-of}tY* Eu$v~v*yw8 Zs}eTnrLj"5aVLnuW&i _pOcL^|X3{a k3C8jfI\\]`{M:Qfu<5i>]knPw|ӽ%%ۍO_1ozx#<~=9;C T>ypH Vuw{R0;CmC8 Lgtt^GN 0MOeTT^J3r̅2D3waIM+ty٤j@ͻzIO,?jFO8 J럄y&WY^9qĪ;g$eAje#T^oY} jSy&{6.ZjZ=GQ~j0ȑۧm3`T#^1-GEWd(!HV)z-w(]"k3TuWACbY|Rxb`ֈ,guwjJo]+6ޑG3k"yg5 `I8dg(X9\1ArnEUg6&oXV%s\DO* K6kO^.Iw, $\Zye166J\g53M}ś7pKq8wI5ץXE`N9~- O6|5MH550ŠN/Cuh,g@ kA:yk _"1:\DZ3y-#EXyn4Y(#㨣Q ˳\-oBtgE!Cu?ݰ)U|;=m3esu;o6l6>uٳȺsԭKEZM6%e^J <@~Ju$ x\Ή) m,=zM~z'Y|df$=ѳ zrǿh)D$et~v;bMRȱ\z(3ɹvi]}U7 tq"g) O Zbli+j}._sr zNcS-]T/s:[ VO +0hh5`GpN*N!!ƙMUư:>p9X D-t>wjJA.Pl! ۺ#$? E2ij@!-E`T <<֐\"[v8BCFXW|>4[:hQ9sV %c(50jF-h5&ᄔ{][1YkGd 3^1:2A-.TH+6_"yWD82!\[7#ۂ"oŭ}z#UGĸ/ԻЪ1"ҽH66|}:;وïM<-!XEݜxn>f-u~ 6sZ ;#Dj6shO RoD\0_Ό&DŽA(TK}v_J xŲԐp߱]"[S!;tiuҔ@ldVO>I/<ތI롻0!=;!l1M MR~jlxfNX:flPfX/aeP>`}%uLi8`sQ,ײ#°㣟~e`v\?3ԊKq8[^9* <3LQHCiMd_CaUcr}|:hbA-5fo?i:\ Pi NO]Y ׯ 5=oo;tYyNjl a_LW{J@N{nM 8&@) +wxB$pۯ< )"Xo`t8.at=١Orb!'Ҷ6@7N?`'t><%!=hR ϏF1+Υ0ǒT|{s<!t`n\#^MJYpzt:} Pg.݀H8P x8$A)D+eDKSp<J;Z@A~dxuDY$!8% qM;9v\sLs=xbC™GU"CGq8M]Jtb\Y:]Ff]?ZQ`˴uJXպe(3?Ʊ, \tY_ x Maz"H i5m%AsӾlLS.33Byi < 5yw~т#IDyH@w(˴5.9mٯ[F=` <288M\vzk8~"ȡ8d K :? x"6A$62"yzG } =CGrg"Ҝ S^[3RN n>齟e:R@lSaD:@Î^H.es6yȂ9:gpX4=%&4dg$L3IdfavllIƏ YwT4'qu) wrYz!0E*3qbbۜTLEw ,ӍyRp>^f x{.fU'pAf_5<}۝g 5Xy8i{R[1LÛeM+eBZ^rW}6$Ef$~ z0t.G1y]gyfr>6@x$@#A`AS3yu`2 k SxlI|1w334.3:(393Aoa4>M?ؖ,Tv)'y!GMb/q="x#.U'Luu&U:?y&4H.<}xb~tlOͬlO%bC4&TD$AZlZ|՝}Yć e@mhg#v.4]F$L`iKrOz Zu w[SQMsQF;F$4/aԓ. ezN_k6.90P1x1Ith t h}1mY|_uƯKRz/gU@~v^Fqo4ch[W00N#۩8rbI8,lRI*PuŊ(96^ b}GYEiT+4F+U/qBa,=SȖ &a6<'خ+aF(Gws0. B#-?vMo!yoss^3RDbSPIa58YixX< 0 I%)fYLdy)4Y mߖ@IN:>K펳ۃ _VbFNSƔ / ʔ@7q6-[Ī\as\pL# OPǵw n7ВrSMO\ژ$rwv[I'gIl5 Y˲n$^s3V`f|3c _ 2\IˆwS~oၚGOT;R ^/."ɲ^й mk0A?1aE,GFM?6ٰ<7Wh\EeIm=Ӕ&e ta|RA-0SKI药FMøVDvI~ uUK >FAڻ:N5i=DQcCLl6PE/Cf{!i{Û7Ȫ\;JS 뿜Ɣǥ^6h) p>]Q0r5>jIODԁ]˟'AAbÌfa(eYw@kzhK3U#ckF5a2URQ.*j'w_{7O8-.z餿#Q`NAy1Ɯ\Q1>Zxh\3UtP{x4/$ Bk8 EfX r߱Dr&Jی5lIw~sŜV)cTM Sb97CӼ7G&=e\l=S]wPȹL>s3j 7@U(8K4'DD1 x!>O7^k8=5~b㻫Mcd;:+.~;w((L+X"yB`6H봇7}E,NϪ^gI V!v "ͬVX˗!1*~ }&2X.wjߩdΥfGw;i3& C$Uоvq{@?nB򉪙$=KQW~KM{sJ@:$X1 hQ㠬yD L\4r\#e:V_ճDZ 5kM$VO͛|dPڤݎ(LT#~z_!JBGKll.Q;Z5׫(Db|iI;!+%_g;u.>d$*?H4N u'ӯ #(fVQlq1~[aa:1ю&GwZ*w"s y20N%9 nTP@溊Tj(4b#Oa_J@Vx([P HA$*8 TƏ3 ELLʧp9?jevUpNfpQlm7;^?aʓ<WтV:_Fhq{[r{gi-^D0^ݐg?y %~"ҵ*5TдOpH?j84"oꋤ ʕ̏39*o(jAUm}9[jv罫*׶9Uu-\GEUPphx xlT%^6O%;v*U~~bhE\ބ͋HC.3Dx9M7W**6[ )ӎw-[D'ltE dD*8 qž?+8ZӢHh8RP>i6^RS%KYֱw)=yAm~OwV[C[KqG4]x6C;!o;B1~^aĀ6+Y(ܞTJ=U DZv[[Uߨ> hv;:E׿D:{:ԬmKMT{M[lAt?2 >ixCbMn%F Br.PD.m0m;!'68 xPSr Q/_aȺ[7iU*Q\B:0)Th\uØPi]8쮺KWh[KWE {`9ƨGUNިW{IMl?q8H *0tbޝ.n@eJttHGٙHӽVhxN[ͳ u{Iwt 9o`/ca8hS]ABv鐭[аN C]-pܼ{%ax?Y 8!L.I&!,c dcfԥպ$g}R]U]]PAVûD6 (TQQŅzP C* ?1tcHw-ԍ"&! 8 ' &B9>Bm-Z\rA* U=e%|Y,ƏJrD!GH4$slnM4(i TLAPw 1KX ]dِsl2y1Kܑ=ruN/Q_`Ptr9*DZ=3dsN[DA(_20Bxzȷ*-=taeC2 qS g'L}$ )̛O4 # ApSK{̦Lqt)Jܝ*Xo-B6:HMxcFu &(Ux2ja>p _2>QZ9Q89UKl`ZEU. 8T2Cn3{a'g?Tٔ8['zJ#Syp%ϠcIk[k-*`e+IbHWwj/ojG~灵j3Y>7j_$5C.H+]acRːZPfhgƝb@_fSdבfY~»63elŒ>c;FgNYNQH'dc=ODGŚGyyTlEtbiX*U_0 zrMΉ ]W䀰!OuZZ0Qc5+F}h -I/ PA?1Z}֚+jF+U8뎎Ǟ'?C_*72qA1( wt~W(+E؟[U%(!>ZV^H1ZW, oQi%wZKl\R;ƙ/w Z0M?DC`ሤjh|MAJ oh9||%Z9|̍!]^ؽ)fû#⪿Β$RݢGf$t(,X,%e R}*Vqp("GdVQw ia,d䀹s*:xl ȁW!8z߻%W}?ro;,2^(ɦY6fDVRiV79Mtj t5 ^?Y9@xsCʲ-k3 DSfj;LJ| | w(cAԆkSxcHO_LOF='ԑR*&kF.J/\ѪMheZ+[3{]$6%s!E4MIB\_-/.WvhV`QJc7 Q+OѰFJt%{5Y̓xS,lBX ڑ#t5ɨyBaOsX4Y!?&{΢B׍@t` Eg5CW,*dc+BoתDJCWr%9bZc +BΊ4FFOR :op)i4Щ\fS9a]naݢ7؊JF85JڗD=g&*!%FI,g_[0ʱG]d !V Đ'ʞ[d][б=$eP߁R$:X0hY}i)Df{2荱-eo|yw֫hlD w'(wТb?LNYtZ[\UjqlGDdԇAlursQmek 7i{O=*@&qPj@4iKquzBFYddEF}w3.f}M=/ c\TJ_24J* I6kk߲"Mh \[[MS'+㤎hM*MG)4 8G14.~$9`ȎQ1+؉ *[ڻOi0?TH:M+ ĽEe(1 C[}ďgR1aG$4ҁ6ugSn Sx?tLkF#8Ga1'CHvA!Z/!P% a8VF#*JK 1T' `=Fd#,֭2c̿h #a*ߢ3 u{rF|0lrmBMp%HAD7kЧ%M2J & [DC@줉xNȁj lx*usf/piƫP3jI$!\^Lϲ%6F_]lc9卑#L& i]4dG4P=?:P+V[O8j5`0T"Ԛ!7[(-n}fjFdx[ ְ\<ĸB AN>>:dDUe#Y7%Yc4>N5Vt2`ћV|⍺]E+g`=Q3^E5VL빫MK4/rc T\T:.fD'7yRcɘp< &Yt,Ѕ-vw3L4DhĽw± -#q҈1co W;-9gc 4&j3]\yj$눸v5MIߡ6oo.D Lc.o]|<=<˳ُ?v|4#j(Ջq+")NH@ڹW%+_fKH! ` [yR{f,hUv!Xatj5>rs}Tu=~3a5hH>iؖ -N.Ua]zѠ$|uل[|8s]j/Nh5жAG(%e$($G\Ⱦɮ5\vCk ҮI{[IHOluA ЃrWB '1o=J5F䣐 |9!'wC:@yA&Bi@r-^:,/}Ao?YkjAf Jrw⮰8*pSIdt~\>ml4f9Wsf՝ [) ʠ g"v1\XaXSsa}R9yT^ E {9XG֛g` /۾IZ~ح EyCb\'pDݬRd1'ز,|x}C88hP=t`dD4eQm"ex*%yT+տ\_|t|uuNO/{O~/}gp7tFc?-aTʥO'Wd^n3mhb'E&O؄-=W*t4u.cɯr<wV- n?v`3Lvkɓ1ng7C\7fB ?:a0|biwkNmM7"Va,﵏:N4wylE\4ѝ˓QӨ^@}Ѝl`TIipb~ vGABw;"4ezvzq0{un*Yը6! d6 4]q6'{͑)In;?C` zUkPql&IҒtvll܎gTxiű:]˅7/NVIi AH!(TN~FN4L7Pv2LQUfgg9hY/uiQvF.\rp:8=sS M */Tj|̃h2~'i-iL*z&XTkx8 Ig8,{mV'N,&C~|vsUgQs[k̉u$X+(moEhPxDSqmd v`nt{ `7/lPkoo0L>kyrE#w}SDt8::K)+K,h_f{mnbcR”q|ҖkzJѤ.b(wGB@&}%RlL gfxAF]a^RGaGǮ4\\kic&]7[ߦi4EE9YIgIӮ>*O"* 1{k|7OP흹lWՖx>W؏WݬֻU r&Q;)l]R ͨ9]q?~Vcz؅YvFK!M+8iZam7%JϏcCae;;Naoh&gJcAM]Mݳ+^=7=6l*.mJ2S'(S7Jhݎ10aОٛe+M8e :uJ@{lx`x6EᴫeFSX-cОqACV[ΙFTNMh\O[WݲwNV)i,}=UG eU~6ؾG@{_̠L%>,(u,rDQ/"HfCOѝki&~:'~\77FIr$j@_UDɀtT.yǸscCT ܤFd= ^$ױL쵱 C0ТL}֠iN3C-!~DǪ0yShԏ:# vϥVNB7;-.4iA{~jpXoa -:I}]Q]m T~oQ&}jmGE0~mЂSf`LP1%1~Q$fbGx|*OLIWl?+or"N{Bk7mA9 /X*av5*f)p0Ȁ ӳJ,VOǗsqHAuL&Jk^ lj;@6D,yU՘#^k_7eVBR1pۤ~J.$odH0~z]&s:r  ,7JnM %p(ʇGnQ`f{s^NW՜ v}Ynm yCU;jya\JjΡ1CwYw> X锦z]g5%>j.xYaiu,@b=FjLsbrNɭt5%=%I̙$w!n{n_uHK[Np;֦j6!2f-u a/(@h$fg4)IAgެ~lo>>빏<_4JFڤ"H !d 2䰳, R64e.MxYKq>n'Q顳)JGհfxYE$^Fd6K6]v36!Ũc elŘ%*Fhd`uI.#R$z83Xgċ`?Roj@jW3 ːk?@O46|p!) '>4M5tUBJw2wΖA0*doIqcM^#?t %ȴNv)ʕ_!II',K H73DywA zc2A:aFre'ϵCQm)fbI#0=WJӂ7Ǒ8t)X}"?α͑JSpL Vh 67V}^a;QC=wN[En wj?{N˷c[Nta==jp;$V(.+9ibӛLAC)HN< K&e(Ў)N4AK YHi" 24h3X'7 ޤ5Ja'r&@&N(aJ'Fs V &&ip}tmhB?T:5WAW@itFFC;@TD헭 T4*90H!1ұ0IyZ|0s ڮMdlGa3x1wԨt(-8& 4LrRMx1k}9- o-f%NKm LRV7 EKlA.l^tR=8 L;aȸ')UjO!G]r\zgTK yº&'X6.U-Ed]uՊ߬ݮZiΥ~l󈦍oQvtޭ}POSjo[z&sg]ucEx?@@y5*WL7@g *; VsɯnM$g$~0vݮ$tӱ_h0_wZ+$jW:QA}m5]b׎hYm둥h*V!Y!K NR t͘DÛ1-NQJq<.C Mc%fir `9WysT,Cg|t`\ ݶ+9T0UbY~AAf'h?,NPfܫ w3z uFصa٪3cѭi{JF9x $4#f/49FWEO43,AőrSt~jҨ|qOZ<8SAݥPs +@{씓Qޯ :7НZc'4y!4^Նv~< m!BVNzMsB}R갓ӂ'+z|t? zYUPt.^3[&7]T02o:p9ѻio6lv+Pfn̴ ~BCgfoCv`ZDV}}JV3Fcrk7m5tm b9eMD29X&J/4JT<_:9yc:WʫN.*膺:D 1(i㤽xfԡ%y*!燍Hb Go4):yRi5B|>4*(\dsDS$1в/ |^bT:i73%n~/w+ɏ9 R(pĎ($Vڜ<@oĊ9,\bJR/hnv:~h: 㓬u u: y`Pt_QtF+'XOW]E!WKg@A R`ѣGh W$=)]ii-SyO߆Jjв;Df(Q׻I&6U^\b*յ3X*/}u V5-L>AC)mύ4FV "ӡK#.-D tvnI_NكS9qXkW6"=t͕ k(|Ye 4LkE%Ha>Q_e퀡Z9cE^9c; s吪EywV%hTNNV~FS?[W[A|IN;ءq GV d_oNj l=!e ?l(MߩңYFd  kEdD!]aDgؑ~^2uXטx$/)2Bi/ag Ao^'~C#n?k ]JZQdbK1pyޮ;je?eD@׮',8S|T;񒁑=K6n*aoS?FՇv_lPDLv_eG7CK/ 5K&E$dSw¨*zfᝬ}Ȼ ; 4F˧ )B.?;oLSme.]gsm߄Nh`liu XWK:w:slBi׸TjuQ|;͂vew~N-++%դO/.*Ajk~#nou1_Y8*-i0w+59.<;Մ&{,д\uIs+%C|]e/֙O9:^7]қ\R8MC{y5Yщƅݓgd*e\P yH)HgJ.ϮN>J[a/` Šƻ qP뤇A3F7F%0ŀݥ'k+}j^Gu.vBcfZU:o>1脖79;ğ  bӿC'K.9F$C1Dit?fWi'Odqy"F5#zUŪ$T%AšxC(? hDŽ?ǜy&l(rX֒{ Hkq;QsvSLk3?G!.WEZ^n'7 ~ =ƈ,[V7>]$.'k?vG|ӊt#Okg˸8(uszL0B*؞?^hQ,ʂS&r,~"3<)!]*4M.~2;ǗfzGm& x3γPi?*Ϣ7\#>BN`#VǢsxXv7RY$X+]n$W}ᨰ]9WSydV:-5[2l f1ZiL2Dq⼛q?݋7ZV_ĭdnjtY}%_Y.6C&c:CGk6'a24}453f|R{Te>$y|7=>!~VF8.LlVz}Y>Aǯ |<Ā:מ4ݸ"CP'(d8 ~t3`ݟfNlX"|Zz)/a P/YaU+>Õ̷O"ǟwަJ}?JABIXr[1r,Kf7K;q%Kndnhw0A}FpebaQ?8wq',K Q-5'ճ1FԂa}'I~<|t>֝BjLs bcCk;m[da;0mvHC{OȴA:vPfY^>n(|d{z$6/RA6Me ;%aCjwn\5[xR;L|pzz߻ٗfV;%h:)B) L>T~(z _,v9:~,i s PQqc̀!>Aܘ 4PVb}m}ԹU}Y/'qp"'Z gLr[;ƈ F mROřp4ƂHl]-(C+4/3) 34x 3)۠=gvRj7o RdVi-Q*4ԬL,@A;ƆCs`&:&6He z9 @y4^oO?@1<Kv] zsz{>6Z;N{nFoF_W4W߃+ `:T~oy#a˛2,nax>j{ZU1$fgc2b:}Dc8۰k+›Y[ǥXjD+b>0^uM5Gw,E\B&ʱ"EOB?tϸ<6޻39hyr3=:;vơqqxyurq{\5Ȗsv@N L}uS\ hXh\0Icz5(w~xxP\4\C 8 83 $S[t1Fe1+.6XXCkKe//.½? HpE`d!̾a$NzG^r;W`-7sIHf*%/PwIBub*`7 u" *%(SD›vE& is^ѵB{߰=GhòdCˏ{ GSKLhsS5ȵof4C z&. $Kyjd-j0Sb!:SdIh79Z1k$F7</6Up׀%LdzX![D% _"7cfQ~x;O8-z=qtLE @n'2(ȣ$t*f,f4W`nEvy#vΡprrx/l60<ǥs 6=Nw.(x)iŌ۪%[[ȓu}# P}r{;vf1mϺjXUzbܠf\;4"*Ֆk u٢28?nvBؕctmݐOq$'$QI z+Ef% K댩ݏ' c=r١0yoz5d_ j{-r6ޫZ޻{D.'|{#t)Z C8Ц҉8aƁHLe'xޙsr[M<(z;V9j_MT1 p2yxC@L@-d]"SEsfɷQWy/WXYA p *UJcwOփ.CB `'y'1M~ ⢺,+HDI#Bj>@Av%uA`DK_hBi*'D|J4I]"Fq+1o(+y Y5%.֟2*ypyJ!T~yݩㆺW(oVs^W:yKO _s 4H+SOkd? )Y91L} )v@u=>-NG~̓{#kx7Giܨ7 Zr*+d,~Z~iГ\.Ӕ4Id=q"=qJ)qmxJ&wZ߫4tka-(\Нh`??qQ6F7-MF[n?ÎQ DgPMP%%JEYE(gO}̊bS)vCS> 'qiQ\d e16 q H2UQb0ئڐ覿)ŷ'4)ePlӡXK ? $]"e?a.ʂv0r`Y\I(S0Bj瀴N^ |yv(+P$P۲!QSY TGZ‡O$7_~s(%ObPAXBjNz^ɋ,r|)O&!>PdԛlkIȒ|$ iR}yӁJ!%HPX?B|&)SPLQW(rG,s*@Bԟ{4y3y3E Tt/=EПr?G35&H*U/dHHO;zld0L%LF`; NjH䱩=N[%[Ʋzs1;;N(PuQr0HL# F) G"ҝ8"gܥ)1u0b!*.CQɤv/K+V`fA @yt1J̷jEkX (d+{ ёFp;S/ *t"8PLg~ YԺ5,gz޲FMLζd^ɶGFgP߳ShGrˆe߃$8dRA, 'KnJY{$vjN?x_ eKh$'@ZMIL@;j7[nvHfRsR`ĥhK$Ec+_EmO XQC NhPzopp2&#=ġ5D93j$~<5WA܊-2~J2&2aIl @ P VJVZFd [QxeLzSN4FsǗ>2FO>cUFPf;9--?{i`$"Mo}mRQAXIEGH=)Kߏf!arv)iG刼 ы* <{fmDBEu7~}8.JlwdӘ20T/Y-pT0 ;tlV!x[c̝9,ʑ,&` $@\z0ˣ+aZOCaܿof)ܷsdBo]7gmv&˻ : =aMKɧ:t,Ow{x #Q# x-No}bQQTʊm鹽g\Ryz'/haT¡-?E+ CH^{ZRεN|%~v1mWSӈE嫌° 9 Hi 4w2#ghm%?w֧.jjD#3+]4j@YF&bE -oFf4w*2Tbeޒ2&+)tœ{}vedݍ'h4j5[u7 "u3Aع/I6Ng.Vetw˴BhTJ`i"Z^1d:wNn JUNݳ+skUk!\D[h8ApASQC V B y:=k/9\QR;͡Bh՞(6^ɴtDT>c.8! 㛌"J;HjI FlmVI:A}pVn{f)v+ɋ?$9Ӟz/@\—FaG0t!"$u޾8=-1U~G=x/QmCߋAe oyV=)Kt!Ϟ%;޹) : vޗe hjĀ23cNws]烑;Up28K&Um ,t4 <[LQpDF\Fו_8vP=,ȦbCqYhVAQ#"7sӟ6r3`<ȵ"=F&RB6_tG''p[Tv%Os3ytR*8>١{IlQ;_r=gؖSl&?D /n[qe*B[꒿@` ƩM͢UX|O/{N!hJFLNW@7%FeB2ǿTJ &bt`(1/毷-n)2-(W$E(Ѽm+!vrN!0J#LM|\~#L;]y #jN4|MZd/<4L j7 >`1] cdo۽E:#u#>FS14E.Sd5gd$va;)$["_i*>^9v猼ySaL DL͖+nΗ!St?_Ky-Ɇxh"h2.BJ%Tr4i b5/yCr.̉YmfUU|CK`Ͱgv>UKJ&ipE:/ `k ТȉÂ̪[Erqtnvq\NnDQސV@%Ykx[Je*ϭ1NA{y\3c-ڭY')!GFYrkk۝d#'j[ӽkNm|*CBnOY-F$ =U! _B O.B ճOSpel.8*I#l-ԴpI?h~EL?P$_)ckM6~C$!?S$0N폶Ap&yUyd2.u~9j4 2KEQ8JK%-2, iQa8Fן aV*~z<)?gTT*zh~(.yM.~&iMz.t$XyWFn@(V \u%P`OOJ5ݾ{Nıh 軃c J4ò}ù @Ig0rq@! dyөc~^ QіL1*hHe(k+ɍEfo˰*2r9oIY19TxI-*,zZ"&!85'ՏP>$vث?|T9\Kă"S; 3(HLJNfUSE0w2ߧ#ߑE X'¶ Ea]3yWEU_~HԵQi  R[ Q8K6AԃHeY9;k Np)UVj ۗŅo0c|=WRr A`\]qזDoʕ- Nr:xhU)1e2P8nCsq+(7HW+*>cX0q+T,26"Ȕq*ob GrQ'Ш&:z6a<"+7z@mF,#8PP<<<3u'\bm_DfJ͈=%Cy &EpHUk!)6D9#7:1͋&1~eRNqOrG`vs1*xxa |QOhK&gLCx YyޫXj'\| j#d')N B +h !t{9:1-V[ѩ =nY)hFUϢ1Sm/a*xZHkTLm܂W_Ţ΂+}ٯ;04Fq ]^ +h3_EES.S"]wZD"#l| !EQ/V:>jVyԈqRtƤfX+X%XM]mj'*l9]D /W/ѝ@T9'lYģVTd|J4ūjY<1Z ŅQek-Jhڂ;ݡ-a(>I%G#<& ؓ$}O(r|l Ё<D[b^_RBDNð͙Ӄ':KHA9Y2S}Åwrqr|WP2홂DNNG4u>ISyh fK d|{គ<'R#d%ie[K'2vK\?ǐN66q? ֟2N6)!H𓥂G<5" )>t`y >V6F~<HSV(8(>?Rm=b\x9XS"6 %3̯ %u6PXڰb,ehdK:I' ZXӤBdYa/U=`5xiAn맓w bϯ;yrƎQ๨ ;8|}g,ۓnx{~iW'G^./λuG0x ڒ,P;zUg="PLKv&<."lL%H ܂=1qujxRûDTCm!w tbKpJ% 3Y*kL0[hp Wr$jzqa{ReJАLw"zC3l=ls<=xL;9,?]EH{j n%T" ֠10>tUh :^w[7N2kg g_+11x$`W8wߖ>D֝Q5gqb?^ׇ?voJXu Lmr^ݚD2K$xLM1oIFx4E}m_G}{^lE lsb p=4%vF2&ֿg$0=gXK3\l(F/T.%ёby;C@'AnĒ?hx~흃CiĴ1ֆ Qnu\?J! NcW8$LrSa+GZaf] 6w"blki)Y=h#֧i/DOa6AA*UPczîJ+k>VaYhĢ^yN.#Et\N&g3\"{EOXG۷34A#0o:_Ih3b*CRMN$ť҇1nrcy !tu/˕ufC1K\}.W]=F7whP?kn6xp/7^uRKf(/9*x_NPqiN"tZIh[PFԝWU3ReNSql lA\;"8)7{j.,+i=@Y(FJ|7`MK;Q7]f 4` F?-nY>Lv^ OߩWInNCwCQ˛߻*7O n9J6vr*GpB3y[Q0jmqIc S"NlQMC;k?.Qɭ1-΃R*uZT7FfDtj ei)U:-tD*w^nU(e]~ XUeUgZf>hŋRg=> oz:,Î%z#ladѠv>3 .U{_]7j- S3$Rq:/}/&mw=~?={+:TwpW9"ms{a[ەn K-^{OB)ї8)#Hy_۫1DaFRq'Mm1QLсfrˆj:F`{]&e4ia4$K1>!EVp͚0dݩRo`3]gO0̋q(ے>$eNNENW ' |rw(1IQ-\ T,~`&w7BD_"N4L#0'ʰWAu+܀@~$;ABe77HSN갠[09(1+S+ب8c!S̉) 2fxfr5*Qzdlr,񓕧PtFJfLky̛ qLT`ϰEtg2ЗZ^)fTh")ecqXyvrA }aT\Iۖ r(9͠ | 63ҡh0/Ǝ狆qjv߫'<mO E[!ՓLVBn/MsO4kJ1fbZ4҉]\c~$jIn"BJ*T 4ǬIL@+\L2ᗓWJ캾r}-RF_϶Qi5}}GO-`ẹܝ KDCgǼN F~^j[*V Ɔ}gcL6/F\Dt QD2s4;x(pPtL]7 b3+ǘp7|RǖmehzfWv٠ѭAL{'EН e},: K}G"m3MDvHϷCUww_KnH4m C|蜈65+[bA__EUU` }z"m`$1yr u@Wi)%GE]zZݻt숃wyܲh"m,ԗsHt"/m"RPX}#6p ԩg`"GIm萘q|"-.I@5O)f:p}$%T2fMs%̀Y8Y/nrt>$$3( 0Q+j6^QGMTӂYm43“NcO"+opelCn=שSt@C;;ls?W?ߏ*AMh>U<ͽQ͟l@eBtz&,GWtI[˭M GFr=Ja}>7y7A*עW+.\^1Nճ jB70s(Ť-&T/?~r`*D!yMك/瑱e^Q֒].C- .gzuQ0y!?sco\ $ca*wz)38|*1ɡ5NK4qYٕS#m +LPfy%1ge,1QMn6h&C`Qf"&  ^G 8՝U8kG^R 3Q7>xڣ-*N2 C~U5|dMY>Y$e=\4[OIHn#BTR}G/ew `_nXH6"@36JS]' +('ƣtzޖǀfֵz.7D3+cѕp*)BaqHba21TХ$rUW 4Y{@ Mex +*'ETMR!Exs5tgCW :ad%{Ɯ  7 0VKq/,K8dk)Ôq-$o%kIask9"֍M %g ӯ\%fD u]hvat0ai* /7 ]u]Uɺxv>V.%Q2[9fwV} *ۇY(S<1Օ..Di=x,M.+9%킕`1D5 u(E $<*Y&%6Х`ũkRopxdp !@b`P5N Ц+S6Mjky~?I1i7WVp'+ 5Koj nȮ5qι(r564D9:RlS67jw: 9 (r] ^fW4r($L5YlR@0&)qH 3-Ett'>5hRC8cȔ` ox:c׌H+01|7cpa|ϕVe^)-W# O)e Dl7 hXҲج f`+kY?G)NhWM*uG}y3c!1\}o9梢t:pt8r /~Y!+Փfu 7Xֿ"O ~5/eѯ22-92[~;.~x]Wőouc6=1'j,ߊR*&7zU{-{0QLJ$)'fsJ$aptr_@k8JEn-@7xXI$:S0rhQwHǗur"YOˊ7<ӤkdBw I/}>w2O12K>!Ypj]6mb>Zt}vCX|c9WAag9d["E•hJu.R m!B3Cu޴ quZUBh#1Ǽq{f#p1O qN sDuB+lT&ő3Q*Qz [թ߂1VYL\}"ѪQtm?sMZN ~"`2;_tܓX)e^z&Bw3u+~iwydTfWzG(}_ľ[%(2L f~"4O5#'<#?; [Ј;* "kbó ;s}0p  )Eyņ&q:qhܹѩ;g\".oB|tL|U.RՕ6'99i<.tm*\LxkS˯TP.e8=&@cL/?@M"Ҟ*u}ێgSȾS$챶Qz9t06 dP\Gcx4 .z !HI/e4 bGx^Y2]3.~Xp^Me24(܈j6yƱ"{@,$N-y}BDVvQy(oy} /{ ^j ӂ*޸5ZAbFx@l-=)W!{Zq/_/-ɚHk't.ϡU*慄Kvľ;kvkAy.WuƁ[ghrˉZ"+ gY"Ϣ9 僁;0dC.eoކ 2ѹfTE->*7rHͶKӚqsT1378x4鏦9:rŽ׵Q =tı δ\ڄ'foFD2މ) [*]V[CB|ˀ?JL*FQkEZӎղ)ː>B>/~}.wDGnӑE)a]=g`^^\ni*9 4 MP8+c Ad$D9Y\rdy ojYGnQ"Z|=::fZxX.7oqov.9lW<~V3r ix9Al{~|}c991e&<`+eBw:wyҚb)èceK8zLG r,~0zmItMO>ߙj)j}>ϓBI+~~27wqs!kW}_V},) `jN^Ud6ݐھb7zE)''6fY'?hHSiCK4!"4>/5&xHC^~siUq$ le%X7O\Ar(ήL1aԩxGW٦S|_09wYTq/q,UOneq3)Ɯ!XiKG`#{I}ډPEcX/^T-<ڪfO#BzQ0tWfFp//Tf*|$${WͩKb 5]BVB?b}^[A,]`#;%@vgOv;f6#] 6c~}]5;/ag[Oo>{l~ݽcjxvw\!bk~* a~!D3zhv~_ӧA}jqitswj76cHfr1yd>CgfsxwFO-*MU~{S5GXG֑j{U&%s~;~??F;B~a i ~3>X>ZQ袢}-"c_f'YSce0겥R70NTᔞ&_Xi X9zrSG`L[5S}YIa-o,v_xiUP^rRtڥn:?䔈%_JNѩy~z**ԹiZ)AU4E~v&JWvn=%T}t!󉓙Zi/vU톲IVįxS++Js)'L FJGG[%WGl4l֒!}s ;"Wg$4?RiT9 %h<@䷋j}de#ԗe/oĚږ$0JYcZ/PB;J,cuFu⾍Z{oXVLJ~a1+ r!c\%="* ׉qefu̫\''WqXVuqPcDθ ܹ';0ylfyqX8jcD#8 3ѕ=c1+˷ ]w7N,S F{|.r{T"Ʋ!Ղ೵dVeJ '+XJ6-&<"կt`Y^_?z]VDh};]qIV:{ٹXX߼:qC$@T6Me$*"!3QO[adr91a=) m<UddO [n+wtYZO+LNU[*=x޽^^bxB0UdI_]sͲTntU'$4{D*!4XH_낆:M\']<[\lٕHugp/|HgnajȗqnW#[R:;$avʤ:5bTKTڙdN'}(0J⚵`-W.f[rIЛ0QanOm* DfچV4iж|_M w>Rrf+VUEyu#KVm*HO ]ck4iF,g~uzm%7G#v}ʕxOIb@lը6s10B$,Y|Oa,JGp#Uܗrm\+]˽'fLa4䳼o2aq/<CdȻq7ٍ$?h3o!WMn֢ڋkEi/@]պNZOm@IJ-c WGDpdɊ~B[!|*#W=>:C}#8 _5WȰs(#XY{s[¼\f8VQ6{ҘY٬,1~Z+@C,EVp^Icp1IIJO BBoW" y#KăA?5!9w/?>,>Vc+Θ;WpMf:w^ :T %Rp.ê7|tToFџ$Bar[-S|A2>G,,Iqq| "xZUk{l;Atq(Rۮlָ}~^a/ ^ԜwzF9ˤ*o^vFb/J4`iJ|6/ςf/cq.y4I8i4{ϠpQ,s ,=WOfXwׁ:s7jO^e嵢 Er"$"F7{p, >[):.M|A҉JnXKޜgLsv{!pZyOq<֚Mp9? {cGsf24®dpLJvlڹ\WCziSa  4pC8i{$+i/9v޴92B㘺FzE0U5CgݙJjQ>^)l^ՍA _TtOI[LJM'V5%&<Y9O昭/u /9I2}W-0t7[kô6r⌏!xM‘),c*M @_d}#[8[<{4G`Z`Jw~~_?s>no[[ok{W7\,dĦ%9'znO"k\  `A}e 0`S!|fCuTq_`_슪j'^&]ݘ)e{ rguF0; $ʼ9# <\k5X2zces9ا:ͳ]IIJ4{MXwUB=2mL$hѹ3&䢞^ D9:8 p@.}Y sJ. R's SNDEtp[^FFp„SM]L&h\^^{i=Nz |6lrP|4 ꋋz.)tt!:d3҂Kڪ|l3gs:Q`6"э' ϝTwDT@zX3L|LBPYQ d/1;* RA%ߜ~Z\FLsȎsi$N064 BPY16C&L4 !8Ր_VVK#;ۚh3á:Ѐ؎ӊC^S;9]vpq]#3V8',7̫5{NG&.q7GEac=\ڐ@$1l;5諭׬ {*dJGڑ>@ag4`ig1\CggbUQA15weuls\1;Cc"WK ij7)HqV쳁'h`l{6W*)h>##T̯D y;LXe);-4m&r]Jrd`%{}\w.(R TLR*6LdJ,"KHn$ ٓشz&niS'e# $NV?Ix!G.+xoh98=jiG` HB(q@UPk竫n66]ӝdFw53B,@pf͸hv%'kyFNe[[Wse90#>1܅5L{2+3^.gR4(i> tDBݶF|~ekEmQgN *h"o)i<|W>?|wec.nULXC ;yeo8_&k;/Tj8~geTnHf ؏X r>Mx9V\S;KlnX,2*Y`l2NM"p)d&:,hd>- XUN>s>"GxxYL*Y*zn ъM"/6>{2"٧&Q=C>?rFڦg] 5ӏ/A\v-E*dt VBeR5&/rKcjw~Om@nQ&___Yys?k+fcԦ_Zse^ڽ{}=??gO4WW *|g|r09 ~Sć.oRͻY|1Ԙ7wKM5"E8f6Ff1'cQ}2a2Ȁz UvT;[3-4Lӑ8pd5{9,l-"l D9q\K,3^bB?ͪ3G-[fra%[+sZg_dxƿh?ۡWN9#.Hw|U X7M(_]' f/41B.¤[|3*;Gǵ݃cQ6$GKsf#ln[.-P-BqRʲ[?5afD֫R˧J5L(ah~d:7?+?KާSzJZ ݛmuP2떠Ƈ%-meuU6$."v""8^BłyHAn}g@-U]b5]F A!D*܎YNT5m{J5{|ʕ+O{ʮ es&e-k vo6} 'V},rK8ɥer:̥V:B>PBSަ "uF8lF[pxz:\rkt$"W+kaeKFx?&}S=~Ѵ/2ܳS}ίLoس{4w9G@d<5!y@9wqM^' W)SI0$(I,tNM1>nf&8j&2.&+}6l p ̢]ΊbX{Ё2BSh.nmP-iULpJbc>/6' @ #blH_U\\UOn>pC nڲ7yhs-,{ӱKUnYEc<ͻHw|jkjeQUxK2K$5{œ)07vrТT=uҼ7SAiAu(ZfD!a9cPTfMD.4iC~/(j+EϨ%v[/L% yV("m~1O?i.TPkF௟鄩>"FreeFem-sources-4.9/CHANGELOG.md000664 000000 000000 00000050606 14037356732 016172 0ustar00rootroot000000 000000 # Changelog All notable changes to this project will be documented in this file. ## [4.9] ### Added - add P3 lagrange finite element on meshS and meshS - add new plugin `meshtool`to add tool to compute the number of connected components of a all kind of mesh (mesh,mesh3,meshS,meshL) with 2 kind of connected components ones on interior part of the mesh (default) ans secondly on the closure of the mesh (see `examples/hpddm/bConnectedComponents.edp` ) add functions int[int] In=iminP1K(Th,u) or int[int] Ix=imaxP1K(Th,u) get the array min/max of value u[i] where i is vertex number on each element k, so we have u[Im[k]] = min u[i]/ i in k; - add in plugin `bfstream` to to read binary int (4 bytes) to read fortran file and try to pull tools to share the endiannes in progress - add gluemesh of array of MeshL and MeshS type - interface to `PC_MG_GALERKIN_BOTH` - Kronecker product of two sparse matrices `matrix C = kron(A, B)` - add lot of finite element on Mesh3, MeshS, MeshL of Discontinous Galerling Element in 3d : P1dc3d, P2dc3d, P3dc3d, P4dc3d , P0edge3d ,P0edgedc3d , P0face3d ,P0facedc3d , P0VF3d ,P0VFdc3d , on Surface : P1dcS, P2dcS, P3dcS, P4dcS , P0edgeS ,P0edgedcS , P0VFS ,P0VFdcS, on Curve : P1dcL, P2dcL, P3dcL, P4dcL , P0VFL ,P0VFdcL remark; the associated generic name existe of P1dc, P2dc, P0edge, P0VF and all dc finite element corresponding to no continuity across element. - add code of intallfaces to do Discontinous Galerkin formulation in 3d (in test FH.) ### Changed - Now the order to find MPI in configure is first if you have PETSC then take MPI from PETSc otherwise use previous method - on MeshL defined with buildmeshL now the default label are 2*k-1 (resp. 2*k) for the begin (resp. end) of curve where k is the order of curve use in buildmeshL. So if you have one curve the labels are 1 and 2. And new the element label are te region number not the label. This element are not really test so be carfull. - PETSc 3.15.0 ### Deprecated - ### Removed - ### Fixed - bug in Find triangle contening point in 2d (border case), `int Mesh::DataFindBoundary::Find(R2 PP,R *l,int & outside) const` the parameter l not correclty return due to local variable. - set CFLAGS=-Wno-implicit-function-declaration to complie with Apple clang version 12.0.0 (clang-1200.0.32.29) to remove following error: implicit declaration of function correct `3dCurve/basicGlue.edp`and add missing test - bugs in SLEPc `SVDSolve()` with a rectangular `Mat` - bugs in nElementonB for DG 3d formulation. ### Security - ## [4.8] ### Added - Bilaplacian example using Morley FE with PETSc, see `examples/hpddm/bilaplacian-2d-PETSc.edp` - Oseen problem preconditioned by PCD, see `examples/hpddm/oseen-2d-PETSc.edp` - SLEPc polynomial eigenvalue solver `PEPSolve()` - add trivail example to check periodic boundary condition on meshS , meshL , mesh3 examples/3d/periodic3.edp examples/3dSurf/periodicS.edp examples/3dCurve/periodicL.edp ### Changed - PETSc version 3.14.2 - Mmg version 5.5.2 - link of ffglut so change in configure.ac and Makefile.am LIBS -> FF_LIBS and LIBS become empty to remove default libs - change number of save plot in ffglut from 10 to 20 for O. Pironneau ### Fixed - some memory leaks - the periodic boundary condition have wrong before first a sementic level of MeshS and MeshL case. the new syntexe is for example: meshL Tl=segment(10); fespace Vl(Tl,P1,periodic=[[1],[2]]); meshS Th=square3(10,10,[x*2*pi,y*2*pi]); fespace Vh2(Th,P1,periodic=[[1,x],[3,x],[2,y],[4,y]]); - fixed '*' keyboard trick, to keep the viewpoint in ffglut or not. ## [4.7-1] ### Changed - change the language definition to use type as a construction function with named arguments for bem plugin - PETSc version 3.14.0 - ARPACK compiled by SLEPc - Mmg version 5.5.0 - -std=c++14 instead of -std=c++11 when possible ### Removed - plugins thresholdings, symmetrizeCSR, and fflapack and associed example ### Fixed - problem compilation with gfortran-10 of arpack and mumps (add -fallow-argument-mismatch flags) ## [4.7] ### Added - new way to build matrix beetween 2d Finite element 2d and Curve finite element to do mortar (Thank to Axel ) , see first example `examples/tutorial/mortar-DN-4-v4.5.edp` - add `Ns` normal vector in R^3 on meshS (normal of the surface) of current point (to day Ns of [x,y,0] plan is [0,0,-1]) no be compatible to exterior normal. - add `Tl` tangent vector in R^3 on meshL (tangent vector of the line/curve) of current point - compile ffmaster / ffslave example under windows (thanks to johann@ifado.de) - Boolean parameter `spiltpbedge` in `buildmesh` to split in to edge with two boundary vertices - interface to PETSc DMPlex, see `examples/hpddm/DMPlex-PETSc.edp` - function `MatDestroy` - function `MatPtAP` and `transferMat` for parallel interpolation between non-matching grids, see `examples/hpddm/PtAP-2d-PETSc.edp` or `examples/hpddm/diffusion-mg-2d-PETSc.edp` - preliminary interface to `SVDSolve` from SLEPc to compute singular value decompositions, see `examples/hpddm/mf-2d-SLEPc.edp` or `examples/hpddm/helmholtz-2d-SLEPc-complex.edp` - preliminary interface to `NEPSolve` from SLEPc to solve nonlinear eigenvalue problems, see `examples/hpddm/nonlinear-2d-SLEPc-complex.edp` - `transpose` parameter when constructing a `Mat` for defining a matrix-free transposed operation - interface to `PetscMemoryGetCurrentUsage` - add P2b, RT0, RT1 surface FE (P2bS, RT0S, RT1S)) - add operator interpolate (2d->3d surface) - add operator x = A'\*b; where x, b are array and A 2 dim array (full matrix) and generate an error in case of b'\*A or b'\*A expression - function `MatLoad` to load a PETSc `Mat` from disk, see `examples/hpddm/MatLoad-PETSc.edp` - possibility to assemble a symmetric `HMatrix` and to densify a `HMatrix` into a `Mat` ### Changed - moved Htool to its new GitHub location - ScaLAPACK and MUMPS are not compiled by PETSc anymore if there is no Fortran compiler - MPICH is compiled by PETSc if no MPI is detected during configure, see https://community.freefem.org/t/feature-request-use-download-mpich-on-ubuntu/407 - PETSc version 3.13.5 - force `--with-cudac=0` in `make petsc-slepc`, see https://github.com/FreeFem/FreeFem-sources/issues/141 - change DSL keyword P1dc3dL->P1dcL and P1dc3dS->P1dcS - rename `view`, `hasType`, `changeSchur` to respectively `ObjectView`, `HasType`, and `ChangeSchur` ### Deprecated - rename `changeNumbering`, `globalNumbering`, `originalNumbering`, `changeOperator`, `destroyRecycling`, and `attachCoarseOperator` to respectively `ChangeNumbering`, `GlobalNumbering`, `OriginalNumbering`, `ChangeOperator`, `DestroyRecycling`, and `AttachCoarseOperator` - `Nt` the normal vector of the current (wrong on meshL) use `Ns` or `Tl` ### Removed - `augmentation` routine from the PETSc plugin - `MPIF77` variable ### Fixed - lot of mistake in MeshL element add a example o check lot of thing `tutomesh1d.edp` - fixed problem of change of mesh when rebuild 2d mesh with buildmesh, .... (Thank to P. Jovilet to points this problem) - missing METIS library when using SuiteSparse compiled by PETSc - missing `-fno-stack-protector` when building PETSc on Windows, see https://community.freefem.org/t/error-loading-complex-petsc-slepc-library/370 - fixed ffglut for the plotting of FE array solution - fixed ffglut bug on MacOS Catalina , draw inn only half windows screen (Apple Bug ???) - correct P0VF finite element - `abs` function of array ## [4.6] ### Added - new search algorithm for the element containing a point (more safe) in mesh of type mesh3, meshS, or meshL. - new function `hasType` to know if a PETSc component has been installed, e.g., `hasType("PC", "hypre")` - eigenvalue problems on linear elements, cf. `examples/eigen/LapEigen1DBeltrami.edp` or `examples/hpddm/laplace-beltrami-3d-line-SLEPc.edp` - `--download-cmake` in PETSc configure if there is no CMake available - flags `--with-[slepc|slepccomplex]-include` and `--with-[slepc|slepccomplex]-ldflags` for when SLEPc has been built outside of FreeFEM or PETSc - interface to `KSPSetResidualHistory` and `KSPGetIterationNumber` - interface to `mpiWaitAll` - new function extract, allows to build a curve mesh from a 2d mesh (can extract a labeled boundary, apply a geometric transformation) - ffglut can plot a vectorial FE function in surface 3d - distributed ParMmg interface, cf. `examples/hpddm/distributed-parmmg.edp` or `examples/hpddm/laplace-adapt-dist-3d-PETSc.edp` - new parallel interpolator on non-matching meshes, cf. `examples/hpddm/transfer.edp` - ability to solve problems in single precision or with 64 bit integers - tool to read data form vtk file only in 3d (cf. plugin iovtk a first example `examples/plugin/iovtk.edp`) - tool to read/wrile ply file of meshL, mesh3, MeshS : Polygon File Format / Stanford Triangle Format do `load "ioply"` see `examples//3dSurf/operatorsOnMeshS.edp` ### Changed - new `tgv` values: -10 => zero row, -20 => zero row/column - Windows binary now shipped with PETSc/SLEPc - BEM examples are now in `examples/mpi` - plot border type is now in 3d (border 2d and 3d) - PETSc version 3.13.0 ### Deprecated ### Fixed - `--enable-download_package` may now be used to download a single package, e.g., `--enable-download_metis` - compilation of PETSc under Windows - compilation of plugins when using static libraries - correct detection problem in FE type when use a vectorial FE - macro concatenation with spaces in arguments - correct bug in `plugin/seq/Schur-Complement.cpp` - correct ambiguity bug in `plugin/seq/bfstream.cpp` (reading real or integer) - compilation of plugin libff-mmap-semaphore.c under windows ## [4.5] ### Added - for windows version: rename under mpi `MUMPS` in `MUMPS_mpi` and in sequentiel in `MUMPS_seq` due to conflict between seq. and mpi version so all MUMPS load become `MUMPS_seq` or `MUMPS_mpi`in all examples - correct link edition with fortran mpi under windows juste use the msmpi (just use `libmsmpi.dll` ) - new `mmg` and `parmmg` (parallel mmg) plugins interfacing mmg5 and parmmg libraries, to replace `mmg3d-v4.0` and `freeyams` (Thanks to P-H Tournier) - a true 3d anisotropic mesh adaptation `examples/3d/Laplace-Adapt-aniso-3d.edp` - an example to extract surface mesh from isovalue in `examples/3dSurf/Pinochio.edp` - function `f.eatspace` to reach eof on istream file which return false in case of EOF. - function `f.length` to get the istream file length - Interface to `PetscLogStagePush()`/`PetscLogStagePop()` - Ability to directly assemble a `Mat` using a `varf` - New `bem` plugin for the Boundary Element Method (using htool and BemTool libraries) - New DSL for BEM (varfbem see examples/bem) - add int0d to apply Neumann BC (curve FE), differential operators (dx,dy,...), compute an 1d integral - add P1dc FE for Border FEM (possible to define a new FE with plugin) - PETSc as a subdomain solver for HPDDM ### Changed - correct ffglut (bug in case of changing number of nb isovalue) - PETSc version 3.12.4 - Change the point search triangle algorithm to be sure in any case (in test) - Sline operator renamed to segment - In square3, segment, movemesh functions: geometry transformation can now be [X] or [X,Y] or [X,Y,Z] according to the minimal shape element dim - PETSc now download OpenBLAS if there is no BLAS found by FreeFEM configure ### Deprecated - freeyams plugin - mmg3d-v4.0 plugin ### Fixed - fix plot for curve mesh ## [4.4-3] ### Added - Preliminary support for symmetric distributed PETSc matrices (MATMPISBAIJ instead of MATMPIAIJ) - Interface to AMS, Hiptmair--Xu preconditioner for problems in H(curl), see maxwell-3d-PETSc.edp - FEM on curve 3D (in test) - P0, P1, P2 curve 3D FE (scalar for the moment) - i/o medit and vtk format for curve FE - checkMesh() function, allow to remove multiple vertices, elements and border elements (argument: precisvertice(double),removeduplicate(bool)) - possible to build a curve mesh from a surface, ThS = buildBdMesh(ThS) and define this new mesh by meshL ThL= ThS.Gamma - can extract a border part of a meshL (meshL ThL = extract(ThL,label=llabs)) - Support for optimized boundary conditions with PETSc, see helmholtz-2d-PETSc-complex.edp - buildmeshL() function: build meshL from borders - `mpiCommSelf` keyword ### Changed - function buildSurface(...) renamed by buildBdMesh(...) - line3(...) renamed by SLine(...) ### Removed - FFTW is not compiled by PETSc anymore - Spurious outputs in TetGen plugin - curve3 type -> border - hypre examples since it is not downloaded by FreeFEM for many months (use PETSc instead) - `dscalprod` routine from HPDDM and PETSc plugins, use `A(u, v)` with `A` a `Mat` or a `schwarz` object - `export` function for `macro_ddm.idp`, use `savevtk` as in the sequential iovtk plugin ### Fixed - plotMPI function for plotting 3D solutions, problem with serialize - variable mes in clean_mesh function - correct bug verflow in plugin iohdf5 - correct problem with buffer iostream function (buffer must be out of range ) - correct i/o vtk and by defaut write at binary format - fix an overflow in RT13d FE - problem with auto-build of border mesh ## [4.4-2] ### Added - add matrix and array tools (FH) ``` matrix A=eye(10); real[int,int] af = eye(10,10); real[int,int] a(10,10); int[int] I=[1,3,6]; real[int] d = a.diag ; // get the diag of full matrix (no copy) real[int] dI= d(I); // init a array from renumbering array real[int] c= a(:,1)(I); // init a array from renumbering array real[int] aa= a.asarray; // view full the matrice as an array (no copy) a(2:5,3:7).diag= 200; a.diag += 100; ``` - adding of a global variable `lockOrientation` to allows the building of mesh without checking the orientation elements (AF) - add plugin tool to build matrix edge/P1 with sign `mat_edgeP1` (FH) - new examples `diffusion-2d-mg.edp` and `helmholtz-2d-mg.edp` showing how to use user-defined coarse corrections - support for nonzero scalars in PETSc block matrices - simpler constructor for sequential HPDDM matrices (no need for the restriction array and the partition of unity) - array of `Mat` and `schwarz` types - add mpi meshS (serialize object) ### Changed - correct mistake in mpirank in case of broadcast with comm (thank tp PHT) - update fftw to v3.3.8 and openblas v0.3.6 - in movemesh23 correct the argument label -> region to change label - new implementation for the moving mesh functions, new arguments: boolean cleanmesh, removemultiple, rebuildborder - new PETSc version 3.12 - templatize movemesh, setMesh functions - add conditional tests in make check ### Fixed - spurious output in PARDISO - fix problem in ffglut (AF) - detect hdf5 and gsl if no enable-download ### Security ## [4.4] ### Added - interface to `TSSolve`, DAE/ODE solvers from PETSc - interface to `TaoSolve`, Toolkit for Advance Optimization from PETSc - simpler constructor for sequential PETSc matrices (no need for the restriction array and the partition of unity) - some unit tests ### Changed - PETSc version 3.11.3 - replaced custom implementations (`RNM::real`, `RNM::norm2`, and `Fem2D::norm`) by C++11 functions - API of the macro `plotMPI` - switched to inexact coarse operators in HPDDM by default - RHS and solution vectors permuted in `IterativeMethod` and `DDM` - `.mesh` are now saved using version 2 (which stores floating-point scalars in double precision) ### Removed - legacy linear solver interfaces using the old matrix type - dot products using CBLAS because of errors at link time - Newtow function (bad name) ### Fixed - assertion failure with some 3D meshes when doing `trunc(Th, true)` (thanks to F. Feppon) - compile error when plotting arrays of vectorial functions ## [4.2.1] ### Added - nested fieldsplit example `examples/hpddm/natural-convection-2d-PETSc-fieldsplit.edp` - `int[int][int] array;` is now supported (a size was previously needed, i.e., `array(0);`) - check selectivity during `make check`, depending on available 3rd party librairies - new CI/CD tools for `develop` branch - new gestion of mesh3 - meshS coupling - square3, buildSurface... operators for meshS ### Changed - SLEPc is now directly downloaded by PETSc with `--download-slepc` - HPDDM and PETSc API have been simplified, instead of an `int[int]` and an `int[int][int]`, only a single `int[int][int]` is now needed - `build` macros for HPDDM and PETSc have been simplified to follow the above API change, two parameters have been permuted as well to match the HPDDM and PETSc constructors - PETSc version 3.11.2 and HPDDM with multilevel GenEO ### Removed - old interfaces that were not maintained anymore (pARMS, PaStiX, hips) and that are available through PETSc - spurious outputs when destroying some meshes - old surface msh3 type, replaced by meshS ### Fixed - multiple segmentation faults when using unitialized values (thanks to G. Sadaka) - nested fieldsplits in the PETSc interface - memory leaks in `SNESSolve` (nonlinear PETSc solvers) - bug fix of `Cofactor` function - various bug fixes on surface mesh ## [4.1] ### Fixed - missing conj operation is some hermitian operation on complex sparse matrix like A+c*B', A*B' thanks to P-H Tournier - writing CheckAllEdp to be compatible with new tree - fix eps in trunc in case of very anisotrope mesh, Thank G. Sadaka ### Added - CMake, thanks to [https://github.com/cdoucet](https://github.com/cdoucet) - Surface finite element, thanks to [AFourmont](https://github.com/AFourmont) - AppImage generation, thanks to [Alexander Sashnov](https://github.com/asashnov) ### Changed - PETSc/SLEPc version 3.11 ## [4.0] - correct bug in RT1Ortho and RT2Ortho 2d in the computation of derivative (2018-01-30, Thank to Bryan.Bosworth@colorado.edu) - uniformize 2d/3d in element, method EdgeOrientation(e) now return +1/-1 - change all the sparse matrix structure - remove all map matrix jan 2019 ### Added - surface finite element (in progress) - nElementonB (version 2d and 3d of nTonEge) - area ( same the lenEdge) in 3d - add function labels to get the array of label of a mesh - add function regions to get the array of label of a mesh - correct big bug in toRarray,toZarray, toCarray transform [ ... ] array to int[int], real[int], complex[int] ## [3.62] - 2018-08-31 ### Added - add x0=true/false, add veps=eps in solver parameters to initialazed of not the the CG , GMRES algo with 0 or previous value and veps is to get the absolue tolerance - A tool of solve adjoint matrix A with only one single LU decomposition with LU, UMFPACK, GMRES `u[]=A'^-1*b;` - Add plugin to save matrix in Harwell-Boeing format (see Harwell-Boeing format) ### Fixed - Fix bug in `trunc` (2d) in case of very fine mesh (eps too small) ## [3.61] - 2018-06-20 ### Added - Add name parameter `kerneln=`, `kernelt=`, `kerneldim=` for dissection solver - Add option in method `toClose` function in `fquatree` to get the nearst point (for intersect meshes) - Add missing file `curvature.edp` - Add `imax`, `jmax`, `imin`, `jmin` to get index of row or column of the min or max coefficient
We have: `A(A.imin,A.jmin) = A.min` - Add cosmetics in macro (macro name, macro line...) ### Changed - Pass to PETSc/SLEPc version 3.8.4/3.8.3 ### Fixed - Fix launchff.exe bug under windows 64 to choose a filescrip if no parameter - Fix the label definition in case of `intalledges` in 2d - Fix mpi_comm with MUMPS (very rare) ## 3.60 - 2018-04-13 ### Changed - The main distribution is now on Github [Unreleased]: https://github.com/FreeFem/FreeFem-sources/compare/v4.9..develop [4.9]: https://github.com/FreeFem/FreeFem-sources/compare/v4.8..v4.9 [4.8]: https://github.com/FreeFem/FreeFem-sources/compare/v4.7-1..v4.8 [4.7-1]: https://github.com/FreeFem/FreeFem-sources/compare/v4.7...v4.7-1 [4.7]: https://github.com/FreeFem/FreeFem-sources/compare/v4.6...v4.7 [4.6]: https://github.com/FreeFem/FreeFem-sources/compare/v4.5...v4.6 [4.5]: https://github.com/FreeFem/FreeFem-sources/compare/v4.4-3...v4.5 [4.4-3]: https://github.com/FreeFem/FreeFem-sources/compare/v4.4-2...v4.4-3 [4.4-2]: https://github.com/FreeFem/FreeFem-sources/compare/v4.4...v4.4-2 [4.4]: https://github.com/FreeFem/FreeFem-sources/compare/v4.2.1...v4.4 [4.2.1]: https://github.com/FreeFem/FreeFem-sources/compare/v4.1...v4.2.1 [4.1]: https://github.com/FreeFem/FreeFem-sources/compare/v4.0...v4.1 [4.0]: https://github.com/FreeFem/FreeFem-sources/compare/3.62...v4.0 [3.62]: https://github.com/FreeFem/FreeFem-sources/compare/3.61...3.62 [3.61]: https://github.com/FreeFem/FreeFem-sources/compare/v3.60...3.61 FreeFem-sources-4.9/CMakeLists.txt000664 000000 000000 00000001361 14037356732 017113 0ustar00rootroot000000 000000 cmake_minimum_required(VERSION 3.0) project(FreeFEM C CXX Fortran) set(FREEFEM_VERSION 4.0) # Add the path containing cmake modules # Note: this line cannot be put in the body of ff_configure_cmake # because ff_configure_cmake is searched in the right repository # thanks to this line set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake/modules) include(ff_configure_cmake) include(ff_configure_compilers) include(ff_configure_thirdparty) include(ff_write_config_file) ff_configure_cmake() ff_configure_compilers() ff_configure_thirdparty() ff_write_config_file() # creation of "make test" command # Note: this call MUST be performed in the main CMake script enable_testing() add_subdirectory(src) add_subdirectory(examples) FreeFem-sources-4.9/Makefile.am000664 000000 000000 00000045001 14037356732 016406 0ustar00rootroot000000 000000 ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Makefile for FreeFem++, adapted to Automake # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric, Simon Garnotel # E-MAIL : SUBDIRS=3rdparty src \ plugin \ examples EXTRA_DIST=README.md CHANGELOG.md AUTHORS\ bin/keys.awk \ bin/mpic++.in \ bin/Install-MacOS.command \ bin/FreeFem++-CoCoa.in \ bin/save-in-files \ bin/cleanregen.sh \ bin/copysharedlibs.sh \ bin/script/clean_all_installed_ffpp.sh \ bin/script/PostInstall.m4 \ bin/regtests.sh \ bin/uninstall-ff++ \ bin/build/cleancrlf \ bin/build/orgindex \ bin/build/download \ bin/build/ff-md5 \ bin/build/links2files \ bin/modpath.iss \ bin/Install-MacOS.command.in \ bin/Searchdylib \ bin/test-driver-ff \ etc/tools/srcHeader.txt \ etc/tools/.uncrustify \ etc/debian/freefem++-doc.doc-base \ etc/debian/freefem++.manpages \ etc/debian/apt-ftparchive.conf \ etc/debian/freefem++.docs \ etc/debian/compat \ etc/debian/freefem++.examples \ etc/debian/changelog \ etc/debian/freefem++-nw.install \ etc/debian/freefem++-nw.prerm \ etc/debian/freefem++-mpich.links \ etc/debian/freefem++-doc.docs \ etc/debian/freefem++-mpich.dirs \ etc/debian/FreeFem++.1 \ etc/debian/freefem++-mpich.install \ etc/debian/freefem++-mpich.postinst \ etc/debian/freefem++-nw.postinst \ etc/debian/shlibs.local \ etc/debian/rules \ etc/debian/freefem++-mpich.prerm \ etc/debian/copyright \ etc/debian/freefem++-nw.dirs \ etc/debian/CopyToServer.sh \ etc/debian/control \ etc/debian/freefem++-nw.links \ etc/config/FreeFem++.scrpt-txt.in \ etc/config/FreeFem++.app.in.tgz \ etc/config/m4/acoptim.m4 \ etc/config/m4/acmpi.m4 \ etc/config/m4/ax_lib_hdf5.m4 \ etc/config/m4/ax_lib_gsl.m4 \ etc/config/m4/regtests.m4 \ etc/config/m4/acmacros.m4 \ etc/config/m4/ax_openmp.m4 \ etc/config/m4/tags.rOZFHw \ etc/config/m4/WindowsPackage.m4 \ etc/FreeFem++.app/Contents \ etc/FreeFem++.app/Contents/Resources \ etc/FreeFem++.app/Contents/Resources/English.lproj \ etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib \ etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/classes.nib \ etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/NSAppleMenuImage.tiff \ etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/info.nib \ etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/objects.nib \ etc/FreeFem++.app/Contents/Resources/FreeFem++.icns \ etc/FreeFem++.app/Contents/Resources/script \ etc/FreeFem++.app/Contents/Resources/Credits.rtf \ etc/FreeFem++.app/Contents/Info.plist \ etc/FreeFem++.app/Contents/PkgInfo \ etc/logo/logo.ico \ etc/logo/logo.bmp \ etc/doxygen/mainpage.dox \ etc/doxygen/Doxyfile \ etc/jenkins \ etc/jenkins/install.sh \ etc/jenkins/deployDEB.sh \ etc/jenkins/build.sh \ etc/jenkins/check.sh \ etc/jenkins/configure.sh \ etc/jenkins/clean.sh \ etc/jenkins/build_PETSc.sh \ etc/ff/ff.xcodeproj \ etc/ff/ff.xcodeproj/project.pbxproj \ readme/BUGS readme/HISTORY_BEFORE_2005 readme/README_MAC.md readme/TODO \ readme/COPYRIGHT readme/INNOVATION readme/README_COMPILATION.md readme/README_WINDOWS.md \ readme/HISTORY readme/README readme/README_GIT.md readme/README_XCODE.md \ idp/ffddm_geneoCS.idp \ idp/func-max.idp \ idp/DDM-Schwarz-macro.idp \ idp/ExtractDofsonBorder.idp \ idp/Heat3d.idp \ idp/MPIGMRESmacro.idp \ idp/gsl.idp \ idp/ffddm_partitioning.idp \ idp/cube.idp \ idp/mortar-msh.idp \ idp/Makefile.am \ idp/movemeshsmooth.idp \ idp/DDM-funcs-v2.idp \ idp/cobrameshcavity.idp \ idp/MPIplot.idp \ idp/MeshSurface.idp \ idp/ffddm.idp \ idp/ffddm_coarsemeshCS.idp \ idp/ffddm_geneoCS_3rdlvl.idp \ idp/getARGV.idp \ idp/ffddm_functions.idp \ idp/macro_ddm_substructuring.idp \ idp/ffddm_parameters.idp \ idp/macro_ddm.idp \ idp/CC.idp FF_MAC_PREFIX=FreeFem++v$(VERSION)$(ADD_PACKAGE_NAME) FF_EXAMPLES_FILES=examples/tutorial/aile.msh \ examples/tutorial/xyf \ examples/3d/dodecaedre01.mesh \ examples/3d/lac-leman-v4.msh \ plugin/seq/load.link \ examples/plugin/cube.msh \ examples/plugin/fig.pgm \ examples/plugin/lg.pgm \ examples/mpi/regtests.sh \ examples/misc/speedtest.sh \ examples/*/*.edp \ examples/CheckAllEdp \ examples/plugin/ch.pts \ examples/plugin/g.gmesh clean-local:: -find . \( -name '*~' -or -name ListOfUnAllocPtr.bin \) | xargs rm -rm examples/*/*.eps # "dist" targets clean-local:: -rm freefem++-*.tar.gz freefem++-*.zip -rm Output/FreeFem++-*.exe # Reduced compilation # ------------------- unittests:: -cd unit && make quick: cd src/libMesh && $(MAKE) $(AM_MAKEFLAGS) cd src/lglib && $(MAKE) $(AM_MAKEFLAGS) cd src/fflib && $(MAKE) $(AM_MAKEFLAGS) cd src/std && $(MAKE) $(AM_MAKEFLAGS) nw: cd src/libMesh && $(MAKE) $(AM_MAKEFLAGS) cd src/lglib && $(MAKE) $(AM_MAKEFLAGS) cd src/fflib && $(MAKE) $(AM_MAKEFLAGS) cd src/nw && $(MAKE) $(AM_MAKEFLAGS) bamg: cd src/libMesh && $(MAKE) $(AM_MAKEFLAGS) cd src/lglib && $(MAKE) $(AM_MAKEFLAGS) cd src/fflib && $(MAKE) $(AM_MAKEFLAGS) cd src/bamg && $(MAKE) $(AM_MAKEFLAGS) ide: cd src/libMesh && $(MAKE) $(AM_MAKEFLAGS) cd 3rdparty && $(MAKE) $(AM_MAKEFLAGS) cd src/lglib && $(MAKE) $(AM_MAKEFLAGS) cd src/fflib && $(MAKE) $(AM_MAKEFLAGS) cd src/ide && $(MAKE) $(AM_MAKEFLAGS) FreeFem++-cs$(EXEEXT) # Cleaning generated files which are stored in the CVS repository, to # avoid technical CVS conflicts. clean-gen: ./cleanregen.sh # Testing # ------- # The standard automake goal "make check" is also valid. It just does # not run any test that could prevent the user from working on its # machine (because of unexpected windows opening right in the middle # of the workspace). visualcheck: all $(MAKE) $(AM_MAKEFLAGS) check VISUALCHECK=yes # Windows package # --------------- # Windows package script (for Inno Setup). We extract version # information from the Debian Changelog to get the package release # number as well. win32:WindowsPackage.iss FreeFEM-documentation.pdf test ! -d plugin/include-tmp || rm -r plugin/include-tmp cd plugin;mkdir include-tmp;cp -Lr seq/include/* */*.h include-tmp cp -r 3rdparty/include/. plugin/include-tmp/. /c/Program\ Files\ \(x86\)/Inno\ Setup\ 5/ISCC.exe "`cygpath.exe WindowsPackage.iss`" WindowsPackage.iss: etc/config/m4/WindowsPackage.m4 configure.ac Makefile.am m4 -DVERSION='$(VERSION)$(ADD_PACKAGE_NAME)' \ -DMPIPROG='$(MPIPROG)' -DSIZEOFPTR='$(SIZEOF_PTRINBIT)' \ -DHOSTOS='$(host_os)' etc/config/m4/WindowsPackage.m4 > WindowsPackage.iss echo loadpath += '"!\."' >freefem++.pref echo includepath += '"!\idp"' >>freefem++.pref # Debian package # -------------- deb: dpkg-buildpackage -rfakeroot @echo Now run CopyToServer.sh in debian subdirectory # Build all versions # ------------------ nativeX: $(FF_MAC_PREFIX)_MacOsX.tgz echo "done" MacOsX: FreeFem++.app.tgz -rm -rf OsXxx mkdir -p OsXxx/Applications/ make install DESTDIR="`pwd`/OsXxx" tar zxvf FreeFem++.app.tgz -C OsXxx/Applications/ cd OsXxx;tar cvfz ../$(FF_MAC_PREFIX)_MacOsX.tgz . rm -rf OsXxx clean-local:: -rm freefem++-$(VERSION).tar.gz -rm -rf FreeFem++v*_MacOS # Native MacOS packaging # ---------------------- install-exec-local:: bin/script/PostInstall.sh FreeFEM-documentation.pdf test `uname` != Darwin || $(MAKE) FreeFem++-CoCoa bin/script/PostInstall.sh $(mkinstalldirs) -m 755 $(DESTDIR)$(pkgdatadir)/$(VERSION) test `uname` != Darwin || $(mkinstalldirs) -m 755 $(DESTDIR)/etc/paths.d/ test `uname` != Darwin || $(mkinstalldirs) -m 755 $(DESTDIR)/usr/local/bin tar cvf - $(FF_EXAMPLES_FILES)| (cd $(DESTDIR)$(pkgdatadir)/$(VERSION); tar xvf -) $(INSTALL_SCRIPT) examples/CheckAll examples/CheckAllEdp $(DESTDIR)$(pkgdatadir)/$(VERSION) $(mkinstalldirs) -m 755 $(DESTDIR)${bindir} test ! -s FreeFEM-documentation.pdf || $(INSTALL_DATA) FreeFEM-documentation.pdf "$(DESTDIR)$(pkgdatadir)" test `uname` != Darwin || $(INSTALL_SCRIPT) FreeFem++-CoCoa $(DESTDIR)${bindir} test `uname` != Darwin || echo $(bindir) >$(DESTDIR)/etc/paths.d/FreeFem++ test `uname` != Darwin || ( rm $(DESTDIR)/usr/local/bin/FreeFem++-CoCoa ; $(INSTALL_SCRIPT) FreeFem++-CoCoa $(DESTDIR)/usr/local/bin ) test `uname` != Darwin || ( $(MAKE) FreeFem++.app.tgz ; test -d $(DESTDIR)/Applications || mkdir $(DESTDIR)/Applications ; tar zxf FreeFem++.app.tgz -C $(DESTDIR)/Applications ) $(mkinstalldirs) -m 755 $(DESTDIR)$(ff_prefix_dir)/idp cd idp; for i in *.idp; do \ if [ -f $$i ] ; then $(INSTALL) -m 555 $$i $(DESTDIR)$(ff_prefix_dir)/idp; fi; done bin/script/PostInstall.sh:./Makefile bin/script/PostInstall.m4 m4 "-DFF__FVER=$(PACKAGE_VERSION)" "-DFF_BINDIR=$(bindir)" "-DFF__DATADIR=$(pkgdatadir)" bin/script/PostInstall.m4 > bin/script/PostInstall.sh chmod a+x bin/script/PostInstall.sh FreeFEM-documentation.pdf:Makefile if DOWNLOAD -rm FreeFEM-documentation.pdf.md5 -$(WGET) https://doc.freefem.org/pdf/FreeFEM-documentation.pdf.md5 -md5sum -c FreeFEM-documentation.pdf.md5 || (rm FreeFEM-documentation.pdf ; $(WGET) https://doc.freefem.org/pdf/FreeFEM-documentation.pdf) else touch FreeFEM-documentation.pdf echo "no downloaded documentation" endif ListFiles-natives: .FORCE test `uname` != Darwin || $(MAKE) FreeFem++-CoCoa echo $(FF_EXAMPLES_FILES) >$@ echo ./3rdparty/fftw/Makefile ./3rdparty/fftw/Makefile.am >>$@ find . -name '*.[ei]dp' -o -name '*.h*' -o -name '*.cpp' -o -name '*.pgm' |egrep '[.]/examples/examples' >>$@ find . -name '*.h*' -o -name '*.cpp' |egrep '[.]/examples/examples' >>$@ List-agl-dylib: src/nw/FreeFem++ otool -L src/nw/FreeFem++|egrep -v '/System/Library/|/usr/lib/'|awk '/.dylib/ {print $$1}' >$@ CheckMacLib.sh: src/nw/FreeFem++ echo "for i in `otool -L src/nw/FreeFem++|egrep -v '/System/Library/|/usr/lib/'|awk '/.dylib/ {printf($$1.OFS) }'` ; do test ! -f $$i && exit 1; done; exit 0" >$@ chmod a+x $@ CheckMPIMacLib.sh: src/mpi/FreeFem++-mpi echo "for i in `otool -L src/mpi/FreeFem++-mpi|egrep -v '/System/Library/|/usr/lib/'|awk '/.dylib/ {printf($$1.OFS) }'` ; do test ! -f $$i && exit 1; done; exit 0" >$@ chmod a+x $@ $(FF_MAC_PREFIX)_Macos:documentation ListFiles-natives -mkdir $@ cat ListFiles-natives|xargs tar chf - | (cd $@ ; tar xf - ) /Developer/Tools/CpMac "FreeFem++(Carbon)" $@/FreeFem++ if DOWNLOAD cp FreeFEM-documentation.pdf $@ endif $(FF_MAC_PREFIX)_MacOsX: all documentation ListFiles-natives List-agl-dylib -mkdir $@ if DOWNLOAD cp FreeFEM-documentation.pdf $@ endif cat ListFiles-natives|xargs tar chf - | (cd $@ ; tar xf - ) cd $@ ; tar zxf ../src/agl/FreeFem++.app.tgz sed $@/FreeFem++.app/Contents/Info.plist \ -e "s/@VVERSION@/$(VERSION)$(ADD_PACKAGE_NAME)/g" \ -e "s/@DATE@/`date`/g" cp src/nw/FreeFem++ $@/FreeFem++.app/Contents/bin cp src/nw/ffglut $@/FreeFem++.app/Contents/bin cp src/medit/ffmedit $@/FreeFem++.app/Contents/bin cp examples/plugin/ff-get-dep $@/FreeFem++.app/Contents/bin cp examples/plugin/ff-pkg-download $@/FreeFem++.app/Contents/bin sed $@/FreeFem++.app/Contents/bin/ff-c++ -e 's;FFAPPLI_INC;$@/FreeFem++.app/Contents/include;' chmod a+x $@/FreeFem++.app/Contents/bin/ff-c++ -mkdir $@/FreeFem++.app/Contents/include -mkdir $@/FreeFem++.app/Contents/idp cp examples/plugin/include/* $@/FreeFem++.app/Contents/include cp examples/plugin/*.dylib $@/FreeFem++.app/Contents/lib cp idp/*.idp $@/FreeFem++.app/Contents/idp -if [ -s List-agl-dylib ]; then tar zchvf $@/OtherMacOsLib.tgz `cat List-agl-dylib`; fi; ./config.status --file=$@/Install-MacOS.command:Install-MacOS.command.in chmod a+rx $@/Install-MacOS.command -mkdir $@/FreeFem++.app/Contents/etc echo loadpath += \"./\" >$@/FreeFem++.app/Contents/etc/freefem++.pref echo loadpath += \"$(ff_prefix_dir)/lib\" >>$@/FreeFem++.app/Contents/etc/freefem++.pref echo includepath += \"$(ff_prefix_dir)/edp\" >>$@/FreeFem++.app/Contents/etc/freefem++.pref echo includepath += \"$(ff_prefix_dir)/idp\" >>$@/FreeFem++.app/Contents/etc/freefem++.pref $(FF_MAC_PREFIX)_MacOsX.tgz: $(FF_MAC_PREFIX)_MacOsX tar zcvf $(FF_MAC_PREFIX)_MacOsX.tgz $(FF_MAC_PREFIX)_MacOsX # Linux binary-only package # ------------------------- # Include kernel and libc version in static package name PACKAGE_NAME=FreeFem++v$(VERSION)_linux-$(KERNEL_VERSION)_$(LIBC_VERSION)$(OPTIM_TYPE) linux-package: $(PACKAGE_NAME).tgz # No direct dependency to "all" to be able to debug the packaging # procedure on its own. $(PACKAGE_NAME): ListFiles-natives cat ListFiles-natives|xargs tar cfh - | (cd $@ ; tar xf - ) -mkdir $@ cp src/std/FreeFem++ $@ ./copysharedlibs.sh src/std/FreeFem++ $@ cp src/nw/FreeFem++-nw $@ ./copysharedlibs.sh src/nw/FreeFem++-nw $@ cp src/ide/FreeFem++-cs $@ ./copysharedlibs.sh src/ide/FreeFem++-cs $@ cp src/ide/FreeFem++-server $@ ./copysharedlibs.sh src/ide/FreeFem++-server $@ cp src/ide/FreeFem++-client $@ ./copysharedlibs.sh src/ide/FreeFem++-client $@ -cp src/glx/FreeFem++-glx $@ -./copysharedlibs.sh src/glx/FreeFem++-glx $@ -cp src/mpi/FreeFem++-mpi $@ -./copysharedlibs.sh src/mpi/FreeFem++-mpi $@ $(PACKAGE_NAME).tgz: $(PACKAGE_NAME) tar cvzf $@ $< clean-local:: -rm -r $(PACKAGE_NAME) $(PACKAGE_NAME).tgz autofiles:AutoGeneratedFile.tar.gz LIST_REQUIRE_AM= compile config.guess config.sub depcomp install-sh missing test-driver LIST_AM_UNUSED=ar-lib mdate-sh ylwrap py-compile texinfo.tex LIST_GENERATE_FILE= configure config.h.in \ Makefile.in \ 3rdparty/Makefile.in \ 3rdparty/blas/Makefile.in \ 3rdparty/arpack/Makefile.in \ 3rdparty/umfpack/Makefile.in \ 3rdparty/fftw/Makefile.in \ src/Makefile.in \ src/bamglib/Makefile.in \ src/Graphics/Makefile.in \ src/ffgraphics/Makefile.in \ src/ffgraphics/client/Makefile.in \ src/ffgraphics/server/Makefile.in \ src/femlib/Makefile.in \ src/Algo/Makefile.in \ src/lglib/Makefile.in \ src/fflib/Makefile.in \ src/nw/Makefile.in \ src/mpi/Makefile.in \ src/bamg/Makefile.in \ src/libMesh/Makefile.in \ src/medit/Makefile.in \ src/bin-win32/Makefile.in \ plugin/Makefile.in \ plugin/seq/Makefile.in \ plugin/mpi/Makefile.in \ examples/Makefile.in \ examples/plugin/Makefile.in \ examples/tutorial/Makefile.in \ examples/misc/Makefile.in \ examples/mpi/Makefile.in \ examples/hpddm/Makefile.in \ examples/ffddm/Makefile.in \ examples/eigen/Makefile.in \ examples/examples/Makefile.in \ examples/bug/Makefile.in \ examples/3d/Makefile.in \ examples/3dSurf/Makefile.in \ examples/3dCurve/Makefile.in LIST_GENERATE_FILE_AM= \ Makefile.am \ 3rdparty/Makefile.am \ 3rdparty/blas/Makefile.am \ 3rdparty/arpack/Makefile.am \ 3rdparty/umfpack/Makefile.am \ 3rdparty/fftw/Makefile.am \ src/Makefile.am \ src/bamglib/Makefile.am \ src/Graphics/Makefile.am \ src/ffgraphics/Makefile.am \ src/ffgraphics/client/Makefile.am \ src/ffgraphics/server/Makefile.am \ src/femlib/Makefile.am \ src/Algo/Makefile.am \ src/lglib/Makefile.am \ src/fflib/Makefile.am \ src/nw/Makefile.am \ src/mpi/Makefile.am \ src/bamg/Makefile.am \ src/libMesh/Makefile.am \ src/medit/Makefile.am \ src/bin-win32/Makefile.am \ plugin/Makefile.am \ plugin/seq/Makefile.am \ plugin/mpi/Makefile.am \ examples/Makefile.am \ examples/plugin/Makefile.am \ examples/tutorial/Makefile.am \ examples/misc/Makefile.am \ examples/mpi/Makefile.am \ examples/hpddm/Makefile.am \ examples/ffddm/Makefile.am \ examples/eigen/Makefile.am \ examples/examples/Makefile.am \ examples/bug/Makefile.am \ examples/3d/Makefile.am \ examples/3dSurf/Makefile.am \ examples/3dCurve/Makefile.am #$(LIST_GENERATE_FILE):$(LIST_GENERATE_FILE_AM) # @echo "WARNING the configure file is older than configure build flies" # @echo "Rebuild configure: do one the three case" # @echo " if you have autoconf # make conf" # @echo " or without # make conf-without-autoconf" # @echo " or by pass this problem # make conf-touch" conf: autoreconf ./config.status --recheck conf-without-autoconf: tar zxvf AutoGeneratedFile.tar.gz ./config.status --recheck conf-touch: touch $(LIST_GENERATE_FILE) AutoGeneratedFile.tar.gz:$(LIST_GENERATE_FILE) configure.ac $(LIST_REQUIRE_AM) tar cvfz $@ $(LIST_GENERATE_FILE) $(LIST_REQUIRE_AM) FreeFem++-CoCoa:bin/FreeFem++-CoCoa.in ./config.status ./config.status --file="FreeFem++-CoCoa:bin/FreeFem++-CoCoa.in" FreeFem++.app.tgz:./config.status etc/config/FreeFem++.app.in.tgz etc/config/FreeFem++.scrpt-txt.in tar zxf etc/config/FreeFem++.app.in.tgz ./config.status --file=FreeFem++.scrpt-txt:etc/config/FreeFem++.scrpt-txt.in -rm FreeFem++.app/Contents/Resources/Scripts/main.scpt osacompile Makefile-for-Checkam echo '@false'| tr '@' '\t' >> Makefile-for-Checkam if $(MAKE) -f Makefile-for-Checkam 2>&1 >/dev/null ; \ then ok=1 ; \ else \ $(MAKE) -f Makefile-for-Checkam ; \ echo "Need to rebuid configure and Makefile files " ;\ echo "Do autoreconf -i or tar zxvf tar zxvf AutoGeneratedFile.tar.gz" ;\ echo " and ./reconfigure" ;\ ok=0; \ fi ; \ test $$ok -eq 1 .FORCE: FreeFem-sources-4.9/README.md000664 000000 000000 00000031260 14037356732 015633 0ustar00rootroot000000 000000
CI / CD tools | Codacy | LGTM | Coverity | |:------:|:----:|:--------:| | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/710d25bb3c6040c19c3ff7c0f3201835)](https://www.codacy.com/app/sgarnotel/FreeFem-sources?utm_source=github.com&utm_medium=referral&utm_content=FreeFem/FreeFem-sources&utm_campaign=Badge_Grade) | [![Language grade: C/C++](https://img.shields.io/lgtm/grade/cpp/g/FreeFem/FreeFem-sources.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/FreeFem/FreeFem-sources/context:cpp) | Coverity Scan Build Status | Jenkins ([FreeFEM-dev](https://ci.inria.fr/freefem-dev/)): *Develop branch only* | | | macOS | | | Ubuntu | | Windows | |:-----:|:-----------:|:-----------:|:-----------:|:------------:|:------------:|:------------:|:---------:| |**Job**| **10.10** | **10.13** | **10.14** | **16.04** | **18.04** | **19.10** | **7** | | #⁠1 | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1010-job1)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1010-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1013-job1)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1013-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1014-job1)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1014-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1604-job1)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2016.04/job/FreeFEM-sources-ubuntu1604-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1804-job1)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2018.04/job/FreeFEM-sources-ubuntu1804-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev-2/buildStatus/icon?job=FreeFEM-sources-ubuntu1910-job1)](https://ci.inria.fr/freefem-dev-2/view/Ubuntu%2019.10/job/FreeFEM-sources-ubuntu1910-job1/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-windows7-job1)](https://ci.inria.fr/freefem-dev/view/Windows%207/job/FreeFEM-sources-windows7-job1/) | | #⁠2 | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1010-job2)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1010-job2/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1013-job2)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1013-job2/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1014-job2)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1014-job2/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1604-job2)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2016.04/job/FreeFEM-sources-ubuntu1604-job2/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1804-job2)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2018.04/job/FreeFEM-sources-ubuntu1804-job2/) | [![Build Status](https://ci.inria.fr/freefem-dev-2/buildStatus/icon?job=FreeFEM-sources-ubuntu1910-job2)](https://ci.inria.fr/freefem-dev-2/view/Ubuntu%2019.10/job/FreeFEM-sources-ubuntu1910-job2/) | | | #⁠3 | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1010-job3)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1010-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1013-job3)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1013-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1014-job3)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1014-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1604-job3)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2016.04/job/FreeFEM-sources-ubuntu1604-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1804-job3)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2018.04/job/FreeFEM-sources-ubuntu1804-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev-2/buildStatus/icon?job=FreeFEM-sources-ubuntu1910-job3)](https://ci.inria.fr/freefem-dev-2/view/Ubuntu%2019.10/job/FreeFEM-sources-ubuntu1910-job3/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-windows7-job3)](https://ci.inria.fr/freefem-dev/view/Windows%207/job/FreeFEM-sources-windows7-job3/) | | #⁠4 | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1010-job4_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1010-job4_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1013-job4_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1013-job4_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1014-job4_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1014-job4_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1604-job4_openmpi)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2016.04/job/FreeFEM-sources-ubuntu1604-job4_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1804-job4_openmpi)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2018.04/job/FreeFEM-sources-ubuntu1804-job4_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev-2/buildStatus/icon?job=FreeFEM-sources-ubuntu1910-job4_openmpi)](https://ci.inria.fr/freefem-dev-2/view/Ubuntu%2019.10/job/FreeFEM-sources-ubuntu1910-job4_openmpi/) | | | #⁠5 | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1010-job5_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1010-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1013-job5_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1013-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-macos1014-job5_openmpi)](https://ci.inria.fr/freefem-dev/job/FreeFEM-sources-macos1014-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1604-job5_openmpi)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2016.04/job/FreeFEM-sources-ubuntu1604-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-ubuntu1804-job5_openmpi)](https://ci.inria.fr/freefem-dev/view/Ubuntu%2018.04/job/FreeFEM-sources-ubuntu1804-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev-2/buildStatus/icon?job=FreeFEM-sources-ubuntu1910-job5_openmpi)](https://ci.inria.fr/freefem-dev-2/view/Ubuntu%2019.10/job/FreeFEM-sources-ubuntu1910-job5_openmpi/) | [![Build Status](https://ci.inria.fr/freefem-dev/buildStatus/icon?job=FreeFEM-sources-windows7-job5)](https://ci.inria.fr/freefem-dev/view/Windows%207/job/FreeFEM-sources-windows7-job5/) | Jenkins ([FreeFEM](https://ci.inria.fr/freefem/)): *Master branch* | Release | .pkg | AppImage | .deb | .exe | Docker | |:-------:|:----:|:--------:|:----:|:----:|:------:| | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=FreeFEM-sources-createRelease)](https://ci.inria.fr/freefem/view/Master/job/FreeFEM-sources-createRelease/) | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=FreeFEM-sources-deployPKG)](https://ci.inria.fr/freefem/view/Master/job/FreeFEM-sources-deployPKG/) | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=FreeFEM-sources-deployAppImage)](https://ci.inria.fr/freefem/view/Master/job/FreeFEM-sources-deployAppImage/) | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=FreeFEM-sources-deployDEB-withPETSc)](https://ci.inria.fr/freefem/view/Master/job/FreeFEM-sources-deployDEB-withPETSc/) | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=deployEXE)](https://ci.inria.fr/freefem/view/Master/job/deployEXE/) | [![Build Status](https://ci.inria.fr/freefem/buildStatus/icon?job=FreeFEM-docker)](https://ci.inria.fr/freefem/view/Docker/job/FreeFEM-docker/) | See [CI/CD Tools](#cicd-tools)
# FreeFEM sources [FreeFEM](https://freefem.org) is a partial differential equation solver for non-linear multi-physics systems in 2D and 3D using the finite element method. Problems involving partial differential equations from several branches of physics such as fluid-structure interactions require interpolations of data on several meshes and their manipulation within one program. FreeFEM includes a fast interpolation algorithm and a language for the manipulation of data on multiple meshes. It is written in C++ and the FreeFEM language is a C++ idiom. ## For users The user documentation is available [here](https://github.com/FreeFem/FreeFem-doc). If you use FreeFEM for academic research, please use the following: **BibTeX:** ``` @article{MR3043640, AUTHOR = {Hecht, F.}, TITLE = {New development in FreeFem++}, JOURNAL = {J. Numer. Math.}, FJOURNAL = {Journal of Numerical Mathematics}, VOLUME = {20}, YEAR = {2012}, NUMBER = {3-4}, PAGES = {251--265}, ISSN = {1570-2820}, MRCLASS = {65Y15}, MRNUMBER = {3043640}, URL = {https://freefem.org/} } ``` **APA:** ``` Hecht, F. (2012). New development in FreeFem++. Journal of numerical mathematics, 20(3-4), 251-266. ``` **ISO 690:** ``` HECHT, Frédéric. New development in FreeFem++. Journal of numerical mathematics, 2012, vol. 20, no 3-4, p. 251-266. ``` **MLA:** ``` Hecht, Frédéric. "New development in FreeFem++." Journal of numerical mathematics 20.3-4 (2012): 251-266. ``` ## For developers All development efforts take place in the _develop_ branch (or in feature branches: feature-cmake, geneo4PETSc, ... for specific projects) **Do not commit on master branch!** Have a look on the [Wiki](https://github.com/FreeFem/FreeFem-sources/wiki)! ## CI/CD Tools ### FreeFEM-dev See [Jenkins configuration files](etc/jenkins) ### FreeFEM All: all dependency packages are installed (computer with root access).
No: dependency packages are not installed (computer without root access). 1: Ubuntu 18.04 x86 2: macOS 10.13 3: macOS 10.9 4: Windows 7 + MSYS2 + MS MPI 7 __Executed commands:__ Automatic configuration: ```bash autoreconf -i ``` Configuration: ```bash ./configure --enable-download --enable-optim ``` If you do not have administrator rights or do not want FreeFEM files scattered around on your machine, please use the `--prefix` option, e.g.: ```bash ./configure --enable-download --enable-optim --prefix=${HOME}/FreeFem-install ``` Download: ```bash ./3rdparty/getall -a ``` PETSc: ```bash cd 3rdparty/ff-petsc make petsc-slepc cd - ./reconfigure ``` Make: ```bash make -j2 make check ``` Install: ```bash (sudo) make install ``` See [CI/CD Tools Wiki](https://github.com/FreeFem/FreeFem-sources/wiki/CI-CD-Tools) for more informations. FreeFem-sources-4.9/bin/000775 000000 000000 00000000000 14037356732 015122 5ustar00rootroot000000 000000 FreeFem-sources-4.9/bin/Build-MacOS-Distribution000775 000000 000000 00000005531 14037356732 021530 0ustar00rootroot000000 000000 #/usr/bin/env bash DESTDIR=no while [ $# -ne 0 ] ; do argsp=$args args="$args '$1'" case "$1" in -[h?]*) echo usage $0 "" exit 0; ;; DIRMPI=*) MPI=`echo $1|sed 's/[A-Z0-9]*=//'` ;; DESTDIR=*) DESTDIR=`echo $1|sed 's/[A-Z0-9]*=//'` ;; *);; esac; shift done prefix=$(grep '"prefix"' config.status|awk -F'"' '{print $4}') bindir=$(grep '"bindir"' config.status|awk -F'"' '{print $4}') ff_prefix_dir_lib=$(grep '"ff_prefix_dir_lib"' config.status|awk -F'"' '{print $4}') echo prefix=$prefix echo lib=$prefix/lib echo biblib=$bindir echo rlib=../lib execrlib=@executable_path/$rlib if [ test $(dirname $bindir) != "prefix" ] ; then echo wrong place to change dylib for install fi echo "DESTDIR=$DESTDIR ." if test "$DESTDIR" = no ; then echo "usage $0 DESTDIR=/tmp/ff++ MPI=MPIcompiledir" exit 1; fi if [ ! -f src/Graphics/sansrgraph.cpp ] ; then echo " Not in FreeFrem++ current dir :$PWD" exit 1; fi mkdir -p "$DESTDIR" DESTDIR=`realpath $DESTDIR` dylib_of() { # otool -L $*|egrep -v '/System/Library/Frameworks/|/usr/lib/'| awk -F '[ *:]' 'NR>2 {if($1) {print $1}}'|sed 's/ *//g' | sort -u | >&2 grep -v "$DESTDIR" otool -L $*|egrep -v '/System/Library/Frameworks/|/usr/lib/|@rpath'| awk -F '[ *:]' 'NR>2 {if($1) {print $1}}'|sed 's/ *//g' | sort -u | grep -v "$DESTDIR"; } if [ ! -d $DESTDIR ] ; then echo dir $DESTDIR is no created exit 1; fi MPIRUN=`awk '$1=="MPIRUN" {print $3}' Makefile` MPICC=`awk '$1=="MPIRUN" {print $3}' Makefile` FC=`awk '$1=="FC" {print $3}' Makefile` FF_prefix_petsc=`awk '$1=="FF_prefix_petsc" {print $3}' Makefile` # install lib fortran # install mpi # install petsc # install ggplugin libs if [ -n "$DIRMPI" -a -d "$DIRMPI" ] ; then ( cd $DIRMPI; make install DESTDIR=$DESTDIR ) if [ $? -ne 0 ] ; then echo error install MPI exit 1; fi fi make make # build the lis of external Get-list-dylib-use make install DESTDIR="$DESTDIR" // get relativ path of MKL=no ffcpf=/tmp/copy-dylib.$$.txt rm $ffcpf for i in $(cat etc/list-dylib|grep -v /intel/) ;do d=$(dirname $i) dl=$(dirname $i) bi=$(basename $i) case $i in */ff-petsc/*) echo mkdir -p $DESTDIR/$d; echo cp $i $DESTDIR/$d;; /usr/local/ff++/*) echo mkdir -p $DESTDIR/$lib; echo cp $i $DESTDIR/$lib echo $DESTDIR/$lib/$i $bi $execrlib >> $ffcpf ;; *libmkl_*) MKL=yes;; esac done if test $MKL = "yes" ; then for i in $(list-dylib-mkl) ;do bi=$(basename $i) echo cp $i $DESTDIR/$lib echo $DESTDIR/$lib/$i $bi $execrlib >> $ffcpf done fi # update lib in distrib .. bin/change-dylib $ffcpf $DESTDIR/$lib/*dylib bin/change-dylib $ffcpf $DESTDIR/$lib/*dylib bin/change-dylib $ffcpf $DESTDIR/$lib/*dylib bin/change-dylib $ffcpf $(find $DESTDIR/$ff_prefix_dir_lib -name '*dylib') for i in FreeFem++ FreeFem++-mpi FreeFem++-nw bamg cvmsh2 ffglut ffmedit;do bin/change-dylib $ffcpf $DESTDIR/$bindir/$i done FreeFem-sources-4.9/bin/FreeFem++-CoCoa.in000775 000000 000000 00000002673 14037356732 020106 0ustar00rootroot000000 000000 #!/bin/sh # # EXTENSIONS : ".edp" # Accepted file extentions # OSTYPES : "****" # Accepted file types # ROLE : None # Role (Editor, Viewer, None) # SERVICEMENU : FreeFem++ # Name of Service menu item # dir=`dirname $0`; prefix="@prefix@" exec_prefix="@exec_prefix@" bindir="@bindir@" ff="@bindir@/FreeFem++" fa="-glut '@bindir@/ffglut' " q="'" end="exit;" begin="cd $PWD;" # begin the cmd generation ---------- cmd="" # ----------------- for i in "$@"; do d=`dirname "$i"`; f=`basename "$i"`; if [ -f "$i" ] ; then np=`awk -v npo=$np '$1=="//" && $2=="NBPROC" { if( vv == ""){ print $3;}; vv=1}' "$i"` npa=`awk -v npo=$np '$1=="//" && $2=="PARAM" { if( vv == ""){ for(i=3;i<=NF;++i)print $i;}; vv=1}' "$i"` # echo --- $np -- $npa -------- if [ "0$np" -gt 0 ] ; then test -n "@TEST_FFPPMPI@" && ff="@bindir@/ff-mpirun -np $np" fi if [ -n "$npa" ] ; then fa="$fa $npa"; fi cmd="$cmd cd $q$d$q; $ff $q$f$q $fa ;" fi done # ------------------ if [ -z "$cmd" ]; then cmd="$ff;" fi # ------------------ cmd="$cmd" # ---- end of cmd generation ---- echo 'do script "'$cmd'"' # # # send the command $cmd to the apple terminal via osascript # # (echo $0;echo $*;echo "--$TERM--";printenv;set) >/tmp/tutu if [ -z "$TERM_PROGRAM" ] ; then echo ' tell application "Terminal" activate try do script "'$begin$cmd$end'" end try end tell ' | osascript else set -e eval $cmd fi; FreeFem-sources-4.9/bin/Get-list-dylib-use000775 000000 000000 00000001754 14037356732 020442 0ustar00rootroot000000 000000 #export DYLD_PRINT_LIBRARIES=1 ##((src/nw/FreeFem++ ;test -f src/mpi/FreeFem++-mpi && src/mpi/FreeFem++-mpi ) 2>&1 | awk '/dyld: loaded:/ {print $3}'| sort -u; #otool -L plugin/seq/*.dylib plugin/mpi/*.dylib |\ # awk ' NF>2 {print $1} ' |\ # sort -u) | sed s,@rpath/,,|\ # egrep -v ^/usr/lib/|\ # grep -v ^/System/ |\ # egrep '.dylib$'| sort -u \ #|awk -F/ '{ if (f[$NF]!=1 ) print $0; f[$NF]=1;}' ( (echo export DYLD_PRINT_LIBRARIES=1;ls plugin/seq/*.dylib|grep -v fflapack| awk '{print "src/nw/FreeFem++ -check_plugin ",$0}' )|sh 2>&1|awk '/dyld: loaded:/ {print $3}'| sort -u (echo export DYLD_PRINT_LIBRARIES=1;ls plugin/mpi/*.dylib|grep -v fflapack| awk '{print "src/mpi/FreeFem++-mpi -check_plugin ",$0}' )|sh 2>&1|awk '/dyld: loaded:/ {print $3}'| sort -u )| sort -u | grep .dylib > etc/list-dylib egrep -v '/usr/lib/|/System/|./plugin/' etc/list-dylib >etc/list-dylib-no-os echo "list extern lib use by ff++" cat etc/list-dylib-no-os echo build file etc/list-dylib-no-os etc/list-dylibFreeFem-sources-4.9/bin/Install-MacOS.command000775 000000 000000 00000002717 14037356732 021042 0ustar00rootroot000000 000000 #!/bin/sh appl=/Applications cd `dirname $0` echo "Installtion of Freefem++ " if [ -f OtherMacOsLib.tgz ]; then for i in `tar ztf OtherMacOsLib.tgz`; do if [ ! -f "/$i" ]; then echo " the Libary '/$i' don't exist => install (need of admin password)" sudo tar zxvf OtherMacOsLib.tgz -C / $i else echo " the Libary '/$i' exist " fi done # verif .... for i in `tar ztf OtherMacOsLib.tgz`; do if [ ! -f "/$i" ]; then echo " the Libary '/$i' don't exist FreeFEM cannot run (call you adminisator sorry)" echo "Sorry" exit 1; fi done fi echo " copy FreeFem++.app in "$appl" " if [ -d FreeFem++.app ] ; then rsync -avHE --delete FreeFem++.app/ "$appl"/FreeFem++.app fi lbin=`cd $appl"/FreeFem++.app/Contents/bin/; echo *` echo " install $lbin commands in /usr/local/bin (need of admin password)" sudo mkdir -p /usr/local/bin sudo ln -s "$appl"/FreeFem++.app/Contents/bin/* /usr/local/bin sudo rm /usr/local/bin/ff-c++ sudo sed <"$appl"/FreeFem++.app/Contents/bin/ff-c++ >/usr/local/bin/ff-c++ \ -e 's;FFAPPLI_INC;$app/FreeFem++.app/Contents/include;' chmod a+rx /usr/local/bin/ff-c++ if [ -d "/Users/hecht/ff++-3.0-2" ] ;then echo Warning "/Users/hecht/ff++-3.0-2" is a dirctory keep this. else rm -f "/Users/hecht/ff++-3.0-2" ln -s /FreeFem++.app/Contents/ '/Users/hecht/ff++-3.0-2" fi echo "++ FreeFem++ is correctly install in $appl directory." echo " install in /usr/local/bin: $lbin " echo " Thanks for using FreeFem++ v3 " FreeFem-sources-4.9/bin/Install-MacOS.command.in000664 000000 000000 00000003055 14037356732 021440 0ustar00rootroot000000 000000 #!/bin/sh appl=/Applications cd `dirname $0` echo "Installtion of Freefem++ " if [ -f OtherMacOsLib.tgz ]; then for i in `tar ztf OtherMacOsLib.tgz`; do if [ ! -f "/$i" ]; then echo " the Libary '/$i' don't exist => install (need of admin password)" sudo tar zxvf OtherMacOsLib.tgz -C / $i else echo " the Libary '/$i' exist " fi done # verif .... for i in `tar ztf OtherMacOsLib.tgz`; do if [ ! -f "/$i" ]; then echo " the Libary '/$i' don't exist FreeFEM cannot run (call you adminisator sorry)" echo "Sorry" exit 1; fi done fi echo " copy FreeFem++.app in "$appl" " if [ -d FreeFem++.app ] ; then rsync -avHE --delete FreeFem++.app/ "$appl"/FreeFem++.app fi lbin=`cd "$appl"/FreeFem++.app/Contents/bin/; echo *` echo " install $lbin commands in /usr/local/bin (need of admin password)" sudo mkdir -p /usr/local/bin sudo mkdir -p `dirname @ff_prefix_dir@` sudo ln -s "$appl"/FreeFem++.app/Contents/bin/* /usr/local/bin sudo rm /usr/local/bin/ff-c++ sudo sed <"$appl"/FreeFem++.app/Contents/bin/ff-c++ >/usr/local/bin/ff-c++ \ -e 's;FFAPPLI_INC;$app/FreeFem++.app/Contents/include;' chmod a+rx /usr/local/bin/ff-c++ if [ -d "@ff_prefix_dir@" ] ; then echo Warning "@ff_prefix_dir@" is a dirctory keep this. else sudo rm -f "@ff_prefix_dir@" sudo ln -s "$appl/FreeFem++.app/Contents" "@ff_prefix_dir@" echo link "@ff_prefix_dir@ -> $appl/FreeFem++.app/Contents/ " fi echo "++ FreeFem++ is correctly install in $appl directory." echo " install in /usr/local/bin: $lbin " echo " Thanks for using FreeFem++ v3 "FreeFem-sources-4.9/bin/Searchdylib000775 000000 000000 00000001363 14037356732 017304 0ustar00rootroot000000 000000 otool -L ./src/mpi/FreeFem++-mpi src/nw/FreeFem++ ./src/nw/ffglut ./src/medit/ffmedit `find plugin -name '*.dylib'` | awk '/compatibility version/ { print $1}' | sort -u |egrep -v "^/usr/lib/|^/System/Library/" >/tmp/l-ff++0 for i in 0 1 2 3 4 5 ; do ((i1=i+1)) ( for f in `sed 's:@rpath/::' < /tmp/l-ff++$i`; do if [ -f $f ] ; then echo $f else IFSO="$IFS" IFS=: for d in $DYLD_LIBRARY_PATH ; do if [ -f $d/$f ] ; then echo $d/$f;fi done IFS="$IFSO" fi done ) >/tmp/l-ff++tt # cat /tmp/l-ff++tt cat /tmp/l-ff++tt| xargs otool -L | awk '/compatibility version/ { print $1}' | sort -u |egrep -v "^/usr/lib/|^/System/Library/" >/tmp/l-ff++$i1 done #echo '------' cat /tmp/l-ff++tt FreeFem-sources-4.9/bin/build/000775 000000 000000 00000000000 14037356732 016221 5ustar00rootroot000000 000000 FreeFem-sources-4.9/bin/build/cleancrlf000775 000000 000000 00000004243 14037356732 020103 0ustar00rootroot000000 000000 #!/usr/bin/perl ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : ... # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric # E-MAIL : ... # clean-up all CR/LF line endings (usually before patching to avoid failures after editing the same source files on # different systems) use strict; # change files that have been recorded in FF using DOS line endings my @files=`find $ARGV[0] -type f`; chomp @files; foreach my $file(@files){ next if $file=~/\.(jpg|eps|mcp|pdf|pgm|o|a|so|png|jpg|gz|tgz)$/; next if $file=~/\.hg\//; my $contents=`cat $file`; my $oldcontents=$contents; # changing line-ending conventions. all ffcs patches work from unix-style (ie no CR) files $contents=~s/\r$//gm; next if $contents eq $oldcontents; print "cleancrlf: Unix line-ending for $file...\n"; open FILE,">$file" or die; print FILE $contents; close FILE; } # Local Variables: # mode:cperl # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/bin/build/download000775 000000 000000 00000005201 14037356732 017754 0ustar00rootroot000000 000000 #!/usr/bin/env bash # -xe ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Download a file by whatever means available # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric # E-MAIL : ... # $1=url # $2=local name # $3=BAD_CERT if the SSL certificate of the web server is wrong if test -x /usr/bin/wget || test -x /usr/bin/wget.exe || test -x /opt/local/bin/wget then opts= if test "$3" = BAD_CERT then opts=--no-check-certificate fi # [[man:wget]] we could use no-verbose to avoid mixing several wget outputs together when called concurrently in # [[file:../3rdparty/getall]], but then nothing moves while the download goes on, and the user may think that it's # stuck. wget "$1" --timeout=30 --tries=2 --output-document="$2" $opts ret=$? elif test -x /usr/bin/curl then curl -L "$1" --output "$2" --connect-timeout 30 ret=$? elif test -x /usr/bin/GET then GET "$1" > "$2" ret=$? else echo FF download: No way to download files from the web echo FF download: Please install wget or curl or GET exit 1 fi if test "$ret" -eq 0 then case `file $2` in *zip*) exit 0 ;; *) echo " incorrect file type => removing " $2; rm $2; exit 1 ;; esac fi echo "Error download $2" exit $ret # Local Variables: # mode:shell-script # ispell-local-dictionary:"british" # coding:utf-8 # eval:(flyspell-prog-mode) # eval:(outline-minor-mode) # End: # LocalWords: emacs FreeFem-sources-4.9/bin/build/ff-md5000775 000000 000000 00000003234 14037356732 017227 0ustar00rootroot000000 000000 #!/usr/bin/perl ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : Download third-party packages independently of FF configuration # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric # E-MAIL : ... use Digest::MD5 qw(md5_hex); # [[http://perldoc.perl.org/Digest/MD5.html]] ($ff,$mm) =@ARGV; if( ! -e "$ff") {exit 2; } $hh=md5_hex(`cat $ff`); # print "ff=$ff, hh= $hh == $mm, \n"; if ( "$hh" == "$mm") {exit 0;} else {exit 1;} FreeFem-sources-4.9/bin/build/links2files000775 000000 000000 00000005112 14037356732 020373 0ustar00rootroot000000 000000 #!/usr/bin/perl ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : ... # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Antoine Le Hyaric # E-MAIL : ... use strict; # if a file is a soft link, just convert it into a real file. I know this is dangerous if the file the link points to # changes, but I don't have many choices to make the MinGW compilers work (they do not understand Cygwin softlinks). traverse(<*>); sub traverse{ foreach my $arg(@_){ # 4/12/10: under cygwin, "find" seems to have random problems (the file system lags behind when doing many file # moves in quick succession?), so just replace it with a local recursive subroutine. if(-d $arg){ print "links2files: traversing $arg...\n"; traverse(<$arg/*>); next; } # do not use readlink -f because it does not exist on Mac. Result is in $org my $org; my $nextorg=$arg; do{ $org=$nextorg; $nextorg=`readlink $org`; }while($nextorg ne ''); if(-l $arg){ chomp $org; if(-e $org){ print "links2files: $arg -> $org\n"; unlink $arg; system "cp $org $arg"; # sometimes on Cygwin the resulting file is of size zero and not readable by anyone? And then if we try again it # works fine! die "$arg is of size 0" unless -s $arg; } } } } # Local Variables: # mode:cperl # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/bin/build/orgindex000775 000000 000000 00000010467 14037356732 017776 0ustar00rootroot000000 000000 #!/usr/bin/perl ############################################################################ # This file is part of FreeFEM. # # # # FreeFEM is free software: you can redistribute it and/or modify # # it under the terms of the GNU Lesser General Public License as # # published by the Free Software Foundation, either version 3 of # # the License, or (at your option) any later version. # # # # FreeFEM is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU Lesser General Public License for more details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with FreeFEM. If not, see . # ############################################################################ # SUMMARY : ... # LICENSE : LGPLv3 # ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE # AUTHORS : Build an index of all Emacs org-mode hyperlinks # E-MAIL : Antoine Le Hyaric use strict; use warnings; use threads; use threads::shared; use Path::Class; # relative() use File::Basename; # dirname() use Cwd; # getcwd() # Scan all files in the FF directory tree my $pwd=getcwd(); my %anchors; share %anchors; my @q; share @q; my $dir=''; my @ignore=`cat .hgignore`; foreach my $f(`find . -type f`){push @q,$f} # core count inspired from [[file:../../../alh/perl/System.ph::nbcores]] sub nbcores{ my $n=1; if(-e "/proc/cpuinfo"){$n=`grep 'processor.*:' /proc/cpuinfo|wc -l`} # linux elsif(-x "/usr/sbin/sysctl"){$n=`sysctl -n hw.ncpu`} # macos elsif(defined $ENV{NUMBER_OF_PROCESSORS}){$n=$ENV{NUMBER_OF_PROCESSORS}} # windows return $n; } # start as many threads as possible my @threads; for(my $i=0;$icreate(\&scanfiles)} foreach(@threads){$_->join()} sub scanfiles{ while(defined(my $f=pop @q)){ # relative path name is required for the index to be operational on any user machine $f=~s/^\.\///; chomp $f; my $frel=file($f)->relative($pwd); # files to skip because of .hgignore my $found=0; foreach my $r(@ignore){ chomp $r; if($f=~/$r/){ $found=1; last; } } next if $found; # files to skip for other reasons next if $f=~/\.(bmp|png|jpg|eps|pdf|tar|gz|zip|tgz)$/; # not text next if $f=~/\.hg\//; # not text next if $f=~/examples\+\+-load\/include\//; # duplicated text # make sure that we don't leave absolute paths in the hyperlinks because these would not work on other machines my $cref=`cat $f`; my $c=$cref; while($cref=~m/\[\[file:([^:\]]+)(.*)\]\]/g){ # hyperlink potentially containing an absolute path my $labs=$1; if($labs=~/^~\//){ # set HOME value my $labshome=$labs; $labshome=~s/^~/$ENV{HOME}/; # change absolute path to relative in file contents my $lrel=file($labshome)->relative(dirname($f)); my $labsqm=quotemeta($labs); $c=~s/\[\[file:$labsqm/\[\[file:$lrel/g; } } # update file if some paths were changed if($c ne $cref){ print "Changed hyperlink paths to relative in $f\n"; open FILE,">$f" or die; print FILE $c; close FILE; } # find name anchors in file contents while($c=~m/<<([^<> ,{}]+)>>/g){$anchors{$1}.=" [[file:${frel}::$1][$frel]]"} } } # print out all existing hyperlink anchors print "\n"; open OUT,">index.org" or die; print OUT "# -*- mode:org;coding:utf-8 -*-\n"; print OUT "# Hyperlinks into the FreeFEM source, built with [[file:build/orgindex]]\n"; print OUT "# (do not modify this file manually: update the source files and run [[file:build/orgindex]] again)\n"; print OUT "\n"; foreach(sort {uc($a) cmp uc($b)} keys %anchors){ print OUT "$_"; my $padding=25; if(length($_)<$padding){print OUT " "x($padding-length($_))} print OUT "$anchors{$_}\n"; } close OUT; # Local Variables: # mode:cperl # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/bin/cleanregen.sh000775 000000 000000 00000000716 14037356732 017570 0ustar00rootroot000000 000000 #!/bin/sh -x # Clean files that can be regenerated, to avoid CVS conflicts # Antoine Le Hyaric - LJLL Paris 6 - lehyaric@ann.jussieu.fr # $Id$ find . -name Makefile.in -exec rm {} \; rm HISTORY rm config.h.in rm configure rm examples/tutorial/all.edp rm examples/misc/all.edp rm examples/misc/regtests.edp rm examples/examples/all.edp rm src/fflib/strversionnumber.cpp rm src/ide/hl_lex.c++ rm src/ide/hl_yacc.c++ rm src/ide/hl_yacc.h rm src/lglib/lg.tab.?pp FreeFem-sources-4.9/bin/copysharedlibs.sh000775 000000 000000 00000000715 14037356732 020477 0ustar00rootroot000000 000000 #!/bin/sh # Find out which shared libs an executable needs and copy them # Antoine Le Hyaric - LJLL Paris 6 - lehyaric@ann.jussieu.fr - 22/11/04 # $Id$ # $1=executable to analyze if test ! -x $1 then echo $1 is not an executable exit 1 fi # $2= where to copy shared libs if test ! -d $2 then echo $2 is not a directory fi # List all shared libs libs=`ldd $1|awk '{print $3}'` if test "$libs" != "dynamic" -a "$libs" != "" then cp $libs $2 fi FreeFem-sources-4.9/bin/ff-md5000775 000000 000000 00000000472 14037356732 016131 0ustar00rootroot000000 000000 #!/usr/bin/perl use Digest::MD5 qw(md5_hex); $num_args = $#ARGV + 1; if ($num_args != 1) { print "Usage: ff-md5 filename\n"; exit 1; } $filename = $ARGV[0]; if (-e $filename) { print "# md5 $filename : ", md5_hex($filename), "\n"; } else { print " ff-md5 Error : $filename do not exist \n"; exit 1; }FreeFem-sources-4.9/bin/get_mkl_dylib000775 000000 000000 00000000560 14037356732 017656 0ustar00rootroot000000 000000 lmkl="libmkl_tbb_thread.dylib libmkl_mc.dylib libmkl_mc3.dylib libmkl_avx.dylib libmkl_avx2.dylib libmkl_avx512.dylib" dlp=$(echo $DYLD_LIBRARY_PATH| sed 's/:/ /g') (for i in $lmkl; do for d in $dlp;do test -f $d/$i && echo $d/$i done done grep /opt/intel/ etc/list-dylib ) |sort -u > etc/list-dylib-mkl echo cat etc/list-dylib-mkl cat etc/list-dylib-mkl FreeFem-sources-4.9/bin/installdylib000775 000000 000000 00000001561 14037356732 017545 0ustar00rootroot000000 000000 DESTDIR=$1 case "$DESTDIR" in */|'') ;; *) DESTDIR=$DESTDIR/;; esac echo DESTDIR=$DESTDIR cd `dirname $0` cd .. grep ' = ' Makefile| grep ff_prefix_dir| sed 's/ = /=/' >/tmp/prefix-list.sh source /tmp/prefix-list.sh pwd=`pwd` pwd3lib="$pwd/3rdparty/lib" #echo $pwd #echo $pwd3lib for lib in $DESTDIR/$ff_prefix_dir_lib/*.dylib $DESTDIR/$ff_prefix_dir_lib_mpi/*dylib ; do echo -- $lib dirlib=`dirname $lib` ooo=/tmp/otool-`basename $lib` # echo $lib otool -L $lib| awk '/compatibility version/ {print $1}'>$ooo grep $pwd3lib $ooo for i in `grep $pwd3lib $ooo` ; do j=`echo $i|sed "s;$pwd3lib;$ff_prefix_dir_lib;"` #echo -- $ff_prefix_dir_lib #echo -- $i #echo -- $pwd3lib chmod a+w $lib test -n "$j" -a $i != "$j" && install_name_tool -change $i $j $lib test -n "$j" -a $i != "$j" && echo install_name_tool -change $i $j $lib chmod a-w $lib done done FreeFem-sources-4.9/bin/keys.awk000664 000000 000000 00000000617 14037356732 016605 0ustar00rootroot000000 000000 # awk -f keys.awk examples/tutorial/lestables|sort -u >ff-name $2 > 256 && $2 < 1000 && NF == 3 && /^ / { {print $1,"keywords"}} /type :/ { if (nn !="") {print nn,"variable"}; nn= substr($1,1,length($1)-1); #print nn, "++"; if(!nn) {nn= substr($2,1,length($2)-1); } } /operator/ && nn {op=substr($0,12,2); print nn,op ; nn ="" } # operator. : $ END { if (nn !="") {print nn,"variable"} } FreeFem-sources-4.9/bin/modpath.iss000664 000000 000000 00000011461 14037356732 017301 0ustar00rootroot000000 000000 // ---------------------------------------------------------------------------- // // Inno Setup Ver: 5.2.1 // Script Version: 1.3.1 // Author: Jared Breland // Homepage: http://www.legroom.net/software // // Script Function: // Enable modification of system path directly from Inno Setup installers // // Instructions: // Copy modpath.iss to the same directory as your setup script // // Add this statement to your [Setup] section // ChangesEnvironment=yes // // Add this statement to your [Tasks] section // You can change the Description or Flags, but the Name must be modifypath // Name: modifypath; Description: &Add application directory to your system path; Flags: unchecked // // Add the following to the end of your [Code] section // setArrayLength must specify the total number of dirs to be added // Dir[0] contains first directory, Dir[1] contains second, etc. // function ModPathDir(): TArrayOfString; // var // Dir: TArrayOfString; // begin // setArrayLength(Dir, 1) // Dir[0] := ExpandConstant('{app}'); // Result := Dir; // end; // #include "modpath.iss" // ---------------------------------------------------------------------------- procedure ModPath(); var oldpath: String; newpath: String; pathArr: TArrayOfString; aExecFile: String; aExecArr: TArrayOfString; i, d: Integer; pathdir: TArrayOfString; begin // Get array of new directories and act on each individually pathdir := ModPathDir(); for d := 0 to GetArrayLength(pathdir)-1 do begin // Modify WinNT path if UsingWinNT() = true then begin // Get current path, split into an array RegQueryStringValue(HKEY_LOCAL_MACHINE, 'SYSTEM\CurrentControlSet\Control\Session Manager\Environment', 'Path', oldpath); oldpath := oldpath + ';'; i := 0; while (Pos(';', oldpath) > 0) do begin SetArrayLength(pathArr, i+1); pathArr[i] := Copy(oldpath, 0, Pos(';', oldpath)-1); oldpath := Copy(oldpath, Pos(';', oldpath)+1, Length(oldpath)); i := i + 1; // Check if current directory matches app dir if pathdir[d] = pathArr[i-1] then begin // if uninstalling, remove dir from path if IsUninstaller() = true then begin continue; // if installing, abort because dir was already in path end else begin abort; end; end; // Add current directory to new path if i = 1 then begin newpath := pathArr[i-1]; end else begin newpath := newpath + ';' + pathArr[i-1]; end; end; // Append app dir to path if not already included if IsUninstaller() = false then newpath := newpath + ';' + pathdir[d]; // Write new path RegWriteStringValue(HKEY_LOCAL_MACHINE, 'SYSTEM\CurrentControlSet\Control\Session Manager\Environment', 'Path', newpath); // Modify Win9x path end else begin // Convert to shortened dirname pathdir[d] := GetShortName(pathdir[d]); // If autoexec.bat exists, check if app dir already exists in path aExecFile := 'C:\AUTOEXEC.BAT'; if FileExists(aExecFile) then begin LoadStringsFromFile(aExecFile, aExecArr); for i := 0 to GetArrayLength(aExecArr)-1 do begin if IsUninstaller() = false then begin // If app dir already exists while installing, abort add if (Pos(pathdir[d], aExecArr[i]) > 0) then abort; end else begin // If app dir exists and = what we originally set, then delete at uninstall if aExecArr[i] = 'SET PATH=%PATH%;' + pathdir[d] then aExecArr[i] := ''; end; end; end; // If app dir not found, or autoexec.bat didn't exist, then (create and) append to current path if IsUninstaller() = false then begin SaveStringToFile(aExecFile, #13#10 + 'SET PATH=%PATH%;' + pathdir[d], True); // If uninstalling, write the full autoexec out end else begin SaveStringsToFile(aExecFile, aExecArr, False); end; end; // Write file to flag modifypath was selected // Workaround since IsTaskSelected() cannot be called at uninstall and AppName and AppId cannot be "read" in Code section if IsUninstaller() = false then SaveStringToFile(ExpandConstant('{app}') + '\uninsTasks.txt', WizardSelectedTasks(False), False); end; end; procedure CurStepChanged(CurStep: TSetupStep); begin if CurStep = ssPostInstall then if IsTaskSelected('modifypath') then ModPath(); end; procedure CurUninstallStepChanged(CurUninstallStep: TUninstallStep); var appdir: String; selectedTasks: String; begin appdir := ExpandConstant('{app}') if CurUninstallStep = usUninstall then begin if LoadStringFromFile(appdir + '\uninsTasks.txt', selectedTasks) then if Pos('modifypath', selectedTasks) > 0 then ModPath(); DeleteFile(appdir + '\uninsTasks.txt') end; end; function NeedRestart(): Boolean; begin if IsTaskSelected('modifypath') and not UsingWinNT() then begin Result := True; end else begin Result := False; end; end; FreeFem-sources-4.9/bin/mpic++.in000775 000000 000000 00000001537 14037356732 016541 0ustar00rootroot000000 000000 #!/bin/sh # mpi mingw driver F. Hecht.. 21/01/2011 # ------------------------------------------ #set -x MPIDIR='@MPIDIR@' MPIINC="$MPIDIR/include" LDIR="-L$MPIDIR/lib" MPILIB="-lmpicxx -lfmpich2g -lmpi" CXX=@CXX@ CC=@CC@ F77=@F77@ F90=@FC@ INC=-I"$MPIINC" LIB="$MPILIB" for i in $*; do if [ "$i" = "-c" ] ; then LIB=""; fi; done; for i in $*; do if [ "$i" = "-show" ] ; then echo="echo" ;else ss="ss '$i'";fi; done; if [ "$echo" = echo ]; then case "$0" in *c++*) echo "$CXX" "'$INC'" $ss "'$LDIR'" $LIB;; *77*) echo "$F77" "'$INC'" $ss "'$LDIR'" $LIB;; *90*) echo "$F90" "'$INC'" $ss "'$LDIR'" $LIB;; *cc*) echo "$CC" "'$INC'" $ss "'$LDIR'" $LIB;; esac else case "$0" in *c++*) "$CXX" "$INC" "$@" "$LDIR" $LIB;; *77*) "$F77" "$INC" "$@" "$LDIR" $LIB;; *90*) "$F90" "$INC" "$@" "$LDIR" $LIB;; *cc*) "$CC" "$INC" "$@" "$LDIR" $LIB;; esac fi FreeFem-sources-4.9/bin/pkgbuild-macos000775 000000 000000 00000003253 14037356732 017754 0ustar00rootroot000000 000000 #!/usr/bin/env bash ## fist tentavie to create un mac installer # first # create dir for install grep ' = ' Makefile| grep ff_prefix_dir| sed 's/ = /=/' >/tmp/prefix-list.sh source /tmp/prefix-list.sh dst=/tmp/ff-dst dstlib="$dst/$ff_prefix_dir_lib" pkg= id=fr.upmc.math.ljll.FreeFem-v4.pkg.app if [ -f ""$dst"" ] ; then echo fatal error the dst dir exist , please remeo of change exit 1 fi mkdir $dst #make install "DESTDIR=$dst" echo copy dynamic libs .... rm /tmp/BaseSearchdylib /tmp/Searchdylib ./bin/Searchdylib >/tmp/Searchdylib for f in /tmp/Searchdylib ; do lib=$dst/`basename $f` echo $lib >>/tmp/BaseSearchdylib dlib=`dirname $f` ff=$dstlib/$ff_prefix_dir_lib/$lib echo cp $f $ff cp $f $ff done echo change dynamic libs in $dst for f in $dst/$ff_prefix_dir_lib/*.dylib $dst/$ff_prefix_dir_lib_mpi/*.dylib ; do lib=$dst/`basename $f` dlib=`dirname $f` dirlib=`dirname $lib` ooo=/tmp/otool-`basename $lib .dylib`.txt # echo $lib otool -L $lib| grep -v '/usr/lib/'|awk '/compatibility version/ {print $1}'>$ooo for l in `cat /tmp/BaseSearchdylib`; do for i in `grep $pwd3lib $ooo` ; do l=`basename $i` li=`egrep "/$l$" /tmp/Searchdylib|wc` if [ "$li" -ge 1 ]; then $fl=$ff_prefix_dir_lib/$l echo $i "->" $fl chmod a+w $lib fi chmod a+w $lib test -n "$j" -a $i != "$j" && install_name_tool -change $i $j $lib test -n "$j" -a $i != "$j" && echo install_name_tool -change $i $j $lib chmod a-w $lib done done done for lib in $dstlib/*.dylib ; do done #awk -F / -v l="$dstlib" '{print "cp",$0,l "/" $NF}' /tmp/Searchdylib | sh # copy lib ... # buildpkg ... # pkgbuild --identifier $id --root "$dst" $pkg # productbuild FreeFem-sources-4.9/bin/regtests.sh000775 000000 000000 00000004333 14037356732 017324 0ustar00rootroot000000 000000 #!/usr/bin/env bash # Runs all regression tests on all compiled FreeFem++ versions # ------------------------------------------------------------ MPIRUN=`awk '$1 =="MPIRUN" {print $3}' Makefile` # Antoine Le Hyaric - LJLL Paris 6 - lehyaric@ann.jussieu.fr - 19/5/04 # $Id$ # To run one set of tests on one executable # $1=program to run # $2=tag for trace file # $3=EDP script to run function dotest(){ # Running FreeFem++ on regtests.edp (specific to regression # tests), otherwise on all.edp. echo regtests.sh: running $1 $3, result in regtests-$2.log $1 $3|tee regtests-$2.log if test $PIPESTATUS != 0 then exit 1 fi } # For the example++-load tests export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:." # In visual checks, we can run even the most invasive programs script=$REGEDP if test "$VISUALCHECK" = yes -a "$REGEDP" = regtests.edp then script=all.edp fi # Number of processors in parallel mode if test "$NPROCS" != "" then nprocs=$NPROCS else nprocs=1 fi # Do not test windowed programs by default, because their windows are # too invasive. if test "$VISUALCHECK" = yes then export PATH="${PROGLOC}/nw/:$PATH"; dotest FreeFem++${EXEEXT} std $script fi if test $nprocs = 1 then dotest ${PROGLOC}/nw/FreeFem++-nw${EXEEXT} nw $script fi if test "${X11PROG}" != "" -a "${VISUALCHECK}" = "yes" then dotest ${PROGLOC}/x11/FreeFem++-x11${EXEEXT} x11 $script fi if test "${GLXPROG}" != "" -a "${VISUALCHECK}" = "yes" then dotest ${PROGLOC}/glx/FreeFem++-glx${EXEEXT} glx $script fi if test "${AGLPROG}" != "" -a "${VISUALCHECK}" = "yes" then dotest ${PROGLOC}/agl/FreeFem++-agl${EXEEXT} agl $script fi if test "${MPIPROG}" != "" then mpich=`${MPIRUN=mpirun} -h 2>&1 |grep mpich |wc -l` host=`hostname` echo $host>machinefile echo $host>>machinefile if [ $mpich -ne 0 ] ; then dotest "${MPIRUN} -np $nprocs -machinefile machinefile ${PROGLOC}/mpi/FreeFem++-mpi${EXEEXT}" mpi $script else [[ -f "$(which lamboot 2>/dev/null)" ]] && lamboot dotest "${MPIRUN} -np $nprocs ${PROGLOC}/mpi/FreeFem++-mpi${EXEEXT}" mpi $script fi fi if test "${IDEPROG}" != "" -a "${VISUALCHECK}" = "yes" then dotest ${PROGLOC}/ide/FreeFem++-cs${EXEEXT} ide $script fi FreeFem-sources-4.9/bin/save-in-files000775 000000 000000 00000000121 14037356732 017504 0ustar00rootroot000000 000000 tar cvf missing-in-file.tar `find . -name '*.in' | awk -F / ' NF <= 4'` configureFreeFem-sources-4.9/bin/script/000775 000000 000000 00000000000 14037356732 016426 5ustar00rootroot000000 000000 FreeFem-sources-4.9/bin/script/PostInstall.m4000664 000000 000000 00000002013 14037356732 021140 0ustar00rootroot000000 000000 #!/bin/sh # "-DFF__FVER=$(PACKAGE_VERSION)" # "-DFF_BINDIR=$(bindir)" # "-DFF__DATADIR=$(pkgdatadir) # "FFBIN="@prefix@"/bin if [ "$(uname)" = "Darwin" ]; then ff_desktop="$HOME/Desktop/FreeFem++-""FF__FVER" mkdir -p -m 0755 /etc/paths.d ln -sf "FF__DATADIR"/"FreeFEM-documentation.pdf" "$HOME/Desktop" test -e "$ff_desktop" || ln -sf "FF__DATADIR"/"FF__FVER" "$ff_desktop" echo Install /etc/paths.d/FreeFem++ file: "FF_BINDIR" echo "FF_BINDIR" > /etc/paths.d/FreeFem++ chmod a+r /etc/paths.d/FreeFem++ fi echo " Try to Clean old file version " if [ -d /usr/local/bin ] ; then cd /usr/local/bin for i in FreeFem++ FreeFem++-CoCoa FreeFem++-mpi FreeFem++-nw bamg cvmsh2 ff-c++ ff-get-dep ff-mpirun ff-pkg-download ffglut ffmedit; do if [ -f "$i" ] ; then echo " clean $i " rm "$i"; fi done if [ "$(uname)" = "Darwin" ]; then echo ln -s FF_BINDIR/FreeFem++-CoCoa /usr/local/bin/ ln -s FF_BINDIR/FreeFem++-CoCoa /usr/local/bin/ fi fi # bluid new link to new FreeFem-sources-4.9/bin/script/clean_all_installed_ffpp.sh000775 000000 000000 00000001352 14037356732 023752 0ustar00rootroot000000 000000 #!/bin/sh # clean all freefem++ version ... rm="rm" cd / if [ -d Applications/FreeFem++.app ] ; then $rm -rf Applications/FreeFem++.app fi if [ -d usr/local/share/freefem++ ] ; then $rm -rf usr/local/share/freefem++ fi if [ -d usr/local/lib/ff++/ ]; then $rm -rf usr/local/lib/ff++ fi ffexe="FreeFem++* ff-c++ ff-pkg-download bamg ff-get-dep ffglut cvmsh2 ff-mpirun ffmedit" ( cd usr/local/bin &&echo cd "usr/local/bin" && $rm $ffexe; ) if [ -f etc/paths.d/FreeFem++ ]; then ffbin=`cat etc/paths.d/FreeFem++` ( cd "$ffbin" &&echo cd "$ffbin" && $rm $ffexe; ) fi if [ -d usr/local/ff++ ] ; then echo " Warning dir usr/local/ff++ exist" ls usr/local/ff++ echo " Warning the directory /usr/local/ff++ no remove ? " fi exit 0 FreeFem-sources-4.9/bin/test-driver-ff000775 000000 000000 00000012201 14037356732 017705 0ustar00rootroot000000 000000 #!/bin/sh # test-driver-ff from test-driver - basic testsuite driver script. # modif F. Hecht UPMC. scriptversion=2013-06-27.18; # UTC # Copyright (C) 2011-2013 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . # Make unconditional expansion of undefined variables an error. This # helps a lot in preventing typo-related bugs. set -u usage_error () { echo "$0: $*" >&2 print_usage >&2 exit 2 } print_usage () { cat <$log_file 2>&1 estatus=77 else do_exit='rm -f $log_file $trs_file; (exit $st); exit $st' trap "st=129; $do_exit" 1 trap "st=130; $do_exit" 2 trap "st=141; $do_exit" 13 trap "st=143; $do_exit" 15 # Test script is run here. FFnp=`awk '$1=="//" && $2=="NBPROC" { if( vv == ""){ print $3;}; vv=1}' "$@"` FLAGS_FFPP_A=`awk '$1=="//" && $2=="PARAM" { if( vv == ""){ for(i=3;i<=NF;++i)print $i;}; vv=1}' "$@"` if [ "0$FFnp" -gt 0 ] ; then FLAGS_FFPP_B="-np $FFnp -nw" else FLAGS_FFPP_B="${FLAGS_FFPP}" fi export MPIEXEC_TIMEOUT=300 test -z "${COMMONPROGRAMFILES+x}" -a -z "${WINDIR+x}" && ulimit -t 60 ${TEST_FFPP} ${FLAGS_FFPP_B} "$@" ${FLAGS_FFPP_A}>$log_file 2>&1 estatus=$? FAIL_CPU_LIMIT=`tail -100 $log_file|egrep -i 'Cputime limit exceeded|MPIEXEC_TIMEOUT|Timeout after'|wc -l` fi ## Add an error suffix in order to extract errored tests only if [ $estatus -ne 0 ] && [ $estatus -ne 77 ] then cp $log_file "${log_file}.err" fi if test $enable_hard_errors = no && test $estatus -eq 99; then estatus=1 fi if test "${FAIL_CPU_LIMIT:-0}" -ge 1 ; then expect_failure=cpu; fi case $estatus:$expect_failure in 0:yes) col=$grn res=PASS recheck=yes gcopy=yes;; 0:*) col=$grn res=PASS recheck=no gcopy=no;; 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; *:cpu) col=$ora res=CPU recheck=no gcopy=yes;; *:*) col=$red res=FAIL recheck=yes gcopy=yes;; esac # Report outcome to console. echo "${col}${res}${std}: $test_name" # Register the test result, and other relevant metadata. echo ":test-result: $res" > $trs_file echo ":global-test-result: $res" >> $trs_file echo ":recheck: $recheck" >> $trs_file echo ":copy-in-global-log: $gcopy" >> $trs_file # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: FreeFem-sources-4.9/bin/uninstall-ff++000775 000000 000000 00000002341 14037356732 017600 0ustar00rootroot000000 000000 echo rm /usr/local/bin/FreeFem++ /usr/local/bin/bamg /usr/local/bin/ff-mpirun test `uname` != Darwin || echo rm /usr/local/bin/FreeFem++-CoCoa echo rm /usr/local/bin/cvmsh2 /usr/local/bin/ff-pkg-download echo rm /usr/local/bin/FreeFem++-mpi /usr/local/bin/ff-c++ /usr/local/bin/ffglut echo rm /usr/local/bin/FreeFem++-nw /usr/local/bin/ff-get-dep /usr/local/bin/ffmedit echo rm -rf /usr/local/share/freefem++ echo rm -rf /usr/local/share/FreeFEM echo rm -rf /usr/local/lib/ff++/ echo rm -rf /usr/local/ff++ echo rm /etc/paths.d/FreeFem++ if [ -d /Applications/FreeFem++.app ]; then echo rm -rf /Applications/FreeFem++.app fi test `uname` != Darwin || rm /usr/local/bin/FreeFem++-CoCoa rm /usr/local/bin/FreeFem++ /usr/local/bin/bamg /usr/local/bin/ff-mpirun rm /usr/local/bin/cvmsh2 /usr/local/bin/ff-pkg-download rm /usr/local/bin/FreeFem++-mpi /usr/local/bin/ff-c++ /usr/local/bin/ffglut rm /usr/local/bin/FreeFem++-nw /usr/local/bin/ff-get-dep /usr/local/bin/ffmedit rm -rf /usr/local/ff++ rm -rf /usr/local/ff-petsc echo rm /etc/paths.d/FreeFem++ rm -rf /usr/local/share/freefem++ rm -rf /usr/local/share/FreeFEM rm -rf /usr/local/lib/ff++/ if [ -d /Applications/FreeFem++.app ]; then rm -rf /Applications/FreeFem++.app fi FreeFem-sources-4.9/cmake/000775 000000 000000 00000000000 14037356732 015432 5ustar00rootroot000000 000000 FreeFem-sources-4.9/cmake/modules/000775 000000 000000 00000000000 14037356732 017102 5ustar00rootroot000000 000000 FreeFem-sources-4.9/cmake/modules/FindAMD.cmake000664 000000 000000 00000000755 14037356732 021315 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(AMD_INCLUDES NAMES amd.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(AMD_LIBRARIES NAMES amd PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(AMD_INCLUDES AND AMD_LIBRARIES) SET(AMD_FOUND True) ENDIF(AMD_INCLUDES AND AMD_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(AMD DEFAULT_MSG AMD_INCLUDES AMD_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindARPACK.cmake000664 000000 000000 00000000500 14037356732 021641 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_LIBRARY(ARPACK_LIBRARIES NAMES arpack PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(ARPACK_LIBRARIES) SET(ARPACK_FOUND True) ENDIF(ARPACK_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(ARPACK DEFAULT_MSG ARPACK_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindCAMD.cmake000664 000000 000000 00000000771 14037356732 021416 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(CAMD_INCLUDES NAMES camd.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(CAMD_LIBRARIES NAMES camd PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(CAMD_INCLUDES AND CAMD_LIBRARIES) SET(CAMD_FOUND True) ENDIF(CAMD_INCLUDES AND CAMD_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(CAMD DEFAULT_MSG CAMD_INCLUDES CAMD_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindCBLAS.cmake000664 000000 000000 00000001024 14037356732 021526 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(CBLAS_INCLUDES NAMES cblas.h PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(CBLAS_LIBRARIES NAMES cblas libcblas.so.3 PATHS ${PACKMAN_LIBRARIES_PATHS} PATH_SUFFIXES atlas-base) IF(CBLAS_INCLUDES AND CBLAS_LIBRARIES) SET(CBLAS_FOUND True) ENDIF(CBLAS_INCLUDES AND CBLAS_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(CBLAS DEFAULT_MSG CBLAS_INCLUDES CBLAS_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindCCOLAMD.cmake000664 000000 000000 00000001035 14037356732 021746 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(CCOLAMD_INCLUDES NAMES ccolamd.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(CCOLAMD_LIBRARIES NAMES ccolamd PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(CCOLAMD_INCLUDES AND CCOLAMD_LIBRARIES) SET(CCOLAMD_FOUND True) ENDIF(CCOLAMD_INCLUDES AND CCOLAMD_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(CCOLAMD DEFAULT_MSG CCOLAMD_INCLUDES CCOLAMD_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindCHOLMOD.cmake000664 000000 000000 00000001041 14037356732 021766 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(CHOLMOD_INCLUDES NAMES cholmod.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(CHOLMOD_LIBRARIES NAMES cholmod PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) SET(CHOLMOD_FOUND True) ENDIF(CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(CHOLDMOD DEFAULT_MSG CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindCOLAMD.cmake000664 000000 000000 00000001021 14037356732 021636 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(COLAMD_INCLUDES NAMES colamd.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(COLAMD_LIBRARIES NAMES colamd PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(COLAMD_INCLUDES AND COLAMD_LIBRARIES) SET(COLAMD_FOUND True) ENDIF(COLAMD_INCLUDES AND COLAMD_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(COLAMD DEFAULT_MSG COLAMD_INCLUDES COLAMD_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindDLOPEN.cmake000664 000000 000000 00000000470 14037356732 021667 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(DLOPEN_INCLUDES NAMES dlfcn.h PATHS ${PACKMAN_INCLUDE_PATHS}) IF(DLOPEN_INCLUDES) SET(DLOPEN_FOUND True) ENDIF(DLOPEN_INCLUDES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(DLOPEN DEFAULT_MSG DLOPEN_INCLUDES) FreeFem-sources-4.9/cmake/modules/FindFFTW.cmake000664 000000 000000 00000000707 14037356732 021457 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(FFTW_INCLUDES NAMES fftw3.h PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(FFTW_LIBRARIES NAMES fftw3 PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(FFTW_INCLUDES AND FFTW_LIBRARIES) SET(FFTW_FOUND True) ENDIF(FFTW_INCLUDES AND FFTW_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(FFTW DEFAULT_MSG FFTW_INCLUDES FFTW_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindGMM.cmake000664 000000 000000 00000000512 14037356732 021323 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(GMM_INCLUDES NAMES gmm.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES gmm) IF(GMM_INCLUDES) SET(GMM_FOUND True) ENDIF(GMM_INCLUDES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(GMM DEFAULT_MSG GMM_INCLUDES) FreeFem-sources-4.9/cmake/modules/FindIPOPT.cmake000664 000000 000000 00000001023 14037356732 021574 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(IPOPT_INCLUDES NAMES ipTNLP.hpp IpIpoptApplication.hpp PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES coin) FIND_LIBRARY(IPOPT_LIBRARIES NAMES ipopt PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(IPOPT_LIBRARIES) SET(IPOPT_FOUND True) ENDIF(IPOPT_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(IPOPT DEFAULT_MSG IPOPT_INCLUDES IPOPT_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindMETIS.cmake000664 000000 000000 00000000722 14037356732 021567 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(METIS_INCLUDES NAMES metis.h PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(METIS_LIBRARIES NAMES metis PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(METIS_INCLUDES AND METIS_LIBRARIES) SET(METIS_FOUND True) ENDIF(METIS_INCLUDES AND METIS_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(METIS DEFAULT_MSG METIS_INCLUDES METIS_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindMUMPS.cmake000664 000000 000000 00000001112 14037356732 021601 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(MUMPS_INCLUDES NAMES smumps_c.h dmumps_c.h cmumps_c.h zmumps_c.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES mumps) FIND_LIBRARY(MUMPS_LIBRARIES NAMES mumps_common smumps dmumps cmumps zmumps PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(MUMPS_INCLUDES AND MUMPS_LIBRARIES) SET(MUMPS_FOUND True) ENDIF(MUMPS_INCLUDES AND MUMPS_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(MUMPS DEFAULT_MSG MUMPS_INCLUDES MUMPS_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindNLOPT.cmake000664 000000 000000 00000000657 14037356732 021611 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(NLOPT_INCLUDES NAMES nlopt.hpp PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(NLOPT_LIBRARIES NAMES nlopt PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(NLOPT_LIBRARIES) SET(NLOPT_FOUND True) ENDIF(NLOPT_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(NLOPT DEFAULT_MSG NLOPT_INCLUDES NLOPT_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindPARMETIS.cmake000664 000000 000000 00000000777 14037356732 022144 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(PARMETIS_INCLUDES NAMES parmetis.h PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(PARMETIS_LIBRARIES NAMES parmetis PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(PARMETIS_INCLUDES AND PARMETIS_LIBRARIES) SET(PARMETIS_FOUND True) ENDIF(PARMETIS_INCLUDES AND PARMETIS_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(PARMETIS DEFAULT_MSG PARMETIS_INCLUDES PARMETIS_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindSCOTCH.cmake000664 000000 000000 00000001017 14037356732 021667 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(SCOTCH_INCLUDES NAMES scotch.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES scotch) FIND_LIBRARY(SCOTCH_LIBRARIES NAMES scotch PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) SET(SCOTCH_FOUND True) ENDIF(SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(SCOTCH DEFAULT_MSG SCOTCH_INCLUDES SCOTCH_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindSUITESPARSE.cmake000664 000000 000000 00000001123 14037356732 022511 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(SUITESPARSE_INCLUDES NAMES umfpack.h PATHS ${PACKMAN_INCLUDE_PATHS} PATH_SUFFIXES suitesparse) FIND_LIBRARY(SUITESPARSE_LIBRARIES NAMES umfpack PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(SUITESPARSE_INCLUDES AND SUITESPARSE_LIBRARIES) SET(SUITESPARSE_FOUND True) ENDIF(SUITESPARSE_INCLUDES AND SUITESPARSE_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(SUITESPARSE DEFAULT_MSG SUITESPARSE_INCLUDES SUITESPARSE_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindSUITESPARSECONFIG.cmake000664 000000 000000 00000000614 14037356732 023403 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_LIBRARY(SUITESPARSECONFIG_LIBRARIES NAMES suitesparseconfig PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(SUITESPARSECONFIG_LIBRARIES) SET(SUITESPARSECONFIG_FOUND True) ENDIF(SUITESPARSECONFIG_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(SUITESPARSECONFIG DEFAULT_MSG SUITESPARSECONFIG_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindSUPERLU.cmake000664 000000 000000 00000001743 14037356732 022051 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) FIND_PATH(SUPERLU_INCLUDES NAMES colamd.h slu_cdefs.h slu_Cnames.h slu_dcomplex.h slu_ddefs.h slu_scomplex.h slu_sdefs.h slu_util.h slu_zdefs.h superlu_enum_consts.h supermatrix.h PATHS /usr/include PATH_SUFFIXES superlu) FIND_LIBRARY(SUPERLU_LIBRARIES NAMES superlu PATHS /usr/lib/x86_64-linux-gnu) IF(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) SET(SUPERLU_FOUND True) ENDIF(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(SUPERLU DEFAULT_MSG SUPERLU_INCLUDES SUPERLU_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindTETGEN.cmake000664 000000 000000 00000000734 14037356732 021677 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_PATH(TETGEN_INCLUDES NAMES tetgen.h PATHS ${PACKMAN_INCLUDE_PATHS}) FIND_LIBRARY(TETGEN_LIBRARIES NAMES tet PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(TETGEN_INCLUDES AND TETGEN_LIBRARIES) SET(TETGEN_FOUND True) ENDIF(TETGEN_INCLUDES AND TETGEN_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(TETGEN DEFAULT_MSG TETGEN_INCLUDES TETGEN_LIBRARIES) FreeFem-sources-4.9/cmake/modules/FindUMFPACK.cmake000664 000000 000000 00000000512 14037356732 021771 0ustar00rootroot000000 000000 INCLUDE(FindPackageHandleStandardArgs) INCLUDE(PackageManagerPaths) FIND_LIBRARY(UMFPACK_LIBRARIES NAMES umfpack PATHS ${PACKMAN_LIBRARIES_PATHS}) IF(UMFPACK_LIBRARIES) SET(UMFPACK_FOUND True) ENDIF(UMFPACK_LIBRARIES) FIND_PACKAGE_HANDLE_STANDARD_ARGS(UMFPACK DEFAULT_MSG UMFPACK_LIBRARIES) FreeFem-sources-4.9/cmake/modules/InstallARPACK.cmake000664 000000 000000 00000000211 14037356732 022366 0ustar00rootroot000000 000000 SET(LIBNAME ARPACK) SET(URL http://www.caam.rice.edu/software/ARPACK/SRC/arpack96.tar.gz) SET(URL_MD5 fffaa970198b285676f4156cebc8626e) FreeFem-sources-4.9/cmake/modules/InstallARPACK_PATCH.cmake000664 000000 000000 00000000214 14037356732 023310 0ustar00rootroot000000 000000 SET(LIBNAME ARPACK_PATCH) SET(URL http://www.caam.rice.edu/software/ARPACK/SRC/patch.tar.gz) SET(URL_MD5 14830d758f195f272b8594a493501fa2) FreeFem-sources-4.9/cmake/modules/InstallBLAS.cmake000664 000000 000000 00000000156 14037356732 022156 0ustar00rootroot000000 000000 SET(LIBNAME BLAS) SET(URL http://www.netlib.org/blas/blas.tgz) SET(URL_MD5 ca21ed426f347c6ec6b136a181e587e5) FreeFem-sources-4.9/cmake/modules/InstallCBLAS.cmake000664 000000 000000 00000000174 14037356732 022261 0ustar00rootroot000000 000000 SET(LIBNAME CBLAS) SET(URL http://www.netlib.org/blas/blast-forum/cblas.tgz) SET(URL_MD5 1e8830f622d2112239a4a8a83b84209a) FreeFem-sources-4.9/cmake/modules/InstallFFTW.cmake000664 000000 000000 00000001021 14037356732 022173 0ustar00rootroot000000 000000 SET(LIBNAME FFTW) SET(URL http://www.fftw.org/fftw-3.3.2.tar.gz) SET(URL_MD5 6977ee770ed68c85698c7168ffa6e178) SET(CONFIGURE_COMMAND /configure --disable-dependency-tracking --disable-fortran --prefix= CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS}) SET(LIBRARIES libfftw3.a) FreeFem-sources-4.9/cmake/modules/InstallFREEYAMS.cmake000664 000000 000000 00000000224 14037356732 022644 0ustar00rootroot000000 000000 SET(LIBNAME FREEYAMS) SET(URL http://www.ann.jussieu.fr/~frey/ftp/archives/freeyams.2012.02.05.tgz) SET(URL_MD5 b7c82a256a3e59beeb2b578de93a4e0b) FreeFem-sources-4.9/cmake/modules/InstallGMM.cmake000664 000000 000000 00000000300 14037356732 022044 0ustar00rootroot000000 000000 SET(LIBNAME GMM++) SET(URL http://download.gna.org/getfem/stable/gmm-5.1.tar.gz) SET(URL_MD5 b16a65c09ea090fcd37cc3f47b692284) SET(CONFIGURE_COMMAND ../src/configure --prefix=) FreeFem-sources-4.9/cmake/modules/InstallHPDDM.cmake000664 000000 000000 00000000240 14037356732 022263 0ustar00rootroot000000 000000 SET(LIBANEM HPDDM) SET(URL https://github.com/hpddm/hpddm/archive/f42101545d47d0545553e840e061e95982992a45.zip) SET(URL_MD5 fa9b00d14190b42b1cab3ab0123250ec) FreeFem-sources-4.9/cmake/modules/InstallIPOPT.cmake000664 000000 000000 00000000522 14037356732 022325 0ustar00rootroot000000 000000 SET(LIBNAME IPOPT) SET(URL http://www.coin-or.org/download/source/Ipopt/Ipopt-3.12.4.tgz) SET(URL_MD5 12a8ecaff8dd90025ddea6c65b49cb03) SET(CONFIGURE_COMMAND ../src/configure --prefix=) SET(BUILD_COMMAND make) SET(INSTALL_COMMAND make install) SET(INCLUDE_PATHS ${FF_DOWNLOAD_DIR}/include/coin) SET(LIBRARIES libipopt.so) FreeFem-sources-4.9/cmake/modules/InstallMETIS.cmake000664 000000 000000 00000000622 14037356732 022314 0ustar00rootroot000000 000000 SET(LIBNAME METIS) SET(URL http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/metis-5.1.0.tar.gz) SET(URL_MD5 5465e67079419a69e0116de24fce58fe) SET(CMAKE_ARGS -D CMAKE_C_COMPILER=${CMAKE_C_COMPILER} -D CMAKE_C_FLAGS=${CMAKE_C_FLAGS} -D CMAKE_INSTALL_PREFIX= -D SHARED=true -D GKLIB_PATH=../src/GKlib) SET(LIBRARIES libmetis.so) FreeFem-sources-4.9/cmake/modules/InstallMMG3D.cmake000664 000000 000000 00000001317 14037356732 022244 0ustar00rootroot000000 000000 SET(LIBNAME MMG3D) SET(URL http://www.math.u-bordeaux1.fr/~dobrzyns/logiciels/download/mmg3d4.0.tgz) SET(CONFIGURE_COMMAND cmake -D CMAKE_INSTALL_PREFIX= -D CMAKE_C_COMPILER=${CMAKE_C_COMPILER} -D CMAKE_C_FLAGS=${CMAKE_C_FLAGS} -D INCLUDE_SCOTCH=/usr/include/scotch -D COMPIL_SHARED_LIBRARY=True ../src/build) SET(INSTALL_COMMAND mkdir -p /include && mkdir -p /lib && cp /build/sources/libmmg3d.h /include && cp libmmg3dlib4.0.so /lib/) FreeFem-sources-4.9/cmake/modules/InstallMSHMET.cmake000664 000000 000000 00000000220 14037356732 022422 0ustar00rootroot000000 000000 SET(LIBNAME MSHMET) SET(URL http://www.ann.jussieu.fr/~frey/ftp/archives/mshmet.2012.04.25.tgz) SET(URL_MD5 427f397e0602efc56aa897d38a8e9bfa) FreeFem-sources-4.9/cmake/modules/InstallMUMPS.cmake000664 000000 000000 00000000176 14037356732 022340 0ustar00rootroot000000 000000 SET(LIBNAME MUMPS) SET(URL http://graal.ens-lyon.fr/MUMPS/MUMPS_5.0.1.tar.gz) SET(URL_MD5 b477573fdcc87babe861f62316833db0) FreeFem-sources-4.9/cmake/modules/InstallNLOPT.cmake000664 000000 000000 00000000504 14037356732 022326 0ustar00rootroot000000 000000 SET(LIBNAME NLOPT) SET(URL http://ab-initio.mit.edu/nlopt/nlopt-2.2.4.tar.gz) SET(URL_MD5 9c60c6380a10c6d2a06895f0e8756d4f) SET(CONFIGURE_COMMAND ../src/configure CC=${CMAKE_C_COMPILER} CFLAGS=${CMAKE_C_FLAGS} --prefix=) SET(BUILD_COMMAND make) SET(INSTALL_COMMAND make install) SET(LIBRARIES libnlopt.a) FreeFem-sources-4.9/cmake/modules/InstallOPENBLAS-WIN32.cmake000664 000000 000000 00000000251 14037356732 023434 0ustar00rootroot000000 000000 SET(LIBNAME OPENBLAS-WIN32) SET(URL http://downloads.sourceforge.net/project/openblas/v0.2.14/OpenBLAS-v0.2.14-Win32.zip) SET(URL_MD5 eefdf170439620d78fabb3139b7aeb2f) FreeFem-sources-4.9/cmake/modules/InstallOPENBLAS-WIN64.cmake000664 000000 000000 00000000257 14037356732 023447 0ustar00rootroot000000 000000 SET(LIBNAME OPENBLAS-WIN64) SET(URL http://downloads.sourceforge.net/project/openblas/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip) SET(URL_MD5 9f2d41076857a514b921bf0bf03b5d39) FreeFem-sources-4.9/cmake/modules/InstallOPENBLAS.cmake000664 000000 000000 00000000200 14037356732 022626 0ustar00rootroot000000 000000 SET(LIBNAME OPENBLAS) SET(URL http://github.com/xianyi/OpenBLAS/tarball/v0.2.12) SET(URL_MD5 dfc868e0c134855639f036d2723bf4be) FreeFem-sources-4.9/cmake/modules/InstallPARMETIS.cmake000664 000000 000000 00000000230 14037356732 022652 0ustar00rootroot000000 000000 SET(LIBNAME PARMETIS) SET(URL http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.3.tar.gz) SET(URL_MD5 f69c479586bf6bb7aff6a9bc0c739628) FreeFem-sources-4.9/cmake/modules/InstallPETSC.cmake000664 000000 000000 00000002076 14037356732 022316 0ustar00rootroot000000 000000 SET(LIBNAME PETSC) SET(URL http://ftp.mcs.anl.gov/pub/petsc/release-snapshots/petsc-lite-3.7.5.tar.gz) SET(URL_MD5 bfc7a5535d5c18c6ec81ab90f3ce5074) SET(CONFIGURE_COMMAND cd && ./configure --download-fftw --download-hypre --download-metis --download-ml --download-mpich --download-mumps --download-parmetis --download-pastix --download-ptscotch --download-scalapack --download-suitesparse --download-superlu PETSC-ARCH=arch-ff++ --prefix= CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS}) SET(BUILD_COMMAND cd && make) SET(INSTALL_COMMAND cd && make install) FreeFem-sources-4.9/cmake/modules/InstallSCALAPACK.cmake000664 000000 000000 00000000203 14037356732 022710 0ustar00rootroot000000 000000 SET(LIBNAME SCALAPACK) SET(URL http://www.netlib.org/scalapack/scalapack-2.0.2.tgz) SET(URL_MD5 2f75e600a2ba155ed9ce974a1c4b536f) FreeFem-sources-4.9/cmake/modules/InstallSCOTCH.cmake000664 000000 000000 00000000402 14037356732 022412 0ustar00rootroot000000 000000 SET(LIBNAME SCOTCH) SET(URL https://gforge.inria.fr/frs/download.php/file/34618/scotch_6.0.4.tar.gz) SET(URL_MD5 d58b825eb95e1db77efe8c6ff42d329f) SET(CMAKE_ARGS -D CMAKE_INSTALL_PREFIX= -D CMAKE_C_COMPILER=${CMAKE_C_COMPILER}) FreeFem-sources-4.9/cmake/modules/InstallSUITESPARSE.cmake000664 000000 000000 00000000422 14037356732 023240 0ustar00rootroot000000 000000 SET(LIBNAME SUITESPARSE) SET(URL http://faculty.cse.tamu.edu/davis/SuiteSparse/SuiteSparse-5.4.0.tar.gz) SET(URL_MD5 4a6d4e74fc44c503f52996ae95cad03a) SET(PATCH_COMMAND cp ${CMAKE_SOURCE_DIR}/cmake/modules/suitesparse/CMakeLists.txt ) SET(INSTALL_COMMAND cd .) FreeFem-sources-4.9/cmake/modules/InstallSUPERLU.cmake000664 000000 000000 00000000562 14037356732 022575 0ustar00rootroot000000 000000 SET(LIBNAME SUPERLU) SET(URL http://crd-legacy.lbl.gov/~xiaoye/SuperLU/superlu_5.2.1.tar.gz) SET(URL_MD5 3a1a9bff20cb06b7d97c46d337504447) SET(CMAKE_ARGS -D CMAKE_C_COMPILER=${CMAKE_C_COMPILER} -D CMAKE_C_FLAGS=${CMAKE_C_FLAGS} -D CMAKE_INSTALL_PREFIX= -D BUILD_SHARED_LIBS=true) SET(LIBRARIES libsuperlu.a) FreeFem-sources-4.9/cmake/modules/InstallTETGEN.cmake000664 000000 000000 00000000752 14037356732 022425 0ustar00rootroot000000 000000 SET(LIBNAME TETGEN) SET(URL http://www.tetgen.org/1.5/src/tetgen1.5.1-beta1.tar.gz) SET(URL_MD5 3d55c197bcbfc611b7ced6f343643756) SET(CMAKE_ARGS -D CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -D CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -D CMAKE_INSTALL_PREFIX=) SET(INSTALL_COMMAND mkdir -p /lib && cp libtet.a /lib && mkdir -p /include && cp /tetgen.h /include) SET(LIBRARIES libtet.a) FreeFem-sources-4.9/cmake/modules/PackageManagerPaths.cmake000664 000000 000000 00000000401 14037356732 023725 0ustar00rootroot000000 000000 LIST(APPEND PACKMAN_INCLUDE_PATHS /usr/include /opt/local/include) LIST(APPEND PACKMAN_LIBRARY_PATHS /usr/lib /usr/lib/x86_64-linux-gnu /opt/local/lib) FreeFem-sources-4.9/cmake/modules/ff_configure_cmake.cmake000664 000000 000000 00000000520 14037356732 023675 0ustar00rootroot000000 000000 function(ff_configure_cmake) # make sure that "make" will run in parallel include(ProcessorCount) ProcessorCount(numprocs) if(NOT numprocs EQUAL 0) set(CMAKE_BUILD_FLAGS -j ${numprocs}) endif() if(NOT ENABLE_DOWNLOAD) set(ENABLE_DOWNLOAD False) endif(NOT ENABLE_DOWNLOAD) endfunction(ff_configure_cmake) FreeFem-sources-4.9/cmake/modules/ff_configure_compilers.cmake000664 000000 000000 00000002304 14037356732 024614 0ustar00rootroot000000 000000 macro(ff_configure_compilers) # Use C++11 standard set(CMAKE_CXX_STANDARD 11) # Set default C and C++ compiler flags set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -DBAMG_LONG_LONG -DCMAKE -DHAVE_GETENV") # Set a special flag for Mac OS if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_UNISTD_H") endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # Set a special flag for MinGW if(MINGW) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPURE_WIN32") endif(MINGW) # Manage Debug and Release modes if(CMAKE_BUILD_TYPE EQUAL "Debug") set(CMAKE_C_FLAGS "${FF_C_FLAGS} -DCHECK_KN -g" ) set(CMAKE_CXX_FLAGS "${FF_CXX_FLAGS} -DCHECK_KN -g" ) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(CMAKE_C_FLAGS "${FF_C_FLAGS} -fno-inline -fexceptions" ) set(CMAKE_CXX_FLAGS "${FF_CXX_FLAGS} -fno-inline -fexceptions" ) endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") else() # Release mode set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNCHECKPTR -O3" ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNCHECKPTR -O3" ) endif(CMAKE_BUILD_TYPE EQUAL "Debug") endmacro(ff_configure_compilers) FreeFem-sources-4.9/cmake/modules/ff_configure_thirdparty.cmake000664 000000 000000 00000000442 14037356732 025012 0ustar00rootroot000000 000000 macro(ff_configure_thirdparty) include(ff_find_package) include(ff_install_package) include(ff_configure_thirdparty_required) include(ff_configure_thirdparty_optional) ff_configure_thirdparty_required() ff_configure_thirdparty_optional() endmacro(ff_configure_thirdparty) FreeFem-sources-4.9/cmake/modules/ff_configure_thirdparty_optional.cmake000664 000000 000000 00000001277 14037356732 026726 0ustar00rootroot000000 000000 macro(ff_configure_thirdparty_optional) include(ff_find_package) set(FF_THIRDPARTY_OPTIONAL CBLAS FFTW IPOPT LAPACK METIS MPI MUMPS NLOPT SCOTCH SUPERLU TETGEN UMFPACK Threads) foreach(PACKAGE ${FF_THIRDPARTY_OPTIONAL}) ff_find_package(${PACKAGE}) endforeach(PACKAGE ${FF_THIRDPARTY_OPTIONAL}) endmacro(ff_configure_thirdparty_optional) FreeFem-sources-4.9/cmake/modules/ff_configure_thirdparty_required.cmake000664 000000 000000 00000001576 14037356732 026723 0ustar00rootroot000000 000000 macro(ff_configure_thirdparty_required) include(ff_find_package) set(FF_THIRDPARTY_REQUIRED AMD ARPACK CAMD CCOLAMD CHOLMOD COLAMD DLOPEN FLEX #OpenGL #GLUT GSL HDF5 SUITESPARSE SUITESPARSECONFIG) foreach(PACKAGE ${FF_THIRDPARTY_REQUIRED}) ff_find_package(${PACKAGE} REQUIRED) if(NOT FF_${PACKAGE}_FOUND) message(SEND_ERROR "Required package ${PACKAGE} is missing") endif(NOT FF_${PACKAGE}_FOUND) endforeach(PACKAGE ${FF_THIRDPARTY_REQUIRED}) endmacro(ff_configure_thirdparty_required) FreeFem-sources-4.9/cmake/modules/ff_create_strversionnumber.cmake000664 000000 000000 00000002061 14037356732 025530 0ustar00rootroot000000 000000 function(ff_create_strversionnumber) execute_process(COMMAND "date" OUTPUT_VARIABLE VersionFreeFemDate) string(STRIP ${VersionFreeFemDate} VersionFreeFemDate) file(WRITE ${CMAKE_SOURCE_DIR}/src/fflib/strversionnumber.cpp "// TODO: remove this block as soon as autoconf is removed from FreeFEM\n #ifndef CMAKE\n #include \"config-wrapper.h\"\n #endif\n \n #include \"strversionnumber.hpp\" // [[file:strversionnumber.hpp]] \n #include \n using namespace std;\n #define TOSTRING1(i) #i \n #define TOSTRING(i) TOSTRING1(i) \n \n //#include \n #include \n using namespace std;\n \n double VersionNumber(){\n #define VersionFreeFem ${FREEFEM_VERSION} // Generated by CMake \n return VersionFreeFem;\n }\n \n string StrVersionNumber(){\n // std::ostringstream buffer;\n // buffer.precision(8);\n // buffer< defines X in 'gcc -I X') # 3. define associated libraries which are defined in other CMake scripts (target_link_libraries -> defines X in 'gcc -l X') # 4. define what to do when running 'make install' (install) add_executable(FreeFem++ ${CMAKE_SOURCE_DIR}/src/Graphics/sansrgraph.cpp ${CMAKE_SOURCE_DIR}/src/mpi/parallelempi-empty.cpp ${CMAKE_SOURCE_DIR}/src/fflib/ffapi.cpp) include_directories(${CMAKE_SOURCE_DIR}/src/lglib ${CMAKE_SOURCE_DIR}/src/fflib ${CMAKE_SOURCE_DIR}/src/Graphics) if(MINGW) target_link_libraries(FreeFem++ Comdlg32 lglib libff) else() target_link_libraries(FreeFem++ dl lglib libff) endif() install(TARGETS FreeFem++ RUNTIME DESTINATION bin) endmacro() FreeFem-sources-4.9/cmake/modules/ff_define_freefem_mpi_executable.cmake000664 000000 000000 00000001515 14037356732 026552 0ustar00rootroot000000 000000 macro(ff_define_freefem_mpi_executable) # Steps are the same as those required for FreeFem executable # (see ff_define_freefem_executable for an explanation) if(MPI_FOUND) add_executable(FreeFem++-mpi ${CMAKE_SOURCE_DIR}/src/mpi/parallelempi.cpp ${CMAKE_SOURCE_DIR}/src/Graphics/sansrgraph.cpp ${CMAKE_SOURCE_DIR}/src/fflib/ffapi.cpp) target_compile_definitions(FreeFem++-mpi PRIVATE PARALLELE) include_directories(${MPI_CXX_INCLUDE_PATH} ${CMAKE_SOURCE_DIR}/src/femlib) target_link_libraries(FreeFem++-mpi lglib libff ${MPI_CXX_LIBRARIES}) install(TARGETS FreeFem++-mpi RUNTIME DESTINATION bin) endif() endmacro() FreeFem-sources-4.9/cmake/modules/ff_define_libff_library.cmake000664 000000 000000 00000007726 14037356732 024713 0ustar00rootroot000000 000000 macro(ff_define_libff_library) include(ff_define_strversionnumber_library) ff_define_strversionnumber_library() # Get paths to cpp files in femlib and fflib and put them in LIBFF_SRC file(GLOB FEMLIB_SRC ${CMAKE_SOURCE_DIR}/src/femlib/*.cpp) file(GLOB_RECURSE FFLIB_SRC ${CMAKE_SOURCE_DIR}/src/fflib/*.cpp) set(LIBFF_SRC ${FEMLIB_SRC} ${FFLIB_SRC}) # Remove spurious cpp files from LIBFF_SRC list(REMOVE_ITEM LIBFF_SRC ${CMAKE_SOURCE_DIR}/src/bamglib/Meshgibbs.cpp ${CMAKE_SOURCE_DIR}/src/femlib/ConjuguedGradrientNL.cpp ${CMAKE_SOURCE_DIR}/src/femlib/FESpace-v0.cpp ${CMAKE_SOURCE_DIR}/src/femlib/glutdraw.cpp ${CMAKE_SOURCE_DIR}/src/femlib/InvIntFunc.cpp ${CMAKE_SOURCE_DIR}/src/femlib/mortar.cpp ${CMAKE_SOURCE_DIR}/src/femlib/Pkorder.cpp ${CMAKE_SOURCE_DIR}/src/femlib/P3korder.cpp ${CMAKE_SOURCE_DIR}/src/fflib/ffapi.cpp ${CMAKE_SOURCE_DIR}/src/fflib/strversionnumber.cpp) # Add other required cpp files to LIBFF_SRC list(APPEND LIBFF_SRC ${CMAKE_SOURCE_DIR}/src/Algo/lgalgo.cpp ${CMAKE_SOURCE_DIR}/src/Eigen/eigenvalue.cpp ${CMAKE_SOURCE_DIR}/src/femlib/libmesh5.c ${CMAKE_SOURCE_DIR}/src/Graphics/DefColor.cpp) # Set a required definition for C++ compilers on systems other than Windows/MINGW if(DLOPEN_FOUND AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT MINGW) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D HAVE_DLFCN_H") endif() # Directories containing required headers include_directories(${CMAKE_SOURCE_DIR}/src/bamglib ${CMAKE_SOURCE_DIR}/src/fflib/ ${CMAKE_SOURCE_DIR}/src/Graphics/ ${CMAKE_SOURCE_DIR}/src/lglib/ ${CMAKE_SOURCE_DIR}/src/femlib) # Definition of libff library add_library(libff ${LIBFF_SRC}) if(FF_SUITESPARSE_FOUND AND FF_CHOLMOD_FOUND) # Compilation definitions if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") TARGET_COMPILE_DEFINITIONS(libff PRIVATE HAVE_LIBUMFPACK PRIVATE HAVE_SUITESPARSE_UMFPACK_H) else() TARGET_COMPILE_DEFINITIONS(libff PRIVATE HAVE_LIBUMFPACK PRIVATE HAVE_UMFPACK_H) endif() # Required libraries for linking libff if(MINGW) find_package(OpenMP) string(REPLACE "C:/msys64" "" OpenMP_C_LIBRARIES "${OpenMP_C_LIBRARIES}") target_link_libraries(libff bamglib Comdlg32 strversionnumber FREEFEM::SUITESPARSE FREEFEM::CHOLMOD FREEFEM::AMD FREEFEM::CAMD FREEFEM::CCOLAMD FREEFEM::COLAMD FREEFEM::METIS FREEFEM::SUITESPARSECONFIG ${FF_LAPACK_LIBRARIES} ${OpenMP_C_LIBRARIES}) else() target_link_libraries(libff bamglib dl strversionnumber FREEFEM::SUITESPARSE FREEFEM::CHOLMOD) endif(MINGW) else() if(MINGW) target_link_libraries(libff bamglib Comdlg32 strversionnumber) else() target_link_libraries(libff bamglib dl strversionnumber) endif(MINGW) endif() # Remove lib prefix from the name of the library # (libff instead of liblibff) set_target_properties(libff PROPERTIES PREFIX "") endmacro() FreeFem-sources-4.9/cmake/modules/ff_define_strversionnumber_library.cmake000664 000000 000000 00000000422 14037356732 027242 0ustar00rootroot000000 000000 macro(ff_define_strversionnumber_library) include(ff_create_strversionnumber) ff_create_strversionnumber() add_library(strversionnumber STATIC strversionnumber.cpp) target_compile_definitions(strversionnumber PRIVATE VersionFreeFem=${FREEFEM_VERSION}) endmacro() FreeFem-sources-4.9/cmake/modules/ff_find_package.cmake000664 000000 000000 00000003221 14037356732 023150 0ustar00rootroot000000 000000 function(ff_find_package PACKAGE) find_package(${PACKAGE} ${ARGN}) if(${PACKAGE}_FOUND) # Unifying variables names for include directory paths set(POSSIBLE_INCLUDE_PATH_VARIABLES "${PACKAGE}_INCLUDE" "${PACKAGE}_INCLUDES" "${PACKAGE}_INCLUDE_PATH" "${PACKAGE}_INCLUDE_PATHS" "${PACKAGE}_C_INCLUDE_PATHS") foreach(INCLUDE_PATH_VARIABLE IN LISTS ${POSSIBLE_INCLUDE_PATH_VARIABLES}) list(APPEND INCLUDE_PATHS ${INCLUDE_PATH_VARIABLE}) endforeach(INCLUDE_PATH_VARIABLE) # Unifying variables names for library directory paths set(POSSIBLE_LIBRARIES_VARIABLES "${PACKAGE}_LIBRARY" "${PACKAGE}_LIBRARIES" "${PACKAGE}_C_LIBRARY" "${PACKAGE}_C_LIBRARIES") foreach(LIBRARIES_VARIABLE IN LISTS ${POSSIBLE_LIBRARIES_VARIABLES}) list(APPEND LIBRARIES ${LIBRARIES_VARIABLE}) endforeach(LIBRARIES_VARIABLE) # Return unified names set(FF_${PACKAGE}_FOUND TRUE PARENT_SCOPE) set(FF_${PACKAGE}_INCLUDE_PATHS ${INCLUDE_PATHS} PARENT_SCOPE) set(FF_${PACKAGE}_LIBRARIES ${LIBRARIES} PARENT_SCOPE) add_library(FREEFEM::${PACKAGE} UNKNOWN IMPORTED) foreach(LIB ${LIBRARIES}) set_target_properties(FREEFEM::${PACKAGE} PROPERTIES IMPORTED_LOCATION ${LIB}) endforeach() set_target_properties(FREEFEM::${PACKAGE} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_PATHS}") endif(${PACKAGE}_FOUND) endfunction(ff_find_package) FreeFem-sources-4.9/cmake/modules/ff_install_package.cmake000664 000000 000000 00000006464 14037356732 023712 0ustar00rootroot000000 000000 FUNCTION(ff_install_package LIBRARY_NAME) IF(ENABLE_DOWNLOAD) INCLUDE(ExternalProject) IF(NOT FF_DOWNLOAD_DIR) SET(FF_DOWNLOAD_DIR ${CMAKE_SOURCE_DIR}/download) ENDIF(NOT FF_DOWNLOAD_DIR) FILE(MAKE_DIRECTORY ${FF_DOWNLOAD_DIR}/include) FILE(MAKE_DIRECTORY ${FF_DOWNLOAD_DIR}/include/coin) INCLUDE(Install${LIBRARY_NAME}) STRING(TOLOWER ${LIBRARY_NAME} LIBRARY_LOWERNAME) STRING(TOUPPER ${LIBRARY_NAME} LIBRARY_UPPERNAME) SET(${LIBRARY_UPPERNAME}_PREFIX ${FF_DOWNLOAD_DIR}/${LIBRARY_LOWERNAME} ) SET(${LIBRARY_UPPERNAME}_DOWNLOAD_DIR ${FF_DOWNLOAD_DIR}/pkg) SET(${LIBRARY_UPPERNAME}_SOURCE_DIR ${${LIBRARY_UPPERNAME}_PREFIX}/src) SET(${LIBRARY_UPPERNAME}_BINARY_DIR ${${LIBRARY_UPPERNAME}_PREFIX}/build) SET(${LIBRARY_UPPERNAME}_TMP_DIR ${${LIBRARY_UPPERNAME}_PREFIX}/tmp) SET(${LIBRARY_UPPERNAME}_STAMP_DIR ${${LIBRARY_UPPERNAME}_PREFIX}/stamp) SET(${LIBRARY_UPPERNAME}_INSTALL_DIR ${FF_DOWNLOAD_DIR}) EXTERNALPROJECT_ADD(freefem_${LIBRARY_LOWERNAME} PREFIX ${${LIBRARY_UPPERNAME}_PREFIX} DOWNLOAD_DIR ${${LIBRARY_UPPERNAME}_DOWNLOAD_DIR} SOURCE_DIR ${${LIBRARY_UPPERNAME}_SOURCE_DIR} BINARY_DIR ${${LIBRARY_UPPERNAME}_BINARY_DIR} TMP_DIR ${${LIBRARY_UPPERNAME}_TMP_DIR} STAMP_DIR ${${LIBRARY_UPPERNAME}_STAMP_DIR} INSTALL_DIR ${${LIBRARY_UPPERNAME}_INSTALL_DIR} URL ${URL} URL_MD5 ${URL_MD5} CMAKE_ARGS ${CMAKE_ARGS} CMAKE_COMMAND ${CMAKE_COMMAND} PATCH_COMMAND ${PATCH_COMMAND} CONFIGURE_COMMAND ${CONFIGURE_COMMAND} BUILD_COMMAND ${BUILD_COMMAND} INSTALL_COMMAND ${INSTALL_COMMAND} LOG_UPDATE 1 LOG_TEST 1 ) SET(${LIBRARY_UPPERNAME}_INSTALL_DIR ${${LIBRARY_UPPERNAME}_PREFIX}/install PARENT_SCOPE) # set default value to INCLUDE_PATHS if not defined IF (NOT INCLUDE_PATHS) SET(INCLUDE_PATHS ${FF_DOWNLOAD_DIR}/include) ENDIF(NOT INCLUDE_PATHS) # set default value to LIBRARY_PATHS if not defined IF (NOT LIBRARY_PATHS) SET(LIBRARY_PATHS ${FF_DOWNLOAD_DIR}/lib) ENDIF(NOT LIBRARY_PATHS) # return useful variables to compile and link against the library SET(FREEFEM_${LIBRARY_UPPERNAME}_INSTALLED TRUE PARENT_SCOPE) SET(FREEFEM_${LIBRARY_UPPERNAME}_INCLUDE_PATHS ${INCLUDE_PATHS} PARENT_SCOPE) SET(FREEFEM_${LIBRARY_UPPERNAME}_LIBRARY_PATHS ${LIBRARY_PATHS} PARENT_SCOPE) # create an imported target ADD_LIBRARY(FREEFEM::${LIBRARY_UPPERNAME} UNKNOWN IMPORTED) STRING(CONCAT LIBRARY_PATHS ${LIBRARY_PATHS} "/" ${LIBRARIES}) SET_TARGET_PROPERTIES(FREEFEM::${LIBRARY_UPPERNAME} PROPERTIES IMPORTED_LOCATION ${LIBRARY_PATHS}) SET_TARGET_PROPERTIES(FREEFEM::${LIBRARY_UPPERNAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_PATHS}") ADD_DEPENDENCIES(FREEFEM::${LIBRARY_UPPERNAME} freefem_${LIBRARY_LOWERNAME}) IF(FREEFEM_VERBOSE) MESSAGE(STATUS "FREEFEM_INSTALL_PACKAGE: INCLUDE_PATHS=${INCLUDE_PATHS}") MESSAGE(STATUS "FREEFEM_INSTALL_PACKAGE: LIBRARY_PATHS=${LIBRARY_PATHS}") ENDIF(FREEFEM_VERBOSE) ENDIF(ENABLE_DOWNLOAD) ENDFUNCTION(ff_install_package) FreeFem-sources-4.9/cmake/modules/ff_write_config_file.cmake000664 000000 000000 00000001567 14037356732 024246 0ustar00rootroot000000 000000 function(ff_write_config_file) # Filename set(FILENAME "cmake-config.inc") # Header file(WRITE ${CMAKE_BINARY_DIR}/${FILENAME} "# Paths found by CMake\n") # Compilers file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "CMAKE_C_COMPILER ${CMAKE_C_COMPILER}\n") file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "CMAKE_C_FLAGS ${CMAKE_C_FLAGS}\n") file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER}\n") file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}\n") # Third-party libraries foreach(PACKAGE ${MODULE_LIST}) file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "FREEFEM_${PACKAGE}_INCLUDE_PATHS ${FREEFEM_${PACKAGE}_INCLUDE_PATHS}\n") file(APPEND ${CMAKE_BINARY_DIR}/${FILENAME} "FREEFEM_${PACKAGE}_LIBRARIES ${FREEFEM_${PACKAGE}_LIBRARIES}\n") endforeach(PACKAGE) endfunction(ff_write_config_file) FreeFem-sources-4.9/cmake/modules/mumps/000775 000000 000000 00000000000 14037356732 020243 5ustar00rootroot000000 000000 FreeFem-sources-4.9/cmake/modules/mumps/CMakeLists.txt000664 000000 000000 00000001376 14037356732 023012 0ustar00rootroot000000 000000 CMAKE_MINIMUM_REQUIRED(VERSION 2.8) PROJECT(mumps C Fortran) FIND_PACKAGE(MPI REQUIRED) SET(C_FLAGS ${CMAKE_C_FLAGS}) FILE(GLOB C_FILES ${CMAKE_SOURCE_DIR}/src/*.c) FILE(GLOB FORTRAN_FILES ${CMAKE_SOURCE_DIR}/src/*.F) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/src ${MPI_Fortran_INCLUDE_PATH}) SET(CMAKE_C_FLAGS "${C_FLAGS} -D MUMPS_ARITH=MUMPS_ARITH_s") ADD_LIBRARY(smumps ${C_FILES} ${FORTRAN_FILES}) SET(CMAKE_C_FLAGS "${C_FLAGS} -D MUMPS_ARITH=MUMPS_ARITH_d") ADD_LIBRARY(dmumps ${C_FILES} ${FORTRAN_FILES}) SET(CMAKE_C_FLAGS "${C_FLAGS} -D MUMPS_ARITH=MUMPS_ARITH_c") ADD_LIBRARY(cmumps ${C_FILES} ${FORTRAN_FILES}) SET(CMAKE_C_FLAGS "${C_FLAGS} -D MUMPS_ARITH=MUMPS_ARITH_z") ADD_LIBRARY(zmumps ${C_FILES} ${FORTRAN_FILES}) FreeFem-sources-4.9/cmake/modules/scotch/000775 000000 000000 00000000000 14037356732 020365 5ustar00rootroot000000 000000 FreeFem-sources-4.9/cmake/modules/scotch/CMakeLists.txt000664 000000 000000 00000025172 14037356732 023134 0ustar00rootroot000000 000000 CMAKE_MINIMUM_REQUIRED(VERSION 2.8) PROJECT(scotch C) FILE(GLOB CFILES ${CMAKE_SOURCE_DIR}/src/libscotch/*.c) LIST(REMOVE_ITEM CFILES ${CMAKE_SOURCE_DIR}/src/libscotch/bgraph_bipart_df_loop.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_bd.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_df.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_df_loop.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_ex.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_ml.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_sq.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_st.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_bipart_zr.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_gather_all.c ${CMAKE_SOURCE_DIR}/src/libscotch/bdgraph_store.c ${CMAKE_SOURCE_DIR}/src/libscotch/comm.c ${CMAKE_SOURCE_DIR}/src/libscotch/common_sort.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_allreduce.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_band.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_band_grow.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_build.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_build_grid3d.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_build_hcub.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_coarsen.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_fold.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_fold_comm.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_fold_dup.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_gather_all.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_ghst.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_halo.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_halo_fill.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_io_load.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_io_save.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_induce.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_match.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_match_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_match_scan.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_match_sync_coll.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_match_sync_ptop.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_redist.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_scatter.c ${CMAKE_SOURCE_DIR}/src/libscotch/dgraph_view.c ${CMAKE_SOURCE_DIR}/src/libscotch/dmapping.c ${CMAKE_SOURCE_DIR}/src/libscotch/dmapping_io.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_io.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_io_block.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_io_tree.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_perm.c ${CMAKE_SOURCE_DIR}/src/libscotch/dorder_tree_dist.c ${CMAKE_SOURCE_DIR}/src/libscotch/graph_coarsen.c ${CMAKE_SOURCE_DIR}/src/libscotch/graph_coarsen_edge.c ${CMAKE_SOURCE_DIR}/src/libscotch/graph_match_scan.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_fold.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_order_io.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_order_nd.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_order_si.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_order_sq.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_order_st.c ${CMAKE_SOURCE_DIR}/src/libscotch/hdgraph_induce.c ${CMAKE_SOURCE_DIR}/src/libscotch/hgraph_induce.c ${CMAKE_SOURCE_DIR}/src/libscotch/hgraph_induce_edge.c ${CMAKE_SOURCE_DIR}/src/libscotch/kgraph_map_df_loop.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph_map_rb.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph_map_rb_map.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph_map_rb_part.c ${CMAKE_SOURCE_DIR}/src/libscotch/kdgraph_map_st.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_band.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_band_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_band_grow.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_build.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_build_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_build_grid3d.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_build_grid3d_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_check_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_coarsen.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_coarsen_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_gather_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_grow.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_halo.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_halo_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_induce.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_induce_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_load.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_load_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_save.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_save_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_stat_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_io_map_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_map.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_map_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_map_view.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_map_view_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_stat_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_gather.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_gather_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_io.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_io_block.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_io_block_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_io_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_perm.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_perm_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_tree_dist.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_order_tree_dist_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_redist.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_redist_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_scatter.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_scatter_f.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dgraph_stat.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dmapping.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_dorder.c ${CMAKE_SOURCE_DIR}/src/libscotch/library_error.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_check.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_gather_all.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_bd.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_df.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_ml.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_sq.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_st.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_separate_zr.c ${CMAKE_SOURCE_DIR}/src/libscotch/vdgraph_store.c) ADD_LIBRARY(scotch ${CFILES}) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src/libscotch) ADD_LIBRARY(scotcherr ${CMAKE_SOURCE_DIR}/src/libscotch/library_error.c) FreeFem-sources-4.9/cmake/modules/suitesparse/000775 000000 000000 00000000000 14037356732 021451 5ustar00rootroot000000 000000 FreeFem-sources-4.9/cmake/modules/suitesparse/CMakeLists.txt000664 000000 000000 00000000723 14037356732 024213 0ustar00rootroot000000 000000 CMAKE_MINIMUM_REQUIRED(VERSION 2.7) PROJECT(suitesparse C) ADD_DEFINITIONS(-O3) FILE(GLOB UMFPACK_FILES UMFPACK/Source/*.c) ADD_LIBRARY(umfpack SHARED ${UMFPACK_FILES}) TARGET_INCLUDE_DIRECTORIES(umfpack PRIVATE UMFPACK/Source UMFPACK/Include AMD/Include SuiteSparse_config CHOLMOD/Include) FreeFem-sources-4.9/configure.ac000664 000000 000000 00000267354 14037356732 016661 0ustar00rootroot000000 000000 # ------------------------------------------------------------ # Antoine Le Hyaric - LJLL Paris 6 - lehyaric@ann.jussieu.fr - 13/5/04 # $Id: configure.ac,v 1.338 2010/05/10 21:35:07 hecht Exp $ # Version numbering: x.xx-pp where "pp" is the package version (when # the same FreeFEM version is packaged several times). For # coherency, this should have the same value as the top-most package # number in debian/changelog. AC_INIT(FreeFEM,4.9,frederic.hecht@sorbonne-universite.fr,FreeFEM) dnl : ${CFLAGS=""} dnl : ${CXXFLAGS=""} dnl : ${FCFLAGS=""} dnl : ${FFLAGS=""} AC_CONFIG_MACRO_DIR([etc/config/m4]) # progg test exec for windows ff_TEST_FFPP="../../src/nw/FreeFem++" ff_TEST_FFPP_MPI="../../src/mpi/FreeFem++-mpi" ff_uncompile_plugin="" ff_unlib_plugin="" ff_download_lib="" AC_PREREQ(2.63) dnl for AC_LANG_CASE and check stuff dnl AC_CONFIG_SRCDIR(src/FreeFem++-CoCoa) # Automake 1.11 is too old for check ... # ALH - 10/9/13 - FFCS needs to skip this automake version requirement when compiling on older platforms (eg cygwin or # last Ubuntu LTS). So we fix a low hardcoded requirement and test for higher versions when not in FFCS. AM_INIT_AUTOMAKE(1.11 dist-zip foreign subdir-objects) if test "$enable_ffcs" != yes then if test "$am__api_version" \< 1.13 ; then AC_MSG_ERROR([Automake version $am__api_version needs to be 1.13 or later to enable "make check"]) fi fi AC_CONFIG_HEADERS(config.h) AC_PROG_MAKE_SET dnl AM_COLOR_TESTS=always AC_PROG_RANLIB inc_usr_include="" ff_where_lib_conf=plugin/seq/WHERE_LIBRARY-config dnl search of associad software m4_define([AC_FF_ADDWHERELIB], [ if test -z "$ff_where_lib_conf_$1" ; then echo "$1 LD $2" >>$ff_where_lib_conf test -n "$3" && echo "$1 INCLUDE $3 " >>$ff_where_lib_conf ff_where_lib_conf_$1=1 ff_where_lib_$1="$2" ff_where_inc_$1="$3" AC_MSG_NOTICE([ ++ add $1 : $2 ; $3 in $ff_where_lib_conf "]) else AC_MSG_NOTICE([ -- do not add $1 : $2 $3 in $ff_where_lib_conf "]) fi ]) dnl m4_define([AC_FF_WHERELIB], [ if test -z "$ff_where_lib_conf_$1" -a "$enable_download_$1" != "yes" ; then AC_MSG_CHECKING(check $1) ff_save_libs=$LIBS LIBS="$LIBS $2 $4" AC_LINK_IFELSE([AC_LANG_PROGRAM([m4_if($3,,,[#include <$3>])],[])], [ff_WHERE=yes],[ff_WHERE=no]) if test "$ff_WHERE" = "yes" ; then echo "$1 LD $2" >>$ff_where_lib_conf ff_WHERE_INC=`AS_DIRNAME(["$3"])` case "$ff_WHERE_INC" in /*) echo "$1 INCLUDE -I$ff_WHERE_INC" >>$ff_where_lib_conf ;; esac ff_where_lib_conf_$1=1 fi AC_MSG_RESULT($ff_WHERE) ff_$1_ok=$ff_WHERE; LIBS=$ff_save_libs fi ])dnl end m4_define AC_FF_WHERELIB define([AC_FF_PETSC_AWK],[awk -F'=' '"$1 "==[$]1 {print [$]2}' $2]) m4_define([AC_FF_PETSC_WHERELIB], [ ff_$2=`AC_FF_PETSC_AWK($2,$4)` ff_$3=`AC_FF_PETSC_AWK($3,$4)` if test -n "[$]ff_$2"; then ff_$1_ok=yes; m4_ifval($5,ff_$2="$ff_$2 $5"; echo " **** add $5 to ff_$2 ******", ) AC_FF_ADDWHERELIB($1,[$]ff_$2,[$]ff_$3) AC_SUBST([TOOL_COMPILE_$1],"") fi ] )dnl end m4_defineAC_FF_PETSC_WHERELIB m4_define([AC_FF_PETSC_WHERELIB_BASIC],# PETSC_WITH_EXTERNAL_LIB,PETSC_CC_INCLUDES [ ff_PETSC_EXTERNAL_LIB_BASIC=`AC_FF_PETSC_AWK(PETSC_EXTERNAL_LIB_BASIC,$2)` ff_PETSC_LIB_BASIC=`AC_FF_PETSC_AWK(PETSC_LIB_BASIC,$2)` ff_PETSC_PREFIXDIR=`awk -F' ' '"PREFIXDIR"==[$]1 {print [$]3}' $2` ff_PETSC_WITH_EXTERNAL_LIB="-Wl,-rpath,[$]ff_PETSC_PREFIXDIR/lib -L[$]ff_PETSC_PREFIXDIR/lib [$]ff_PETSC_LIB_BASIC [$]ff_PETSC_EXTERNAL_LIB_BASIC" ff_PETSC_CC_INCLUDES=`AC_FF_PETSC_AWK(PETSC_CC_INCLUDES,$2)` if test -n "[$]ff_PETSC_WITH_EXTERNAL_LIB"; then ff_$1_ok=yes; AC_FF_ADDWHERELIB($1,[$]ff_PETSC_WITH_EXTERNAL_LIB,[$]ff_PETSC_CC_INCLUDES) AC_SUBST([TOOL_COMPILE_$1],"") fi ] )dnl end m4_defineAC_FF_PETSC_WHERELIB_BASIC # ALH - this is required by FFCS that needs to deactivate some tools that do not work on all platforms. Some FF users # may also find interesting to specify a local version of a tool instead of downloading it. # m4 macro parameters: $1 = tool name, $2 = dynamic library name, $3 = download directory name if different from $1 m4_define([TOOL_PARAMETERS], [ AC_ARG_WITH($1_include,AC_HELP_STRING([--with-$1-include=],[Include directives for $1 instead of automatic download])) AC_ARG_WITH($1_ldflags,AC_HELP_STRING([--with-$1-ldflags=],[Link-time directives for $1 instead of automatic download])) AC_ARG_ENABLE(download-$1,AC_HELP_STRING([--enable-download_$1],[force the download of $1])) if test "$with_$1_include" != "" || test "$with_$1_ldflags" != "" then # some directives have been specified, use them instead of downloading AC_FF_ADDWHERELIB($1,$with_$1_ldflags,$with_$1_include) AC_SUBST([TOOL_COMPILE_$1],"") enable_$1_download=no fi ]) m4_define([TOOL_DISABLE], [AC_ARG_ENABLE($1,AC_HELP_STRING([--disable-$1],[Do not use $1])) if test "$enable_$1" = "no" then AC_SUBST([TOOL_COMPILE_$1],"") AC_SUBST([TOOL_DYLIB_$1],"") ff_uncompile_plugin+=" "$2 ff_unlib_plugin+=" $1" elif test "$ff_$1_ok" = "yes" ; then AC_SUBST([TOOL_DYLIB_$1],$2) enable_$1_download=no else if test "$enable_download" != "yes" -a "$enable_download_$1" != "yes" ; then AC_SUBST([TOOL_COMPILE_$1],"") ff_uncompile_plugin+=" "$2 ff_unlib_plugin+=" $1" AC_SUBST([TOOL_DYLIB_$1],"") else if test "$ff_wget" = yes -a "$enable_download_$1" = "yes" -a "$DOWNLOADCOMPILE" = "" ; then DOWNLOADCOMPILE="compile-pkg install-other" fi AC_SUBST([TOOL_COMPILE_$1],ifelse($3,,$1,$3)) AC_SUBST([TOOL_DYLIB_$1],$2) ifelse($4,,[ff_download_lib="$1 $ff_download_lib"]) ifelse($4,,[enable_$1_download=yes]) fi fi # Also allow to disable the download of one tool if it is already locally installed TOOL_PARAMETERS($1,$2,$3) ]) m4_define([TOOL_DISABLE_NO], [ AC_ARG_ENABLE($1,AC_HELP_STRING([--disable-$1],[Do not use $1])) if test "$enable_$1" = "no" -o "$enable_$1" = "" then enable_$1=no AC_SUBST([TOOL_COMPILE_$1],"") AC_SUBST([TOOL_DYLIB_$1],"") elif test "$ff_$1_ok" = "yes" ; then AC_SUBST([TOOL_DYLIB_$1],$2) enable_$1_download=no else AC_SUBST([TOOL_COMPILE_$1],ifelse($3,,$1,$3)) AC_SUBST([TOOL_DYLIB_$1],$2) ifelse($4,,[ff_download_lib="$1 $ff_download_lib"]) ifelse($4,,[enable_$1_download=yes]) fi # Also allow to disable the download of one tool if it is already locally installed TOOL_PARAMETERS($1,$2,$3) ]) echo "# Build with FreeFEM with ./configure " `date` >$ff_where_lib_conf # To allow anonymous CVS version to contain a "./configure" and # Makefiles AM_MAINTAINER_MODE echo >config_LIB_INFO # Necessary compilers AC_PROG_CC AC_PROG_CXX AC_LANG(C++) if test `uname` != Darwin; then ff_ldeg="-Wl,--end-group" ff_ldbg="-Wl,--start-group" fi # suffix of dynamic lib .. # --------------------------- ff_uname=`uname` # flag to build window 32 version ff_mingw = yes # bof bof F. Hecht case $ff_uname in CYGWIN*|MINGW*|MSYS_NT*) ff_suffix_dylib="dll" ff_suffix_dylib_a="dll.a";; Darwin) ff_suffix_dylib="dylib" ff_suffix_dylib_a="dylib";; *) ff_suffix_dylib="so"; ff_suffix_dylib_a="so"; esac # end suffix ... AC_CHECK_PROG(ff_unzip,unzip,yes,no) AC_CHECK_PROG(ff_m4,m4,yes,no) AC_CHECK_PROG(ff_bison,bison,yes,no) AC_CHECK_PROG(ff_flex,flex,yes,no) AC_CHECK_PROG(ff_cmake,cmake,yes,no) AC_CHECK_PROG(ff_patch,patch,yes,no) case "$ff_m4 $ff_bison $ff_patch $ff_flex $ff_unzip" in *no*) AC_MSG_NOTICE([ to install missing package under debian or ubuntu, try ]) test "$ff_unzip" = no && AC_MSG_NOTICE([ sudo apt-get install unzip]) test "$ff_m4" = no && AC_MSG_NOTICE([ sudo apt-get install m4]) test "$ff_bison" = no && AC_MSG_NOTICE([ sudo apt-get install bison]) test "$ff_flex" = no && AC_MSG_NOTICE([ sudo apt-get install flex]) test "$ff_patch" = no && AC_MSG_NOTICE([ sudo apt-get install patch]) AC_MSG_ERROR([ Sorry missing unzip,m4,bison,flex,patch command !]);; esac if test -z "$CXX"; then AC_MSG_NOTICE( [ fatal error : c++ compiler ! ] ); AC_MSG_ERROR([ Sorry no c++ compiler !]) fi AC_COMPUTE_INT(ff_size_long,[sizeof(long)]) AC_COMPUTE_INT(ff_size_int,[sizeof(int)]) AC_COMPUTE_INT(ff_size_ptr,[sizeof(int*)]) AC_SUBST(SIZEOF_LONG,$ff_size_long) AC_SUBST(SIZEOF_INT,$ff_size_int) AC_SUBST(SIZEOF_PTR,$ff_size_ptr) if test "0$ff_size_ptr" -eq 4 ;then AC_SUBST(SIZEOF_PTRINBIT,32) ff_ptrbit=32 elif test "0$ff_size_ptr" -eq 8 ;then AC_SUBST(SIZEOF_PTRINBIT,64) ff_ptrbit=64 else AC_MSG_NOTICE( [ fatal error : sizeof pointer $ff_size_ptr ! or no c++ compiler: $CXX] ); AC_MSG_ERROR([ Sorry sizeof c++ pointer $ff_size_ptr are not 4 or 8 ]) fi # FFCS - build the code for FreeFem++-cs AC_ARG_ENABLE(ffcs,AC_HELP_STRING([--enable-ffcs],[build FreeFEM for use by FreeFem++-cs])) if test "$enable_ffcs" = yes then AC_DEFINE_UNQUOTED(ENABLE_FFCS,$enable_ffcs,[build FreeFEM for use by FreeFem++-cs]) else enable_ffcs=no fi AC_SUBST(ENABLE_FFCS,"$enable_ffcs") AM_CONDITIONAL([ENABLE_FFCS],[test $enable_ffcs = yes]) # dur dur car sous MacOsX le fortran n'est pas standard. ff_AR="ar" ff_ARFLAGS="rv" ff_RANLIB="ranlib" AC_ARG_ENABLE(fortran,AC_HELP_STRING([--disable-fortran],[No Fortran compiler available ( ARPACK need it)])) ff_g2c_lib=""; if test "$enable_fortran" != no then # ALH-FFCS-2/3/10: add gfortran-mp-4.4 for MacPorts on MacOS 10.6 # FH add iforr AC_PROG_FC(gfortran f90 xlf90 g95 ifort gfortran-mp-4.4) AC_PROG_F77(gfortran f90 xlf xlf90 g95 f77 fort77 "$FC" ifort gfortran-mp-4.4) # if test -n "$F77" # then ff_flibs="" # modif FH AC_F77_LIBRARY_LDFLAGS dnl AC_F77_WRAPPERS # correct pb of double def under macos case $F77 in *fort77) if test -z "$FLIBS" ; then # FLIBS=`fort77 -v a.out 2>&1|awk '/a.out/ && /fort77/ { print $(NF-2),$(NF-1)}'` # FH to remove " " FLIBS=`fort77 -v a.out 2>&1|awk '/a.out/ && /fort77/ { print "echo",$(NF-2),$(NF-1)}'|sh` AC_MSG_WARN([ fort77 FLIBS : $FLIBS ]) fi ;; *g77) for i in $FLIBS; do case $i in # save last directory of the list -L*) d=`expr $i : '-L\(.*\)'`; echo " try $d " if test -e "$d/libg2c.so" ; then ff_flibs="$d/libg2c.so" elif test -e "$d/libg2c.dylib" ; then ff_flibs="$d/libg2c.dylib" elif test -e "$d/libg2c.a" ; then ff_flibs="$d/libg2c.a" elif test -e $d/libg2c.so -o -e $d/libg2c.a ; then ff_flibs="$i" fi;; esac done if test -e "$ff_flibs" ;then FLIBS="$ff_flibs" else FLIBS="$ff_flibs -lg2c" fi AC_MSG_WARN([ get dir of -lg2c FLIBS : $FLIBS ]) ;; # add FH sep 2006 / modif 2009 # remove FH jan 2021 (Idea of P. Jolivet) *gfortran-obsolete) ff_okkk=0 for i in $FLIBS; do case $i in -L*) d=`expr $i : '-L\(.*\)'`; if test -e "$d/libgfortran.$ff_suffix_dylib_a" -a -e "$d/libquadmath.$ff_suffix_dylib_a" ; then ff_flibs="$d/libgfortran.$ff_suffix_dylib_a $d/libquadmath.$ff_suffix_dylib_a" ff_okkk=1 elif test -e "$d/libgfortran.a" -a -e "$d/libquadmath.a" ; then ff_flibs="$d/libgfortran.a $d/libquadmath.a" ff_okkk=1 elif test -e "$d/libgfortran.$ff_suffix_dylib_a" ; then ff_flibs="$d/libgfortran.$ff_suffix_dylib_a" ff_okkk=2 elif test -e "$d/libgfortran.a" ; then ff_flibs="$d/libgfortran.a" ff_okkk=2 fi;; esac done if test "$ff_okkk" -ge 1 ;then FLIBS="$ff_flibs" else FLIBS="$ff_flibs -lgfortran" fi AC_MSG_WARN([ get dir of -lgfortran FLIBS : $FLIBS ]) ;; esac AC_ARG_WITH(flib,[ --with-flib= the fortran library ]) # correct FH sep 2006 -o -> -a if test "$with_flib" != no -a -n "$with_flib" then ff_g2c_lib="$with_flib" FLIBS="$with_flib" fi # add FH oct 2007 for download f2c if test -z "$F77" ; then ff_f77=`pwd`/3rdparty/bin/fort77 ff_flibs=`pwd`/3rdparty/lib/libf2c.a if test -x $ff_f77 -a -f $ff_flibs ; then AC_MSG_WARN([ no fortran, but find download f2c/fort]); F77=$ff_f77 FLIBS=$ff_flibs else AC_MSG_NOTICE( [ fatal error : no fortran ] ); AC_MSG_NOTICE( [add --disable-fortran ] ); AC_MSG_NOTICE( [or try to compile f2c in directory 3rdparty/f2c ] ); AC_MSG_NOTICE( [ just do: make install ] ); AC_MSG_ERROR([ Fatal error No Fortran compiler . ],1); fi fi # check if the FLIBS is correct ff_libs="$LIBS" LIBS="$ff_libs $FLIBS" AC_TRY_LINK_FUNC(exit,ff_err=,ff_err=ok); if test "$ff_err" = "ok" ; then AC_MSG_ERROR([ Fatal FLIBS: $FLIBS is incorrect. ],1); fi LIBS="$ff_libs" echo "F77 LD $ff_libs" >config_LIB_INFO AC_LANG_PUSH([Fortran 77]) AC_MSG_CHECKING([ Size of fortran 77 integer ]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ program test integer p,i p=1024*1024 i= p*p open(7,file="conftest.out") if (i>0) then write(7,*) 8 else write(7,*) 4 endif close(7) end ]])] ,ff_f77intsize=`cat conftest.out`,ff_f77intsize=4,ff_f77intsize=4) ## AC_LANG(C++) AC_LANG_POP([Fortran 77]) AC_MSG_RESULT($ff_f77intsize) AC_F77_WRAPPERS ff_cfagsf77="" case $ac_cv_f77_mangling in # "lower case, no underscore, no extra underscore") # "lower case, no underscore, extra underscore") # ;; "lower case, underscore, no extra underscore") ff_cfagsf77="-DAdd_";; "lower case, underscore, extra underscore") ff_cfagsf77="-DAdd__ -Df77IsF2C";; "upper case, no underscore, no extra underscore") ff_cfagsf77="-DUPPER";; "upper case, no underscore, extra underscore") ff_cfagsf77="-DUPPER -DAdd__";; "upper case, underscore, no extra underscore") ff_cfagsf77="-DUPPER -DAdd_";; "upper case, underscore, extra underscore") ff_cfagsf77="-DUPPER -DAdd__";; *) ;; esac ff_cfagsf77="$ff_cfagsf77 $ff_allow_mismatch" AC_SUBST(CFLAGSF77,$ff_cfagsf77) if test $ff_f77intsize -ne 4 then AC_MSG_ERROR([ Fatal Error: Your fortran 77 compiler have by not 4 bytes integer ( $ff_f77intsize bytes) ],1); fi test -f /mingw/bin/libpthread-2.dll && ff_pthread_dll=/mingw/bin/libpthread-2.dll ; \ AC_FF_ADDWHERELIB(f77,$FLIBS,$ff_cfagsf77 $ff_pthread_dll) AC_FF_ADDWHERELIB(fc,$FLIBS,$ff_cfagsf77 $ff_pthread_dll) fi # fin test FORTRAN .......... # ---------------------------- AC_ARG_ENABLE(c,AC_HELP_STRING([--disable-c],[No C compiler available (C BLAS need it)])) if test "$enable_c" != no then AC_PROG_CC AM_PROG_CC_C_O else # We need to define this otherwise ./configure crashes with # the message "configure: error: conditional "am__fastdepCC" # was never defined". am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' fi AM_PROG_LEX AC_PROG_YACC AC_LANG(C++) # Some useful libraries AC_CHECK_LIB(pthread,pthread_create,ff_pthread="-lpthread",ff_pthread="") AC_CHECK_LIB(iomp5,omp_get_wtime,ff_iomp5="-liomp5",ff_iomp5="") AC_CHECK_LIB([rt],[clock_gettime]) if test -n "$ff_pthread" ; then AC_FF_ADDWHERELIB(pthread,$ff_pthread,) fi # Necessary absolute pathname for local directory when some libraries # are used from several different locations (for instance locally in # configure.ac and in a subdir). curdir=`pwd` # Configure options # ----------------- echo "$PATH" > config.path eval for i in $ac_configure_args\; do echo \$i\; done | sed -e "s/$/'/" -e "s/^/'/" > config.param # by default the suffix of the .so lib file is .so # it is .dll under windows # it is .dylib under macos X # suffix of dynamic lib # Checking wether we can produce a MacIntosh-specific version # ----------------------------------------------------------- AC_MSG_CHECKING(wether we are on a MacIntosh) ff_mac=no if test `uname` = Darwin; then ff_suffix_dylib="dylib" ff_suffix_dylib_a="dylib" ff_mac=yes AC_DEFINE_UNQUOTED(FF_PREFIX_DIR_APPLE,"/Applications/FreeFem++.app/Contents/",FreeFem prefix dir) fi AC_MSG_RESULT($ff_mac) AC_MSG_CHECKING(wether we are on SunOS) ff_sunos=no if test `uname -s` = SunOS; then ff_sunos=yes fi AC_MSG_RESULT($ff_sunos) # glut ------------ # ---------------- ff_glut="" ff_glut_ok="" AC_ARG_WITH(glut,[ --with-glut glutlib and include ]) if test "$with_glut" != no -a -n "$with_glut" ; then ff_libs_save="$LIBS" LIBS="$LIBS" dnl not works .... so no check .... FH ff_glut="$with_glut";ff_glut_ok=yes dnl AC_SEARCH_LIBS(abort,[$with_glut],[ff_glut="$with_glut";ff_glut_ok=yes],[ff_glut="";ff_glut_ok="no"]) LIBS="$ff_libs_save" dnl AC_MSG_ERROR([ Sorry bad --with-glut : $with_glut !]) elif test "$with_glut" = no ; then ff_glut_ok="no" fi # Checking wether we can produce a Microsoft Windows-specific version # ------------------------------------------------------------------- AC_ARG_ENABLE(cygwindll,[ --enable-cygwindll Forces the use of the Cygwin DLL (not recommended)]) AC_ARG_ENABLE(mingw64,[ --enable-mingw64 Uses mingw64 compilers on Cygwin]) AC_MSG_CHECKING(wether we are on Microsoft Windows) ff_uname=`uname` # flag to build window 32 version ff_mingw = yes ff_mingw=no ff_fpic=yes case $ff_uname in CYGWIN*) ff_fpic=no # ff_nocygwin=-mno-cygwin AC_SUBST(GCCNOCYGWIN,$ff_nocygwin);; MINGW*|MSYS_NT*) enable_cygwindll=no;; esac ff_PURE_WIN32=0 case $ff_uname in CYGWIN*|MINGW*|MSYS_NT*) ff_fpic=no ff_suffix_dylib="dll"; ff_suffix_dylib_a="dll.a"; ff_win32=yes; # echo "ff win 32 --------------------------------------------" # FFCS - 8/3/12 - remove -D_MSC_VER under MinGW64 because it forces system calls to be compiled into any object # (which creates thousands of duplicate definitions for sytem calls like time()). # We need Mingw to avoid Cygwin's extra DLLs if test "$enable_cygwindll" != yes then # CHECK_COMPILE_FLAG(C++,-mwindows,CXXFLAGS) # FFCS: on Windows, FF crashes when compiling GL/glut.h and the option "--disable-opengl" is not # operational because ff_glut_ok is forced to yes here. if test $enable_ffcs = yes then ff_glut_ok=no ff_glut="" enable_opengl=no fi ff_mingw=yes enable_cygwindll=no; ff_pthread="-mthreads" # FFCS does not use the Cygwin MinGW compilers any more if test $enable_ffcs = no then CXXFLAGS="$CXXFLAGS $ff_nocygwin -I/usr/include/mingw" FFLAGS="$FFLAGS $ff_nocygwin" CFLAGS="$CFLAGS $ff_nocygwin -I/usr/include/mingw" AC_COMPILE_IFELSE([AC_LANG_SOURCE([int a;])],[], [ff_nocygwin=""; AC_MSG_NOTICE([Sorry $ff_nocygwin optio is wrong try whitout , but try with gcc-3.3]) ]) CXXFLAGS="$CXXFLAGS $ff_nocygwin -I/usr/include/mingw" FFLAGS="$FFLAGS $ff_nocygwin" CFLAGS="$CFLAGS $ff_nocygwin -I/usr/include/mingw" CNOFLAGS="$CNOFLAGS $ff_nocygwin -I/usr/include/mingw" fi LIBS="$LIBS $ff_nocygwin -mthreads -lws2_32 -lcomdlg32" LIBSNOCONSOLE="-mwindows" # FFCS uses a specific compiler, so we specify its libraries explicitely if test $enable_ffcs = no then export PATH="$PATH:`cygpath $MSMPI_BIN`" test -z "$MPIRUN" && MPIRUN=`which mpiexe.exe` if test "$enable_fortran" != no -a "$with_flib" != no ; then case "$F77" in *gfortran) FLIBS="$ff_nocygwin -lgfortran -lquadmath";; *g77) FLIBS="$ff_nocygwin -lg2c";; *) AC_MSG_ERROR([ Sorry no known FLIBS with this $F77 !]) ;; esac fi fi if test -z "$ff_glut" -a "$ff_glut_ok" != "no" ; then ff_glutname="glut32" # check abort a existing function just to find in glut32.dll exist in the path # because glutInit is not the real symbol on win32 dur dur FH !!!!!!!!! AC_CHECK_LIB(glut32,abort,ff_glut="-l$ff_glutname -mthreads -lglu32 -lopengl32",ff_glut="") if test -z "$ff_glut" ; then ff_glutname="freeglut" if test -x /usr/bin/pkg-config.exe ; then ff_glut="-lglu32 "`/usr/bin/pkg-config.exe --libs freeglut` else ff_glut="-l$ff_glutname -mthreads -lglu32 -lopengl32" fi AC_CHECK_LIB(freeglut,abort,ff_glut_ok=yes,ff_glut="") fi fi # Resources for FreeFem++-cs in Microsoft Windows format AC_SUBST(FFGLUTNAME,$ff_glutname) AC_SUBST(WINDRESOBJ,windres.o) AC_SUBST(LIBSNOCONSOLE,$LIBSNOCONSOLE) AC_SUBST(WIN32DLLTARGET,win32-dll-target) ff_TEST_FFPP="../../src/bin-win32/FreeFem++.exe" ff_TEST_FFPP_MPI="../../src/bin-win32/FreeFem++-mpi.exe" AC_DEFINE(PURE_WIN32,1,A pure windows applications no cygwin dll) ff_PURE_WIN32=1 ff_dynload=yes fi ;; *) ff_win32=no;; esac AC_MSG_RESULT($ff_win32) if test "$ff_win32" = no then enable_cygwindll=no fi # FreeFem++-specific version information # -------------------------------------- # Version numbering, converted to a floating point value ff_numver="`echo $VERSION|sed 's/-\(.*\)/+\1.0*0.000001/'`" AC_DEFINE_UNQUOTED(VersionFreeFem,$ff_numver,FreeFEM version as a float) # Since src/fflib/strversionnumber.cpp is recreated at each build, this # date is only useful for config-version.h test "$prefix" = NONE && prefix="$ac_default_prefix" ff_prefix_dir="${prefix}/lib/ff++/$VERSION" AC_MSG_CHECKING(prefix dir freefem++ ) AC_MSG_RESULT($ff_prefix_dir) FF_DATE=`date` AC_DEFINE_UNQUOTED(VersionFreeFemDate,"$FF_DATE",FreeFEM build date) AC_DEFINE_UNQUOTED(FF_PREFIX_DIR,"${ff_prefix_dir}",FreeFem prefix dir) AC_SUBST(ff_prefix_dir,$ff_prefix_dir) ff_prefix_dir_lib="$ff_prefix_dir/lib" ff_prefix_dir_data="$datadir/$PACKAGE_NAME" ff_prefix_dir_include="$ff_prefix_dir/include" ff_prefix_dir_etc="$ff_prefix_dir/etc" ff_prefix_dir_lib_mpi="$ff_prefix_dir/lib/mpi" ff_prefix_dir_example="$ff_prefix_dir_data/$PACKAGE_VERSION" AC_SUBST(ff_prefix_dir_lib,$ff_prefix_dir_lib) AC_SUBST(ff_prefix_dir_include,$ff_prefix_dir_include) AC_SUBST(ff_prefix_dir_etc,$ff_prefix_dir_etc) AC_SUBST(ff_prefix_dir_lib_mpi,$ff_prefix_dir_lib_mpi) AC_SUBST(ff_prefix_dir_example,$ff_prefix_dir_example) # Separate configuration header file for version information, included # in config-macos9.h cat << EOF > config-version.h /* FreeFEM version information for MacOS 9 configuration */ /* File generated by configure.ac */ /* Define to the full name and version of this package. */ #define PACKAGE_STRING "$PACKAGE_STRING" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "$PACKAGE_TARNAME" /* Define to the version of this package. */ #define PACKAGE_VERSION "$PACKAGE_VERSION" /* Version number of package */ #define VERSION "$VERSION" /* FreeFEM build date */ #define VersionFreeFemDate "$FF_DATE" /* FreeFEM version as a float */ #define VersionFreeFem ($ff_numver) EOF # Checking wether to produce a debug version # ------------------------------------------ # Generic or hardware-dependant optimization m4_include(etc/config/m4/acmacros.m4) m4_include(etc/config/m4/acoptim.m4) # [[file:ax_lib_hdf5.m4]] m4_include(etc/config/m4/ax_lib_hdf5.m4) # [[file:ax_lib_gsl.m4]] m4_include(etc/config/m4/ax_lib_gsl.m4) # Checking c++11 for HashMatix (Madatory now) # F.Hecht: # do not compile all with c++11 version to much # probleme to day (frev 2015) with g++ 4.8.1 ff_cxx_save=$CXXFLAGS ## for bem.cpp need c++14 for intel compiler CHECK_COMPILE_FLAG(C++,[-std=c++14],CXXFLAGS,ff_cxx11) if test "$ff_cxx11" = yes ; then CXX11FLAGS="-std=c++14"; AC_FF_ADDWHERELIB(cxx11,$CXX11FLAGS,$CXX11FLAGS) else CHECK_COMPILE_FLAG(C++,[-std=c++11],CXXFLAGS,ff_cxx11) if test "$ff_cxx11" = yes ; then CXX11FLAGS="-std=c++11"; AC_FF_ADDWHERELIB(cxx11,$CXX11FLAGS,$CXX11FLAGS) else AC_MSG_NOTICE( [ fatal error : c++ compiler ! ] ); AC_MSG_ERROR([ Sorry no c++ compiler !]) AC_MSG_ERROR([ Fatal error no c++11 or c++14 ],1); fi fi ##CXXFLAGS="$ff_cxx_save" AC_SUBST(WITH_CXX11,$ff_cxx11) AC_SUBST(CXX11FLAGS,$CXX11FLAGS) #end Checking c++11 # ------------------------------ # compilation flags of FreeFem++ # ------------------------------ # -DDRAWING bamg active some drawing facility in bamg (inquire mesh) # -DBAMG_LONG_LONG active the use of long long in bamg # -DDEBUG active the checking in bamg # -DNCHECKPTR remove check pointer facility # -DCHECK_KN active subscripting of some array # -DWITHCHECK of the dynamic type ckecking of the langague (very slow) # -DEIGENVALUE to compile the eigen value part # ------------------------------------------------ # FreeFrem++-specific optimizations -fpascal-strings if test "$enable_debug" = yes; then CXXFLAGS="$CXXFLAGS -DBAMG_LONG_LONG -DCHECK_KN" if test "$ff_mac" = yes; then CXXFLAGS="$CXXFLAGS -fno-inline -fexceptions" fi else CXXFLAGS="$CXXFLAGS -DBAMG_LONG_LONG -DNCHECKPTR" if test "$ff_mac" = yes; then CXXFLAGS="$CXXFLAGS " fi fi #whether or not to add a suffix to the package name #-------------------------------------------------- AC_ARG_WITH(suffix,[ --with-suffix=[G3/G4/G4,P4,..] or --without-suffix (default no suffix to package name)]) AC_MSG_CHECKING(suffix to add to package name) ff_suffix= if test "$with_suffix" = yes -o "$with_suffix" = no -o "$with_suffix" = "" then AC_MSG_RESULT(none) else ff_suffix="-$with_suffix" AC_MSG_RESULT($ff_suffix) fi AC_SUBST(ADD_PACKAGE_NAME,$ff_suffix) # Checking that OpenGL is accessible # ---------------------------------- ff_gl_ok=no ff_glx_ok=no ff_libs="$LIBS" AC_ARG_WITH(flib,[ --with-flib= the fortran library ]) AC_ARG_ENABLE([opengl],AS_HELP_STRING([--enable-opengl], [Enable/disable support for OpenGL])) # echo " .... LIBS = $LIBS .... " if test x$enable_opengl != xno; then # GL libraries seem to depend on some dlopen capabilities if test "$enable_static" != yes -a "$ff_glut_ok" != yes then # If we cannot find gl.h, no OpenGL version can be produced AC_CHECK_HEADERS(OpenGL/gl.h, ff_gl_ok=yes , ff_gl_ok=no ) if test "$ff_gl_ok" = no then AC_CHECK_HEADERS(GL/gl.h, ff_gl_ok=yes , ff_gl_ok=no ) fi AC_CHECK_HEADERS(GLUT/glut.h, ff_glut_ok=yes, ff_glut_ok=no) if test "$ff_glut_ok" = no then AC_CHECK_HEADERS(GL/glut.h, ff_glut_ok=yes, ff_glut_ok=no) fi # GLUT ..... if test "$ff_glut_ok" = yes then for glut in \ "-framework GLUT -framework OpenGL -framework Cocoa" \ "-lglut -lGLU -lGL" do ff_glut="$glut" ff_libs1="$LIBS" LIBS="$LIBS $ff_glut" AC_TRY_LINK_FUNC(glutInit, ff_glut_ok=yes LIBS="$ff_libs1" break, ff_glut_ok=next); LIBS="$ff_libs1" done fi fi fi AC_SUBST(LIBSPTHREAD,$ff_pthread) # ALH - 16/9/13 - at the moment the requirements for medit (glut) are not compatible with FFCS so medit is # only compiled with FF. if test $enable_ffcs = no then if test "$ff_glut_ok" = yes then AC_SUBST(LIBSGLUT,$ff_glut) if test -n "$ff_pthread" ; then ff_ffglutprog="ffglut${EXEEXT}" AC_DEFINE_UNQUOTED(PROG_FFGLUT,"$ff_ffglutprog", the ffglut application for the new graphics ) fi ff_meditprog="ffmedit${EXEEXT}" fi fi AC_ARG_WITH(suffix,[ --with-suffix=[G3/G4/G4,P4,..] or --without-suffix (default no suffix package name)]) if test "$with_suffix" = yes then with_suffix="" elif test "$with_suffix" = no -o -z "$with_suffix" then with_suffix="" else with_suffix="-$with_suffix" fi AC_MSG_CHECKING(add suffix $with_suffix ) AC_SUBST(ADD_PACKAGE_NAME,$with_suffix) AC_MSG_RESULT(yes) # Checking wether we can produce a parallel version # ------------------------------------------------- if test $enable_ffcs = no then # FF case conf with PETSC or after dnl m4_include(etc/config/m4/acmpi.m4) ff_mpi=no else # FFCS - use the same MPI configuration choices as FFCS if test -z "$MPICXX" then ff_mpi=no else ff_mpi=yes AC_SUBST(MPICXX,$MPICXX) AC_SUBST(MPICC,$MPICC) AC_SUBST(MPIFC,$MPIFC) AC_SUBST(MPIPROG,"FreeFem++-mpi${EXEEXT}") AC_SUBST(MPI_INCLUDE,"-I $MPI_INC_DIR") AC_SUBST(MPI_INC_DIR,$MPI_INC_DIR) AC_SUBST(MPI_LIB_DIRS,"") AC_SUBST(MPI_LIB,$MPI_LIB) AC_SUBST(MPI_LIBC,"") AC_SUBST(MPI_LIBFC,"") # these values should not be empty otherwise # plugin/seq/ff-get-dep will think that they are not # defined AC_FF_ADDWHERELIB(mpi,-DDUMMY,-I$MPI_INC_DIR) # mpifc and mpif77 libraries should always be specified # because FF never calls the Fortran MPI compiler. It always # uses mpicxx in [[file:plugin/seq/ff-c++]]. The # resulting Fortran libraries (eg Mumps) would compile even # without the proper Fortran libs, but they would not load # properly. # under Win32, libmpi_f77.a is not the right name and FFCS # mingw/mpicxx adds the right libraries by itself # With mpich2 Debian wheezy 32 bits, libmpi_f77.a is not available (request from Cico, 14/10/13) if test -f /usr/lib/openmpi/lib/libmpi_f77.a then AC_FF_ADDWHERELIB(mpifc,-lmpi_f77,) AC_FF_ADDWHERELIB(mpif77,-lmpi_f77,) else AC_FF_ADDWHERELIB(mpifc,-DDUMMY,) AC_FF_ADDWHERELIB(mpif77,-DDUMMY,) fi # FFCS - MPI_DOUBLE_COMPLEX kept from original FF configure script AC_MSG_CHECKING( MPI_DOUBLE_COMPLEX) AC_COMPILE_IFELSE( [AC_LANG_SOURCE([ #include MPI_Datatype xxxx=MPI_DOUBLE_COMPLEX; ])], ff_mpi_double_complex=yes, ff_mpi_double_complex=no) AC_MSG_RESULT($ff_mpi_double_complex) if test "$ff_mpi_double_complex" = yes ; then AC_DEFINE(HAVE_MPI_DOUBLE_COMPLEX,1, mpi_double_complex) fi fi fi # Looking for useful configuration utilities # ------------------------------------------ AC_ARG_ENABLE(summary,AC_HELP_STRING([--enable-summary],[Display activated libraries list at the end of the configure process])) AC_ARG_ENABLE(download,AC_HELP_STRING([--enable-download],[Download missing libraries (BLAS,ARPACK,UMFPACK,...)])) ## PETSc AC_ARG_WITH(petsc,[ --with-petsc=/usr/local/petsc/conf/petscvariables --without-petsc]) AC_ARG_WITH(prefix_petsc,[ --prefix_petsc=directory where all PETSc/SLEPc will be installed if you download ]) if test -z "${prefix_petsc}"; then prefix_petsc="${prefix}/ff-petsc"; fi AC_SUBST([FF_prefix_petsc],"$prefix_petsc") AC_SUBST([FF_prefix_petsc_real],"$prefix_petsc_real") AC_SUBST([FF_prefix_petsc_complex],"$prefix_petsc_complex") AC_SUBST([FF_generic_petsc],"$enable_generic") # /usr/local/petsc/conf/petscvariables #/usr/local/ff++/petsc/lib/petsc/conf/petscvariables ff_petsc_ok=no ff_slepc_ok=no if test "$with_petsc" != no; then for d in "$with_petsc" "${prefix_petsc}/r/lib" /usr/local/ff++/petsc/lib /usr /usr/local /opt/usr /opt/local ;do for dd in "" "/petsc/conf/petscvariables" "/lib/petsc/conf/petscvariables" ; do ffconfpetsc="$d$dd" ffconfslepc="$d/slepc/conf/slepcvariables" # echo " petsc ... $ffconfpetsc" if test -f "$ffconfpetsc" ; then if test "$ff_petsc_ok" = no ; then PETSC_PREFIXDIR=`awk -F' *= *' '"PREFIXDIR"==$1 {print $2}' "$ffconfpetsc"` PETSC_SCALAR=`awk -F' *= *' '"PETSC_SCALAR"==$1 {print $2}' "$ffconfpetsc"` PETSC_MPIRUN=`awk -F' *= *' '"MPIEXEC"==$1 {print $2}' "$ffconfpetsc"` PETSC_MPIRUN=`echo "$PETSC_MPIRUN" | awk '{gsub(" --oversubscribe", ""); print}'` #remove --oversubscribe AC_MSG_NOTICE([ find real ( $PETSC_SCALAR ) petsc in $ffconfpetsc]); if test "$ff_mpi" != yes; then AC_MSG_NOTICE([ choose MPI from petsc : $PETSC_MPIRUN ]); MPIRUN=`echo $PETSC_MPIRUN` MPICC=`awk -F' *= *' '"CC"==$1 {print $2}' "$ffconfpetsc"` MPICXX=`awk -F' *= *' '"CXX"==$1 {print $2}' "$ffconfpetsc"` MPIFC=`awk -F' *= *' '"FC"==$1 {print $2}' "$ffconfpetsc"` case $ff_uname in CYGWIN*|MINGW*|MSYS_NT*) MPICC=$MPICC" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetsc"` MPICXX=$MPICXX" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetsc"` MPIFC=$MPIFC" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetsc"` esac m4_include(etc/config/m4/acmpi.m4) fi if test "$ff_mpi" = yes; then WPETSC_MPIRUN=`which "$PETSC_MPIRUN"` WMPIRUN=`which "$MPIRUN"` test "$PETSC_SCALAR" = "real" && test "$ff_win32" = yes -o "$PETSC_MPIRUN" = "$MPIRUN" || cmp -s "$WPETSC_MPIRUN" "$WMPIRUN" if test "$?" -eq 0 ; then AC_SUBST([FF_conf_petsc_real],"$ffconfpetsc") AC_SUBST([FF_prefix_petsc_real],"$PETSC_PREFIXDIR") AC_FF_PETSC_WHERELIB_BASIC(petsc,$ffconfpetsc) AC_FF_PETSC_WHERELIB(parmetis,PARMETIS_LIB,PARMETIS_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(metis,METIS_LIB,METIS_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(ptscotch,PTSCOTCH_LIB,PTSCOTCH_INCLUDE,$ffconfpetsc) ff_SCOTCH_INCLUDE=$ff_PTSCOTCH_INCLUDE ff_SCOTCH_LIB=`echo $ff_PTSCOTCH_LIB | sed -e 's/@<:@^ @:>@*ptscotch@<:@^ @:>@*//g' | sed -e 's/@<:@^ @:>@*ptes@<:@^ @:>@*//g'` if test -n "$ff_SCOTCH_LIB"; then ff_scotch_ok=yes; AC_FF_ADDWHERELIB(scotch,$ff_SCOTCH_LIB,$ff_SCOTCH_INCLUDE) AC_SUBST([TOOL_COMPILE_scotch],"") fi AC_FF_PETSC_WHERELIB(mumps,MUMPS_LIB,MUMPS_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(tetgen,TETGEN_LIB,TETGEN_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(hpddm,HPDDM_LIB,HPDDM_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(superlu,SUPERLU_LIB,SUPERLU_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(mmg,MMG_LIB,MMG_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(parmmg,PARMMG_LIB,PARMMG_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(scalapack,SCALAPACK_LIB,SCALAPACK_INCLUDE,$ffconfpetsc) AC_FF_PETSC_WHERELIB(suitesparse,SUITESPARSE_LIB,SUITESPARSE_INCLUDE,$ffconfpetsc,$ff_METIS_LIB) dnl if test -n "$ff_SUITESPARSE_LIB" -a -n "$ff_METIS_LIB"; then dnl ff_where_lib_suitesparse="$ff_METIS_LIB $ff_SUITESPARSE_LIB" dnl fi AC_FF_PETSC_WHERELIB(blaslapack,BLASLAPACK_LIB,BLASLAPACK_INCLUDE,$ffconfpetsc) if test -n "$ff_BLASLAPACK_LIB"; then ff_where_lib_blaslapack="`echo $ff_BLASLAPACK_LIB | sed -e 's/@<:@^ @:>@*scalapack@<:@^ @:>@*//g' | sed -e 's/@<:@^ @:>@*blacs@<:@^ @:>@*//g'`" fi if test -f $d/libslepc.$ff_suffix_dylib -o -f $d/libslepc.a ; then ff_slepc_ok=yes ff_SLEPC_LIB="-Wl,-rpath,$d -L$d -lslepc" AC_FF_ADDWHERELIB(slepc,$ff_SLEPC_LIB,$ff_where_inc_petsc) if test -f $d/libarpack.$ff_suffix_dylib -o -f $d/libarpack.a ; then ff_arpack_ok=yes ff_arpack_libs="-Wl,-rpath,$d -L$d -larpack" AC_FF_ADDWHERELIB(arpack,$ff_arpack_libs,) fi fi if test "$ff_slepc_ok" != yes ; then AC_ARG_WITH(slepc_include,AC_HELP_STRING([--with-slepc-include=],[Include directives for slepc instead of automatic download])) AC_ARG_WITH(slepc_ldflags,AC_HELP_STRING([--with-slepc-ldflags=],[Link-time directives for slepc instead of automatic download])) if test "$with_slepc_include" != "" || test "$with_slepc_ldflags" != "" then # some directives have been specified, use them instead of downloading AC_FF_ADDWHERELIB(slepc,$with_slepc_ldflags,$with_slepc_include) ff_slepc_ok=yes fi fi else AC_MSG_NOTICE([" Warning PETSC MPI and FF++ MPI not the same: $PETSC_MPIRUN != $MPIRUN or $PETSC_SCALAR != real ." ]); fi fi fi else ffconfpetsc=""; fi done done fi if test "$ff_petsc_ok" != no ; then AC_MSG_NOTICE([ with petsc $ff_petsc_ok, with slepc $ff_slepc_ok ]) else AC_MSG_NOTICE([ without petsc, slepc ***** ]) fi ## PETSc complex AC_ARG_WITH(petsc_complex,[ --with-petsc_complex=/usr/local/petsc/conf/petscvariables --without-petsc_complex ]) ff_petsccomplex_ok=no ff_slepccomplex_ok=no if test "$with_petsc_complex" != no; then for d in "$with_petsc_complex" "${prefix_petsc}/c/lib" /usr/local/ff++/petsc/c/lib ;do for dd in "" "/petsc/conf/petscvariables" "/lib/petsc/conf/petscvariables" ; do ffconfpetscc="$d$dd" if test -f "$ffconfpetscc" -a "$ff_petsccomplex_ok" = no ; then PETSC_COMPLEX_SCALAR=`awk -F' *= *' '"PETSC_SCALAR"==$1 {print $2}' "$ffconfpetscc"` PETSC_COMPLEX_MPIRUN=`awk -F' *= *' '"MPIEXEC"==$1 {print $2}' "$ffconfpetscc"` PETSC_COMPLEX_PREFIXDIR=`awk -F' *= *' '"PREFIXDIR"==$1 {print $2}' "$ffconfpetscc"` PETSC_COMPLEX_MPIRUN=`echo "$PETSC_COMPLEX_MPIRUN" | awk '{print $1}'` #remove --oversubscribe AC_MSG_NOTICE([ find complex ($PETSC_COMPLEX_SCALAR) petsc in $ffconfpetscc]); if test "$ff_mpi" != yes; then MPIRUN=`echo $PETSC_COMPLEX_MPIRUN` MPICC=`awk -F' *= *' '"CC"==$1 {print $2}' "$ffconfpetscc"` MPICXX=`awk -F' *= *' '"CXX"==$1 {print $2}' "$ffconfpetscc"` MPIFC=`awk -F' *= *' '"FC"==$1 {print $2}' "$ffconfpetscc"` case $ff_uname in CYGWIN*|MINGW*|MSYS_NT*) MPICC=$MPICC" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetscc"` MPICXX=$MPICXX" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetscc"` MPIFC=$MPIFC" "`awk -F' *= *' '"MPI_INCLUDE"==$1 {print $2}' "$ffconfpetscc"` esac m4_include(etc/config/m4/acmpi.m4) fi if test "$ff_mpi" = yes; then test "$PETSC_COMPLEX_SCALAR" = "complex" && test "$ff_win32" = yes -o "$PETSC_COMPLEX_MPIRUN" = "$MPIRUN" || cmp -s `which "$PETSC_COMPLEX_MPIRUN"` `which "$MPIRUN"` if test "$?" -eq 0 ; then AC_SUBST([FF_prefix_petsc_complex],"$PETSC_COMPLEX_PREFIXDIR") AC_SUBST([FF_conf_petsc_complex],"$ffconfpetscc") AC_FF_PETSC_WHERELIB_BASIC(petsccomplex,$ffconfpetscc) AC_FF_PETSC_WHERELIB(parmetis,PARMETIS_LIB,PARMETIS_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(metis,METIS_LIB,METIS_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(ptscotch,PTSCOTCH_LIB,PTSCOTCH_INCLUDE,$ffconfpetscc) ff_SCOTCH_INCLUDE=$ff_PTSCOTCH_INCLUDE ff_SCOTCH_LIB=`echo $ff_PTSCOTCH_LIB | sed -e 's/@<:@^ @:>@*ptscotch@<:@^ @:>@*//g' | sed -e 's/@<:@^ @:>@*ptes@<:@^ @:>@*//g'` if test -n "$ff_SCOTCH_LIB"; then ff_scotch_ok=yes; AC_FF_ADDWHERELIB(scotch,$ff_SCOTCH_LIB,$ff_SCOTCH_INCLUDE) AC_SUBST([TOOL_COMPILE_scotch],"") fi AC_FF_PETSC_WHERELIB(mumps,MUMPS_LIB,MUMPS_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(tetgen,TETGEN_LIB,TETGEN_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(hpddm,HPDDM_LIB,HPDDM_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(superlu,SUPERLU_LIB,SUPERLU_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(mmg,MMG_LIB,MMG_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(parmmg,PARMMG_LIB,PARMMG_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(scalapack,SCALAPACK_LIB,SCALAPACK_INCLUDE,$ffconfpetscc) AC_FF_PETSC_WHERELIB(suitesparse,SUITESPARSE_LIB,SUITESPARSE_INCLUDE,$ffconfpetscc) if test -z "$ff_BLASLAPACK_LIB"; then AC_FF_PETSC_WHERELIB(blaslapack,BLASLAPACK_LIB,BLASLAPACK_INCLUDE,$ffconfpetscc) if test -n "$ff_BLASLAPACK_LIB"; then ff_where_lib_blaslapack="`echo $ff_BLASLAPACK_LIB | sed -e 's/@<:@^ @:>@*scalapack@<:@^ @:>@*//g' | sed -e 's/@<:@^ @:>@*blacs@<:@^ @:>@*//g'`" fi fi if test -f $d/libslepc.$ff_suffix_dylib -o -f $d/libslepc.a ; then ff_slepccomplex_ok=yes ff_SLEPCCOMPLEX_LIB="-Wl,-rpath,$d -L$d -lslepc" AC_FF_ADDWHERELIB(slepccomplex,$ff_SLEPCCOMPLEX_LIB,$ff_where_inc_petsccomplex) if test -z "$ff_arpack_ok"; then if test -f $d/libarpack.$ff_suffix_dylib -o -f $d/libarpack.a ; then ff_arpack_ok=yes ff_arpack_libs="-Wl,-rpath,$d -L$d -larpack" AC_FF_ADDWHERELIB(arpack,$ff_arpack_libs,) fi fi fi if test "$ff_slepccomplex_ok" != yes ; then AC_ARG_WITH(slepccomplex_include,AC_HELP_STRING([--with-slepccomplex-include=],[Include directives for slepccomplex instead of automatic download])) AC_ARG_WITH(slepccomplex_ldflags,AC_HELP_STRING([--with-slepccomplex-ldflags=],[Link-time directives for slepccomplex instead of automatic download])) if test "$with_slepccomplex_include" != "" || test "$with_slepccomplex_ldflags" != "" then # some directives have been specified, use them instead of downloading AC_FF_ADDWHERELIB(slepccomplex,$with_slepccomplex_ldflags,$with_slepccomplex_include) ff_slepccomplex_ok=yes fi fi else AC_MSG_NOTICE([" Warning PETSC complex MPI and FF++ MPI not the same: $PETSC_COMPLEX_MPIRUN != $MPIRUN or $PETSC_COMPLEX_SCALAR != complex ." ]); fi fi fi done done fi if test "$ff_petsccomplex_ok" != no ; then AC_MSG_NOTICE([ with petsc complex $ff_petsccomplex_ok, with slepc complex $ff_slepccomplex_ok ]) else AC_MSG_NOTICE([ without petsc complex ***** ]) fi ## slepc ### end petsc .... # chech MPI after petsc ... if test "$ff_mpi" != yes; then m4_include(etc/config/m4/acmpi.m4) fi # FFCS needs to change some of the FF makefiles to compile without MPI AM_CONDITIONAL([FFCS_MPIOK],[test $ff_mpi = yes]) #replacing wget with another command ff_wget_command="wget --no-check-certificate" ff_wget=no AC_ARG_WITH(wget, [ --with-wget=command Replace "wget" with another command. Implies --enable-download], enable_download=yes ff_wget=yes ff_wget_command="${withval}") ff_MKL_libpath=yes AC_ARG_WITH(mkl, [ --with-mkl= the MKL LIBPATH : (ie. /opt/intel/mkl/RR.r.y.xxx/lib/)], ff_mkl_libpath="${withval}") # if enabling downloads find wget or curl to do download # ------------------------------------------------------ if test "$ff_wget" = no then ff_wget="" AC_CHECK_PROG(ff_wget,wget --no-check-certificate,yes,no) fi if test "$ff_wget" = no then ff_wget_command="curl --fail -O " ff_curl="" AC_CHECK_PROG(ff_curl,curl -O,yes,no) ff_wget="$ff_curl" fi # for automatique compilation of # lib in download if test "$ff_wget" != yes -a "$enable_download" = "yes" ; then enable_download=no AC_MSG_ERROR([ enable-download and no wget or curl. ],1); fi if test "$enable_download" = "yes" ; then DOWNLOADCOMPILE="compile-pkg install-other" else DOWNLOADCOMPILE= enable_download=no fi if test "$enable_download" = "yes" ; then AC_CHECK_PROG(ff_git,git,yes,no) case "$ff_git" in *no*) AC_MSG_NOTICE([ to install missing package under debian or ubuntu, try ]) test "$ff_git" = no && AC_MSG_NOTICE([ sudo apt-get install git]) AC_MSG_ERROR([ Sorry missing git command !]);; esac fi AC_SUBST(WGET,$ff_wget_command) # modif FH ----- # -- looking for cadna # the round-off error propagation # the web site http://www-anp.lip6.fr/cadna/ # -------------------- AC_ARG_WITH(cadna,[ --with-cadna= cadna library --without-cadna ]) if test "$with_cadna" != no -o -n "$with_cadna" then ff_cadna="$with_cadna" fi ff_cadna_ok=no if test "$with_cadna" != no then ff_libs_old="$LIBS" ff_ldflags_old="$LDFLAGS" ff_cadna_dir="${curdir}/3rdparty/cadna" LDFLAGS="$LDFLAGS -L$ff_cadna_dir" AC_CHECK_LIB(cadnafree,arit_zero, ff_cadna_ok=yes) AC_CHECK_HEADERS(${ff_cadna_dir}/cadnafree.h, ff_cadna_h=yes, ff_cadna_h=no) LIBS="$ff_libs_old" LDFLAGS="$ff_ldflags_old" if test "$ff_cadna_ok" = yes -a "$ff_cadna_h" = yes then AC_DEFINE(HAVE_CADNA,1, freecadna is use to evalute the round-off error propagation ) CPPFLAGS="$CPPFLAGS -I$ff_cadna_dir" LIBS=" -L$ff_cadna_dir -lcadnafree $ff_libs_old" else AC_MSG_NOTICE([ without cadna ***** ]) fi else AC_MSG_NOTICE([ without cadna ***** ]) fi # Looking for FFTW # ---------------- # ALH - 18/9/13 - deactivates FFTW detection for testing purposes AC_ARG_ENABLE(system_fftw,AC_HELP_STRING([--disable-system-fftw],[Disable the automatic detection of FFTW])) if test "$enable_system_fftw" != no then # ALH - 4/9/13 - avoid recompiling FFTW if a copy was found on the system (request from Helmut Jarausch, 1/8/13) AC_CHECK_LIB(fftw3,fftw_execute,ff_fftw_ok=yes) AC_CHECK_HEADERS(fftw3.h,ff_fftw_h=yes, ff_fftw_h=no) fi if test "$ff_fftw_ok" = yes -a "$ff_fftw_h" = yes then AC_FF_ADDWHERELIB(fftw3,-lfftw3,) else AC_ARG_ENABLE(download-fftw,AC_HELP_STRING([--enable-download_fftw],[force the download of fftw])) if test "$enable_download_fftw" != no -a "$enable_download" = yes then ff_DOWNLOAD_FFTW=fftw fi fi # used in [[file:3rdparty/fftw/Makefile.am::DOWNLOAD_FFTW]] AC_SUBST(DOWNLOAD_FFTW,$ff_DOWNLOAD_FFTW) dnl --------------------------- dnl Looking for the tetgen dnl -------------------------- dnl AC_CHECK_LIB(tet,tetrahedralize,ff_fftet_ok=yes) dnl AC_CHECK_HEADERS(tetgen.h,ff_tet_h=yes, ff_tet_h=no) dnl if test "$ff_tet_ok" = yes -a "$ff_tet_h" = yes dnl then dnl AC_SUBST([TOOL_COMPILE_tetgen],"") dnl AC_FF_ADDWHERELIB(tetgen,-ltet,) dnl AC_DEFINE(HAVE_TETGEN,1, tetgen is compute tetrahedralize volume of an enclosed surface) dnl fi # Looking for the BLAS # -------------------- ff_blas_ok=no ff_blas_inc="" # ALH - 18/9/13 - give the option to deactivate system blas for testing purposes AC_ARG_ENABLE(system-blas,AC_HELP_STRING([--disable-system-blas],[Disable the search for a system-wide BLAS library])) if test "$ff_where_lib_conf_blaslapack" = 1 ; then echo " use BLAS/Lapack of petsc " # echo " lib: $ff_where_lib_blaslapack inc: $ff_where_inc_blaslapack" ff_blas_ok=yes ff_lapack_ok=yes ff_blas_libs="$ff_where_lib_blaslapack" ff_lapack_lib= ff_blas_inc="$ff_where_inc_blaslapack" AC_FF_ADDWHERELIB(lapack,$ff_mkl_lapack,$ff_blas_inc) AC_FF_ADDWHERELIB(blas,$ff_blas_libs,$ff_blas_inc) AC_MSG_NOTICE([ BLAS and LAPACK from petsc ***** ]) if echo "$ff_where_lib_blaslapack"| grep -q " -lmkl" ; then AC_MSG_NOTICE([ MKL from petsc ***** ]) AC_FF_ADDWHERELIB(mkl,$ff_blas_libs,$ff_blas_inc) ff_mkl_ok=yes ff_mkl=yes dnl AC_DEFINE(HAVE_MKL,1, the MKL intel lib is present for BLAS and LAPACK ) fi fi if test "$enable_system_blas" != no -a "$ff_blas_ok" = no then # User-specified location # add MKL seach dec 2010 FH .... ff_mkl_flags="" if test "$ff_mkl_libpath" != "not" ; then ff_CFLAGS="$CFLAGS" CHECK_COMPILE_FLAG(C,-mkl,CFLAGS) if test "$ff_CFLAGS" != "$CFLAGS" ; then ff_ff_mkl_flags="-mkl" fi CFLAGS="$ff_CFLAGS" fi AC_MSG_CHECKING(for MKL) ff_mkl_root="" if test "$ff_mkl_libpath" != "not" -a -z "$ff_mkl_flags" ; then if test "$ff_mkl_libpath" = "yes" ; then ff_IFS="$IFS" IFS=":" for i in $LD_LIBRARY_PATH:$LIBRARY_PATH ; do case $i in */mkl/*) ff_mkl_libpath=$i;; esac ; done; IFS="$ff_IFS" # else # ff_mkl_libpath=no fi if test "$ff_mkl_libpath" != "no" -a -d "$ff_mkl_libpath" ; then ff_mkl_root=`expr "//$ff_mkl_libpath" : '//\(.*\)/lib.*'` ff_mkl_arch=`expr "//$ff_mkl_libpath" : '//.*/lib/\(.*\)'` ff_mkl_lp=_lp64 case "$ff_mkl_arch" in *64*) ff_mkl_lp=_lp64 esac case $F77 in *ifort*) ff_mkl_cc=intel;; *) ff_mkl_cc=gf;; esac # bof bof .... case "$MPIRUN" in */sgi/*) ff_mkl_mpi=_sgimpt;; */intel/*) ff_mkl_mpi=_intelmpi;; *) ff_mkl_mpi=_openmpi;; esac # echo ................ ff_mkl_root = $ff_mkl_root .. $ff_mkl_arch if test ! -d "$ff_mkl_libpath" ; then ff_mkl_libpath="$ff_mkl_root/lib/$ff_mkl_arch" fi mkl_l="-Wl,-rpath,$ff_mkl_libpath -L$ff_mkl_libpath" mkl_piomp5=$(realpath $ff_mkl_libpath/../../compiler/lib/) ff_mkl_inc=$(realpath $ff_mkl_libpath/../include) ff_blas_inc="-I$ff_mkl_inc" if test -d "$mkl_piomp5" ; then mkl_l="$mkl_l -Wl,-rpath,$mkl_piomp5 -L$mkl_piomp5" ; fi # echo " ################### $mkl_l $ff_blas_inc " if test -f "$ff_mkl_libpath/libmkl_rt.$ff_suffix_dylib" ; then mkl_blas=rt mkl_lapack=rt ff_mkl_mlt="$mkl_l -lmkl_rt -lmkl_intel_thread -lmkl_core $ff_iomp5 $ff_pthread" ff_mkl_blas="$mkl_l -lmkl_rt -lmkl_sequential -lmkl_core $ff_pthread" ff_mkl_lapack="$ff_mkl_blas" elif test -f "$ff_mkl_libpath/libmkl_lapack.$ff_suffix_dylib" ; then ff_mkl_mlt="$mkl_l -lmkl_${ff_mkl_cc}${ff_mkl_lp} -lmkl_lapack -lmkl_intel${ff_mkl_lp} -lmkl_intel_thread -lmkl_core -lguide -lm -lpthread" ff_mkl_blas="$mkl_l -lmkl_${ff_mkl_cc}${ff_mkl_lp} -lmkl_lapack -lmkl_intel${ff_mkl_lp} -lmkl_sequential -lmkl_core -lguide -lm -lpthread" ff_mkl_lapack="$mkl_l -lmkl_lapack" else ff_mkl_mlt="$ff_ldbg $mkl_l -lmkl_${ff_mkl_cc}${ff_mkl_lp} -lmkl_intel${ff_mkl_lp} -lmkl_intel_thread -lmkl_core -lmkl_intel_thread $ff_iomp5 $ff_ldeg -lm $ff_pthread" ff_mkl_blas="$ff_ldbg $mkl_l -lmkl_${ff_mkl_cc}${ff_mkl_lp} -lmkl_intel${ff_mkl_lp} -lmkl_sequential -lmkl_core -lmkl_sequential $ff_ldeg -lm $ff_pthread" ff_mkl_lapack="$ff_mkl_blas" fi if test -f "-L$ff_mkl_libpathmkl_scalapack${ff_mkl_lp}" ; then ff_mkl_scalapack="$mkl_l -lmkl_blacs${ff_mkl_mpi}${ff_mkl_lp} -lmkl_scalapack${ff_mkl_lp} " ff_mkl_blacs="$mkl_l -lmkl_blacs${ff_mkl_mpi}${ff_mkl_lp} -lmkl_scalapack${ff_mkl_lp}" fi ff_blas_ok=yes ff_lapack_ok=yes # <> ALH - 6/11/13 - request from Atsushi Suzuki - the default MKL library must be able to # handle threads when FF is connected to AS' solver. So we need an option to configure FF with the # multithreaded MKL by default. AC_ARG_ENABLE(mkl_mlt,AC_HELP_STRING([--enable-mkl-mlt],[Link with the multithreaded instead of the monothreaded version of the MKL])) if test "$enable_mkl_mlt" = yes then ff_blas_libs="$ff_mkl_mlt" else ff_blas_libs="$ff_mkl_blas" fi ff_blas_inc="-I$ff_mkl_root/include" dnl scalapack, and blacs do not not work with mlk and sgi if test "$ff_mkl_mpi" = "_intelmpi" ; then AC_FF_ADDWHERELIB(scalapack,$ff_mkl_scalapack,$ff_blas_inc) AC_FF_ADDWHERELIB(blacs,$ff_mkl_blacs,$ff_blas_inc) else ff_warm=" (We do not use MKL scalapack and blacs with sgi MPI), " fi AC_FF_ADDWHERELIB(lapack,$ff_mkl_lapack,$ff_blas_inc) AC_FF_ADDWHERELIB(mkl,$ff_mkl_mlt,$ff_blas_inc) ff_mkl_ok=yes ff_mkl=yes dnl AC_DEFINE(HAVE_MKL,1, the MKL intel lib is present for BLAS and LAPACK ) else ff_mkl_libpath=no fi fi AC_MSG_RESULT( [ $ff_warm root: $ff_mkl_root , arch: $ff_mkl_arch , $ff_mkl_lp ... ]) AC_ARG_WITH(blas, AC_HELP_STRING([--with-blas=library],[Use a specific version of the Blas]), ff_blas_ok=yes ff_blas_libs="${withval}") # Specific BLAS library location for FreeBSD ff_freebsd_blas="-lf2c -lf77blas -latlas -lgslcblas" ff_dll_blas=`which libopenblas.dll` if test -z "$ff_dll_blas" ; then ff_dll_blas="no"; fi # zmach is in lapack # Trying blas library # echo "LIBS = $LIBS blas --- $ff_blas_ok" for iblas in "$ff_dll_blas" \ "-framework Accelerate" "$ff_mkl_blas" "-lblas" "-L/usr/lib/atlas -lblas" \ $ff_freebsd_blas do if test "$ff_blas_ok" = no -a "$iblas" != "no" then AC_MSG_CHECKING(for daxpy_ in $iblas) ff_save_libs="$LIBS" LIBS="$LIBS $iblas" AC_LINK_IFELSE( [AC_LANG_CALL(,daxpy_)], ff_blas_ok=yes ff_blas_libs="$iblas",) LIBS="$ff_save_libs" AC_MSG_RESULT($ff_blas_ok) fi done #cblas_zdotu_sub #echo "LIBS = " $LIBS ff_cblas_libs=no if test "$ff_blas_ok" = yes then AC_MSG_CHECKING(for blas_zdotu_sub in $ff_blas_libs) ff_save_libs="$LIBS" LIBS="$LIBS $ff_blas_libs $FLIBS" AC_LINK_IFELSE( [AC_LANG_CALL(,cblas_zdotu_sub)], ff_cblas_libs=yes) AC_MSG_RESULT($ff_cblas_libs) LIBS="$ff_save_libs" fi fi # <> ALH - 18/9/13 - option to compile the OpenBLAS moved from the FFCS tree to FF tree AC_ARG_ENABLE(openblas,AC_HELP_STRING([--disable-openblas],[Disable the automatic download of OpenBLAS])) if test "$ff_blas_ok" = no && test "$enable_openblas" != no && test "$enable_download" = yes then AC_CHECK_PROG(ff_git,git,yes,no) AC_MSG_CHECKING([Activating the OpenBLAS (deactivate with --disable-openblas)]) if test $ff_git = no then AC_MSG_RESULT([git not found]) else ff_download_blas=openblas # it is necessary to split the library path into -L and -l otherwise ff/upstream/plugin/seq/ff-c++ will place -lgfortran in # LIBS, .../libgoto___.lib in OTHER, and call the compiler with $LIBS before $OTHER, which will fail. But libopenblas.a is placed in # the standard directory 3rdparty/lib so the -L option is not required anymore. ff_blas_libs="-L${curdir}/3rdparty/lib -lopenblas $LIBS $ff_pthread" ff_blas_inc="-I${curdir}/3rdparty/include" # skip generic blas compilation and activate OpenBLAS (see [[file:3rdparty/blas/Makefile.am::OpenBLAS]]) AC_SUBST(DOWNLOADED_BLAS,"") AC_SUBST(COMPILE_OPENBLAS,openblas) ff_blas_ok=yes ff_cblas_h=yes ff_cblas_libs=yes AC_MSG_RESULT([ok]) fi fi # If all else fails, download a generic version if test "$ff_blas_ok" = no -a "$enable_download" = yes -a "$enable_fortran" != no -a "$enable_c" != no then AC_CHECK_PROG(ff_unzip,unzip,yes,no) AC_MSG_CHECKING(for BLAS version to download) # Do not update $LIBS, but create an extra LIB variable, because this lib does not exist yet, and this could make # the following tests fail. # When compiling a generic version, we do not need an optimized version of the BLAS. ff_download_blas=generic ff_blas_libs="-L${curdir}/3rdparty/blas -lcblas -lf77blas" ff_blas_inc="-I${curdir}/3rdparty/blas/CBLAS/src" AC_SUBST(DOWNLOADED_BLAS,"libf77blas.a libcblas.a") AC_SUBST(DOWNLOADED_BLAS_BUILT_SOURCES,'BLAS CBLAS $(F77BLAS_SRC) $(CBLAS_SRC)') AC_MSG_RESULT($ff_download_blas) ff_blas_ok=yes ff_cblas_h=yes ff_cblas_libs=yes fi if test -n "$ff_BLASLAPACK_LIB"; then AC_SUBST(BLACSLIBS,"$ff_SCALAPACK_LIB $ff_BLASLAPACK_LIB") else AC_SUBST(BLACSLIBS,$ff_blas_libs) fi AC_SUBST(BLASLIBS,$ff_blas_libs) AC_SUBST(BLASINC,$ff_blas_inc) #looking for cblas.h FH if test "$ff_blas_ok" = yes -a -z "$ff_download_blas" -a "$ff_mkl_ok" != yes; then AC_CHECK_HEADERS(cblas.h, ff_cblas_h=yes, ff_cblas_h=no) AC_CHECK_HEADERS(Accelerate/cblas.h, ff_cblas_h=yes, ff_cblas_h=no) AC_CHECK_HEADERS(atlas/cblas.h, ff_cblas_h=yes, ff_cblas_h=no) AC_CHECK_LIB([cblas], [cblas_zdotu_sub], [LIBS+=-lcblas]) fi # ALH - 18/9/13 - [[file:3rdparty/blas/Makefile.am]] does not update the WHERE mechanism so it always needs to be set up # here even in the case of a downloaded blas library. AC_FF_ADDWHERELIB(blas,$ff_blas_libs,) # end of BLAS ------------------- # Looking for ARPACK # ------------------ # We need the following g77 libraries to connect to the Fortran 77 # Arpack. if test "$ff_blas_ok" = yes; then ff_g2c_lib="$FLIBS" if test -n "$ff_g2c_lib" then LIBS="$LIBS $ff_g2c_lib" else ff_g2c_ok=no AC_CHECK_LIB(g2c,G77_second_0, LIBS="$LIBS -lg2c" ff_g2c_lib="-lg2c" ff_g2c_ok=yes) if test "$ff_g2c_ok" = no; then AC_MSG_CHECKING(for G77_second_0 in /sw/lib/libg2c.a) ff_save_libs="$LIBS" LIBS="$LIBS -L/sw/lib -lg2c" AC_LINK_IFELSE( [AC_LANG_CALL(,G77_second_0)], ff_g2c_lib="-L/sw/lib -lg2c" ff_g2c_ok=yes, LIBS="$ff_save_libs") AC_MSG_RESULT($ff_g2c_ok) fi fi fi # Copy the result of g2c investigations into a separate variable # because BLAS compilation will need it. AC_SUBST(G2CLIB,$ff_g2c_lib) #looking of lapack if no compile the arpack lapack # warning $$ because the make eat one ff_lapack_ok=no; ff_lapack_lib= AC_MSG_CHECKING([for lapack in $LIBS, $ff_blas_libs and -llapack] ) if test "$ff_blas_ok" = yes ; then AC_LINK_IFELSE( [AC_LANG_CALL(,dgeqr2_)], ff_lapack_ok=yes) if test "$ff_lapack_ok" = no ; then ff_save_libs=$LIBS LIBS="$ff_save_libs $ff_blas_libs" AC_LINK_IFELSE( [AC_LANG_CALL(,dgeqr2_)], [ff_lapack_ok=yes;ff_lapack_lib="$ff_blas_libs"]) if test "$ff_lapack_ok" = no ; then LIBS="$ff_save_libs -llapack $ff_blas_libs" AC_LINK_IFELSE( [AC_LANG_CALL(,dgeqr2_)], [ff_lapack_ok=yes ff_lapack_lib="-llapack"] ) fi LIBS="$ff_save_libs" fi fi if test "$ff_lapack_ok" = no; then AC_ARG_WITH(lapack, AC_HELP_STRING([--with-lapack=library],[Use a specific version of Lapack]), ff_lapack_ok=yes ff_lapack_lib="${withval}" LIBS="$ff_lapack_lib $LIBS") fi AC_MSG_RESULT($ff_lapack_ok) if test "$ff_lapack_ok" != no ; then AC_SUBST(LAPACKLIBS,$ff_lapack_lib) fi # Lapack configuration for Arpack ff_lapackdir='$$(LAPACKdir)' if test "$ff_lapack_ok" = yes then # no compilation of lapack in arpack ff_lapackdir= else ff_lapack_lib="-L${curdir}/3rdparty/lib -llapack" fi # Arpack itself AC_ARG_ENABLE(download-arpack,AC_HELP_STRING([--enable-download_arpack],[force the use download of arpack])) #echo "****** enable_download_arpack=$enable_download_arpack" if test -z "$ff_arpack_ok"; then ff_arpack_ok=no ff_save_libs="$LIBS" ff_arpack_libs= if test "$ff_blas_ok" = yes ; then if test "$enable_download_arpack" != yes ; then # User-specified location AC_ARG_WITH(arpack, [ --with-arpack=library Use a specific version of Arpack], ff_arpack_ok=yes ff_arpack_libs="${withval}") # Default locations if test "$ff_arpack_ok" = no; then AC_CHECK_LIB(arpack,dsaupd_, ff_arpack_libs="-larpack -llapack" ff_arpack_ok=yes,, -llapack) fi # Trying to "locate" Arpack if test "$ff_arpack_ok" = no -a "$enable_download" != yes ; then AC_MSG_CHECKING(for libarpack with locate) ff_lib_arpack=`locate libarpack|grep 'libarpack.*.a$'|head -1` LIBS="$ff_lib_arpack $LIBS" AC_LINK_IFELSE( [AC_LANG_CALL(,dsaupd_)], ff_arpack_ok=yes ff_arpack_libs="$ff_lib_arpack") AC_MSG_RESULT($ff_arpack_ok) fi if test "$ff_arpack_ok" = yes then AC_FF_ADDWHERELIB(arpack,$ff_arpack_libs,) fi fi # If all else fails, download! if test "$enable_fortran" != no -o -z "$enable_fortran" then if test "$ff_arpack_ok" = no -a "$enable_download" = yes -o "$enable_download_arpack" = yes then ff_arpack_download=yes AC_MSG_NOTICE(using downloaded Arpack) # ALH - 6/11/13 - this install goal is the standard goal for all downloaded packages in # [[file:3rdparty/common.mak::install]] AC_SUBST(DOWNLOAD_ARPACK,install) AC_SUBST(FF_LAPACKdir,$ff_lapackdir) AC_SUBST(ARPACKLIB,${curdir}/3rdparty/lib/libarpack.a) AC_SUBST(LAPACK_arpack_LIB,${curdir}/3rdparty/lib/liblapack.a) # Do not update $LIBS, but create an extra LIB variable, because this lib does not exist yet, and this # could make the following tests fail. # ALH - 30/9/13 - do not use the "-L ${curdir}/3rdparty/lib" directive because it would allow other # following -l directives (eg -lumfpack) to pick an old locally compiled library instead of the system # ones. ff_arpack_libs="${curdir}/3rdparty/lib/libarpack.a $ff_lapack_lib" ff_arpack_ok=yes fi fi fi fi if test "$ff_arpack_ok" != yes then AC_MSG_NOTICE([-- NO ARPACK -- enable_download : $enable_download , wget: $ff_wget ]) fi # Do not insert ARPACK libs in $LIBS yet, because they may not exist # yet, and this could make the following tests fail. LIBS="$ff_save_libs" if test "$ff_arpack_ok" = yes; then AC_SUBST(ARPACKLIBS,$ff_arpack_libs) EIGENOBJ='eigenvalue.$(OBJEXT)' AC_DEFINE(HAVE_LIBARPACK,1,Arpack is used for eigenvalue computation) # Determines whether to run the eigenvalue tests else # all eigen test fails AC_SUBST([SKIP_TESTS_EIGEN],[yes]) fi AM_CONDITIONAL([ENABLE_EIGEN], [test "$ff_arpack_ok" = "yes"]) AC_SUBST([EIGENOBJ]) # ALH - 25/9/13 - <> always run the WHERE LD statement for lapack because some libraries in # plugin/seq require it (eg [[file:plugin/seq/Element_Mixte.cpp::lapack]]). Request from Fred. AC_FF_ADDWHERELIB(lapack,$ff_lapack_lib,) # Looking for UMFPACK # ------------------- UMFPACK_CPPFLAGS=""; ff_umfpack_incs="" ff_amd_ok=no ff_umfpack_ok=no # echo @@@@@@@@@ ff_where_lib_conf_suitesparse == $ff_where_lib_conf_suitesparse @@@@ if test "$ff_where_lib_conf_suitesparse" = "1" ; then echo "amd/umfpack/suitesparse of petsc" echo " lib: $ff_where_lib_suitesparse inc: $ff_where_inc_suitesparse" ff_amd_ok=yes ff_umfpack_ok=yes enable_system_umfpack=no ff_umfpack_libs="$ff_where_lib_suitesparse" ff_umfpack_incs="$ff_where_inc_suitesparse" AC_DEFINE(HAVE_LIBUMFPACK,1,Umfpack is used for sparse matrices computations) AC_DEFINE(HAVE_LIBCHOLMOD,1,Cholmod is used for sparse matrices computations) UMFPACK_CPPFLAGS="$ff_where_inc_suitesparse" dnl AC_SUBST([UMFPACK_CPPFLAGS],[$ff_where_inc_suitesparse]) AC_DEFINE(HAVE_UMFPACK_H,1,If umfpack.h is located in UMFPACK subdir) fi #echo "@@@@@@@" ff_save_libs="$LIBS" AC_ARG_ENABLE(system_umfpack,AC_HELP_STRING([--disable-system-umfpack],[Disable the automatic detection of umfpack, colmod, amd, ....])) if test "$ff_umfpack_ok" = no -a "$enable_system_umfpack" != no -a "$ff_blas_ok" = yes; then # echo "@@@@@@@qqqq" # User-specified location AC_ARG_WITH(amd, AC_HELP_STRING([--with-amd=library],[Use a specific version of AMD]), ff_amd_ok=yes ff_umfpack_libs="${withval}") AC_ARG_WITH(umfpack, AC_HELP_STRING([--with-umfpack=library],[Use a specific version of Umfpack]), ff_umfpack_ok=yes ff_umfpack_libs="${withval} $ff_umfpack_libs") if test "${with_umfpack_include+set}" = set; then CPPFLAGS="$CPPFLAGS $with_umfpack_include" fi AC_CHECK_HEADERS(umfpack.h umfpack/umfpack.h ufsparse/umfpack.h suitesparse/umfpack.h, [ff_umfpack_header=yes ff_umfpack_dir=`dirname $ac_header` break]) # Somes systems like FreeBSD hide umfpack.h in a directory called UMFPACK (all capitals). This breaks the # standard #define produced by autoconf in config.h.in. LIBS="$ff_blas_libs $LIBS" if test "$ff_umfpack_header" != yes; then AC_CHECK_HEADER(UMFPACK/umfpack.h, ff_umfpack_header=yes ff_umfpack_dir=UMFPACK AC_DEFINE(HAVE_BIG_UMFPACK_UMFPACK_H,1,If umfpack.h is located in UMFPACK subdir)) fi if test "$ff_amd_ok" = no; then AC_CHECK_LIB(amd,amd_info, ff_umfpack_libs="$ff_umfpack_libs -lamd" ff_amd_ok=yes) fi if test "$ff_umfpack_ok" = no -a "$ff_amd_ok" = yes; then # ALH - 30/9/13 - other libraries required by Umfpack AC_CHECK_LIB(cholmod,cholmod_add,ff_umfpack_libs="$ff_umfpack_libs -lcholmod") AC_CHECK_LIB(colamd,colamd_set_defaults,ff_umfpack_libs="$ff_umfpack_libs -lcolamd") AC_CHECK_LIB(umfpack,umf_i_malloc, ff_umfpack_libs="-lumfpack $ff_umfpack_libs" ff_umfpack_ok=yes,,$ff_umfpack_libs) fi if test "$ff_umfpack_header" != yes -o "$ff_umfpack_ok" != yes; then ff_umfpack_ok=no AC_MSG_WARN([Sorry, we could not find the UMFPACK lib or the UMFPACK headers]) ff_umfpack_incs="" UMFPACK_CPPFLAGS="" fi if test "$ff_umfpack_ok" = yes -a "$ff_amd_ok" = yes; then if test -n "$ff_umfpack_incs" ; then UMFPACK_CPPFLAGS="$ff_umfpack_incs" fi dnl AC_SUBST([UMFPACK_CPPFLAGS],[$ff_umfpack_incs]) AC_DEFINE(HAVE_LIBUMFPACK,1,Umfpack is used for sparse matrices computations) fi fi LIBS="$ff_save_libs" #echo "@@@@22@@@" # If all else fails, download! # attention no /usr/include in WHERE if test "$ff_umfpack_ok" = yes -a -n "$ff_umfpack_dir" -a -z "$ff_umfpack_incs" then ff_umfpack_incs="-I/usr/include/$ff_umfpack_dir" UMFPACK_CPPFLAGS="$ff_umfpack_incs" fi if test "$ff_umfpack_ok" = no -a "$enable_download" = yes -a "$enable_umfpack" != no then AC_MSG_NOTICE(using downloaded UMFPACK) AC_SUBST(DOWNLOAD_UMFPACK,"umfpack") ff_umfpack_download=yes # Do not update $LIBS, but create an extra LIB variable, because this lib does not exist yet, and this could make # the following tests fail. ff_umfpack_libs="-L${curdir}/3rdparty/lib -lumfpack -lcholmod -lcolamd -lamd -lsuitesparseconfig" ff_umfpack_incs="-I${curdir}/3rdparty/include" UMFPACK_CPPFLAGS="$ff_umfpack_incs" AC_DEFINE(HAVE_LIBUMFPACK,1,UMFPACK) if test "$ff_win32" = yes; then AC_SUBST(FF_UMFPACK_CONFIG,-DCBLAS) fi ff_umfpack_ok=yes fi AC_SUBST([UMFPACK_CPPFLAGS],[$UMFPACK_CPPFLAGS]) # ALH - 17/9/13 - moved UMFPACK configuration settings in wherelib to _after_ configuring the download version because # [[file:3rdparty/umfpack/Makefile.am]] does not set the WHERE mechanism. Also removed -I/usr/include/$ff_umfpack_dir # from include options for FFCS because it breaks the MingW64 compilation process. if test "$ff_umfpack_ok" = yes then if test $enable_ffcs = no then AC_FF_ADDWHERELIB(amd,$ff_umfpack_libs,$ff_umfpack_incs) AC_FF_ADDWHERELIB(umfpack,$ff_umfpack_libs,$ff_umfpack_incs) else AC_FF_ADDWHERELIB(amd,$ff_umfpack_libs,) AC_FF_ADDWHERELIB(umfpack,$ff_umfpack_libs,) fi else #echo "@@@@33@@@@" AC_MSG_NOTICE([ -- NO UMFPACK (ff_wget = $ff_wget)]) fi AC_SUBST(UMFPACKLIBS,$ff_umfpack_libs) # If times() and sysconf() are not here, UMFPACK should know if test "$ff_umfpack_ok" = yes then AC_CHECK_FUNCS(times sysconf, ff_umfpack_posix_ok=yes, ff_umfpack_posix_ok=no) if test "$ff_umfpack_posix_ok" = no then AC_SUBST(FF_UMFPACK_CONFIG,"-DCBLAS -DNPOSIX") fi fi # Checking for some functions that may not appear everywhere # ---------------------------------------------------------- AC_CHECK_HEADERS(cstddef) AC_CHECK_HEADERS(stddef.h) AC_CHECK_HEADERS(semaphore.h,ff_sem=1,ff_sem=0) AC_CHECK_HEADERS(sys/mman.h,ff_mmap=1,ff_mmap=0) if test "$ff_sem" -eq 1 -a "$ff_mmap" -eq 1 ; then BIN_ffmaster=ffmaster else enable_MMAP=no BIN_ffmaster= fi AC_CHECK_HEADERS(regex.h,ff_regex_h=1, ff_regex_h=0) AC_SUBST(FF_HAVE_REGEX_H,$ff_regex_h) AC_CHECK_HEADERS([unistd.h]) # asinh acosh atanh are not in Mingw yet gettimeofday ff_malloc_h="" AC_HEADER_TIME AC_CHECK_HEADERS(malloc.h,ff_malloc_h=1) AC_SUBST(FF_MALLOC_H,$ff_malloc_h) AC_CHECK_FUNCS(asinh acosh atanh getenv jn erfc tgamma gettimeofday mallinfo mstats) AC_CHECK_FUNCS(srandomdev) AC_CHECK_FUNCS(second_,ff_second="",ff_second=second.o) AC_SUBST(FF_SECOND,"$ff_second") # Enable static linking (no shared libraries) # ------------------------------------------- AC_CHECK_PROG(ff_libtool,libtool,yes,no) if test "$ff_mac" = "yes" -a "$ff_libtool" = yes ; then ff_AR="libtool" ff_ARFLAGS="-static -o" ff_RANLIB="echo" fi AC_ARG_ENABLE(static, [ --enable-static Build binaries with no shared library dependencies]) if test "$enable_static" = yes then AC_CHECK_PROG(ff_libtool,libtool,yes,no) if test "$ff_libtool" = yes then LDFLAGS="$LDFLAGS -all-static" AC_SUBST(STATICTOOL,libtool) else AC_MSG_ERROR(libtool not found) fi fi # for compiation of arpack use libtool to bluid universal library on MacOs. AC_SUBST(AR,$ff_AR) AC_SUBST(ARFLAGS,$ff_ARFLAGS) AC_SUBST(RANLIN,$ff_RANLIB) # Dynamic loading of compiled functions # ------------------------------------- # Not if we don't want shared libraries (non FH modif FH juin 2005) if test "$ff_PURE_WIN32" -ne 1 ; then ff_dynload=no if test "$enable_static" != yes then # Availability of dlopen(). Use AC_COMPILE rather than # AC_CHECK_HEADERS because the latter has problems seeing it (in # Cygwin) when it does not compile (in Mingw). AC_MSG_CHECKING(for dlfcn.h) AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include ]])], ff_dynload=yes, ff_dynload=no) AC_MSG_RESULT($ff_dynload) fi # FFCS - -lm missing for ffmedit link stage on Debian Testing AC_CHECK_LIB(m,sin) # Checks that we also have the corresponding library if test "$ff_dynload" = yes then AC_CHECK_LIB(dl,dlinfo) # Checks that everythings works ok AC_MSG_CHECKING(whether dlopen links ok) AC_LINK_IFELSE( [AC_LANG_SOURCE([[#include int main(int argc,char **argv){ dlopen("",RTLD_LAZY); return 0; }]])], ff_dynload=yes, ff_dynload=no) AC_MSG_RESULT($ff_dynload) fi if test "$ff_dynload" = yes then AC_DEFINE(HAVE_DLFCN_H,1,Dynamic loading - not mandatory) fi fi # endif ff_PURE_WIN32 != 1 # the -rdynamic don't exist on macos and sunOS if test "$ff_dynload" = yes then # Activate dynamic loading tests (see plugin/seq/Makefile.am) AC_SUBST(LOAD_TESTS,../regtests.sh) AC_SUBST(LOAD_COMPILE,load_compile) # gcc on MacOS does not produce an error with "-rdynamic" but # still complains about it. if test "$ff_mac" = "no" -a "$ff_win32" = "no" -a "$ff_sunos" = "no" ; then CHECK_COMPILE_FLAG(C++,-rdynamic,LDFLAGS) dnl CHECK_COMPILE_FLAG(C,-rdynamic,CNOFLAGS) dnl CHECK_COMPILE_FLAG(C,-rdynamic,CFLAGS) fi if test "$ff_fpic" != "no" ; then CHECK_COMPILE_FLAG(C++,-fPIC,CXXFLAGS) CHECK_COMPILE_FLAG(C,-fPIC,CFLAGS) CHECK_COMPILE_FLAG(C,-fPIC,CNOFLAGS) fi if test "$enable_fortran" != no ; then if test "$ff_fpic" != "no" ; then CHECK_COMPILE_FLAG(Fortran,-fPIC,FFLAGS) CHECK_COMPILE_FLAG(Fortran,-fPIC,FNOFLAGS) CHECK_COMPILE_FLAG(Fortran,-fPIC,FCFLAGS) CHECK_COMPILE_FLAG(Fortran,-fPIC,FCNOFLAGS) fi dnl CHECK_COMPILE_FLAG(Fortran,-rdynamic,FNOFLAGS) dnl CHECK_COMPILE_FLAG(Fortran,-rdynamic,FFLAGS) fi fi AC_SUBST(DYLIB_SUFFIX,$ff_suffix_dylib) # the doc is now in https://github.com/FreeFem/FreeFem-doc-pdf/blob/master/freefem%2B%2Bdoc.pdf # Checking wether we can generate some documentation # -------------------------------------------------- case "$F77" in *gfortran*) CHECK_COMPILE_FLAG(Fortran 77,[-fallow-argument-mismatch],FFLAGS) CHECK_COMPILE_FLAG(Fortran,[-fallow-argument-mismatch],FCFLAGS) ;; esac dnl dnl AC_CHECK_PROG(ff_latex,latex,yes,no) dnl AC_CHECK_PROG(ff_makeindex,makeindex,yes,no) dnl AC_CHECK_PROG(ff_dvips,dvips,yes,no) dnl # to translate the figure dnl AC_CHECK_PROG(ff_pdf2ps,pdf2ps,yes,no) dnl AC_CHECK_PROGS(EPSTOPDF,[epstopdf pstopdf],[false]) dnl AC_CHECK_PROG(ff_convert,convert,yes,no) dnl if test "$ff_latex" = yes -a "$ff_makeindex" = yes -a "$ff_dvips" = yes -a $ff_pdf2ps = yes -a $ff_convert = yes; dnl then dnl AC_SUBST(DOCPS,"freefem++doc.ps") dnl dnl AC_CHECK_PROG(ff_gzip,gzip,yes,no) dnl if test "$ff_gzip" = yes; dnl then dnl AC_SUBST(DOCPSGZ,"freefem++doc.ps.gz") dnl fi dnl fi # PDF documentation building sometimes poses problems because of pdfsync.sty. So we need to be able to disable it. # FFCS - 9/4/14 - need to disable PDF also during FFCS build (which does not use the resulting PDF). dnl AC_ARG_ENABLE(pdf,[ --disable-pdf Disable PDF documentation building]) dnl if test "$enable_pdf" != no dnl then dnl AC_CHECK_PROG(ff_pdflatex,pdflatex,yes,no) dnl if test "$ff_pdflatex" = yes -a $EPSTOPDF != false -a $ff_convert = yes; dnl then dnl AC_SUBST(DOCPDF,"freefem++doc.pdf") dnl fi dnl fi # Choosing compilation options for the standard version (in src/std) # ------------------------------------------------------------------ # The "standard" configured version can use win32 (mingw) if test "$ff_mingw" = yes then # FFCS does not use FreeFem++-std, and Pcrgraph.cpp does not compile under mingwin64 if test $enable_ffcs = no then ff_stdprog="FreeFem++-std${EXEEXT}" ff_std_graph_obj=Pcrgraph.$OBJEXT fi # ALH - FFCS - 30/11/8 - I need to get the output from FF for FFCS regression tests if test $enable_ffcs = yes then ff_std_ldflags="-mconsole -mwindows" else ff_std_ldflags=-mwindows fi ff_std_libs= fi AC_SUBST(STD_GRAPH_OBJ,$ff_std_graph_obj) AC_SUBST(STD_LDFLAGS,$ff_std_ldflags) AC_SUBST(STD_LIBS,$ff_std_libs) # Allow some downloaded tools not to be compiled # ---------------------------------------------- ## try to see pakage is hon computer if the FH ZZZZ ## FH to find gsl ... AX_PATH_GSL(1.15, ff_with_gsl=yes, ff_with_gsl=no) if test "$ff_with_gsl" = "yes"; then AC_FF_ADDWHERELIB(gsl,$GSL_LIBS,$GSL_CFLAGS) fi m4_map([AC_FF_WHERELIB],[ [[mumps],[-ldmumps -lzmumps -lmumps_common -lpord],[dmumps_c.h],[]], [[mumps_seq],[-ldmumps_seq -lzmumps_seq -lmumps_common_seq -lpord_seq -lmpiseq_seq],[dmumps_c.h],[]], [[libseq],[-lmpiseq_seq],[${inc_usr_include}mumps_seq/mpi.h]], [[mumps_ptscotch],[-lpord_ptscotch -lmumps_common_ptscotch -ldmumps_ptscotch -lzmumps_ptscotch -lpord_ptscotch],[dmumps_c.h]], [[mumps_scotch],[-lpord_scotch -lmumps_common_scotch -ldmumps_scotch -lzmumps_scotch -lpord_scotch],[dmumps_c.h]], [[fftw3],[-lfftw3],[${inc_usr_include}fftw3.h],[]], [[superlu],[-lsuperlu],[${inc_usr_include}superlu/superlu_enum_consts.h],[]], [[blacs],[ -lblacsCinit$ff_with_mpi -lblacsF77init$ff_with_mpi -lblacs$ff_with_mpi],[]], [[scalapack],[-lscalapack$ff_with_mpi],[]], [[scotch],[-lscotch -lscotcherr],[scotch.h]], [[ptscotch],[-lptscotch -lptscotcherr],[ptscotch.h]], [[metis],[-lmetis],[${inc_usr_include}metis/metis.h],[]], [[metis],[-lmetis],[metis.h],[]], [[parmetis],[-lparmetis -lmetis],[],[]], [[freeyams],[-lfreeyams],[freeyamslib.h],[]], [[mmg3d],[-lmmg3d-v4],[libmmg3d.h],[]], [[mmg],[-lmmg],[libmmg.h],[]], [[parmmg],[-lparmmg],[libparmmg.h],[]], [[mshmet],[-lmshmet],[mshmetlib.h],[]], dnl [[gsl],[-lgsl -lgslcblas -lm],[gsl/gsl_sf.h],[]], [[tetgen],[-ltet],[tetgen.h],[]], [[ipopt],[ -lipopt],[${inc_usr_include}coin/IpTNLP.hpp],[]], [[nlopt],[ -lnlopt],[nlopt.hpp],[]] ] ) ## before try TOOL_DISABLE(tetgen,tetgen.$DYLIB_SUFFIX) TOOL_DISABLE(lapack,lapack.$DYLIB_SUFFIX) TOOL_DISABLE(metis,metis.$DYLIB_SUFFIX) TOOL_DISABLE(parmetis,parmetis.$DYLIB_SUFFIX) TOOL_DISABLE(mmg3d,mmg3d-v4.0.$DYLIB_SUFFIX) if test "$ff_cmake" = yes -o \( "$ff_mmg_ok" = yes -a -z "$TOOL_COMPILE_mmg" \); then TOOL_DISABLE(mmg,mmg.$DYLIB_SUFFIX) fi if test "$ff_cmake" = yes -o \( "$ff_parmmg_ok" = yes -a -z "$TOOL_COMPILE_parmmg" \); then TOOL_DISABLE(parmmg,parmmg.$DYLIB_SUFFIX) fi TOOL_DISABLE(mshmet,"mshmet.$DYLIB_SUFFIX aniso.$DYLIB_SUFFIX") TOOL_DISABLE(gmm,ilut.$DYLIB_SUFFIX,,enable_gmm=no) if test "$enable_fortran" != no -o -z "$enable_fortran"; then TOOL_DISABLE(ipopt,ff-Ipopt.$DYLIB_SUFFIX) TOOL_DISABLE(scalapack,"MUMPS.$DYLIB_SUFFIX MUMPS_mpi.$DYLIB_SUFFIX") TOOL_DISABLE(mumps,"MUMPS.$DYLIB_SUFFIX MUMPS_mpi.$DYLIB_SUFFIX") TOOL_DISABLE(mumps_seq,"MUMPS_seq.$DYLIB_SUFFIX MUMPS.$DYLIB_SUFFIX",mumps-seq) AC_SUBST([TOOL_DYLIB_ffnewuoa],ffnewuoa.$DYLIB_SUFFIX) else TOOL_DISABLE_NO(ipopt,ff-Ipopt.$DYLIB_SUFFIX) TOOL_DISABLE_NO(scalapack,"MUMPS.$DYLIB_SUFFIX MUMPS_mpi.$DYLIB_SUFFIX") TOOL_DISABLE_NO(mumps,"MUMPS.$DYLIB_SUFFIX MUMPS_mpi.$DYLIB_SUFFIX") TOOL_DISABLE_NO(mumps_seq,"MUMPS_seq.$DYLIB_SUFFIX MUMPS.$DYLIB_SUFFIX",mumps-seq) AC_SUBST([TOOL_DYLIB_ffnewuoa],"") ff_uncompile_plugin+=" ffnewuoa" fi TOOL_DISABLE(nlopt,ff-NLopt.$DYLIB_SUFFIX) TOOL_DISABLE(scotch,scotch.$DYLIB_SUFFIX) TOOL_DISABLE(superlu,SuperLu.$DYLIB_SUFFIX) TOOL_DISABLE(umfpack,UMFPACK64.$DYLIB_SUFFIX) TOOL_DISABLE(yams,freeyams.$DYLIB_SUFFIX) TOOL_DISABLE(pipe,pipe.$DYLIB_SUFFIX,,nodownload) AC_SUBST([BIN_ffmaster],"$BIN_ffmaster") if test "$ff_win32" = "yes" ; then TOOL_DISABLE(libpthread_google,ff-mmap-semaphore.$DYLIB_SUFFIX,libpthread-google) else TOOL_DISABLE(MMAP,ff-mmap-semaphore.$DYLIB_SUFFIX,,nodownload) fi ff_pthread_google=$ff_pthread if test "$enable_libpthread_google_download" = "yes" ; then ff_pthread_google="-I${curdir}/3rdparty/include -L${curdir}/3rdparty/lib -lpthread-google" fi AC_SUBST(LIB_PTHREAD_GOOGLE,$ff_pthread_google) # FFCS - MUMPS_seq has a different Win32 compiler setup from FFCS, so we need to add some extra parameters if test "$OS" = Windows_NT then CFLAGS="$CFLAGS -DWITHOUT_PTHREAD -DAdd_" # we also need to satisfy ff-c++ that the pthread are not a blocking point if test -n "$ff_pthread" ; then AC_FF_ADDWHERELIB(pthread,"",) fi # ALH MPI "-fno-range-check" on Windows, but this options fails on MacOS 10.8. Add no-range-check for Windows # for hexadecimal parameter constants like: # [[file:c:/cygwin/home/alh/ffcs/rel/mingw/mpif.h::PARAMETER MPI_SHORT_INT z 8c000003]] # AC_SUBST(NO_RANGE_CHECK,-fno-range-check) fi # ALH - 4/9/13 - request from Helmut Jarausch - allow to change Scotch include path if test "$with_scotch_include" = "" then with_scotch_include=$ac_pwd/3rdparty/include/scotch fi AC_SUBST(SCOTCH_INCLUDE,$with_scotch_include) # Find out kernel and libc versions # --------------------------------- if test "$ff_win32" != yes -a "$ff_mac" != yes then AC_MSG_CHECKING(kernel version) ff_kernel_version=`cat /proc/version|perl -e '=~/(\d+\.\d+\.\d+)/;print $1;'` AC_MSG_RESULT($ff_kernel_version) AC_SUBST(KERNEL_VERSION,$ff_kernel_version) AC_MSG_CHECKING(libc version) ff_libc_version=`ldd /bin/sh | awk '/libc/{print $3}' | xargs readlink | sed -e 's/\.so$//'` AC_MSG_RESULT($ff_libc_version) AC_SUBST(LIBC_VERSION,$ff_libc_version) fi # def variable pour les makefiles # creating all makefiles # ---------------------- ff_bamgprog="bamg${EXEEXT} cvmsh2${EXEEXT}" AC_SUBST(CNOFLAGS,$CNOFLAGS) dnl for superludist CFLAGS without optim ... AC_SUBST(FNOFLAGS,$FNOFLAGS) dnl for blacs CFLAGS without optim ... # The final list of executable programs AC_SUBST(MEDITPROG,$ff_meditprog) AC_SUBST(FFGLUTPROG,$ff_ffglutprog) AC_SUBST(BAMGPROG,$ff_bamgprog) AC_SUBST(STDPROG,$ff_stdprog) ff_progs="FreeFem++-nw $ff_bamgprog $ff_mpiprog $ff_meditprog $ff_ffglutprog" AC_SUBST(TEST_FFPP,$ff_TEST_FFPP) AC_SUBST(TEST_FFPP_MPI,$ff_TEST_FFPP_MPI) AC_SUBST(TEST_FFPPMPI,"../../src/mpi/ff-mpirun") ff_with_mpi=-openmpi; ff_blacs="-lblacsCinit$ff_with_mpi -lblacsF77init$ff_with_mpi -lblacs$ff_with_mpi" ff_scalapack=-lscalapack # change MKL interface ... test -n "$ff_mkl_blacs" && ff_blacs="$ff_mkl_blacs" test -n "$ff_mkl_scalapack" && ff_scalapack="$ff_mkl_scalapack" test -n "$ff_mkl_root" && ff_winc="$ff_mkl_root/include/mkl_blas.h" # FFCS - 27/10/11 - Some extra conditionals for things that do not work on certain systems (eg MPI libraries under # Windows) ## search of HDF5 .... AX_LIB_HDF5() if test "$with_hdf5" = "yes"; then ff_save_cppflags="$CPPFLAGS"; CPPFLAGS="$CPPFLAGS $HDF5_CPPFLAGS" AC_CHECK_HEADER([H5Cpp.h],[],[with_hdf5=no]) CPPFLAGS="$ff_save_cppflags" if test "$with_hdf5" = "yes"; then AC_FF_ADDWHERELIB(hdf5,$HDF5_LDFLAGS $HDF5_LIBS,$HDF5_CPPFLAGS) LIBS="$LIBS $HDF5_LDFLAGS $HDF5_LIBS" # hack, if enable_download=no AC_SUBST([TOOL_COMPILE_iohdf5],ifelse(,,iohdf5,)) AC_SUBST([TOOL_DYLIB_iohdf5],iohdf5.$DYLIB_SUFFIX) ifelse(nodownload,,[ff_download_lib="iohdf5 $ff_download_lib"]) ifelse(nodownload,,[enable_iohdf5_download=yes]) fi else enable_iohdf5=no fi #TOOL_DISABLE(iohdf5,iohdf5.$DYLIB_SUFFIX,,nodownload) AM_CONDITIONAL([FFCS_WINDOWS],[test "$OS" = Windows_NT]) # remove gsl if not find ... FH # correction FH .. 18/12/2013. # ALH - 7/1/14 - not able to compile gsl with FFCS on Windows if test "$enable_ffcs" != yes then if test "$ff_where_lib_conf_gsl" = 1 -a "$enable_gsl" != "no" ; then enable_gsl=yes; fi; fi # hack, if enable_download=no if test "$ff_with_gsl" = "yes"; then AC_SUBST([TOOL_COMPILE_gsl],ifelse(,,gsl,)) AC_SUBST([TOOL_DYLIB_gsl],gsl.$DYLIB_SUFFIX) ifelse(nodownload,,[ff_download_lib="gsl $ff_download_lib"]) ifelse(nodownload,,[enable_gsl_download=yes]) else enable_gsl=no fi TOOL_DISABLE(NewSolver,"NewSolver.$DYLIB_SUFFIX",,nodownload) TOOL_DISABLE(mkl,"PARDISO.$DYLIB_SUFFIX",,nodownload) AC_SUBST([FF_LIBS],$LIBS) LIBS= # echo "debug cxxx11: $ff_cxx11 mpi: $ff_mpi petsc: $ff_petsc_ok " test "$enable_download" != "yes" -a "$enable_download_hpddm" != "yes" -a \( "$enable_hpddm" != "yes" -o "$ff_hpddm_ok" != "yes" \) && enable_hpddm=no test "$enable_download" != "yes" -a "$enable_htool" != yes && enable_htool=no test "$enable_download" != "yes" -a "$enable_bemtool" != yes && enable_bemtool=no test "$enable_download" != "yes" -a "$enable_boost" != yes && enable_boost=no test \( \( "$enable_download" = "yes" -o "$enable_download_hpddm" = "yes" \) -a "$enable_hpddm" != "no" \) -o "$ff_hpddm_ok" = "yes" && enable_hpddm=yes test "$enable_download" = yes -a "$enable_htool" != no && enable_htool=yes test "$enable_download" = yes -a "$enable_bemtool" != no && enable_bemtool=yes test "$enable_download" = yes -a "$enable_boost" != no && enable_boost=yes test "$ff_cxx11" != yes -o "$ff_mpi" != yes -o \( "$ff_umfpack_ok" != "yes" -a "$ff_mumps_ok" != "yes" \) && enable_hpddm=no test "$ff_cxx11" != yes -o "$ff_mpi" != yes && enable_htool=no test "$ff_cxx11" != yes -o "$ff_mpi" != yes -o \( "$enable_bemtool" != "yes" -a "$enable_boost" != "yes" \) && enable_bemtool=no FF_PETSC_DYLIB="" if test "$ff_petsc_ok" != no ; then FF_PETSC_DYLIB="PETSc.$DYLIB_SUFFIX function-PETSc.$DYLIB_SUFFIX" fi if test "$ff_petsccomplex_ok" != no ; then FF_PETSC_DYLIB="$FF_PETSC_DYLIB PETSc-complex.$DYLIB_SUFFIX" fi if test "$ff_slepccomplex_ok" != no ; then FF_PETSC_DYLIB="$FF_PETSC_DYLIB SLEPc-complex.$DYLIB_SUFFIX" fi if test "$ff_slepc_ok" != no ; then FF_PETSC_DYLIB="$FF_PETSC_DYLIB SLEPc.$DYLIB_SUFFIX" fi ##echo @@@@@@@@@@@@ $ff_petsc_ok @@ "hpddm.$DYLIB_SUFFIX $FF_PETCS_DYLIB" TOOL_DISABLE(hpddm,["hpddm.$DYLIB_SUFFIX hpddm_substructuring.$DYLIB_SUFFIX $FF_PETSC_DYLIB"]) TOOL_DISABLE(bem,["bem.$DYLIB_SUFFIX"]) ## if test "$ff_mkl_ok" = yes ; then AC_DEFINE(HAVE_MKL,1, the MKL intel lib is present for BLAS and LAPACK ) fi ## Conditional for check test "$ff_fftw_ok" = yes -o -n "$ff_DOWNLOAD_FFTW" && ff_fftw_=yes || ff_fftw_=no test "$ff_ipopt_ok" = yes -o -n "$TOOL_COMPILE_ipopt" && ff_ipopt_=yes || ff_ipopt_=no test "$ff_metis_ok" = yes -o -n "$TOOL_COMPILE_metis" && ff_metis_=yes || ff_metis_=no test "$ff_mmg_ok" = yes -o -n "$TOOL_COMPILE_mmg" && ff_mmg_=yes || ff_mmg_=no test "$ff_mshmet_ok" = yes -o -n "$TOOL_COMPILE_mshmet" && ff_mshmet_=yes || ff_mshmet_=no test "$ff_mumps_ok" = yes -o -n "$TOOL_COMPILE_mumps" && ff_mumps_=yes || ff_mumps_=no test "$ff_mumps_seq_ok" = yes -o -n "$TOOL_COMPILE_mumps_seq" && ff_mumps_seq_=yes || ff_mumps_seq_=no test "$ff_parmetis_ok" = yes -o -n "$TOOL_COMPILE_parmetis" && ff_parmetis_=yes || ff_parmetis_=no test "$ff_scotch_ok" = yes -o -n "$TOOL_COMPILE_scotch" && ff_scotch_=yes || ff_scotch_=no test "$ff_tetgen_ok" = yes -o -n "$TOOL_COMPILE_tetgen" && ff_tetgen_=yes || ff_tetgen_=no test "$ff_umfpack_ok" = yes -o -n "$TOOL_COMPILE_umfpack" && ff_umfpack_=yes || ff_umfpack_=no test "$ff_parmmg_ok" = yes -o -n "$TOOL_COMPILE_parmmg" && ff_parmmg_=yes || ff_parmmg_=no AC_SUBST(DOWNLOADCOMPILE,$DOWNLOADCOMPILE) AM_CONDITIONAL([DOWNLOAD], [test "$enable_download" = yes]) AM_CONDITIONAL([ARPACK], [test "$ff_arpack_ok" = yes]) AM_CONDITIONAL([BLACS], [test "$ff_blacs_ok" = yes]) AM_CONDITIONAL([BEMTOOL], [test "$enable_bemtool" = yes]) AM_CONDITIONAL([BOOST], [test "$enable_boost" = yes]) AM_CONDITIONAL([FFTW3], [test "$ff_fftw_" = yes]) AM_CONDITIONAL([FORTRAN], [test "$enable_fortran" != no ]) AM_CONDITIONAL([FREEYAMS], [test "$ff_freeyams_ok" = yes -o -n "$TOOL_COMPILE_yams"]) AM_CONDITIONAL([GMM], [test "$enable_gmm" = yes -o -n "$TOOL_COMPILE_gmm"]) AM_CONDITIONAL([GSL], [test "$ff_with_gsl" = yes]) AM_CONDITIONAL([HDF5], [test "$with_hdf5" = yes]) AM_CONDITIONAL([HTOOL], [test "$enable_htool" = yes]) AM_CONDITIONAL([HPDDM], [test "$enable_hpddm" = yes]) AM_CONDITIONAL([IPOPT], [test "$ff_ipopt_" = yes]) AM_CONDITIONAL([LIBSEQ], [test "$ff_libseq_ok" = yes]) AM_CONDITIONAL([METIS], [test "$ff_metis_" = yes]) AM_CONDITIONAL([MKL], [test "$ff_mkl" = yes]) AM_CONDITIONAL([MMG3D], [test "$ff_mmg3d_ok" = yes -o -n "$TOOL_COMPILE_mmg3d"]) AM_CONDITIONAL([MMG], [test "$ff_mmg_" = yes]) AM_CONDITIONAL([PARMMG], [test "$ff_parmmg_" = yes]) AM_CONDITIONAL([MPI], [test "$ff_mpi" = yes]) AM_CONDITIONAL([MSHMET], [test "$ff_mshmet_" = yes]) AM_CONDITIONAL([MUMPS], [test "$ff_mumps_" = yes]) AM_CONDITIONAL([MUMPS_ptscotch], [test "$ff_mumps_ptscotch_ok" = yes]) AM_CONDITIONAL([MUMPS_scotch], [test "$ff_mumps_scotch_ok" = yes]) AM_CONDITIONAL([MUMPS_seq], [test "$ff_mumps_seq_" = yes]) AM_CONDITIONAL([NLOPT], [test "$ff_nlopt_ok" = yes -o -n "$TOOL_COMPILE_nlopt"]) AM_CONDITIONAL([PARMETIS], [test "$ff_parmetis_" = yes]) AM_CONDITIONAL([PETSC], [test "$ff_petsc_ok" = yes]) AM_CONDITIONAL([PETSCCOMPLEX], [test "$ff_petsccomplex_ok" = yes]) AM_CONDITIONAL([PIPE], [test -n "$TOOL_COMPILE_pipe"]) AM_CONDITIONAL([PTHREAD], [test -n "$ff_pthread"]) AM_CONDITIONAL([PTSCOTCH], [test "$ff_ptscotch_ok" = yes]) AM_CONDITIONAL([SCALAPACK], [test "$ff_scalapack_ok" = yes]) AM_CONDITIONAL([SCOTCH], [test "$ff_scotch_" = yes]) AM_CONDITIONAL([SLEPC], [test "$ff_slepc_ok" = yes]) AM_CONDITIONAL([SLEPCCOMPLEX], [test "$ff_slepccomplex_ok" = yes]) AM_CONDITIONAL([SUPERLU], [test "$ff_superlu_ok" = yes]) AM_CONDITIONAL([TETGEN], [test "$ff_tetgen_" = yes]) AM_CONDITIONAL([UMFPACK], [test "$ff_umfpack_" = yes]) ## # All makefiles AC_OUTPUT( Makefile 3rdparty/Makefile 3rdparty/blas/Makefile 3rdparty/arpack/Makefile 3rdparty/umfpack/Makefile 3rdparty/fftw/Makefile src/Makefile src/bamglib/Makefile src/Graphics/Makefile src/femlib/Makefile src/Algo/Makefile src/lglib/Makefile src/ffgraphics/Makefile src/ffgraphics/server/Makefile src/ffgraphics/client/Makefile src/fflib/Makefile src/nw/Makefile src/mpi/Makefile src/bamg/Makefile src/libMesh/Makefile src/medit/Makefile src/bin-win32/Makefile plugin/Makefile plugin/seq/Makefile plugin/mpi/Makefile examples/Makefile examples/plugin/Makefile examples/tutorial/Makefile examples/misc/Makefile examples/mpi/Makefile examples/hpddm/Makefile examples/ffddm/Makefile examples/eigen/Makefile examples/examples/Makefile examples/bug/Makefile examples/3d/Makefile examples/3dSurf/Makefile examples/3dCurve/Makefile ) AC_MSG_NOTICE([ FreeFEM used download: $enable_download ]) AC_MSG_NOTICE([ -- Dynamic load facility: $ff_dynload ]) AC_MSG_NOTICE([ -- ARPACK (eigen value): $ff_arpack_ok ]) AC_MSG_NOTICE([ -- UMFPACK (sparse solver): $ff_umfpack_ok ]) AC_MSG_NOTICE([ -- BLAS: $ff_blas_ok ]) AC_MSG_NOTICE([ -- with MPI: $ff_mpi]) AC_MSG_NOTICE([ -- with PETSc: $ff_petsc_ok / PETSc complex: $ff_petsccomplex_ok ]); AC_MSG_NOTICE([ -- with SLEPc: $ff_slepc_ok / SLEPc complex: $ff_slepccomplex_ok ]); AC_MSG_NOTICE([ -- with hpddm: $enable_hpddm ]); AC_MSG_NOTICE([ -- with htool: $enable_htool ]); AC_MSG_NOTICE([ -- with bemtool: $enable_bemtool (need boost: $enable_boost and htool: $enable_htool) ]); AC_MSG_NOTICE([ -- without libs: $ff_unlib_plugin ]); AC_MSG_NOTICE([ -- without plugins: $ff_uncompile_plugin ]); AC_MSG_NOTICE([ progs: $ff_progs ]) if test "$ff_umfpack_download" = yes -a -n "$TOOL_COMPILE_umfpack" ;then AC_MSG_NOTICE([ use of download UMFPACK see 3rdparty/umfpack/SuiteSparse/UMFPACK/README.txt for the License]) fi if test "$ff_arpack_download" = yes -a -n "$TOOL_COMPILE_arpack" ; then AC_MSG_NOTICE([ use of download ARPACK see 3rdparty/arpack/ARPACK/README no License ]) fi if test "$enable_download" = yes; then if test -n "$ff_DOWNLOAD_FFTW"; then AC_MSG_NOTICE([ use of download fftw see 3rdparty/fftw/fftw-3.2/COPYRIGHT ]) fi if test -n "$TOOL_COMPILE_tetgen"; then AC_MSG_NOTICE([ use of download tetgen see 3rdparty/tetgen/tetgen1.5.1-beta/LICENSE ]) fi if test -n "$TOOL_COMPILE_yams"; then AC_MSG_NOTICE([ use of download freeyams see 3rdparty/yams/freeyams.2011.02.22 (suface mesh adaptation) ]) fi if test -n "$TOOL_COMPILE_mmg3d"; then AC_MSG_NOTICE([ use of download mmg3d (v4) see 3rdparty/mmg3d/mmg3d4/LICENCE.txt ]) fi if test -n "$TOOL_COMPILE_mmg"; then AC_MSG_NOTICE([ use of download mmg see 3rdparty/mmg/mmg-sources/LICENSE ]) fi if test -n "$TOOL_COMPILE_parmmg"; then AC_MSG_NOTICE([ use of download parmmg see 3rdparty/parmmg/parmmg-sources/LICENSE ]) fi if test -n "$TOOL_COMPILE_parmetis"; then AC_MSG_NOTICE([ use of download parmetis]) fi # ALH - some tools may be activated but not downloaded if a local version is specified (see # [[TOOL_PARAMETERS]]) if test "$enable_superlu_download" = yes; then AC_MSG_NOTICE([ use of download superlu]) fi if test -n "$MPICC"; then AC_MSG_NOTICE([ try to download: $ff_download_lib]) fi if test "$enable_boost" = yes; then AC_MSG_NOTICE([ use of download boost see 3rdparty/boost/LICENSE_1_0.txt ]) fi if test "$ff_download_blas" = "generic"; then AC_MSG_NOTICE([ use of download generic blas and cblas freefem may be slow ]) AC_MSG_NOTICE([ you can try to use the Kazushige Goto s BLAS at http://www.cs.utexas.edu/users/flame/goto/ ]) AC_MSG_NOTICE([ or at http://www.tacc.utexas.edu/~kgoto/ for the best BLAS .]) AC_MSG_NOTICE([ or try to 3rdparty/compile the altas blas .]) fi # ALH - 30/10/13 - (request from FH) all downloads are now part of a separate script ([[file:3rdparty/getall]]) # which needs to be run before make for the user to specify whether he is ok with each of the package licences. AC_MSG_NOTICE([ Please run "3rdparty/getall" to download all necessary packages before running make]) fi # FFCS does not use glut, so remove this message because it could make the user think that something is broken if test $enable_ffcs = no then if test "$ff_glut_ok" != yes ; then AC_MSG_NOTICE([ *********************************************** ]) AC_MSG_NOTICE([ WARNING: you do not have the new grachics tools ]) AC_MSG_NOTICE([ because the configure do not find OpenGL, GLUT or pthread developer stuff ]) AC_MSG_NOTICE([ read the README to find missing package ]) AC_MSG_NOTICE([ F. Hecht ]) AC_MSG_NOTICE([ to install missing package under debian or ubuntu, try: sudo apt-get install freeglut3-dev ]) AC_MSG_NOTICE([ *********************************************** ]) fi fi if test "$enable_summary" = yes; then if test "$enable_fortran" != yes; then enable_fortran=no fi if test "$enable_gmm" != yes; then enable_gmm=no fi if test "$ff_mkl" != yes; then ff_mkl=no fi if test -n "$ff_pthread"; then ff_pthread_ok=yes else ff_pthread_ok=no fi AC_MSG_NOTICE([ Activated libraries ]) AC_MSG_NOTICE([ - ARPACK: $ff_arpack_ok]) AC_MSG_NOTICE([ - BLACS: $ff_blacs_ok]) AC_MSG_NOTICE([ - BEMTOOL: $enable_bemtool]) AC_MSG_NOTICE([ - BOOST: $enable_boost]) AC_MSG_NOTICE([ - FFTW3: $ff_fftw_]) AC_MSG_NOTICE([ - FORTRAN: $enable_fortran]) AC_MSG_NOTICE([ - FREEYAMS: $ff_freeyams_ok]) AC_MSG_NOTICE([ - GMM: $enable_gmm]) AC_MSG_NOTICE([ - GSL: $ff_with_gsl]) AC_MSG_NOTICE([ - HDF5: $with_hdf5]) AC_MSG_NOTICE([ - HTOOL: $enable_htool]) AC_MSG_NOTICE([ - HPDDM: $enable_hpddm]) AC_MSG_NOTICE([ - IPOPT: $ff_ipopt_]) AC_MSG_NOTICE([ - LIBSEQ: $ff_libseq_ok]) AC_MSG_NOTICE([ - METIS: $ff_metis_]) AC_MSG_NOTICE([ - MKL: $ff_mkl]) AC_MSG_NOTICE([ - MMG3D: $ff_mmg3d_ok]) AC_MSG_NOTICE([ - MMG: $ff_mmg_]) AC_MSG_NOTICE([ - PARMMG: $ff_parmmg_]) AC_MSG_NOTICE([ - MPI: $ff_mpi]) AC_MSG_NOTICE([ - MSHMET: $ff_mshmet_]) AC_MSG_NOTICE([ - MUMPS: $ff_mumps_]) AC_MSG_NOTICE([ - MUMPS_ptscotch: $ff_mumps_ptscotch_ok]) AC_MSG_NOTICE([ - MUMPS_scotch: $ff_mumps_scotch_ok]) AC_MSG_NOTICE([ - MUMPS_seq: $ff_mumps_seq_]) AC_MSG_NOTICE([ - NLOPT: $ff_nlopt_ok]) AC_MSG_NOTICE([ - PARMETIS: $ff_parmetis_]) AC_MSG_NOTICE([ - PETSC: $ff_petsc_ok]) AC_MSG_NOTICE([ - PETSCCOMPLEX: $ff_petsccomplex_ok]) AC_MSG_NOTICE([ - PTHREAD: $ff_pthread_ok]) AC_MSG_NOTICE([ - PTSCOTCH: $ff_ptscotch_ok]) AC_MSG_NOTICE([ - SCALAPACK: $ff_scalapack_ok]) AC_MSG_NOTICE([ - SCOTCH: $ff_scotch_]) AC_MSG_NOTICE([ - SLEPC: $ff_slepc_ok]) AC_MSG_NOTICE([ - SLEPCCOMPLEX: $ff_slepccomplex_ok]) AC_MSG_NOTICE([ - SUPERLU: $ff_superlu_ok]) AC_MSG_NOTICE([ - TETGEN: $ff_tetgen_]) AC_MSG_NOTICE([ - UMFPACK: $ff_umfpack_]) fi if test "${prefix_petsc}" != no -a ! \( -d "${prefix_petsc}/real/lib" -o -d "${prefix_petsc}/r/lib" \) ; then AC_MSG_NOTICE([ PETSc directories do not exist, to build do:]) AC_MSG_NOTICE([cd 3rdparty/ff-petsc && make]) fi FreeFem-sources-4.9/etc/000775 000000 000000 00000000000 14037356732 015125 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/AppImage/000775 000000 000000 00000000000 14037356732 016610 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/AppImage/appimage-builder/000775 000000 000000 00000000000 14037356732 022017 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/AppImage/appimage-builder/Dockerfile000664 000000 000000 00000002313 14037356732 024010 0ustar00rootroot000000 000000 FROM ubuntu:16.04 MAINTAINER Alexander Sashnov "sashnov@ngs.ru" ENV DEBIAN_FRONTEND noninteractive ENV HOME /home/ubuntu # WORKDIR directive will create the directory automatically WORKDIR /home/ubuntu RUN apt update && apt full-upgrade -y && apt install -y --no-install-recommends \ autoconf \ automake \ autotools-dev \ bison \ ca-certificates \ cmake \ coinor-libipopt-dev \ file \ flex \ freeglut3-dev \ g++ \ gcc \ gdb \ gfortran \ ghostscript \ git \ gnuplot-qt \ libgsl0-dev \ libarpack2-dev \ libfftw3-dev \ libgmm++-dev \ libhdf5-dev \ liblapack-dev \ libmumps-seq-dev \ libnlopt-dev \ libopenblas-dev \ libscotch-dev \ libsuitesparse-dev \ libtet1.5-dev \ locales \ m4 \ make \ mpich \ patch \ pkg-config \ python \ sudo \ unzip \ valgrind \ wget \ && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Reconfigure locale RUN locale-gen en_US.UTF-8 && dpkg-reconfigure locales # Add group & user + sudo RUN groupadd -r ubuntu && useradd --create-home --gid ubuntu ubuntu && echo 'ubuntu ALL=NOPASSWD: ALL' > /etc/sudoers.d/ubuntu FreeFem-sources-4.9/etc/AppImage/build-appimage.sh000775 000000 000000 00000006270 14037356732 022034 0ustar00rootroot000000 000000 #!/usr/bin/env bash # # This script configures, compiles and package FreeFem into AppImage # # This script normally run from .travis.yml # but can be run locally in Docker as well (see rebuild-and-run-appimage-builder.sh) set -x set -u set -e # https://stackoverflow.com/questions/4774054/reliable-way-for-a-bash-script-to-get-the-full-path-to-itself SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" # Sources top directory SRC_DIR=$(dirname $(dirname $SCRIPTPATH)) cd $SRC_DIR # make sure we are in the right directory if [ ! -f "readme/INNOVATION" ] ; then echo "Build script error: unable to change dir into FreeFem sources top directory" exit 1 fi TOOLDIR="$HOME/opt" # Download AppImage Linux Deploy tool LINUX_DEPLOY_TOOL="$TOOLDIR/linuxdeploy-x86_64.AppDir/AppRun" APPIMAGE_TOOL="$TOOLDIR/appimagetool-x86_64.AppDir/AppRun" function appimage_get_and_extract() { local url="$1" local appimage=$(basename $url) local appname="${appimage%.*}" mkdir -p "$HOME/opt" pushd "$HOME/opt" rm -rf "squashfs-root" rm -rf "${appname}.AppDir" wget -c $url chmod +x "$appimage" ./$appimage --appimage-extract rm -f "$appimage" mv squashfs-root "${appname}.AppDir" popd } if [ ! -e "$LINUX_DEPLOY_TOOL" ] ; then appimage_get_and_extract \ "https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage" fi if [ ! -e "$APPIMAGE_TOOL" ] ; then appimage_get_and_extract \ "https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage" fi # TODO: when migration from Autoconf to CMake will be finished in FreeFem project, # we need to update configure and build commands here to something like: # cmake . -DCMAKE_INSTALL_PREFIX=/usr # make # make install DESTDIR=$SRC_DIR/AppDir # https://docs.appimage.org/packaging-guide/from-source/native-binaries.html#cmake autoreconf -i ./configure \ CFLAGS='-fpermissive' \ CXXFLAGS='-fpermissive' \ --prefix=$SRC_DIR/AppDir/usr \ --enable-download \ --enable-optim \ --enable-generic ./3rdparty/getall -a make make install # TODO: ffglut tool is missing now # To try to add it, add --with-glut to ./configure # and add 'freeglut3-dev' to apt-get install list. # Remove some extras that not need in AppImage rm -rf $SRC_DIR/AppDir/usr/lib/ff++/4.1/include rm -rf $SRC_DIR/AppDir/usr/lib/ff++/4.1/examples rm -rf $SRC_DIR/AppDir/usr/share/freefem++/4.1/examples rm -rf $SRC_DIR/AppDir/usr/share/doc/ # Create additional files need to AppImage mkdir -p AppDir/usr/share/applications/ cat << EOF > AppDir/usr/share/applications/FreeFem.desktop [Desktop Entry] Name=FreeFEM Type=Application Exec=freefem-apprun.sh Icon=freefem Comment=Easy to use PDE solver Categories=Science; Terminal=true EOF mkdir -p AppDir/usr/share/icons/hicolor/32x32/apps cp etc/logo/logo.png AppDir/usr/share/icons/hicolor/32x32/apps/freefem.png mkdir -p AppDir/usr/bin cp etc/AppImage/freefem-apprun.sh AppDir/usr/bin/freefem-apprun.sh # Use it if you want resulting file to be FreeFem-3f71f1f-x86_64.AppImage for ex. # export VERSION=$(git rev-parse --short HEAD) $LINUX_DEPLOY_TOOL --appdir AppDir # the output will be FreeFem-x86_64.AppImage env ARCH=x86_64 $APPIMAGE_TOOL AppDir FreeFem-sources-4.9/etc/AppImage/freefem-apprun.sh000775 000000 000000 00000002015 14037356732 022061 0ustar00rootroot000000 000000 #!/usr/bin/env bash # # The purpose of this custom AppRun script is # to allow symlinking the AppImage and invoking # the corresponding binary depending on which # symlink was used to invoke the AppImage # if [ -n "$APPIMAGE_DEBUG" ] ; then set -x fi # support for running extracted AppImage like this: # $ ./FreeFem-x86_64.AppImage --appimage-extract # $ cd squashfs-root # $ ./AppRun FreeFem++ ../mycode.edp if [ -z "$APPDIR" ] ; then APPDIR=`dirname -- "$0"` fi if [ -z "$1" -o "$1" = "--help" ] ; then cat << EOF This is AppImage version of FreeFem. To run individual tools specify them as the first argument: ./FreeFem.AppImage FreeFem++ mycode.edp EOF exit 0 fi export PATH=$APPDIR/usr/bin:$PATH BINARY_NAME="$1" shift if [ -e "$APPDIR/usr/bin/$BINARY_NAME" ] ; then if [ -n "$APPIMAGE_DEBUG" ] ; then exec strace -o log -s 120 -f "$APPDIR/usr/bin/$BINARY_NAME" "$@" else exec "$APPDIR/usr/bin/$BINARY_NAME" "$@" fi else echo "Error: Tool $1 is not a part of FreeFem package" exit 2 fi FreeFem-sources-4.9/etc/AppImage/rebuild-and-run-appimage-builder.sh000775 000000 000000 00000003166 14037356732 025352 0ustar00rootroot000000 000000 #!/usr/bin/env bash # # You need to prepare this docker container if you want to buitd AppImage locally. # Normally it should be built with TravisCI (see .travis.yml) # # Install Docker following the instructions at # https://docs.docker.com/install/linux/docker-ce/ubuntu/ # if you are using Ubuntu, or choose what's appropriate for your system: # https://www.docker.com/get-started set -x set -u set -e # https://stackoverflow.com/questions/4774054/reliable-way-for-a-bash-script-to-get-the-full-path-to-itself SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" # Sources top directory SRC_DIR=$(dirname $(dirname $SCRIPTPATH)) CONTAINER_NAME="freefem-appimage-builder" IMAGE_NAME="freefem-appimage-builder" SOURCES_MOUNT_POINT="/home/ubuntu/FreeFem-sources" if docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" ; then docker stop "$CONTAINER_NAME" docker rm "$CONTAINER_NAME" fi docker build -t "$IMAGE_NAME" "$SCRIPTPATH/appimage-builder/" --network=host # Run a container with interactive Bash as the main process (PID 1) # with the FreeFem sources directory mounted: docker run --detach \ --interactive \ --tty \ --name "$CONTAINER_NAME" \ --volume "$SRC_DIR:$SOURCES_MOUNT_POINT" \ "$IMAGE_NAME" docker ps cat << EOF Docker container for AppImage build is running. To start build use the following command: docker exec $CONTAINER_NAME $SOURCES_MOUNT_POINT/etc/AppImage/build-appimage.sh If you want to attach interactive Bash shell inside the container use docker attach "$CONTAINER_NAME" Press CTRL-p CTRL-q to detach and leave the container running. EOF FreeFem-sources-4.9/etc/FreeFem++.app/000775 000000 000000 00000000000 14037356732 017343 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/FreeFem++.app/Contents/000775 000000 000000 00000000000 14037356732 021140 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Info.plist000664 000000 000000 00000002653 14037356732 023116 0ustar00rootroot000000 000000 CFBundleAllowMixedLocalizations CFBundleDevelopmentRegion English CFBundleDocumentTypes CFBundleTypeExtensions * CFBundleTypeOSTypes **** CFBundleTypeRole Viewer CFBundleExecutable droplet CFBundleIconFile droplet CFBundleInfoDictionaryVersion 6.0 CFBundleName ff++ CFBundlePackageType APPL CFBundleSignature dplt LSMinimumSystemVersionByArchitecture x86_64 10.6 LSRequiresCarbon WindowState dividerCollapsed eventLogLevel 2 name ScriptWindowState positionOfDivider 567 savedFrame 1305 592 630 946 0 0 2560 1578 selectedTabView event log FreeFem-sources-4.9/etc/FreeFem++.app/Contents/PkgInfo000664 000000 000000 00000000010 14037356732 022407 0ustar00rootroot000000 000000 APPLdpltFreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/000775 000000 000000 00000000000 14037356732 023112 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/Credits.rtf000664 000000 000000 00000000641 14037356732 025225 0ustar00rootroot000000 000000 {\rtf1\mac\ansicpg10000\cocoartf102 {\fonttbl\f0\fswiss\fcharset77 Helvetica;} {\colortbl;\red255\green255\blue255;} \vieww9000\viewh9000\viewkind0 \pard\tx1440\tx2880\tx4320\tx5760\tx7200\ql\qnatural \f0\fs24 \cf0 \ This application FreeFem++ 1.26.\ \ The Lauching tools is base on: \ \ DropScript BSD Daemon \'a9 1988 Marshall Kirk McKusick.\ All rights reserved.\ Written for Mac OS using the Cocoa framework.\ }FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/English.lproj/000775 000000 000000 00000000000 14037356732 025630 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/000775 000000 000000 00000000000 14037356732 030110 5ustar00rootroot000000 000000 etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/NSAppleMenuImage.tiff000664 000000 000000 00000001452 14037356732 033777 0ustar00rootroot000000 000000 FreeFem-sources-4.9MM*pBBFi`wFF00dr0077idΛidr3iΛidid33dii3d333i`33r3>3`iii7d0i37d3  h"(R ' 'FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/classes.nib000664 000000 000000 00000000571 14037356732 032242 0ustar00rootroot000000 000000 { IBClasses = ( { ACTIONS = {open = id; }; CLASS = DropController; LANGUAGE = ObjC; SUPERCLASS = NSObject; }, { ACTIONS = {showHelp = id; }; CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; } ); IBVersion = 1; }FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/info.nib000664 000000 000000 00000000776 14037356732 031547 0ustar00rootroot000000 000000 IBEditorPositions 29 69 252 122 44 0 0 1280 832 IBFramework Version 283.0 IBOpenObjects 160 29 IBSystem Version 6C115 FreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/English.lproj/MainMenu.nib/objects.nib000664 000000 000000 00000006365 14037356732 032245 0ustar00rootroot000000 000000  typedstream@NSIBObjectDataNSObjectNSCustomObject)@@NSMutableStringNSString+ NSApplicationi NSTextField NSControl)NSView) NSResponder @@@@ffffffffNSMutableArrayNSArray NSSliderficc@ NSSliderCell℄ NSActionCellNSCellAii@@@@NSFont[28c]Helveticaf ci:ddddiiid2NSTextFieldCell>@How cool are you?([40c]Helvetica-Bold c@@NSColor@@@System controlColorff?*controlTextColor:KVII@ kinda cool$[36c]LucidaGrande V88@not coolV==@way coolîd2III@ kinda coolƱ88@not coolɱ==@way cooḻNSBox*ϚϒIff@@cccBox 2@How cool is this application? NSMenuItemNSMenu̔i@@@MainMenuؒل i@@IIi@@@@:i@ނNSCustomResource)NSImageNSAppleMenuImageᄙNSMenuCheckmarkᄙNSMenuMixedStatesubmenuAction:ڻInfoؒAbout DropScriptؒ缂ނؒPreferences...ނؒServicesނڻServices_NSServicesMenuؒHidehؒ缂ނؒQuitq _NSAppleMenuؒټFileނڻFileؒOpen...o׆ _NSMainMenuHelpނڻHelpؒDropScript Help?ٖٖ疣ݖNSWindowTemplate iiffffi@@@@@cI px PreferencesNSPanelViewffff@Ձ疄DropControllerٖ̟Ɵϟß ɟ ٕזϚ τNSBox1 File's Owner 2턘111 Panelׄ1لMainMenu҄NSView NSMutableSetNSSetINSNibControlConnectorτNSNibConnector showHelp:#ꕄorderFrontStandardAboutPanel:#hide:# terminate:NSNibOutletConnector$delegate#open:# orderFront:#ãtakeDoubleValueFrom:)@iƁȟ&|Ț9ρ~8}( gj/1Ƚȹ3:"z,* óȬȣÁȕɁҁIBCocoaFrameworkFreeFem-sources-4.9/etc/FreeFem++.app/Contents/Resources/FreeFem++.icns000664 000000 000000 00000120520 14037356732 025427 0ustar00rootroot000000 000000 icnsPics#His32qBc)!Bc)!s8mkICN#il32c)   1{ցB!1 s R91 )c!1 J) !k9 kcc)   1{ցB!1 s R91 )c!1 J) !k9 kc)֓  ck1)֒ ! 9B cB 9ށ  1!Ɓ l8mkich#Hih32 XkZZJ{9c) ֝! 9Ɓ{RJ!Bck   !1BZ {k1Js {)sc!  11)BBkƅs)  1!)sJsΟckZZJ{9c) ֝! 9Ɓ{RJ!Bck   !1BZ {k1Js {)sc!  11)BBkƅs)  1!)sJsΟc )J )9!!B J9!B ޵1  !罭{1{ZR1)Z9c {ZZ1Z!JށB9 ΁s!1 cR ΂ ch8mk it32/tmp/tutu if [ -z "$TERM_PROGRAM" ] ; then echo ' tell application "Terminal" activate try do script "'$begin$cmd$end'" end try end tell ' | osascript else set -e eval $cmd fi; FreeFem-sources-4.9/etc/config/000775 000000 000000 00000000000 14037356732 016372 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/config/FreeFem++.app.in.tgz000664 000000 000000 00000075412 14037356732 021754 0ustar00rootroot000000 000000 \} `cWuy&ˣ&l & 2<[e)1I&uL Y1FdEI DH\JiSHbAiQU JiQr{<3ars~[-/"L^JL_9{s$9w6?9 tK.fsdLfC w%$&Ht׵d*sU\42ڌaLGC#s&o|ΥW%/8t ɡ&c#̠I.dױ.K^<OCFt&?}`]xSAiwm?ԾVHۏh#ٮ*ruGqP~.'ƒGgp$y(: \4z;oH&f5%3C𕉉+F~fPW"w0u ǎ-k 9<|9ǫ`btpf\{UkvȾc79]u##t&yN*~%L2qT?.ittuoq ]GU'_yp$L_:{򎒣M.39C#׏ %SN NC@ #S8jbU"F`%S鹰u[AƜKF&3GuJɉŮ l}cپc]H^"mٱ;һ z}y NPɡ݃{Eӟb!26GeWs>_?wwL 0G;cUQW}o8Y˻%N%~1~osǓ_Mqtlǣ<]N_ӓ}sF/ Xі+~lOپsv}VOV/+)?^'cÅޒ  ,SzRo(?FiXOc?c6*!׎$ҧysWy 2Lasyn%V8W}${rBϵ%p O9ΒSQ(9__y[dqjfiLsLyl)Ӛz(g=2M+HmDe=sʺCY4)->(/S~^B+/4 ]FD[f>N"%*þM mI&w1_'WBzqf br?^ F\xĥjԢiGM!pE;_5ke`f?V@U+8TC8ZQ-l5*,Lu-׋PIaYŻiFao[$^Âb2w9YI-rKcn:mJwh15-r,YC0NTpaQ@eX{5*F9^v~>f7 o`J#q`{,lh[͒/mnvKnE%R+K׫3ai8ɰ-,KWcHQ7KMU2puyC:cEj稵 qz/ƂB\UcF*t~8*d͐YAUF3*!b^q5KU%S%,6 , a БSL,,qF`TV> m1-LΩcV o(e@~#Un~LyW_^IW{y׽~Ë%,[o3*f4o ˙:G?e ӽfz]#ޚP7 '89P1Q>q46mQ3&|0h-lzR=#bgiO&G;VkS@T˱^?ޢ긞l=c 1,D,|lM*.8g`:+S7Ӊ@']'.'I,帝h |ħ[r&p::|,Xsi7e%%קޟ3Wt#Mri 'G5-Q"K꜀#[_Vt '&:Wn/X3#`:w>aP\XIjc9:J< 7FGyդSȦn,3ލ2pf|Z)ge8JD8M|CUKhKzy0j.cyun妨a\`I; #R>i\)8` &؄Ł=Ox:!? NN4rdy~msQ?۹^Q tZhL Qi 5A}"Ȉ4ָOYD>@"FYڇtp촛N*dp"l/)MBҵ!y\VogYtJ a^0)HOyLVueS1I)]?+ NwdAŨx)ڭn'UIisrY lN'^A%sjS=@9mO*b8~#QҨ&7x' \\$j,foꤷ]qiQR[}i_+mrd W`_.}'Z)ka*D ݙ% B%%d,̙6(*n,0,ER)r0Tbq jL?T7x6[c}lHK&4r:YAk1:k:+J k(l3 }(8Trk75i=mSr2k8RcZ4æ)ʔ8%6䷳)mbZ76Mͽ!X^aėi9!I8Cµk8WX^Q;ÔU!_fż Vy Vc< , MB!JA+zZ{>Gnj[v.Qg"r2!~>S8}Lp~[Y`8[9y^A-:!_3싂LpRTƵ̥rbLAUT3<2/XjNT1Ba'qRaBPӯ Uej.SE8&g5Zn" hXr R4+s6'2gE[Q*Z6lŴ)dQ Z,Pۼ쭚?D}T{kIV灸ʫ܁iP-5p7TŷY)­/KA!TbKwL -(_=P̚A3* ЉSaeQZA>:92Fs4U"֒U0jt>2Y)!rӊ8lq&ISW5Ө8cݢNpԚI5L1$-E%@Q S$ӟ4Iz>Ua_$*{UkybT)phjd1%ɡ@'ط"j[q*hB,'Fe Sz%#FP5D074i[Iu y%+[sb1L h2c:NikI df.U$Y% ?/SkWOKNU넖%ݲFyEJB!7O Yeфj1 PRQ\kh}PmP' q=^1wU/JC, ܾ%h<.*(7+R4˯{o7`joL6c2O] 13xZUy8d-)%+%_{VDB9}LZƏt!!V:f}1\%hA%Ĕ˭XF֣ Uy.@W'JX1X*hC6)Z}~QFY>=EJ_YF\h5uey@{ׇua+\y0: Da@R_TJToz:yJ6#(3W\$WUyFBiPWU7P D *R{Yrah @p[ 9ewfVBB xkqɪi bMͱ?jRlVvPV~吸*ULi?@<%Éf7 Y-X[p.^}`^~ozM/7^~ozM/7^~F~SJ6݉V͐}T,R?B9!B /|S~yqPX5w&IRo4!ƁDl^ ,L~ɼ%T՟a>[IlJ[Tb"bkl"u gmX0<}Z_фrꃈJx} RS\s.&P1ޣ Eb@a,،xAMj$[M&nɻ1v+ѣ"`rCbV/RQ{ ZvLV?/lfW)^ 9M{eXweXʻԄrm]T5#gć9bjZ\t'e>9KjM&6-Kb'V$]8 \4mmtp4#F}QW[`Gs֊-lҐwjli#N#-ԡ4X%}-KF-->M;谅 Ay 嬒$ x6m]ay0m9yv>:8mTRz/ʇ TXap3x`&L"ˆ HZiHpq0jAl(Z95sJb yI ^Ě+-g헴G`Qe44OS, ߰SP_a.uKkXV"\cIȌZč5* uMHhm^=@fpzBCdTfj=/f{8 |;0oSjg@R0lKP)}ۀZUHN`  XKyҴ G~fMa%Am 1UOȵʬZWRa*4phG+Ge~eښ)R¥âEjORZg]U'٪@YKo+g}Pc1|>͚9%Wr܆VCM[yaXؤr6,oo9_a>jҩu^Ujr>"GeMA9v^>w'c*o7Jv o..J3g+,oxz)5 e9փS#+Y "W|AC"Nƀ5 jjJ9{sQ$fQm1rm&n$Z@܆}+(a6b#eW9giU,-:9toQԌ]qkXl=WT魎*K5C9*|4ofbآfNjQ2eD!]0lc}ie^,R{QU$g F9ދC \_|٬m7r(Qy,sV&[\W^O4&Xõ7)W L-pI4e#L>;Uli0pGTL5@@k.DEr.ÒV\BSrT8+insg3c_ "4BCkCIͺ6|JZ6nfzQ@yͮr-`-uU1a:.Qnj[c (c s*F*qCRb(d\C\QbW&)Pr7qbƓp[ͭG433ߦ>'Έ g_D_`G\#N/Wk_EDj5x+^Wx+^Wx+^Wx+^Wx+^Wx+^Wx+^Wx+^BfT2yE9ltb<Ϥ鉃D2ݕNF&3?;oݳvll߹cm==[#=۷H/h',ә@ٟL?4Qmx\"_rZkxWzHx\wkDΏlޒ܎rZD"9IEGƒ (#i4u\{9ZD(?Qɤu[Ώ\314'PˇF2gG^8<į\D.ᕗ^~׼hk$#Ñ뒩GȁH&y pZ`h$u˱:wEOǙ-[+/|@r|F2lMhsb(r/ 'd'#C}djPp9Qrͨ|%0v8<#?m1;P#(@&"gMD l>SZL4a4VM80}MfBρ1ԁ#J}=n>w_n`25" Lg$́ɮ8/o_r51؈Xؘ20Sۼۂ9>Ȍ\?I3QU)y9hJǁ3#>H{ 9Nnd y=8=. TޒʜH@ضcض=U= llρĞHbr_LߓHL ʥԞaEf؞=C#uc'3 ~Uc)Խ`g=a㲳w`zC2o썎uLdnm֍ΓMggrl+3Sc|'1ؽ$}[ؖ[N2r}P<cHDF&ϏHR_wYtH"W!IFN< 8/Ⱦt$L&%pE d,"Jlg dxr`=2?4N~L+CX2e$18tGbۺؾ{m[=UCIO~g$ M+__וfs8sYfAl>Ep[,fL~blRݪy[dqj ^1-B%9viX|(U+|@Me:>aVW9_>"_rל&xxxȘ`̇IqKb8N+^a%tYJ&:",?fx 1Z3h,bްmۚE>V?0 M;j a;5%}k2~$µAVRp?' -a7X ̇.u?6+GVe‚,+=C>lFao[$^ÂbU֕[Zׅb Wa8A,xX3"k*(G/QnKUŲVfY6@ȳG0Su DXd,B6knQl@+3K0($n㶘sf>/4ԓfkeY /H@jMU׺ aRdmEYuAWgR6$b[OfƂB\5={`Q 0EPUQrrW\6:o WLeڴ,i"sfd~,@C0yYY;|c~h]7ZfǔZx+^啑~x=AW~~7XB˂hacx}}x7uOVS_$G)c7քY8 1ρGhD{o1჉MEft)W^a?[zF-΂M^R|#FHZ+̲~/l\*GDrr=z=k{Ł~g>E 1 'v؉;Y٬z%5bfp|@.ck%CKƒAJOUˮoU 0|,Xsie)EMYI{ril!]Sv$9̢&LhO$HLn! tvK-fem9Y>-_יO dDk2FL, _֩XbYA*kDn/X3#`:w>oPrtRgx>.UmQ^55sw# \8kCݕx GiS#\ot)%\LY@G%"q,st+7E R]UL+B-~'9;d uUǩo0 0u z{:!tϧC>lW,EsM2s^yCv-%Y^Q tZhL Qi 5A}"Ȉ,Z>4͚ j/Ew6iɝY]_ZVZ)ka,ZEp+TZbJraȜi㺩*n,0,`3sWan `K#!X~E;B.fՒ.ONX_U׃ aV[h` 섍54~JFS5O+^W救W/DrG_ܭ O|_MNhfM#tH&rQ\O\/rkIY5!xŜw3ph5 K^A~$p+T/"+2kvxP,S|քgtViYHGM EHȣ$uB8M^<7sZ|Wv g\ٻ|ejA)_udzhQ zN507Bvjs]L stst<5diPa.awu07aΛtCFvdfZB݇wkmi݄FYnSg:+Á?ha;UgMEI{am:ắGJn& q}jZNee5LtV|Uҵp[4MSY`ǑO÷Æv9MLƦ D9\ Y+2-5$)T~HVք V,/aJȯ]J`V3b^kQ|QͱL@[VZ&]uڮ,R*`Cx?pKTĵÇu:ѐD? !~,u񇅆slƉ3Z7P̙Ŕfgn q-sux-""j\# Rmv܉j9F(L =T"}P$Kkal4@.k i!Tə8y-kCn@d-K@tCaQfeC欈p=JEˆ 4SȚ~: 6/{/rtZ5y .*w`Z3TK MU-AV 'p+9(B&"7J9n+%Z$Q>jzA͡5gTs^+V$(2bYȃ}tCseDL%iD%O`"DU"E}df`+SBqrM"jQsqyv:Qk'-׬zE-LGNҌ&녆XT}Yb2TVm&r :R$¡KCep$7C`ߊmQENh y0RE+NmT8T때ˢ3jdqx$ż9KM1b TZ4 1 VJ'lҴB23SP)5+ŧ%ϪuB˒nY"Vfϧf Mr,ղhB(}\)(k54D>(ֶWLp8GZһvF]ZFo[D;,*pkmbGjRQvї_szsGtNSx EajX }Ei("V0dQجbԒ,(ΛnWrk9.CAG,~L78gįa:B9 D+9}N ЪF1^7Kgt\Z5=7-Kcpi˔UإGXkV3ͺMd'\d]3qo`.$s,[h3ebaU̫7h N%7;©G;-k-v4(Rp*LJaKUyq;a㝴/Yg;d8JK RTѾ0Æj+ ~6Lp׻-:+<ެH>.^ VQS11`>ugkiQtz?WQ*0x^|yZuCU iZ1i?jЅp3[A35 깆 R@SP{zMz1-{3S7;%w]SqxEyy⎴q7"Yshj*|'}P\p$whĥeh7P\-j`[t~kU1➖ h߰M\mƸro4ܣUS.c%UX2TNr@X)._{+`a iڰSh)W@EUe)~IgE rB춫2{Xx i]oJԅrE 4[.X:sK}R);Rl *F\$@ڌ_q^opTYY [@mG_U6@!b>2UPHY>n gI?qJGP)$֚N%Vzoe&DɚIX 枇 f-Kw`(myu2&nNwÄWg׵B}R8ç7 xZ<< Zs ߧ^wF7.71:k DYKMcyQ֬DGQ#::jMXR6GKVU8Oܧ2?N5o[MF]A}aī+J~Z7nl HS-=ĹFd'&^C1255IUZډ BZ-EvCsV1hJ'J_4f`1n¹.zmy^~ozM/7^~ozM/7^~ozM)ut'>={Z5CPH~#r Is 5MЗoAmWbը"$I;ӄF_AyMRȃF2%PZW l%+enQ=K֊5i&1a&Zi}EޯϳS^ȩ"*Q+HclLiN_pg 4ʻDЛ@żz2$h!`3b7KXRl n36%ۭDJWLƃ˝YHGM쁈V;h5a3mZmd1_fxQr/4`5ޝq`?+SzkʵuSՌ l爩kY/sѝЪv/Y5ش|,lXvE,pѴ$qE4b&)+{k Ed kv}~/&M}f囄f}{Nu| ,?C-$n^[V0_u:Oc|_ߠ*:5G-ey =4`-A# pF].o͂QZ+Z HC~Jm:ԷPZbіek۷.PGd,d4-bTX(hlCKE/J,kڴumQFxT+L+qk4_֜8DhEtQ>|#IRyZj}&4, % Cß8Wq1 $VSes~n"VINq *rhV)AYG+ug& .wöBnP3@ޥ~O >0L<6wMDDF.xO9%1nqQ@" 95Y1#bK/)i.x3"z\׋ Dr5܆x拵;]\{T(׷,TkӽE}FĶjS3/7vFa_IPզ:.q `KhB俛c ߊa~ ;BEɠ͆ԇw]`&˳ɂ yHEUŒ0+hx{/&i+p}+ef(QG屠YزRODlr^{q?(`yZ ׎\570M+p'\㗍dl0cVinZaS3Qa KjF[sG V< M[S⬤!$n`ϝ͌u|Pf23 Wh%5)j=ظE ?52r^"Wd w.D +7S3n18v5̩ɢҪ<կIs usKFu\0(CݘgOm7:94R2~f:#2nB|sB;G:$_!&ԿP%QcP`$i(JLR8I 8RWx+^Wx+^Wx+^Wx+^Wx+^Wx+^Wx+^Wx+ +R眳epr҉Lr^㨟VZı C4| 6 &t L:%N%'$ "저qzatD2rcJt@J:I bil}# 0Hzb8sh04L gn f"cc[ h' hODO i3b-#1tSAl0O'9>S4:5M:=ȗ6` &Ӊʡ{#geF2Kƒȋpjlb_KgxtFY va+?KF2rE^dk>6ѩ+B 6 mt6ٟLuС--] 20&0N穘doΧ.;(W`/X*CJBecQ}A bQDgYAl4::;sR7u 3/N'3BNMq&luhC7Ae7hsl ={w;~(qEgm 6M/nQO?Z G[} 6qpO8N%gB$A:@D//NGAP7 B'eKQ@: OЅAl| =>.qKqb$wjO':F]<Ӄ mHQa OO>:M v_Ap}.R듩trH&t>.˂脞>b{oߢ]I}Al'ѓu$O*|&#H2 FW S>4~plAU_~ЫAa:_9Z42GiiTzW:r ZD'}S6؟g/5 º5Pn:ɈK"q0J^ &/ߵwAP oS?6sYt,י>!3HMgљH'*\:B }d$veN+{'cog9,& ӉT2Kʭ*)VZ~FZz/ nHtZf.EhRk^n|q*X.G:.@J&^J{4j>'~H*=(OJ= 6; 7]^Fe3AĦ-W)OG7 9:dOo?W`nH`f tP}F%dbco㿢WG:#T+7JcAl({$'IaE`&6r>sd}Bo^F>1OQW/].Ce}¯Pr*mx+{}o;:Sa '^u/WFzU)S{> v@7>A)???l` &~h3= nD9 l! y*QN,4/{QNDlt:x{);Hs@ ws"yx]1qc]2a-07&Lfbf 6j,BQ* 6M;"6*7A*:v~gS]2u;(H$g;4v'x{x}ۚm8י>[;Ǡ"Ma-~Kf]n;x1r8A˰@Aۈt7v]st?J捲yleN [7i}t}A*(* os5_Oo7 M}M|M>a |ol#g-Alt/=| a^n⻅> 2{jz }`ìoDoi;v.9|plJRm{;޾Fw:0O$LJ$zer0l$  wALj&(]Al_˷::U| ^~ [kdjn)YT= tzJD-d%LdF&de2DՈ=r#a:Gch zso`)}lC_&h KZ bAm>̇;#fg.0>sAȺHvKM 3'srMV+57k״h5ʗ" b :7Loq :X.]EQ}>Fo[a^e|~Q5osAZ3c!vA?z ̱OցC tS+*O~|P$`f~GFgh>ցtxD -0$D 'pLH6s&<=|c?|o/~L!WE ۋSaFGGKT bcI@_bTW6W ·ٯ,-)3!A7@!LXp-n%!s%9pR? .H߰^M9yV޹zd` P9H_6TwGQ1H3g6dB_D_ +%4J`.@ Z?ҷ4,+y'~tR6R['vQ?;1IDdh"+6JMLZp&MpHj/}jӌyN6zѿipK |h{⏲kfѿGgfGNFQ=j8aϣ@Lg&&*  A~EB}(.QkԞAD? bb=4o '~!1&$ țdNvpX:,}1VsxCyF69Oܗ- %"wo&HBLwΦuf~ahqxl^*d݈';O :O6 P xw 9;cX9O:O]^¿-~-m9.vxTF6WKSt*<}󹏯tRO6WWA@y3g:'lvNsyG¿W|_z}OWj"j1bW'щM{>99;;pxglsFg9M|9N.|f<ق3,>u>x> 0|o(G:p~ i'fp⹃#GOgw}!wzm_Uz`Tfoy螁Fg]'ξj3*X VIgӝs%^է9n V-X}j?XtAPp]G;sd~UJF'ɼ 'mفG!)Q,`oTJ|鮯Lѳئ]O3qg| h{5, QOιpx|}'=ƙ(su G#2lvs[Dmo0mGSxO/elFΊz *D[.nfۉqS[OZp{rz|oo9[ȱ'+(9ƩOwet'20Yi6| N6mwgvט\I=T*KǏ]^~_Cɴ>jK*3<ԵŻeuoݾ}g$۱-ֽuΝ;"_2O&1/ֳgOb"11(b[ Lcd#O ;{@2}nQ7116B C=۷c X=f=x2{wt{F2=ġelp_{}cao ŮݛJev|d=1vH5N$e#?5c^42ӳcG73i=*{n7KcF7'Ǝ=#OLezXcooc/%{= 39= @G׻!@ J xs C4JcHXA7J5bJ5EzbhZ~oZϪ*5گ>o޼޾7;PfXU4h@@$#QYD:TIL.*pb*_[8aC]B"EWC[@Qd`+q\Xv %1N)pMJk0\)Ij,Ojh5,!cDFt>^&zD`DM) Pb*'2X@/$_SCPI'%jqNV:jLfa5!2?J5Z4 ΔI4vd+\Ee'H 1"GK^#@)q,TeUJ\+éw+ӀXt s43U$ IU{8Uh՞*C\Ŵk@x\6ylrTk&ǽ3<1>9y͏TI'$! B"H[n ]L,+?2$F9% ^~Uo-<@(DWiN*յH-lPM< ұT%1~r"`I Rz T] ƅ/6: `x( +c5cSG`R;їb$RKv!B*_,*'=PTjzKIFS:AW6¦V {9͖u!(T520_%-_Ef 22z0.^ք v~>U%לW̳w~?TJXDB [F30GsH`Y$xXC?^(3r/2f IB(p_IIȟJKAVjn1>P:}Xud @|dݖ@'ar BDԈ"1YNcXQ"Mr`o"*#cS#bcQbE@[$.Xb/gQʍ&-9 sTPlR.&$>r$'_"CN!p _ $%!+;32ym/p[ƒtL2XC? b 8)s)g}A4~ms&}Q!q.C̥K? L}h}̥D}d{\q@ H@3DX.`@܍ kxk'sE@<Gk)ZrͥM*hZ DCKXGv,$LfF#Asi Vr~P~8 []p*1na(C2 mo d[m \BXiEsx}82FFhTdJMP+h02X4Z`_Kx`"5(/HB&‚4m)Q*2c4$aABkt:B+JHRB2qrP@(HjXG@,I<W4RhT@@H=K" P;ᯍ]: ya:jrp!Dѣz%Y:.ռD\y9RTBDS)@zN8L,JjRu(qףiO`(2Q3ɢDQo2'>z]h$PXu5d~~II"D ;8^dK9/rC$lRV.CTE[RVR\B* HdI:*A-7OA\[蹐7' BJv.z1a4;8 pңR@*CQ |e!! }i]I8:drRrV jU}d~B8U.&U<\ OlЯ[0, ÈPU L +_Oq]CaU +6.8tS86zNFF^kס۴ 4Xz1C5@7!P$TaABRwKhAY PF ߱/16"^432 p2'odϊE$޿Wsx^&&"U{-(-Bh:fin0:%| XxRm #m$}FFxT*c5U6>p̜( Rꐏ) NS vkuCPnjXm6lzQ}ˌePn;Y աKh)t@~!Т<@ؔϕTAd|<GPXR`*(E'ox@PNPZDq#qOl$b".i$EMW@ZӂV~*&;q{xN*?"]PK)IK E׊n!tK`p\.ڋ8K S;@7<O7G@` `Es 2«;LKh(5Gbd8 !xeQwL%7I٬Y.pyKrP,9S\;FGAN*jcye(}G9:|0|P'\jÉqhAP3L8Z'=lR$J<P2z.- @7:DWRT DqQoΥ>yq[*j2222qWx1V6VO[h< 0`PcCsсz 4[1ޅN\ p$l2U׵ UZh$7t4 VdQ2eQCJHrR.(:m&"e+˞C?mt CE}q )s Y`1\Сz;PFk޴52\gR 0[S ʱ&xO9vENFJZ (Q:"t:x52c9LwuHR19ŴHTjutLl0=xC|SOR`3dCfd?1:SSYaڥCVmC0gd!3LtH2!*tJSkRl*pִ5!M'tFЗP(2[`xphfO3"km _Z a e_ |Z!4je.tE4ƦR`&X`X`X`X?Ǟ}&(OG+AS"bcމ>EA/st~_-s!H!D\+?ǚ.hJhһkoCϮ=1N6A>^ho?2Pe&a4+K Y :j6?ilvaoԮ&c( !i"̓Xyt\!_דǷ[i咖1:X[q==JP) 004q b-m1Xv|Yr@CxX}`' ܳs]g>$juA-ocpaX ȉt,O.c;3|ls|(-jeQ0ml@6W |.Hty5 :E;3D"y] JE] KyUP-haOX||,u.:KHPWDz>at;jX`XOH\ffk Cw_CWW ]5P(_#GT;}1}1xcGp\qӸkWHH5aDQn0i{G}4fA ,ܤEk;=:{+p uag}Pw F:Q4tEB' GЉ|dσ2t>#Z&s8\H2|+ б0fof{i2}ܹDz , , , ܼHiu:A#{E0pqD.|RMs$i"A$HSZ񉄆JFbEگ,\hy {w@-o@u lPzײOZ-Ych aFaHS%n.E^hR"# !! S%hU~؛a]:4@Ua}wGm=*sl e^EXT[sXcT_poܯG}w-N>;H/ IA|kX<mpݼv'MzQj i3X k;vichI=hٵe3mv_?jsho;LX6gŚVm}ktNq`3b ~^[}~w+zIs~Ym[b*$wg?t<^wo'uzج,{ymilrw Wc箛V};a ŒEAuBP(jrA2Aww:zہneoc#tm`܄0ێ!rond2yΟz7>NΟ+&M+ \O{MD :76=IHv~e%q;;{/yezo?F.i/Z2<7[n?x9/o̹>aa %OOOtt7c +ڼ%^CDo90i-H:uvTݭkrp}/;l{[2*x7 :<9.>}ϒ-?m]-QK/zfZEoGGN)Y75a5MYȏ9s(|Ba J( C /}-Ns9=U\9Iid!oMhǦO~]鱔GdmƞT9[Y_Es]2=;ݲͥ.g o#Wqcv/[W^I(uMOsĘ=S{5ˆA3h}nବMs<.jssʂejdBoort=0n.u2[T{(e0]W8 nO2GIi8.'$t;CDuGK* R|PDsN~™"dލ.m۫l[Mi>ҭW3[~Z}_\vwo^!r=}jnOUQ<˧qDž}73A8t\P+?FV) }w\oWw*tUn >}:(һu[+5%Lkv?ʾ+avB\`@a`H( Gmi]@*C+!4G|KʳI?{|8oDCЊ ɜuĠ|e}kq*F͇} x~,s?-7Olpϝ0ܽ-oʭ)2V);ܽ_Vޯ#1Aݧd\оL\sҽk9!}tm.?֟R[PVwǜobV'\سg?Ѳ)[WKz.,Ĝ4V~Ƞ=vdݪ"O_|&IꅳOm,0SCyf?MT'8BY^9nWTmGNPQ74juBI=4EL2JyfaRRo!{|s c!{xϏ oYҔo(l꺷u1&-`ܛy}}D;?QOǂoO06 ~qG>(הi>/4ñ'΋wjol$Tzrj{O>U3羷k쵉bYyrn[{cSn=\ o7; C}g):M5u7/:"OMvX;k\Ǒ3FsQ"djPDd tVƅÃ˧f8s_}ܧn>?zq+*W[CEWvs*vɊtGoq]d%>vS$~\ggo=ݲDɓo^[}h;ӐËn[\u={r>2(їrEtkէ\>&_سܡX-qn{|Eꨜ E+c.6xDkcrr]JQa+#_5|9ÚZ`X`X`XGijbk yh[&m[L7)[>/@|@ws6b60oF'ON_fhwז_h,$ 'mXƸ1=M[W#M-CrkDA}hmaрh־'V-iXhd_/^ڽhOowhp4mo˔)w l靛OS?߲O^ʋƧ Ex)Ȑ]uGpQ';(H<vmŧF7\Prgpe6|zõH-+xM٥϶rrWX7YYe\BZ!s3ɣ#L)sC0&mqT)ѾX)2I,WBfIbRJDCqTB"j O[ mNg2!LL kuz]{b[ʕN[!/j5j.r1W%u_bc۰EfG}{Er1 /kz [D{>'*ʻ;n_oxLܯrٔ5L~=?0B>OqAͥS]ĝN\c*B[63z~K]p;[g]E 9z{%"NNnڟK箏Z?rXn5^3Kg&cUSkAwNsEEwr4[o="V3C[N_aG΄ ?Hzwn䡧ŷ=ތr{A1v3'G\71LS+, , , ,_$XFreeFem-sources-4.9/etc/config/FreeFem++.scrpt-txt.in000664 000000 000000 00000003246 14037356732 022335 0ustar00rootroot000000 000000 # Author F. hecht with the help of Vincent Bernardi # frederic.hecht@upmc.fr # this is a free software with no garanty at all. #--------------- # take from the web (thank to Alec Web log). # http://www.alecjacobson.com/weblog/ on last_offset(the_text, char) try set len to count of the_text set reversed to reverse of characters of the_text as string set last_occurrence to len - (offset of char in reversed) + 1 if last_occurrence > len then return 0 end if on error return 0 end try return last_occurrence end last_offset on dirname(the_path) set last_occurrence to last_offset(the_path, "/") if last_occurrence is equal to 0 then return "." end if if last_occurrence is equal to 1 then return "/" end if if last_occurrence is equal to (count of the_path) then set the_path to items 1 thru (last_occurrence - 1) of the_path as string return dirname(the_path) end if return items 1 thru (last_occurrence - 1) of the_path as string end dirname # end of web part ... # the freefem++ action # ------------------- on freefem(edpfile) set edppath to POSIX path of edpfile set edpdir to dirname(edppath) tell application "Terminal" if not (exists window 1) then reopen activate try do shell script "/usr/bin/env bash -c 'export PATH=\"$PATH:$(cat /etc/paths.d/FreeFem++)\";cd \"" & edpdir & "\";FreeFem++-CoCoa \"" & edppath & "\"'" end try end tell end freefem # to do on drag and drop # ------------------- on open filelist repeat with edpfile in filelist freefem(edpfile) end repeat return end open # other default case …. # ------------------- set edpfile to choose file with prompt "Choose a edp script:" freefem(edpfile) FreeFem-sources-4.9/etc/config/m4/000775 000000 000000 00000000000 14037356732 016712 5ustar00rootroot000000 000000 FreeFem-sources-4.9/etc/config/m4/WindowsPackage.m4000664 000000 000000 00000022151 14037356732 022063 0ustar00rootroot000000 000000 ; Creating a FreeFem++ package for Microsoft Windows with Inno Setup ; $Id$ ;; m4 def ;; `DHOSTOS' = HOSTOS ;; `SIZEOFPTR' = SIZEOFPTR ;; ifelse(SIZEOFPTR,64, define(`SUFF64',`-64' ),define(`SUFF64',`' )) ;; define(IFMPI,ifelse(len(MPIPROG),0,; ,)) ;; define(IFMGW32,ifelse(SIZEOFPTR,32,,;)) ;; define(IFMGW64,ifelse(SIZEOFPTR,64,,;)) ;; -- end def m4 ; The Inno Setup configuration file WindowsPackage.iss is built from ; WindowsPackage.m4 with the command "make WindowsPackage.iss". ; No source file here. They are in the source tar ball. ; suppress -cs version no fltk to day , wait the next version ; FH version 3.0-1 [Setup] AppName=FreeFem++-win`'SIZEOFPTR-VERSION AppVerName=FreeFem++ version VERSION (win SIZEOFPTR bits) DefaultDirName={pf}\FreeFem++`'SUFF64 DefaultGroupName=FreeFem++`'SUFF64 Compression=lzma SolidCompression=yes ChangesAssociations=yes OutputBaseFilename=FreeFem++-VERSION-win`'SIZEOFPTR ChangesEnvironment=yes [Dirs] Name: "{app}"; ; set writing permissions for examples with write and read files Name: "{app}\examples\misc"; Permissions: everyone-full Name: "{app}\examples\plugin"; Permissions: everyone-full Name: "{app}\examples\tutorial"; Permissions: everyone-full Name: "{app}\examples\3d"; Permissions: everyone-full Name: "{app}\examples\3dSurf"; Permissions: everyone-full Name: "{app}\examples\3dCurve"; Permissions: everyone-full Name: "{app}\examples\examples"; Permissions: everyone-full Name: "{app}\examples\eigen"; Permissions: everyone-full Name: "{app}\idp"; Permissions: everyone-full IFMPI Name: "{app}\examples\mpi"; Permissions: everyone-full IFMPI Name: "{app}\examples\hpddm"; Permissions: everyone-full [Files] ; README Source: "README.md"; DestDir: "{app}" Source: "readme\README_WINDOWS.md"; DestDir: "{app}" Source: "readme\INNOVATION"; DestDir: "{app}" Source: "readme\AUTHORS"; DestDir: "{app}" Source: "readme\BUGS"; DestDir: "{app}" Source: "readme\COPYRIGHT"; DestDir: "{app}" Source: "readme\COPYING"; DestDir: "{app}" ;Source: "README"; DestDir: "{app}" ;Source: "crimson-freefem++.zip"; DestDir: "{app}" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}" ; Programs Source: "src\bin-win32\FreeFem++.exe"; DestDir: "{app}" Source: "freefem++.pref"; DestDir: "{app}" ifelse(len(MPIPROG),0,; ,)Source: "src\bin-win32\FreeFem++-mpi.exe"; DestDir: "{app}" ifelse(len(MPIPROG),0,; ,)Source: "src\mpi\ff-mpirun"; DestDir: "{app}" Source: "src\bin-win32\launchff++.exe"; DestDir: "{app}" ; no freefem++-cs today see ALH (FH) ;Source: "src\bin-win32\FreeFem++-cs.exe"; DestDir: "{app}" ;Source: "src\ide\FreeFem++-cs.exe"; DestDir: "{app}" Source: "src\nw\ffglut.exe"; DestDir: "{app}" Source: "src\medit\ffmedit.exe"; DestDir: "{app}" ;Source: "src\bin-win32\FreeFem++-nw.exe"; DestDir: "{app}" Source: "src\bin-win32\bamg.exe"; DestDir: "{app}" Source: "src\bin-win32\cvmsh2.exe"; DestDir: "{app}" ; Source: "src\bin-win32\drawbdmesh.exe"; DestDir: "{app}" Source: "src\bin-win32\*.dll"; DestDir: "{app}" Source: "plugin\seq\ff-c++"; DestDir: "{app}" Source: "plugin\seq\ff-get-dep.awk"; DestDir: "{app}" Source: "plugin\seq\WHERE_LIBRARY-config"; DestDir: "{app}" Source: "plugin\seq\WHERE_LIBRARY"; DestDir: "{app}" Source: "plugin\seq\WHERE_LIBRARY-download"; DestDir: "{app}" Source: "plugin\seq\ff-pkg-download"; DestDir: "{app}" Source: "plugin\seq\ff-get-dep"; DestDir: "{app}" ; mingwm10.dll is necessary when "-mthreads" is used as a compilation ; flag. ; ;ldd.exe src/bin-win32/FreeFem++.exe |awk '/mingw64/ {print "cygpath -w ",$3}'|sh|awk '{print "IFMGW64 Source: @" $0 "@ DestDir: @{app}@"}'|sed 's/@/"/g' IFMGW32 ; mingw32 .... FH. I have put all dll in bin-win32 dir .... IFMGW32 Source: "C:\MinGW\bin\mingwm10.dll"; DestDir: "{app}" ; Source: "C:\Cygwin\bin\glut32.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\msys\1.0\bin\freeglut.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\bin\pthreadGC2.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\bin\libgcc_s_dw2-1.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\bin\libstdc++-6.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\bin\libgfortran-*.dll"; DestDir: "{app}" IFMGW32 Source: "C:\MinGW\bin\libquadmath-*.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libgcc_s_seh-*.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libstdc++-*.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libwinpthread-1.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libgfortran-*.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libquadmath-*.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libfreeglut.dll"; DestDir: "{app}" IFMGW64 Source: "C:\msys64\mingw64\bin\libgsl*.dll"; DestDir: "{app}" IFMGW64 ; mingw64 .... FH. I have put all dll in bin-win32 dir .... ;; end of mingw ------------ ; Does not include FreeFem++-x11 which would need the Cygwin X-Server ; Does not include FreeFem++-glx which would need the Cygwin X-Server ; Examples Source: "idp\*.idp"; DestDir: "{app}\idp" Source: "examples\misc\*.edp"; DestDir: "{app}\examples\misc" Source: "examples\eigen\*.edp"; DestDir: "{app}\examples\eigen" Source: "examples\tutorial\*.edp"; DestDir: "{app}\examples\tutorial" Source: "examples\tutorial\aile.msh"; DestDir: "{app}\examples\tutorial" Source: "examples\tutorial\xyf"; DestDir: "{app}\examples\tutorial" Source: "examples\examples\*.edp"; DestDir: "{app}\examples\examples" Source: "examples\plugin\*.edp"; DestDir: "{app}\examples\plugin" Source: "examples\plugin\*.pgm"; DestDir: "{app}\examples\plugin" Source: "examples\plugin\*.pts"; DestDir: "{app}\examples\plugin" Source: "examples\plugin\cube.msh"; DestDir: "{app}\examples\plugin" Source: "examples\plugin\g.gmesh"; DestDir: "{app}\examples\plugin" Source: "plugin\seq\load.link"; DestDir: "{app}\plugin" Source: "plugin\include-tmp\*"; DestDir: "{app}\include" Source: "examples\3d\*.edp"; DestDir: "{app}\examples\3d" Source: "examples\3d\dodecaedre01.mesh"; DestDir: "{app}\examples\3d" Source: "examples\3d\lac-leman-v4.msh"; DestDir: "{app}\examples\3d" Source: "examples\3dSurf\*.edp"; DestDir: "{app}\examples\3dSurf" Source: "examples\3dCurve\*.edp"; DestDir: "{app}\examples\3dCurve" IFMPI Source: "examples\mpi\ff*.txt"; DestDir: "{app}\examples\mpi" IFMPI Source: "examples\mpi\*.edp"; DestDir: "{app}\examples\mpi" IFMPI Source: "examples\hpddm\*.edp"; DestDir: "{app}\examples\hpddm" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}\examples\load" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}\examples\tutorial" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}\examples\examples" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}\examples\misc" ;Source: "0ldUserReadMe.txt"; DestDir: "{app}\examples\eigen" ; Documentation files may need to be copied from another machine if ; Cygwin refuses to build them. Source: "FreeFEM-documentation.pdf"; DestDir: "{app}" ; Icons for Windows can be created from a 32x32 image with icotool ; (Linux Debian unstable), or IrfanView (Windows, not very good ; results) or paint (Windows, save in .bmp then rename to .ico). Source: "etc\logo\logo.ico"; DestDir: "{app}" [Icons] ; Menu Name: "{group}\FreeFem++"; Filename: "{app}\launchff++.exe"; IconFilename: "{app}\logo.ico" ;Name: "{group}\FreeFem++ GUI"; Filename: "{app}\FreeFem++-cs.exe" Name: "{group}\PDF manual"; Filename: "{app}\freefem++doc.pdf" Name: "{group}\Examples\Tutorial"; Filename: "{app}\examples\tutorial" Name: "{group}\Examples\chapt3"; Filename: "{app}\examples\examples" Name: "{group}\Examples\load"; Filename: "{app}\examples\plugin" Name: "{group}\Examples\Main"; Filename: "{app}\examples\misc" Name: "{group}\Examples\Eigenvalues"; Filename: "{app}\examples\eigen" Name: "{group}\Examples\3d"; Filename: "{app}\examples\3d" Name: "{group}\Examples\3dSurf"; Filename: "{app}\examples\3dSurf" Name: "{group}\Examples\3dCurve"; Filename: "{app}\examples\3dCurve" IFMPI Name: "{group}\Examples\mpi"; Filename: "{app}\examples\mpi" IFMPI Name: "{group}\Examples\hpddm"; Filename: "{app}\examples\hpddm" Name: "{group}\Uninstall FreeFem++ VERSION"; Filename: "{uninstallexe}" ; Desktop Name: "{userdesktop}\FreeFem++ VERSION"; Filename: "{app}\launchff++.exe"; IconFilename: "{app}\logo.ico" ;Name: "{userdesktop}\FreeFem++ VERSION GUI"; Filename: "{app}\FreeFem++-cs.exe" Name: "{userdesktop}\FreeFem++ VERSION Examples"; Filename: "{group}\Examples" [Registry] ; Link .edp file extension to FreeFem++ Root: HKCR; Subkey: ".edp"; ValueType: string; ValueName: ""; ValueData: "FreeFemVERSIONScript"; Flags: uninsdeletevalue Root: HKCR; Subkey: "FreeFemVERSIONScript"; ValueType: string; ValueName: ""; ValueData: "FreeFem++ Script"; Flags: uninsdeletekey Root: HKCR; Subkey: "FreeFemVERSIONScript\DefaultIcon"; ValueType: string; ValueName: ""; ValueData: "{app}\logo.ico" Root: HKCR; Subkey: "FreeFemVERSIONScript\shell\open\command"; ValueType: string; ValueName: ""; ValueData: """{app}\launchff++.exe"" ""%1""" [Tasks] Name: modifypath; Description: &Add application directory to your system path (if missing you can have trouble with on-the-fly graphic ) ; Flags: checkedonce ; unchecked [Code] function ModPathDir(): TArrayOfString; var Dir: TArrayOfString; begin setArrayLength(Dir, 1) Dir[0] := ExpandConstant('{app}'); Result := Dir; end; #include "bin\modpath.iss" FreeFem-sources-4.9/etc/config/m4/acmacros.m4000664 000000 000000 00000001571 14037356732 020750 0ustar00rootroot000000 000000 # Checks whether a compiler accepts a given flag # ---------------------------------------------- # $1 = compiler name # $2 = flag # $3 = make macro containing flags for that compiler # $4 = exec is true .. # Note: changes AC_LANG() AC_DEFUN([CHECK_COMPILE_FLAG], [AC_MSG_CHECKING(whether the $1 compiler accepts $2) check_save_flags="$$3" AC_LANG_PUSH($1) $3="$$3 $2" # The program needs to contain something for the test source # file to be created by autoconf. # Some options really need to be linked (not only compiled) to # check whether they work. AC_LINK_IFELSE([ifelse($1,Fortran 77, [ program x end], [AC_LANG_PROGRAM])], check_flag_ok=yes, check_flag_ok=no) AC_MSG_RESULT($check_flag_ok) if test "$check_flag_ok" = no; then $3="$check_save_flags" fi if test -n "$4" ; then $4="$check_flag_ok" fi AC_LANG_POP($1) ]) FreeFem-sources-4.9/etc/config/m4/acmpi.m4000664 000000 000000 00000032151 14037356732 020247 0ustar00rootroot000000 000000 # Checking wether we can produce a parallel version # ------------------------------------------------- dnl m4_include(ax_mpi.m4) ff_save_path="$PATH" # We need to choose between mpich, openmpi and lam for the Debian package AC_ARG_WITH(mpipath,[ --with-mpipath= the path of mpich under windows (no command mpic++, ... )]) AC_ARG_WITH(mpilibs,[ --with-mpilibs= the libs to add to c++,fc, ... (to link with c++ - ex: -L/usr/local/lib -lmpi_f90 -lmpi_cxx -lmpi -lopen-rte -lopen-pal -lutil) ]) AC_ARG_WITH(mpilibsc,[ --with-mpilibsc= the libs to add to c ... (to link with cc (for pastix lib) ex: -L/usr/local/lib -lmpi -lopen-rte -lopen-pal -lutil) ]) AC_ARG_WITH(mpiinc,[ --with-mpiinc= the include directory directive and preprocess directive (no mpicc++, just use the compiler)) ]) AC_ARG_WITH(mpi,[ --with-mpi=[yes|no|mpic++|lam|mpich|openmpi|/usr/local/bin/mpic++|... ] or --without-mpi Choose MPI implementation (default is mpic++)]) if test "$with_mpi" != no ; then #if test "$with_mpi" != no ; then #AX_MPI(with_mpi=yes, with_mpi=no) #fi # Default is mpic++ ff_mpi_suffix=""; if test "$with_mpi" = yes -o -z "$with_mpi" then ff_mpicxx=mpic++ else case "$with_mpi" in lam|mpich|openmpi) ff_mpi_suffix=.$with_mpi;ff_mpicxx=mpic++.$with_mpi;; *) ff_mpicxx="$with_mpi" ;; esac fi dnl AC_MSG_NOTICE([ xxxxxxxxxxxxxxxxxxxx --$with_mpilibs--]); if test -n "$with_mpiinc" -a "$with_mpiinc" != no ; then if test "$with_mpi" = 'no' ; then with_mpi='yes'; fi ff_MPI_INCLUDE="$with_mpiinc" fi if test -n "$with_mpilibs" -a "$with_mpilibs" != no ; then ff_MPI_LIB="$with_mpilibs" ff_MPI_LIBC="$with_mpilibs" ff_MPI_LIBFC="$with_mpilibs" MPICXX="$CXX $ff_MPI_INCLUDE" MPIFC="$FC $ff_MPI_INCLUDE" MPICC="$CC $ff_MPI_INCLUDE" AC_MSG_NOTICE([ --- set all MPI compile to compiler: $MPICC, $MPIFC, $MPICC ]) fi if test -n "$with_mpilibsc" -a "$with_mpilibsc" != no ; then ff_MPI_LIBC="$with_mpilibsc" fi AC_ARG_VAR(MPIRUN,[MPI run command ]) AC_MSG_CHECKING(for MPIRUN) if test -z "$MPIRUN" ; then if test -n "$MSMPI_BIN" -a -x "$MSMPI_BIN/mpiexec.exe" ; then MPIRUN="$MSMPI_BIN"\mpiexec.exe else AC_PATH_PROGS(MPIRUN,mpiexec mpirun mpiexec.exe,no) fi if test "$MPIRUN" = no then ff_mpi=no fi fi AC_MSG_RESULT($MPIRUN) if test "ff_mpi" != "no" ; then AC_MSG_CHECKING(for MPIRUN option: ) ff_mpi_option="" ff_okkk=`"$MPIRUN" -np 2 --oversubscribe echo ff__okkk 2>/dev/null| grep ff__okkk |wc -l` if test "$ff_okkk" -eq 2 ; then ff_mpi_option="--oversubscribe" ; fi AC_MSG_RESULT($ff_mpi_option) fi AC_MSG_CHECKING(for mpipath ) if test "$with_mpi" != no -a ! -d "$with_mpipath" -a "$MPIRUN" != no ; then # if "$MPIRUN" != no ; tehn with_mpipath=`AS_DIRNAME(["$MPIRUN"])` with_mpipath=`AS_DIRNAME(["$with_mpipath"])` # echo " ***** with_mpipath $with_mpipath \n" # else # for i in '/c/Program Files (x86)/MPICH2' '/c/Program Files/MPICH2' 'c:\Program Files (x86)\MPICH2' 'c:\Program Files\MPICH2' ; do # test -d "$i" && with_mpipath="$i" && break # done # fi fi #echo "**** with_mpipath '$with_mpipath' $MPIRUN *****" dnl if test "$with_mpilibs" != "no" ; then dnl fi case "$MPIRUN" in */sgi/mpt/*) ff_MPI_INCLUDE_DIR= ff_MPI_LIB_DIR= test -f "$with_mpipath/include/mpif.h" && ff_MPI_INCLUDE_DIR="$with_mpipath/include" test -f "$with_mpipath/lib/libmpi.so" && ff_MPI_LIB_DIR="$with_mpipath/lib" if test -n "$ff_MPI_INCLUDE_DIR" -a -n "$ff_MPI_LIB_DIR" ; then ff_MPI_INCLUDE="-I'$ff_MPI_INCLUDE_DIR' " with_mpiinc="$ff_MPI_INCLUDE" ff_MPI_LIBC="-L'$ff_MPI_LIB_DIR' -lmpi" ff_MPI_LIB="-L'$ff_MPI_LIB_DIR' -lmpi++ -lmpi" ff_MPI_LIBFC="-L'$ff_MPI_LIB_DIR' -lmpi" ff_mpitype=sgi test -z "$MPICXX" && MPICXX="$CXX $ff_MPI_INCLUDE" test -z "$MPIFC" && MPIFC="$FC $ff_MPI_INCLUDE" test -z "$MPICC" && MPICC="$CC $ff_MPI_INCLUDE" # echo " *** MPI sgi ..... " fi ;; esac # echo " #### --$MSMPI_INC--$MSMPI_BIN--$ff_win32" if test -n "$MSMPI_INC" -a -n "$MSMPI_BIN" -a "$ff_win32" = yes ; then echo " #### check MSMPI" # MSMPI_LIB64 MSMPI_LIB32 $ff_ptrbit is 32 or 64 ffMSMPI_BIN=`cygpath $MSMPI_BIN` ffMSMPI_INC=`cygpath $MSMPI_INC` ffMSMPI_LIB32=`cygpath $MSMPI_LIB32` ffMSMPI_LIB64=`cygpath $MSMPI_LIB64` mkdir -p 3rdparty/include/msmpi mkdir -p 3rdparty/lib/msmpi cp "$MSMPI_INC"/*.h 3rdparty/include/msmpi grep -v INT_PTR_KIND "$MSMPI_INC"/mpif.h >3rdparty/include/msmpi/mpif.h test "$ff_ptrbit" -eq 64 && cp "$MSMPI_INC"/x64/*.h 3rdparty/include/msmpi test "$ff_ptrbit" -eq 32 && cp "$MSMPI_INC"/x86/*.h 3rdparty/include/msmpi ff_MPI_INCLUDE_DIR=`pwd`/3rdparty/include/msmpi ff_msmpi_lib="$MSMPI_LIB64" test "$ff_ptrbit" -eq 32 && ff_msmpi_lib="$MSMPI_LIB32" cp "$ff_msmpi_lib/msmpifec.lib" "$ff_msmpi_lib/msmpi.lib" 3rdparty/lib/msmpi ff_msmpi_lib=`pwd`/3rdparty/lib/msmpi # to reinstall msmpi .. # MSMPI msmpi_dll="`which msmpi.dll`" if test -x "$msmpi_dll" then # Remove for scotch and parmetis ff_MPI_INCLUDE="-I$ff_MPI_INCLUDE_DIR -D__int64=long\ long" with_mpiinc="$ff_MPI_INCLUDE" test -z "$MPIRUN" -a -x "$ffMSMPI_BIN/mpiexe.exe" && MPIRUN="$MSMPI_BIN\mpiexe.exe" ff_MPI_LIBC="$msmpi_dll" ff_MPI_LIB="$msmpi_dll" ff_MPI_LIBFC="$msmpi_dll" ff_mpiexec_win="C:\Program Files\Microsoft MPI\Bin\mpiexec.exe" test -z "$ff_mpiexec_win" && MPIRUN="$ff_mpiexec_win" test -z "$MPICXX" && MPICXX="$CXX $ff_MPI_INCLUDE" test -z "$MPIFC" && MPIFC="$FC $ff_MPI_INCLUDE" test -z "$MPICC" && MPICC="$CC $ff_MPI_INCLUDE" ff_mpitype=MSMPI else echo " #### no msmpi.dll => no mpi under windows .... (FH) " >&AS_MESSAGE_LOG_FD echo " #### no msmpi.dll => no mpi under windows .... (FH) " >&AS_MESSAGE_FD with_mpipath=no with_mpi=no fi elif test -d "$with_mpipath" -a "$ff_win32" = yes ; then # sed -e "s?@MPIDIR@?$with_mpipath?" -e "s?@F77@?$F77?" -e "s?@CC@?$CC?" -e "s?@CXX@?$CXX?" -e "s?@FC@?$FC?" mpic++ # chmod a+rx mpic++ # for i in mpicc mpif90 mpifc mpif77 ; do cp mpic++ $i; done # ff_pwd=`pwd` # with_mpi="$ff_pwd"/mpic++ # MPICXX="$ff_pwd/mpic++" # MPIFC="$ff_pwd/mpif90" # MPICC="$ff_pwd/mpicc" zzzzzzzzzzz if with_mpilibs=`which msmpi.dll` then case "$ff_size_ptr" in 4) with_mpipathlib="$with_mpipath/Lib/i386";; 8) with_mpipathlib="$with_mpipath/Lib/amd64";; *) with_mpipath=no;; esac test -d "$with_mpipath/Inc" && ff_MPI_INCLUDE_DIR="$with_mpipath/Inc" test -d "$with_mpipath/Include" && ff_MPI_INCLUDE_DIR="$with_mpipath/Include" # Remove for scotch and parmetis # ff_MPI_INCLUDE="-I'$ff_MPI_INCLUDE_DIR' '-D_MSC_VER' '-D__int64=long long'" ff_MPI_INCLUDE="-I'$ff_MPI_INCLUDE_DIR' '-D__int64=long long'" with_mpiinc="$ff_MPI_INCLUDE" test -z "$MPIRUN" && MPIRUN="$with_mpipath/bin/mpiexe.exe" ff_MPI_LIBC="$with_mpilibs" ff_MPI_LIB="$with_mpilibs" ff_MPI_LIBFC="$with_mpilibs" test -z "$MPICXX" && MPICXX="$CXX $ff_MPI_INCLUDE" test -z "$MPIFC" && MPIFC="$FC $ff_MPI_INCLUDE" test -z "$MPICC" && MPICC="$CC $ff_MPI_INCLUDE" else echo " #### no msmpi.dll => no mpi under windows .... (FH) " >&AS_MESSAGE_LOG_FD echo " #### no msmpi.dll => no mpi under windows .... (FH) " >&AS_MESSAGE_FD with_mpipath=no with_mpi=no fi else with_mpipath=no fi AC_MSG_RESULT($ff_mpi_path) dnl correct ff_mpi_path august 2010 -- FH ... ff_save_cxx="$CXX" ff_save_libs="$LIBS" if test "$with_mpi" != no then ff_mpi_path=`AS_DIRNAME(["$MPIRUN"])` dnl echo "ff_mpi_path '$ff_mpi_path' .............." case "$ff_mpi_path" in .|"") ff_mpi_path="$PATH";ff_defmpicxx="$ff_mpicxx";; *) ff_mpi_path="$ff_mpi_path";ff_defmpicxx=`expr "//$ff_mpicxx" : '.*/\(.*\)'`;; dnl if also add $PATH they could be missing some different mpi version... esac AC_ARG_VAR(MPICXX,[MPI C++ compiler command]) if test -z "$MPICXX" ; then AC_PATH_PROGS(MPICXX,$ff_defmpicxx mpic++$ff_mpi_suffix mpicxx$ff_mpi_suffix mpiCC$ff_mpi_suffix mpCC hcp mpxlC mpxlC_r cmpic++,no,$ff_mpi_path) AC_MSG_CHECKING(for MPICXX) fi ff_mpicxx="eval $MPICXX" CXX=$ff_mpicxx LIBS="$LIBS $ff_MPI_LIB" test -z "$ff_mpi" && ff_mpi=yes AC_LINK_IFELSE( [AC_LANG_SOURCE([ #include #include int main(int argc,char **argv){ char name[[BUFSIZ]]; int length; MPI_Init(&argc, &argv); MPI_Get_processor_name(name, &length); printf("%s: hello world\n", name); MPI_Finalize(); return 0; }])],ff_mpi=yes,ff_mpi=no) AC_MSG_RESULT($ff_mpi) # Also check that mpirun is there. If it isn't, then MPI is # not fully installed. if test "$ff_mpi" = yes; then AC_MSG_CHECKING( MPI_DOUBLE_COMPLEX) AC_COMPILE_IFELSE( [AC_LANG_SOURCE([ #include MPI_Datatype xxxx=MPI_DOUBLE_COMPLEX; ])], ff_mpi_double_complex=yes, ff_mpi_double_complex=no) AC_MSG_RESULT($ff_mpi_double_complex) if test "$ff_mpi_double_complex" = yes ; then AC_DEFINE(HAVE_MPI_DOUBLE_COMPLEX,1, mpi_double_complex) fi echo "MPI CC $ff_mpi" >config_LIB_INFO # We do not AC_DEFINE any special flag for parallel # computation here, because it must only be set when the # parallel program is compiled (see src/mpi/Makfile.am) ff_mpiprog="FreeFem++-mpi${EXEEXT}" AC_SUBST(MPIPROG,"$ff_mpiprog") AC_SUBST(MPISCRIPT,"ff-mpirun") AC_SUBST(MPIRUN,"$MPIRUN") AC_SUBST(MPICXX,$MPICXX) else AC_SUBST(MPICXX,$ff_save_cxx) fi if test "$ff_mpi" = yes; then if test "$enable_fortran" != no then AC_ARG_VAR(MPIFC,[MPI Fortran 90 compiler command]) if test -z "$MPIFC" ; then AC_PATH_PROGS(MPIFC, mpif90$ff_mpi_suffix mpxlf95_r mpxlf90_r mpxlf95 mpxlf90 mpf90 cmpif90c, "",$ff_mpi_path) fi AC_SUBST(MPIFC) fi # echo " ********************ffmpi= '$ff_mpi' ************* " ff_MPI_INCLUDE="$with_mpiinc" if test -z "$ff_mpitype" ; then test -n "$MPICXX" && ff_mpishow=`$MPICXX -show` 2>/dev/null test -n "$MPICC" && ff_mpicshow=`$MPICC -show` 2>/dev/null test -n "$MPIFC" && ff_mpifcshow=`$MPIFC -show` 2>/dev/null if test "$with_mpilibs" = no -o -z "$with_mpilibs" ; then [ff_MPI_INCLUDE=`echo $ff_mpishow|tr ' ' '\n'| grep -E '^[-/][^WLlOgpf]|^-Wp,'|tr '\n' ' '`] ff_MPI_LIB_DIRS="" [ff_MPI_LIB=`echo $ff_mpishow|tr ' ' '\n'| grep -E '^-[Llp]|^-Wl,'|tr '\n' ' '`] [ff_MPI_LIBC=`echo $ff_mpicshow|tr ' ' '\n'| grep -E '^-[Llp]|^-Wl,'|tr '\n' ' '`] [ff_MPI_LIBFC=`echo $ff_mpifcshow|tr ' ' '\n'| grep -E '^-[Llp]|^-Wl,'|grep -v 'commons,use_dylibs' |tr '\n' ' '`] [ff_mpi_idir=`echo $ff_mpishow|tr ' ' '\n'| grep -E '^-I'|sed s/^-I//|tr '\n' ' '`' /usr/include'] fi [ff_mpi_idir=`echo $ff_MPI_INCLUDE|tr ' ' '\n'| grep -E '^-I'|sed s/^-I//|tr '\n' ' '`' /usr/include'] [ff_mpi_ldir=`echo $ff_MPI_LIB|tr ' ' '\n'| grep -E '^-[Llp]|^-Wl,'|sed -e 's/^-[Llp]//' -e 's/^-Wl,]//' |tr '\n' ' '`' /usr/lib'] if test -z "$ff_MPI_INCLUDE_DIR" ; then for i in $ff_mpi_idir; do if test -f "$i/mpi.h" -a -z "$ff_MPI_INCLUDE_DIR" ;then ff_MPI_INCLUDE_DIR=$i fi done fi for i in $ff_mpi_ldir; do if test -d $i ; then ff_tmp=`ls $i/libmpi.*|head -1` if test -f "$ff_tmp" -a -z "$ff_MPI_LIB_DIRS" ;then ff_MPI_LIB_DIRS=$i fi fi done fi AC_SUBST(MPICXX,$MPICXX) AC_ARG_VAR(MPICC,[MPI C compiler command in $ff_mpi_path]) if test -z "$MPICC" ; then AC_PATH_PROGS(MPICC,mpicc$ff_mpi_suffix hcc mpcc mpcc_r mpxlc cmpicc, "",$ff_mpi_path) fi AC_SUBST(MPICC,$MPICC) if test ! -f "$ff_MPI_INCLUDE_DIR/mpif.h" ; then AC_MSG_NOTICE([ MPI without fortran no file "$ff_MPI_INCLUDE_DIR/mpif.h" ]) else if test -n "$MPIFC" ; then AC_FF_ADDWHERELIB(mpifc,$ff_MPI_LIBFC,$ff_MPI_INCLUDE) AC_FF_ADDWHERELIB(mpif77,$ff_MPI_LIBFC,$ff_MPI_INCLUDE) dnl [echo mpifc LD "'$ff_MPI_LIBFC'" >>$ff_where_lib_conf ] dnl [echo mpifc INCLUDE "'$ff_MPI_INCLUDE'" >>$ff_where_lib_conf ] dnl [echo mpif77 LD "'$ff_MPI_LIBFC'" >>$ff_where_lib_conf ] dnl [echo mpif77 INCLUDE "'$ff_MPI_INCLUDE'" >>$ff_where_lib_conf ] fi fi if test -n "$MPICXX" ; then AC_FF_ADDWHERELIB(mpi,$ff_MPI_LIB,$ff_MPI_INCLUDE) dnl [echo mpi LD "'$ff_MPI_LIB'" >>$ff_where_lib_conf ] dnl [echo mpi INCLUDE "'$ff_MPI_INCLUDE'" >>$ff_where_lib_conf ] fi AC_SUBST(MPI_INC_DIR,$ff_MPI_INCLUDE_DIR) AC_SUBST(MPI_INCLUDE,$ff_MPI_INCLUDE) AC_SUBST(MPI_LIB_DIRS,$ff_MPI_LIB_DIRS) AC_SUBST(MPI_LIB,$ff_MPI_LIB) AC_SUBST(MPI_LIBC,$ff_MPI_LIBC) AC_SUBST(MPI_LIBFC,$ff_MPI_LIBFC) AC_SUBST(MPI_RUN_OPTION,$ff_mpi_option) AC_SUBST(SKIP_TESTS_MPI,"no") fi CXX="$ff_save_cxx" LIBS="$ff_save_libs" fi fi ## clean on MPI variable if not MPI ... if test "$ff_mpi" != yes ; then AC_SUBST(MPIRUN,"") AC_SUBST(MPICC,"") AC_SUBST(MPICXX,"") AC_SUBST(MPIFC,"") AC_SUBST(MPI_INCLUDE,"") AC_SUBST(MPI_LIB_DIRS,"") AC_SUBST(MPI_LIB,"") AC_SUBST(MPI_LIBC,"") AC_SUBST(MPI_LIBFC,"") AC_SUBST(SKIP_TESTS_MPI,"yes") ff_mpi=no dnl AC_MSG_ERROR([ Sorry nompi compiler !]) fi # Local Variables: # mode:shell-script # ispell-local-dictionary:"british" # coding:utf-8 # End: FreeFem-sources-4.9/etc/config/m4/acoptim.m4000664 000000 000000 00000020650 14037356732 020613 0ustar00rootroot000000 000000 # Choosing debugging and/or optimization flags for compilation # ------------------------------------------------------------ # get CPU Type cputype=unknow if test -x /usr/bin/machine ; then cputype=`/usr/bin/machine` elif test -x /usr/bin/arch ; then cputype=`/usr/bin/arch` fi cpuintel=no; case "$cputype" in i386|i486| x86_64*) cpuintel=yes;; *) cpuintel=no;; esac AC_MSG_NOTICE([ ----- CPU kind: $cputype , intel/amd: $cpuintel ]) AC_ARG_ENABLE(profiling,[ --enable-profiling Turn on profiling]) if test "$enable_profiling" = yes then CXXFLAGS="$CXXFLAGS -pg" LDFLAGS="$LDFLAGS -pg" fi if test "$enable_m64" = yes -a "$enable_m32" then AC_MSG_ERROR([ Choose 32 or 64 architecture not the both ],1); fi AC_ARG_ENABLE(m64,[ --enable-m64 Turn on 64 bits architecture]) if test "$enable_m64" = yes then ff_m64=-m64 ff_ok=no CHECK_COMPILE_FLAG(C,$ff_m64,CFLAGS,ff_ok) if test "$ff_ok" = yes ;then CNOFLAGS="$CFLAGS $ff_m64";fi CHECK_COMPILE_FLAG(C++,$ff_m64,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,$ff_m64,FFLAGS) # add -fPIC on on 64 architecture if test "$ff_ok" = yes -a "$ff_fpic" != "no" ;then CHECK_COMPILE_FLAG(C,-fPIC,CFLAGS,ff_ok) CHECK_COMPILE_FLAG(C++,-fPIC,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-fPIC,FFLAGS) fi fi AC_ARG_ENABLE(m32,[ --enable-m32 Turn on 32 bits architecture]) if test "$enable_m32" = yes then ff_m32=-m32 ff_ok=no CHECK_COMPILE_FLAG(C,$ff_m32,CFLAGS,ff_ok) if test "$ff_ok" = yes ;then CNOFLAGS="$CFLAGS $ff_m32";fi CHECK_COMPILE_FLAG(C,$ff_m32,CNOFLAGS) CHECK_COMPILE_FLAG(C++,$ff_m32,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,$ff_m32,FFLAGS) # add -fPIC on on 64 architecture # CHECK_COMPILE_FLAG(C,-fPIC,CFLAGS) # CHECK_COMPILE_FLAG(C++,-fPIC,CXXFLAGS) # CHECK_COMPILE_FLAG(Fortran 77,-fPIC,FFLAGS) fi # Debug mode (no optimisation) # ---------------------------- AC_MSG_CHECKING(whether to generate debugging information) AC_ARG_ENABLE(debug,[ --enable-debug Turn on debug versions of FreeFem++]) AC_ARG_ENABLE(optim,[ --enable-optim Turn on compiler optimization]) if test "$enable_debug" = yes; then AC_MSG_RESULT(yes) CFLAGS="`echo $CFLAGS | sed 's/-O2//g'`" FFLAGS="`echo $FFLAGS | sed 's/-O2//g'`" CXXFLAGS="`echo $CXXFLAGS | sed 's/-O2//g'`" CHECK_COMPILE_FLAG(C,-g,CFLAGS) CHECK_COMPILE_FLAG(C++,-g,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-g,FFLAGS) else AC_MSG_RESULT(no) # No debugging information in optimized code CFLAGS="$CFLAGS -DNDEBUG" FFLAGS="$FFLAGS -DNDEBUG" CXXFLAGS="$CXXFLAGS -DNDEBUG" fi # Hardware-independant optimization # --------------------------------- if test "$enable_debug" != yes -a "$enable_optim" != no; then CHECK_COMPILE_FLAG(C,-O3,CFLAGS) CHECK_COMPILE_FLAG(C++,-O3,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-O3,FFLAGS) fi AC_ARG_ENABLE(generic, [ --enable-generic Turn off hardware-dependant optimization options]) # FFCS: remove "-mcpu=common" to allow other hardware-dependant values of cpu for PowerPC - thank you Fred (20/02/11) # FH # Generic code if test "$enable_generic" = yes then CHECK_COMPILE_FLAG(C,-mtune=generic,CFLAGS) CHECK_COMPILE_FLAG(C++,-mtune=generic,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-mtune=generic,FFLAGS) fi # Hardware-dependant optimization # ------------------------------- if test "$enable_debug" != yes \ -a "$enable_optim" != no \ -a "$enable_generic" != yes then # Autoconf always chooses -O2. -O2 in gcc makes some functions # disappear. This is not ideal for debugging. And when we optimize, we # do not use -O2 anyway. CFLAGS="`echo $CFLAGS | sed 's/-O2//g'`" FFLAGS="`echo $FFLAGS | sed 's/-O2//g'`" CXXFLAGS="`echo $CXXFLAGS | sed 's/-O2//g'`" # MacOS X Darwin if test -x /usr/bin/hostinfo then # If we are on MacOS X to choise the optimisaztion AC_MSG_CHECKING(GCC version) ff_gcc4=`$CC --version |awk ' NR==1 {print $3}'|sed -e 's/\..*$//'` ff_clang=`$CC --version |awk '/clang/ {print $4}'` if test -n "$ff_clang" ; then ff_gcc4="llvm"; fi AC_MSG_RESULT($ff_gcc4) # At the moment, we do not know how to produce correct # optimizated code on G5. AC_MSG_CHECKING(PowerPC architecture) ff_machine=`(test -x /usr/bin/machine && /usr/bin/machine) || echo unknow` ff_fast="-O3" if test -n "$ff_clang" ; then ff_fast='-O3' elif test `uname` = Darwin then # Optimization flags: -fast option do not work because the # -malign-natural flags create wrong IO code if test "$ff_gcc4" -eq 4 then ff_fast='-fast' else ff_fast='-O3 -funroll-loops -fstrict-aliasing -fsched-interblock -falign-loops=16 -falign-jumps=16 -falign-functions=16 -falign-jumps-max-skip=15 -falign-loops-max-skip=15 -ffast-math -mpowerpc-gpopt -force_cpusubtype_ALL -fstrict-aliasing -mpowerpc64 ' fi fi # CPU detection case $ff_machine in ppc7450) # G4 ff_fast="$ff_fast -mtune=G4 -mcpu=G4";; ppc970) # G5 # remove -fstrict-aliasing on G5 to much optim the # code cash in GC ff_fast="`echo $ff_fast -mtune=G5 -mcpu=G5| sed 's/-fstrict-aliasing //g'`";; ppc*) # G3 ???? ff_fast="-O3";; i486) ff_fast="-O3 $ff_fast";; x86_64*) ff_fast="-O3 $ff_fast";; arm64*) ff_fast="-O3 $ff_fast";; *) AC_MSG_WARN(cannot determine apple cpu type ) ff_fast="-O3";; esac AC_MSG_RESULT($ff_fast) CHECK_COMPILE_FLAG(C,$ff_fast,CFLAGS) CHECK_COMPILE_FLAG(C++,$ff_fast,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,$ff_fast,FFLAGS) # Linux elif test -f /proc/cpuinfo then # Specific processors proc_type=unknown ff_optim_type= if test `grep 'Pentium III (Coppermine)' /proc/cpuinfo|wc -l` -gt 0 then proc_type=pentium3 ff_optim_type=-P3 elif test `grep 'Intel(R) Pentium(R) III ' /proc/cpuinfo|wc -l` -gt 0 then proc_type=pentium3 ff_optim_type=-P3 elif test `grep 'Intel(R) Pentium(R) 4 ' /proc/cpuinfo|wc -l` -gt 0 then proc_type=pentium4 ff_optim_type=-P4 elif test `grep 'Intel(R) Xeon(TM) CPU' /proc/cpuinfo|wc -l` -gt 0 then proc_type=pentium4 ff_optim_type=-P4 elif test `grep 'AMD Athlon(tm) Processor' /proc/cpuinfo|wc -l` -gt 0 then proc_type=athlon ff_optim_type=-Athlon elif test `grep 'AMD Athlon(tm) XP' /proc/cpuinfo|wc -l` -gt 0 then proc_type=athlon-xp ff_optim_type=-AthlonXP fi if test "$proc_type" != unknown then CHECK_COMPILE_FLAG(C,-march=$proc_type,CFLAGS) CHECK_COMPILE_FLAG(C++,-march=$proc_type,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-march=$proc_type,FFLAGS) fi # If we did not find a processor type (this happens with # cygwin), try and select separate capabilities instead. if test "$proc_type" = unknown then if test `grep -e '^flags.*mmx' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-mmmx,CFLAGS) CHECK_COMPILE_FLAG(C++,-mmmx,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-mmmx,FFLAGS) fi if test `grep -e '^flags.*avx' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-mavx,CFLAGS) CHECK_COMPILE_FLAG(C++,-mavx,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-mavx,FFLAGS) else if test `grep -e '^flags.*sse4_2' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-msse4.2,CFLAGS) CHECK_COMPILE_FLAG(C++,-msse4.2,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-msse4.2,FFLAGS) else if test `grep -e '^flags.*sse2' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-msse2,CFLAGS) CHECK_COMPILE_FLAG(C++,-msse2,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-msse2,FFLAGS) else if test `grep -e '^flags.*sse ' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-msse,CFLAGS) CHECK_COMPILE_FLAG(C++,-msse,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-msse,FFLAGS) fi if test `grep -e '^flags.*3dnow' /proc/cpuinfo|wc -l` -gt 0 then CHECK_COMPILE_FLAG(C,-m3dnow,CFLAGS) CHECK_COMPILE_FLAG(C++,-m3dnow,CXXFLAGS) CHECK_COMPILE_FLAG(Fortran 77,-m3dnow,FFLAGS) fi fi fi fi fi fi fi # Defines a variable containing the optimization type, to be used in # binary archive names. It may be empty if only generic optimization # is used. AC_SUBST(OPTIM_TYPE,$ff_optim_type) AC_MSG_NOTICE([ CXXFLAGS = $CXXFLAGS ]) FreeFem-sources-4.9/etc/config/m4/ax_lib_gsl.m4000664 000000 000000 00000011743 14037356732 021265 0ustar00rootroot000000 000000 # Configure path for the GNU Scientific Library # Christopher R. Gabriel , April 2000 AC_DEFUN([AX_PATH_GSL], [ AC_ARG_WITH(gsl-prefix,[ --with-gsl-prefix=PFX Prefix where GSL is installed (optional)], gsl_prefix="$withval", gsl_prefix="") AC_ARG_WITH(gsl-exec-prefix,[ --with-gsl-exec-prefix=PFX Exec prefix where GSL is installed (optional)], gsl_exec_prefix="$withval", gsl_exec_prefix="") AC_ARG_ENABLE(gsltest, [ --disable-gsltest Do not try to compile and run a test GSL program], , enable_gsltest=yes) if test "x${GSL_CONFIG+set}" != xset ; then if test "x$gsl_prefix" != x ; then GSL_CONFIG="$gsl_prefix/bin/gsl-config" fi if test "x$gsl_exec_prefix" != x ; then GSL_CONFIG="$gsl_exec_prefix/bin/gsl-config" fi fi AC_PATH_PROG(GSL_CONFIG, gsl-config, no) min_gsl_version=ifelse([$1], ,0.2.5,$1) AC_MSG_CHECKING(for GSL - version >= $min_gsl_version) no_gsl="" if test "$GSL_CONFIG" = "no" ; then no_gsl=yes else GSL_CFLAGS=`$GSL_CONFIG --cflags` GSL_LIBS=`$GSL_CONFIG --libs` gsl_major_version=`$GSL_CONFIG --version | \ sed 's/^\([[0-9]]*\).*/\1/'` if test "x${gsl_major_version}" = "x" ; then gsl_major_version=0 fi gsl_minor_version=`$GSL_CONFIG --version | \ sed 's/^\([[0-9]]*\)\.\{0,1\}\([[0-9]]*\).*/\2/'` if test "x${gsl_minor_version}" = "x" ; then gsl_minor_version=0 fi gsl_micro_version=`$GSL_CONFIG --version | \ sed 's/^\([[0-9]]*\)\.\{0,1\}\([[0-9]]*\)\.\{0,1\}\([[0-9]]*\).*/\3/'` if test "x${gsl_micro_version}" = "x" ; then gsl_micro_version=0 fi if test "x$enable_gsltest" = "xyes" ; then ac_save_CFLAGS="$CFLAGS" ac_save_LIBS="$LIBS" CFLAGS="$CFLAGS $GSL_CFLAGS" LIBS="$LIBS $GSL_LIBS" rm -f conf.gsltest AC_TRY_RUN([ #include #include #include char* my_strdup (const char *str); char* my_strdup (const char *str) { char *new_str; if (str) { new_str = (char *)malloc ((strlen (str) + 1) * sizeof(char)); strcpy (new_str, str); } else new_str = NULL; return new_str; } int main (void) { int major = 0, minor = 0, micro = 0; int n; char *tmp_version; system ("touch conf.gsltest"); /* HP/UX 9 (%@#!) writes to sscanf strings */ tmp_version = my_strdup("$min_gsl_version"); n = sscanf(tmp_version, "%d.%d.%d", &major, &minor, µ) ; if (n != 2 && n != 3) { printf("%s, bad version string\n", "$min_gsl_version"); exit(1); } if (($gsl_major_version > major) || (($gsl_major_version == major) && ($gsl_minor_version > minor)) || (($gsl_major_version == major) && ($gsl_minor_version == minor) && ($gsl_micro_version >= micro))) { exit(0); } else { exit(1); } } ],, no_gsl=yes,[echo $ac_n "cross compiling; assumed OK... $ac_c"]) CFLAGS="$ac_save_CFLAGS" LIBS="$ac_save_LIBS" fi fi if test "x$no_gsl" = x ; then AC_MSG_RESULT(yes) ifelse([$2], , :, [$2]) else AC_MSG_RESULT(no) if test "$GSL_CONFIG" = "no" ; then echo "*** The gsl-config script installed by GSL could not be found" echo "*** If GSL was installed in PREFIX, make sure PREFIX/bin is in" echo "*** your path, or set the GSL_CONFIG environment variable to the" echo "*** full path to gsl-config." else if test -f conf.gsltest ; then : else echo "*** Could not run GSL test program, checking why..." CFLAGS="$CFLAGS $GSL_CFLAGS" LIBS="$LIBS $GSL_LIBS" AC_TRY_LINK([ #include ], [ return 0; ], [ echo "*** The test program compiled, but did not run. This usually means" echo "*** that the run-time linker is not finding GSL or finding the wrong" echo "*** version of GSL. If it is not finding GSL, you'll need to set your" echo "*** LD_LIBRARY_PATH environment variable, or edit /etc/ld.so.conf to point" echo "*** to the installed location Also, make sure you have run ldconfig if that" echo "*** is required on your system" echo "***" echo "*** If you have an old version installed, it is best to remove it, although" echo "*** you may also be able to get things to work by modifying LD_LIBRARY_PATH"], [ echo "*** The test program failed to compile or link. See the file config.log for the" echo "*** exact error that occured. This usually means GSL was incorrectly installed" echo "*** or that you have moved GSL since it was installed. In the latter case, you" echo "*** may want to edit the gsl-config script: $GSL_CONFIG" ]) CFLAGS="$ac_save_CFLAGS" LIBS="$ac_save_LIBS" fi fi # GSL_CFLAGS="" # GSL_LIBS="" ifelse([$3], , :, [$3]) fi AC_SUBST(GSL_CFLAGS) AC_SUBST(GSL_LIBS) rm -f conf.gsltest ]) AU_ALIAS([AM_PATH_GSL], [AX_PATH_GSL]) FreeFem-sources-4.9/etc/config/m4/ax_lib_hdf5.m4000664 000000 000000 00000025042 14037356732 021323 0ustar00rootroot000000 000000 # =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_lib_hdf5.html # =========================================================================== # # SYNOPSIS # # AX_LIB_HDF5([serial/parallel]) # # DESCRIPTION # # This macro provides tests of the availability of HDF5 library. # # The optional macro argument should be either 'serial' or 'parallel'. The # former only looks for serial HDF5 installations via h5cc. The latter # only looks for parallel HDF5 installations via h5pcc. If the optional # argument is omitted, serial installations will be preferred over # parallel ones. # # The macro adds a --with-hdf5 option accepting one of three values: # # no - do not check for the HDF5 library. # yes - do check for HDF5 library in standard locations. # path - complete path to the HDF5 helper script h5cc or h5pcc. # # If HDF5 is successfully found, this macro calls # # AC_SUBST(HDF5_VERSION) # AC_SUBST(HDF5_CC) # AC_SUBST(HDF5_CFLAGS) # AC_SUBST(HDF5_CPPFLAGS) # AC_SUBST(HDF5_LDFLAGS) # AC_SUBST(HDF5_LIBS) # AC_SUBST(HDF5_FC) # AC_SUBST(HDF5_FFLAGS) # AC_SUBST(HDF5_FLIBS) # AC_DEFINE(HAVE_HDF5) # # and sets with_hdf5="yes". Additionally, the macro sets # with_hdf5_fortran="yes" if a matching Fortran wrapper script is found. # Note that Autconf's Fortran support is not used to perform this check. # H5CC and H5FC will contain the appropriate serial or parallel HDF5 # wrapper script locations. # # If HDF5 is disabled or not found, this macros sets with_hdf5="no" and # with_hdf5_fortran="no". # # Your configuration script can test $with_hdf to take any further # actions. HDF5_{C,CPP,LD}FLAGS may be used when building with C or C++. # HDF5_F{FLAGS,LIBS} should be used when building Fortran applications. # # To use the macro, one would code one of the following in "configure.ac" # before AC_OUTPUT: # # 1) dnl Check for HDF5 support # AX_LIB_HDF5() # # 2) dnl Check for serial HDF5 support # AX_LIB_HDF5([serial]) # # 3) dnl Check for parallel HDF5 support # AX_LIB_HDF5([parallel]) # # One could test $with_hdf5 for the outcome or display it as follows # # echo "HDF5 support: $with_hdf5" # # You could also for example, override the default CC in "configure.ac" to # enforce compilation with the compiler that HDF5 uses: # # AX_LIB_HDF5([parallel]) # if test "$with_hdf5" = "yes"; then # CC="$HDF5_CC" # else # AC_MSG_ERROR([Unable to find HDF5, we need parallel HDF5.]) # fi # # LICENSE # # Copyright (c) 2009 Timothy Brown # Copyright (c) 2010 Rhys Ulerich # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 11 AC_DEFUN([AX_LIB_HDF5], [ AC_REQUIRE([AC_PROG_SED]) AC_REQUIRE([AC_PROG_AWK]) AC_REQUIRE([AC_PROG_GREP]) dnl Check first argument is one of the recognized values. dnl Fail eagerly if is incorrect as this simplifies case statements below. if test "m4_normalize(m4_default([$1],[]))" = "" ; then : # Recognized value elif test "m4_normalize(m4_default([$1],[]))" = "serial" ; then : # Recognized value elif test "m4_normalize(m4_default([$1],[]))" = "parallel"; then : # Recognized value else AC_MSG_ERROR([ Unrecognized value for AX[]_LIB_HDF5 within configure.ac. If supplied, argument 1 must be either 'serial' or 'parallel'. ]) fi dnl Add a default --with-hdf5 configuration option. AC_ARG_WITH([hdf5], AS_HELP_STRING( [--with-hdf5=[yes/no/PATH]], m4_case(m4_normalize([$1]), [serial], [location of h5cc for serial HDF5 configuration], [parallel], [location of h5pcc for parallel HDF5 configuration], [location of h5cc or h5pcc for HDF5 configuration]) ), [if test "$withval" = "no"; then with_hdf5="no" elif test "$withval" = "yes"; then with_hdf5="yes" else with_hdf5="yes" H5CC="$withval" fi], [with_hdf5="yes"] ) dnl Set defaults to blank HDF5_CC="" HDF5_VERSION="" HDF5_CFLAGS="" HDF5_CPPFLAGS="" HDF5_LDFLAGS="" HDF5_LIBS="" HDF5_FC="" HDF5_FFLAGS="" HDF5_FLIBS="" dnl Try and find hdf5 compiler tools and options. if test "$with_hdf5" = "yes"; then if test -z "$H5CC"; then dnl Check to see if H5CC is in the path. AC_PATH_PROGS( [H5CC], m4_case(m4_normalize([$1]), [serial], [h5cc], [parallel], [h5pcc], [h5cc h5pcc]), []) else AC_MSG_CHECKING([Using provided HDF5 C wrapper]) AC_MSG_RESULT([$H5CC]) fi AC_MSG_CHECKING([for HDF5 libraries]) if test ! -f "$H5CC" || test ! -x "$H5CC"; then AC_MSG_RESULT([no]) AC_MSG_WARN(m4_case(m4_normalize([$1]), [serial], [ Unable to locate serial HDF5 compilation helper script 'h5cc'. Please specify --with-hdf5= as the full path to h5cc. HDF5 support is being disabled (equivalent to --with-hdf5=no). ], [parallel],[ Unable to locate parallel HDF5 compilation helper script 'h5pcc'. Please specify --with-hdf5= as the full path to h5pcc. HDF5 support is being disabled (equivalent to --with-hdf5=no). ], [ Unable to locate HDF5 compilation helper scripts 'h5cc' or 'h5pcc'. Please specify --with-hdf5= as the full path to h5cc or h5pcc. HDF5 support is being disabled (equivalent to --with-hdf5=no). ])) with_hdf5="no" with_hdf5_fortran="no" else dnl Get the h5cc output HDF5_SHOW=$(eval $H5CC -show) dnl Get the actual compiler used HDF5_CC=$(eval $H5CC -show | $AWK '{print $[]1; exit}') if test "$HDF5_CC" = "ccache"; then HDF5_CC=$(eval $H5CC -show | $AWK '{print $[]2}') fi dnl h5cc provides both AM_ and non-AM_ options dnl depending on how it was compiled either one of dnl these are empty. Lets roll them both into one. dnl Look for "HDF5 Version: X.Y.Z" HDF5_VERSION=$(eval $H5CC -showconfig | $GREP 'HDF5 Version:' \ | $AWK '{print $[]3}') dnl A ideal situation would be where everything we needed was dnl in the AM_* variables. However most systems are not like this dnl and seem to have the values in the non-AM variables. dnl dnl We try the following to find the flags: dnl (1) Look for "NAME:" tags dnl (2) Look for "H5_NAME:" tags dnl (3) Look for "AM_NAME:" tags dnl HDF5_tmp_flags=$(eval $H5CC -showconfig \ | $GREP 'FLAGS\|Extra libraries:' \ | $AWK -F: '{printf("%s "), $[]2}' ) dnl Find the installation directory and append include/ HDF5_tmp_inst=$(eval $H5CC -showconfig \ | $GREP 'Installation point:' \ | $AWK '{print $[]NF}' ) dnl Add this to the CPPFLAGS HDF5_CPPFLAGS="-I${HDF5_tmp_inst}/include" dnl Now sort the flags out based upon their prefixes for arg in $HDF5_SHOW $HDF5_tmp_flags ; do case "$arg" in -I*) echo $HDF5_CPPFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ || HDF5_CPPFLAGS="$arg $HDF5_CPPFLAGS" ;; -L*) echo $HDF5_LDFLAGS | $GREP -e "$arg" 2>&1 >/dev/null \ || HDF5_LDFLAGS="$arg $HDF5_LDFLAGS" ;; -l*) echo $HDF5_LIBS | $GREP -e "$arg" 2>&1 >/dev/null \ || HDF5_LIBS="$arg $HDF5_LIBS" ;; esac done HDF5_LIBS="$HDF5_LIBS -lhdf5" AC_MSG_RESULT([yes (version $[HDF5_VERSION])]) dnl See if we can compile ax_lib_hdf5_save_CC=$CC ax_lib_hdf5_save_CPPFLAGS=$CPPFLAGS ax_lib_hdf5_save_LIBS=$LIBS ax_lib_hdf5_save_LDFLAGS=$LDFLAGS CC=$HDF5_CC CPPFLAGS=$HDF5_CPPFLAGS LIBS=$HDF5_LIBS LDFLAGS=$HDF5_LDFLAGS AC_CHECK_HEADER([hdf5.h], [ac_cv_hadf5_h=yes], [ac_cv_hadf5_h=no]) AC_CHECK_LIB([hdf5], [H5Fcreate], [ac_cv_libhdf5=yes], [ac_cv_libhdf5=no]) if test "$ac_cv_hadf5_h" = "no" && test "$ac_cv_libhdf5" = "no" ; then AC_MSG_WARN([Unable to compile HDF5 test program]) fi dnl Look for HDF5's high level library AC_HAVE_LIBRARY([hdf5_hl], [HDF5_LIBS="$HDF5_LIBS -lhdf5_hl"], [], []) CC=$ax_lib_hdf5_save_CC CPPFLAGS=$ax_lib_hdf5_save_CPPFLAGS LIBS=$ax_lib_hdf5_save_LIBS LDFLAGS=$ax_lib_hdf5_save_LDFLAGS AC_MSG_CHECKING([for matching HDF5 Fortran wrapper]) dnl Presume HDF5 Fortran wrapper is just a name variant from H5CC H5FC=$(eval echo -n $H5CC | $SED -n 's/cc$/fc/p') if test -x "$H5FC"; then AC_MSG_RESULT([$H5FC]) with_hdf5_fortran="yes" AC_SUBST([H5FC]) dnl Again, pry any remaining -Idir/-Ldir from compiler wrapper for arg in `$H5FC -show` do case "$arg" in #( -I*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ || HDF5_FFLAGS="$arg $HDF5_FFLAGS" ;;#( -L*) echo $HDF5_FFLAGS | $GREP -e "$arg" >/dev/null \ || HDF5_FFLAGS="$arg $HDF5_FFLAGS" dnl HDF5 installs .mod files in with libraries, dnl but some compilers need to find them with -I echo $HDF5_FFLAGS | $GREP -e "-I${arg#-L}" >/dev/null \ || HDF5_FFLAGS="-I${arg#-L} $HDF5_FFLAGS" ;; esac done dnl Make Fortran link line by inserting Fortran libraries for arg in $HDF5_LIBS do case "$arg" in #( -lhdf5_hl) HDF5_FLIBS="$HDF5_FLIBS -lhdf5hl_fortran $arg" ;; #( -lhdf5) HDF5_FLIBS="$HDF5_FLIBS -lhdf5_fortran $arg" ;; #( *) HDF5_FLIBS="$HDF5_FLIBS $arg" ;; esac done else AC_MSG_RESULT([no]) with_hdf5_fortran="no" fi AC_SUBST([HDF5_VERSION]) AC_SUBST([HDF5_CC]) AC_SUBST([HDF5_CFLAGS]) AC_SUBST([HDF5_CPPFLAGS]) AC_SUBST([HDF5_LDFLAGS]) AC_SUBST([HDF5_LIBS]) AC_SUBST([HDF5_FC]) AC_SUBST([HDF5_FFLAGS]) AC_SUBST([HDF5_FLIBS]) AC_DEFINE([HAVE_HDF5], [1], [Defined if you have HDF5 support]) fi fi ]) FreeFem-sources-4.9/etc/config/m4/ax_openmp.m4000664 000000 000000 00000010776 14037356732 021155 0ustar00rootroot000000 000000 # =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_openmp.html # =========================================================================== # # SYNOPSIS # # AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro tries to find out how to compile programs that use OpenMP a # standard API and set of compiler directives for parallel programming # (see http://www-unix.mcs/) # # On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS # output variable to the flag (e.g. -omp) used both to compile *and* link # OpenMP programs in the current language. # # NOTE: You are assumed to not only compile your program with these flags, # but also link it with them as well. # # If you want to compile everything with OpenMP, you should set: # # CFLAGS="$CFLAGS $OPENMP_CFLAGS" # #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" # #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" # # (depending on the selected language). # # The user can override the default choice by setting the corresponding # environment variable (e.g. OPENMP_CFLAGS). # # ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is # found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is # not found. If ACTION-IF-FOUND is not specified, the default action will # define HAVE_OPENMP. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2015 John W. Peterson # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 11 AC_DEFUN([AX_OPENMP], [ AC_PREREQ([2.69]) dnl for _AC_LANG_PREFIX AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown # Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), # -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" fi for ax_openmp_flag in $ax_openmp_flags; do case $ax_openmp_flag in none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; esac AC_LINK_IFELSE([AC_LANG_SOURCE([[ @%:@include static void parallel_fill(int * data, int n) { int i; @%:@pragma omp parallel for for (i = 0; i < n; ++i) data[i] = i; } int main() { int arr[100000]; omp_set_num_threads(2); parallel_fill(arr, 100000); return 0; } ]])],[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break],[]) done []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS ]) if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then m4_default([$2],:) else if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp fi m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) fi ])dnl AX_OPENMP FreeFem-sources-4.9/etc/config/m4/regtests.m4000664 000000 000000 00000002674 14037356732 021025 0ustar00rootroot000000 000000 // Regression tests // ---------------- // The tests are checked against reference values by "make check" in // each examples subdirectory // "ref.edp" contains all reference values and may be rebuilt with // "make Ref" // $Id$ // The values tested here may not have a physical or mathematical // meaning. Their main property is to gather numerical values from the // whole domain, to be checked for consistency with previous runs. NoUseOfWait=true; int verbosityy=verbosity; dnl May write or read a reference file changequote([[,]]) define(REFFILE,"ref.edp") ifdef([[ASSERT]], include REFFILE;, ofstream ref(REFFILE);) dnl $1=file name dnl $2=reference value (if there is one) dnl $3=precision of reference value (if there is one) dnl or minimum absolute variation if $4 is defined dnl $4=maximum absolute variation (if defined) define(ONETEST, [[cout << "--------- file : $1.edp -----------------" << endl; verbosity=verbosityy; { dnl Place the dash first to avoid any confusion with things like "a-z" define([[TESTVAR]],TEST[[]]translit($1,-_,XX)) define([[REFVAR]],REF[[]]translit($1,-_,XX)) include "$1.edp"; ifelse($2,,, [[real TESTVAR=$2; ifdef([[ASSERT]], cout<<"$1 reference value = "<