\[ \newcommand\inv{^{-1}}\newcommand\invt{^{-t}} \newcommand\bbP{\mathbb{P}} \newcommand\bbR{\mathbb{R}} \newcommand\defined{ \mathrel{\lower 5pt \hbox{${\equiv\atop\mathrm{\scriptstyle D}}$}}} \] Back to Table of Contents

0 Index

\begin{theindex}

  • {\texttt{.bashrc}}, \hyperindexformat{\see{shell, startup files}}{394}

  • {\texttt{.profile}}, \hyperindexformat{\see{shell, startup files}}{394}

  • 2's complement, \hyperpage{154}

    \indexspace

  • $\alpha$, \hyperindexformat{\see{latency}}{236}

  • $\beta$, \hyperindexformat{\see{bandwidth}}{236}

  • $\gamma$, \hyperindexformat{\see{computation rate}}{236}

    \indexspace

  • active messages, \hyperpage{115}

  • acyclic graph, \hyperpage{359}

  • \acl {AMR} (AMR), \hyperpage{308}

  • address space, \hyperpage{81} \subitem shared, \hyperpage{81}

  • adjacency \subitem matrix, \hyperpage{323}

  • adjacency graph, \hyperpage{210}, \hyperpage{361}

  • adjacency matrix, \hyperpage{360}

  • \acl {AVX} (AVX), \hyperpage{79}, \hyperpage{137}, \hyperpage{328}

  • affinity, \hyperpage{94--95}

  • alignment \subitem gene, \hyperpage{335}

  • allgather, \hyperpage{238--239}, \hyperpage{243}, \hyperpage{247}

  • Alliant FX/8, \hyperpage{37}

  • allocation \subitem static, \hyperpage{516}

  • allreduce, \hyperpage{238}

  • AMD, \hyperpage{79}, \hyperpage{149} \subitem Barcelona, \hyperpage{31}, \hyperpage{34} \subitem Opteron, \hyperpage{35}, \hyperpage{46}, \hyperpage{51, 52}

  • Amdahl's law, \hyperpage{72--74}

  • AMR, \hyperindexformat{\see{Adaptive Mesh Refinement}}{536}

  • analytics \subitem graph, \hyperpage{315}

  • AOS, \hyperindexformat{\see{Array-Of-Structures}}{536}

  • API, \hyperindexformat{\see{Application Programmer Interface}}{536}

  • Apple \subitem iCloud, \hyperpage{145}

  • archive utility, \hyperpage{400}

  • arithmetic \subitem computer, \hyperindexformat{\see{floating point arithmetic}}{153} \subitem finite precision, \hyperpage{153} \subitem intensity, \hyperpage{39}

  • array processors, \hyperpage{78}, \hyperpage{133}

  • array syntax, \hyperpage{109}

  • \acl {AOS} (AOS), \hyperpage{117}

  • assembly \subitem inline, \hyperpage{33} \subitem language, \hyperpage{24}

  • assertion, \hyperindexformat{\textbf}{462}

  • assertions, \hyperpage{461--463}

  • associative, \hyperindexformat{\seealso{cache, associative}}{31}

  • associativity \subitem of mathematical operations, \hyperpage{164}

  • asynchronous communication, \hyperpage{119}

  • atomic operation, \hyperpage{37}, \hyperpage{91--93}

  • atomicity, \hyperindexformat{\see{atomic operation}}{91}

  • automaton, \hyperpage{370--371} \subitem linear bounded, \hyperpage{371}

  • autotuning, \hyperpage{56}

  • AVX, \hyperindexformat{\see{Advanced Vector Extensions}}{536}

  • axpy, \hyperpage{40}

    \indexspace

  • background process, \hyperpage{388}

  • backwards stability, \hyperpage{168}

  • banded matrix, \hyperpage{183} \subitem storage, \hyperpage{206--207}

  • bandwidth, \hyperpage{22}, \hyperpage{131}, \hyperpage{136}, \hyperpage{236} \subitem aggregate, \hyperpage{121} \subitem measure in GT/s, \hyperpage{134} \subitem of a matrix, \hyperindexformat{\see{halfbandwidth}}{206} \subitem of a matrix, \hyperpage{206}

  • bandwidth bound algorithm, \hyperpage{149}

  • bandwidth-bound, \hyperindexformat{\textbf}{41}, \hyperpage{262}

  • Barnes-Hut algorithm, \hyperpage{326--327}

  • barrier, \hyperpage{116}

  • base, \hyperpage{156}

  • bash, \hyperpage{377}

  • \acl {BLAS} (BLAS), \hyperpage{450}

  • BBN \subitem Butterfly, \hyperpage{126}

  • BEM, \hyperindexformat{\see{Boundary Element Method}}{536}

  • benchmarking, \hyperpage{28}, \hyperpage{40}

  • BFS, \hyperindexformat{\see{Breadth-First Search}}{536}

  • bidirectional exchange, \hyperpage{239}

  • big-endian, \hyperpage{430}, \hyperpage{438}

  • binary-coded-decimal, \hyperpage{157}

  • bisection bandwidth, \hyperpage{121}

  • bisection width, \hyperpage{121}

  • BitBucket, \hyperpage{418}

  • bitonic sequence, \hyperpage{312}

  • Bitonic sort, \hyperpage{309}

  • bitonic sort, \hyperpage{75}, \hyperindexformat{\textbf}{312--313} \subitem sequential complexity of, \hyperpage{313}

  • bits, \hyperpage{153}

  • Bjam, \hyperpage{402}

  • Black-Scholes model, \hyperpage{331}

  • BLAS, \hyperindexformat{\see{Basic Linear Algebra Subprograms}}{536} \subitem data format, \hyperpage{453}

  • blis, \hyperpage{454}

  • block Jacobi, \hyperpage{265}, \hyperpage{281}

  • block matrix, \hyperpage{185}, \hyperpage{253}, \hyperpage{273}

  • block tridiagonal, \hyperpage{185}, \hyperpage{216}, \hyperpage{276}

  • blocking communication, \hyperpage{99}, \hyperpage{106}

  • blocking for cache reuse, \hyperpage{49}

  • \acl {BEM} (BEM), \hyperpage{257}

  • \acl {BVP} (BVP), \hyperpage{180--187}

  • branch misprediction, \hyperpage{21}

  • branch penalty, \hyperpage{21}

  • breakpoint, \hyperpage{476}, \hyperindexformat{\textbf}{477--478}

  • Brent's theorem, \hyperindexformat{\textbf}{70}, \hyperpage{139}

  • broadcast, \hyperpage{102}, \hyperpage{236--237}

  • BSP, \hyperindexformat{\see{Bulk Synchronous Parallel}}{536}

  • bubble sort, \hyperpage{309}

  • bucket brigade algorithm, \hyperpage{237}

  • buffering, \hyperpage{241}

  • bug, \hyperpage{461}

  • \acl {BSP} (BSP), \hyperpage{116}

  • bus, \hyperpage{21} \subitem memory, \hyperpage{122} \subitem speed, \hyperpage{23} \subitem width, \hyperpage{23}

  • butterfly exchange, \hyperpage{126}

  • BVP, \hyperindexformat{\see{Boundary Value Problem}}{536}

  • by reference, \hyperpage{492}

  • bytes, \hyperpage{153}

    \indexspace

  • C \subitem array layout, \hyperpage{489--490} \subitem language standard, \hyperpage{169}

  • C++ \subitem language standard, \hyperpage{169} \subitem linking to, \hyperpage{488--489} \subitem name mangling, \hyperpage{488}

  • cache, \hyperpage{20, 21}, \hyperindexformat{\textbf}{25--32} \subitem associative, \hyperpage{31} \subitem block, \hyperpage{28} \subitem blocking, \hyperpage{49}, \hyperpage{54}, \hyperpage{204} \subitem coherence, \hyperpage{27}, \hyperpage{36--39}, \hyperpage{46}, \hyperpage{83}, \hyperpage{122}, \hyperpage{287} \subitem hierarchy, \hyperpage{26}, \hyperpage{57} \subitem hit, \hyperpage{26} \subitem hot, \hyperpage{516} \subitem line, \hyperpage{28--29}, \hyperpage{43} \subitem mapping, \hyperpage{29} \subitem miss, \hyperpage{26}, \hyperpage{32} \subitem miss, capacity, \hyperpage{27} \subitem miss, compulsory, \hyperpage{27} \subitem miss, conflict, \hyperpage{27} \subitem miss, invalidation, \hyperpage{27} \subitem oblivious programming, \hyperpage{57--58} \subitem replacement policies, \hyperpage{28} \subitem shared, \hyperpage{82} \subitem warming, \hyperpage{516}

  • cacheline \subitem boundary aligment, \hyperpage{29}, \hyperpage{516} \subitem invalidation, \hyperpage{37}

  • CAF, \hyperindexformat{\see{Co-array Fortran}}{536}

  • Cannon's algorithm, \hyperindexformat{\see{matrix-matrix product, Cannon's algorithm}}{251}

  • capability computing, \hyperpage{146}

  • capacity computing, \hyperpage{146}

  • cartesian mesh, \hyperpage{122}

  • cartesian product, \hyperpage{115}

  • \texttt {cat}, \hyperpage{378}

  • Cayley-Hamilton theorem, \hyperpage{226}

  • ccNUMA, \hyperpage{83}

  • CCS, \hyperindexformat{\see{Compressed Column Storage}}{536}

  • \texttt {cd}, \hyperpage{380}

  • CDC \subitem Cyber205, \hyperpage{80}

  • Cell processor, \hyperpage{133}

  • CG, \hyperindexformat{\see{Conjugate Gradients}}{536}

  • CGS, \hyperindexformat{\see{Classical Gram-Schmidt}}{536}

  • channel rate, \hyperpage{131}

  • channel width, \hyperpage{131}

  • Chapel, \hyperpage{110}, \hyperpage{113}

  • characteristic polynomial, \hyperpage{226}

  • Charm++, \hyperpage{115}

  • checkerboard ordering, \hyperpage{185}

  • Cheeger's \subitem constant, \hyperpage{365} \subitem inequality, \hyperpage{365}

  • chess, \hyperpage{86}

  • \texttt {chgrp}, \hyperpage{383}

  • \texttt {chmod}, \hyperpage{382}

  • Cholesky factorization, \hyperpage{199}, \hyperpage{286}

  • Chomsky hierarchy, \hyperpage{371}

  • Cilk Plus, \hyperpage{96}

  • cleanup code, \hyperpage{48}

  • clique, \hyperpage{321}, \hyperpage{361}

  • clock speed, \hyperpage{19}

  • Clos network, \hyperpage{129}

  • cloud computing, \hyperpage{143--146} \subitem service models, \hyperpage{145}

  • cluster \subitem node, \hyperindexformat{\see{node}}{537}

  • clustering, \hyperindexformat{\textbf}{338} \subitem $k$-means, \hyperpage{338}

  • clusters, \hyperpage{79, 80} \subitem Beowulf, \hyperpage{80}

  • \acl {CAF} (CAF), \hyperpage{112}

  • co-processor, \hyperindexformat{\textbf}{133--134}, \hyperpage{150}

  • coherence \subitem cache, \hyperpage{109}

  • collective communication, \hyperpage{102}, \hyperpage{105}, \hyperpage{236--239}

  • collective operation, \hyperpage{103}, \hyperpage{131}, \hyperpage{236--239} \subitem long vector, \hyperpage{237} \subitem short vector, \hyperpage{237}

  • colour number, \hyperpage{276}, \hyperpage{360}

  • colouring, \hyperpage{275}

  • column-major, \hyperpage{61}, \hyperindexformat{\textbf}{453}, \hyperpage{489}

  • communication \subitem blocking, \hyperpage{105} \subitem overhead, \hyperpage{72} \subitem overlapping computation with, \hyperpage{118}, \hyperpage{284}

  • compare-and-swap, \hyperpage{309}, \hyperpage{313}

  • compiler, \hyperpage{56}, \hyperpage{85}, \hyperpage{111} \subitem directives, \hyperpage{96}, \hyperpage{112} \subitem flags, \hyperpage{170} \subitem optimization, \hyperpage{24} \subsubitem vs round-off, \hyperpage{169--170} \subitem optimization levels, \hyperpage{160} \subitem parallelizing, \hyperpage{87}

  • complexity, \hyperpage{352} \subitem computational, \hyperpage{203--204}, \hyperpage{352} \subitem of iterative methods, \hyperpage{234} \subitem space, \hyperpage{213--215}, \hyperpage{352}

  • \acl {CCS} (CCS), \hyperpage{209}

  • \acl {CRS} (CRS), \hyperpage{208--210}, \hyperpage{257}

  • Compressed Row Storage (CRS) \subitem performance of the matrix-vector product, \hyperpage{263}

  • computation rate, \hyperpage{131}, \hyperpage{236}

  • computational finance, \hyperpage{146}

  • compute-bound, \hyperindexformat{\textbf}{41}, \hyperpage{149}

  • concurrency, \hyperpage{95}

  • condition number, \hyperpage{168}, \hyperpage{346}

  • conditionally stable, \hyperpage{177}

  • congestion, \hyperpage{121}

  • \acl {CG} (CG), 233, \hyperpage{285}

  • Connection Machine, \hyperpage{78}, \hyperpage{149}

  • contention, \hyperpage{121}

  • context, \hyperindexformat{\textbf}{91} \subitem switch, \hyperindexformat{\textbf}{91}, \hyperpage{132}, \hyperpage{136}

  • control flow, \hyperpage{12}, \hyperindexformat{\textbf}{20}, \hyperpage{77}, \hyperpage{287}

  • conveniently paralllel, \hyperpage{86}

  • COO, \hyperindexformat{\see{Coordinate Storage}}{536}

  • coordinate storage, \hyperpage{209}

  • coordination language, \hyperpage{114}

  • core, \hyperpage{36}, \hyperindexformat{\textbf}{14--36} \subitem vs processor, \hyperpage{36}

  • core dump, \hyperpage{469}

  • correct rounding, \hyperpage{162}

  • Courant-Friedrichs-Lewy condition, \hyperpage{180}

  • CPU-bound, \hyperindexformat{\see{compute-bound}}{537}

  • cpu-bound, \hyperpage{14}

  • Cramer's rule, \hyperpage{193}

  • Crank-Nicolson method, \hyperpage{191}

  • Cray, \hyperpage{253} \subitem Cray-1, \hyperpage{80} \subitem Cray-2, \hyperpage{80} \subitem T3E, \hyperpage{107} \subitem X/MP, \hyperpage{80} \subitem XE6, \hyperpage{83} \subitem XMT, \hyperpage{91}, \hyperpage{133} \subitem Y/MP, \hyperpage{80}

  • Cray Inc., \hyperpage{133}

  • Cray Research, \hyperpage{133}

  • critical path, \hyperpage{70}, \hyperpage{287}

  • critical section, \hyperpage{92}

  • crossbar, \hyperpage{82}, \hyperpage{126}

  • CRS, \hyperindexformat{\see{Compressed Row Storage}}{536}

  • csh, \hyperpage{377}

  • CUDA, \hyperpage{78}, \hyperpage{134, 135}, \hyperpage{150}

  • \texttt {cut}, \hyperpage{385}

  • Cuthill-McKee ordering, \hyperpage{216--217}, \hyperpage{279}

  • cycle (in graph), \hyperpage{359}

  • cyclic distribution, \hyperpage{251}

  • Cygwin, \hyperpage{377}

    \indexspace

  • DAG, \hyperindexformat{\see{Directed Acyclic Graph}}{536}

  • data decomposition, \hyperpage{240}

  • data flow, \hyperpage{12}, \hyperindexformat{\textbf}{20}, \hyperpage{77}, \hyperpage{287}

  • data parallel, \hyperpage{132}, \hyperpage{135}, \hyperpage{137}

  • data parallelism, \hyperpage{67}, \hyperpage{134}

  • data race, \hyperindexformat{\see{race condition}}{92}

  • data reuse, \hyperpage{21}, \hyperpage{39}

  • ddd, \hyperpage{469}

  • DDT, \hyperpage{469}, \hyperindexformat{\textbf}{479}

  • deadlock, \hyperpage{96}, \hyperpage{101}, \hyperindexformat{\textbf}{106}, \hyperpage{478}

  • debug flag, \hyperpage{470}

  • debugger, \hyperpage{469}

  • debugging, \hyperpage{469--480} \subitem in parallel, \hyperpage{478--480}

  • DEC \subitem Alpha, \hyperpage{17}, \hyperpage{80}

  • defensive programming, \hyperpage{461}

  • degree, \hyperpage{120}, \hyperpage{358}

  • Delauney mesh refinement, \hyperpage{85}

  • Dennard scaling, \hyperpage{61}

  • denormal, \hyperindexformat{\see{floating point numbers, unnormalized}}{158}

  • Dense linear algebra, \hyperpage{239--252}

  • dependency, \hyperpage{19}

  • dgemm, \hyperindexformat{\texttt}{59}

  • diagonal dominance, \hyperpage{202}

  • diagonal storage, \hyperpage{206--208}, \hyperpage{254}

  • diameter, \hyperpage{120}, \hyperpage{359}

  • die, \hyperpage{14}, \hyperpage{26}

  • difference stencil, \hyperindexformat{\textbf}{186}, \hyperpage{340}

  • differential operator, \hyperindexformat{\textbf}{181}

  • Dijkstra's shortest path algorithm, \hyperpage{319}

  • direct mapping, \hyperpage{30}

  • direct methods \subitem for linear systems, \hyperpage{262}

  • \acl {DAG} (DAG), \hyperpage{287}, \hyperpage{359}

  • directives, \hyperindexformat{\see{compiler, directives}}{537}

  • directories, \hyperpage{377}

  • Dirichlet boundary condition, \hyperpage{180}, \hyperpage{182}

  • discretization, \hyperpage{176}

  • distributed computing, \hyperpage{143--146}

  • divide-and-conquer, \hyperpage{57}

  • domain decomposition, \hyperpage{268}

  • double precision, \hyperpage{136}

  • DRAM, \hyperindexformat{\see{Dynamic Random-Access Memory}}{536}

  • DSP, \hyperindexformat{\see{Digital Signal Processing}}{536}

  • dynamic programming, \hyperpage{317}, \hyperpage{335}

  • \acl {DRAM} (DRAM), \hyperpage{25}

    \indexspace

  • Earth Simulator, \hyperpage{149}

  • ebook, \hyperpage{145}

  • edges, \hyperpage{358}

  • efficiency, \hyperindexformat{\textbf}{68}

  • eigenvector \subitem dominant, \hyperpage{349}

  • Eispack, \hyperpage{451}

  • Elliptic PDEs, \hyperpage{181}

  • embarassingly parallel, \hyperpage{68}, \hyperpage{86}

  • embedding, \hyperpage{122}

  • environment variable, \hyperpage{386}, \hyperindexformat{\textbf}{389--391}

  • Eratosthenes \subitem sieve of, \hyperpage{313}

  • escape, \hyperpage{384}, \hyperpage{395}

  • ETA-10, \hyperpage{80}

  • Euler \subitem explicit, \hyperpage{175--177} \subitem implicit, \hyperpage{177--179}

  • evicted, \hyperpage{38}

  • excess, \hyperpage{156}

  • executable, \hyperpage{377}

  • exponent, \hyperpage{156}

  • export, \hyperindexformat{\texttt}{390}

  • extended precision, \hyperpage{163}, \hyperpage{170}, \hyperindexformat{\textbf}{171}

    \indexspace

  • Facebook, \hyperpage{315}

  • factorization, \hyperindexformat{\see{LU factorization}}{197}

  • false sharing, \hyperindexformat{\underline}{39}, \hyperpage{99}, \hyperpage{109}

  • \acl {FFT} (FFT), \hyperpage{51}

  • \acl {FMM} (FMM), \hyperpage{327}

  • fast solvers, \hyperpage{223}

  • fat tree, \hyperpage{126--129} \subitem bisection width of a, \hyperpage{128} \subitem clusters based on, \hyperpage{129}

  • fault tolerance, \hyperpage{148}

  • FD, \hyperindexformat{\see{Finite Difference}}{536}

  • FDM, \hyperindexformat{\see{Finite Difference Method}}{536}

  • features, \hyperpage{339}

  • FEM, \hyperindexformat{\see{Finite Element Method}}{536}

  • fenv.h, \hyperindexformat{\texttt}{161}

  • FFT, \hyperindexformat{\see{Fast Fourier Transform}}{536}

  • Fiedler vector, \hyperpage{338}, \hyperindexformat{\textbf}{365}

  • Fiedler's theorem, \hyperpage{365}

  • field scaling, \hyperpage{61}

  • \acl {FPGA} (FPGA), \hyperpage{146}

  • files, \hyperpage{377}

  • fill locations, \hyperpage{213}

  • fill-in, \hyperpage{212--217}, \hyperpage{276} \subitem estimates, \hyperpage{213--215} \subitem reduction, \hyperpage{215--217}

  • \texttt {finger}, \hyperpage{395}

  • finite difference, \hyperpage{176}

  • \acl {FDM} (FDM), \hyperpage{182}

  • \acl {FEM} (FEM), \hyperpage{187}, \hyperpage{261}

  • \acl {FSA} (FSA), \hyperpage{38}, \hyperpage{370--371}

  • finite volume method, \hyperpage{187}

  • first-touch policy, \hyperpage{95}

  • floating point \subitem unit, \hyperindexformat{\textbf}{15--16}

  • floating point arithmetic \subitem associativity of, \hyperpage{168}, \hyperpage{372}

  • floating point arithmetic, \hyperindexformat{\textbf}{153--172}

  • floating point numbers \subitem normalized, \hyperpage{158} \subitem representation, \hyperpage{156--157} \subitem unnormalized, \hyperpage{158}

  • floating point pipeline, \hyperpage{47}

  • flops, \hyperpage{19}

  • Floyd-Warshall algorithm, \hyperpage{317--319} \subitem parallelization of the, \hyperpage{324}

  • flushed, \hyperpage{28}

  • flushing \subitem pipeline, \hyperpage{93}

  • Flynn's taxonomy, \hyperpage{77}

  • FMA, \hyperindexformat{\see{Fused Multiply-Add}}{536}

  • FMM, \hyperindexformat{\see{Fast Multipole Method}}{536}

  • FOM, \hyperindexformat{\see{Full Orthogonalization Method}}{536}

  • foreground process, \hyperpage{388}

  • fork-join, \hyperpage{88, 89}

  • Fortran, \hyperpage{453} \subitem array layout, \hyperpage{489--490} \subitem declarations in, \hyperpage{170} \subitem iso C bindings, \hyperpage{487} \subitem language standard, \hyperpage{169}

  • Fortress, \hyperpage{113--114}

  • Fourier Transform, \hyperpage{367--369}

  • FPGA, \hyperindexformat{\see{Field-Programmable Gate Array}}{536}

  • FPU, \hyperindexformat{\see{Floating Point Unit}}{536}

  • \acl {FSB} (FSB), \hyperpage{21}

  • FSA, \hyperindexformat{\see{Finite State Automaton}}{536}

  • FSB, \hyperindexformat{\see{Front-Side Bus}}{536}

  • \acl {FOM} (FOM), \hyperpage{229}

  • fully associative, \hyperpage{31}

  • fully connected, \hyperpage{120}

  • functional parallelism, \hyperpage{67}

  • functional programming, \hyperpage{147--148}

  • \acl {FMA} (FMA), \hyperpage{15}, \hyperpage{163}, \hyperpage{171}

    \indexspace

  • gather, \hyperpage{102}, \hyperpage{168}, \hyperpage{238}

  • Gauss-Seidel, \hyperpage{221}

  • GCC, \hyperpage{97}

  • gdb, \hyperpage{469--478}

  • \acl {GPGPU} (GPGPU), \hyperpage{134}

  • \acl {GMRES} (GMRES), \hyperpage{234}

  • genome alignment, \hyperpage{337}

  • ghost region, \hyperpage{114}, \hyperpage{255}, \hyperpage{282, 283}, \hyperpage{334}

  • Global Arrays, \hyperpage{115}

  • GMRES, \hyperindexformat{\see{Generalized Minimum Residual}}{536}

  • GNU, \hyperpage{469}, \hyperpage{483} \subitem gdb, \hyperindexformat{\see{gdb}}{469} \subitem gnuplot, \hyperindexformat{\see{gnuplot}}{458} \subitem Make, \hyperindexformat{\see{Make}}{402}

  • gnuplot, \hyperpage{458}

  • Goodyear \subitem MPP, \hyperpage{78}

  • Google, \hyperpage{117}, \hyperpage{257}, \hyperpage{320} \subitem code, \hyperpage{418} \subitem Google Docs, \hyperpage{144, 145}

  • Goto \subitem Kazushige, \hyperpage{59}

  • GPGPU, \hyperindexformat{\see{General Purpose Graphics Processing Unit}}{536}

  • gprof, \hyperpage{483}

  • GPU, \hyperindexformat{\see{Graphics Processing Unit}}{536}

  • Gram-Schmidt, \hyperpage{229}, \hyperpage{346--348} \subitem modified, \hyperpage{229}, \hyperpage{347}

  • granularity, \hyperpage{87}, \hyperpage{91}

  • Grape computer, \hyperpage{133}, \hyperpage{327}

  • graph \subitem adjacency, \hyperpage{216}, \hyperindexformat{\see{adjacency graph}}{360} \subitem colouring, \hyperpage{265}, \hyperpage{276}, \hyperpage{360} \subitem directed, \hyperpage{358} \subitem Laplacian, \hyperpage{140}, \hyperpage{364} \subitem random, \hyperpage{320} \subitem social, \hyperpage{315} \subitem theory, \hyperpage{358--366} \subsubitem of parallel computers, \hyperpage{120} \subitem undirected, \hyperpage{120}, \hyperpage{211}, \hyperpage{358}

  • graphics, \hyperpage{340--341}

  • \acl {GPU} (GPU), \hyperpage{80}, \hyperpage{134--137}, \hyperpage{149}, \hyperpage{161}, \hyperpage{258}, \hyperpage{278}

  • Gray code, \hyperpage{125}

  • \texttt {grep}, \hyperpage{383}

  • grid (CUDA), \hyperpage{135}

  • grid computing, \hyperpage{143}

  • \texttt {groups}, \hyperpage{383}

  • GS, \hyperindexformat{\see{Gram-Schmidt}}{536}

  • guard digit, \hyperindexformat{\textbf}{162}, \hyperpage{164}

  • Gustafson's law, \hyperpage{73}

    \indexspace

  • Hadoop, \hyperpage{147}

  • \acl {HDFS} (HDFS), \hyperpage{148}

  • halfbandwidth, \hyperpage{214} \subitem left, \hyperpage{207} \subitem right, \hyperpage{207}

  • halo, \hyperindexformat{\see{ghost region}}{537}

  • handshake protocol, \hyperpage{109}

  • hardware counters, \hyperpage{483}

  • hardware prefetch, \hyperpage{33}

  • Harwell-Boeing matrix format, \hyperpage{209}

  • HDFS, \hyperindexformat{\see{Hadoop File System}}{536}

  • heap, \hyperpage{88}

  • heat equation, \hyperpage{180}, \hyperpage{187}

  • Hessenberg matrix, \hyperpage{228}

  • heterogeneous computing, \hyperpage{150--151}

  • hg, \hyperindexformat{\see{mercurial}}{425}

  • \acl {HPF} (HPF), \hyperpage{109}, \hyperpage{112}

  • \acl {HPC} (HPC), \hyperpage{114}, \hyperpage{143}

  • HITS, \hyperpage{322}

  • Horner's rule, \hyperpage{281}, \hyperpage{324}

  • host process, \hyperpage{133}

  • host processor, \hyperpage{150}

  • Householder reflectors, \hyperpage{351} \subitem in LU factorization, \hyperpage{198}

  • HPC, \hyperindexformat{\see{High-Performance Computing}}{536}

  • HPF, \hyperindexformat{\see{High Performance Fortran}}{536}

  • hybrid computing, \hyperpage{108--109}

  • hyper-threading, \hyperpage{89}

  • Hyperbolic PDEs, \hyperpage{180}

  • hypercube, \hyperpage{124}

  • hyperthreading, \hyperpage{35}, \hyperpage{91}, \hyperpage{96}

  • hypervisor, \hyperpage{83}

    \indexspace

  • I/O subsystem, \hyperpage{20}

  • IBM, \hyperpage{114}, \hyperpage{120}, \hyperpage{156--158}, \hyperpage{438} \subitem BlueGene, \hyperpage{83}, \hyperpage{130}, \hyperpage{149} \subitem BlueGene Q, \hyperpage{33} \subitem Power 5, \hyperpage{17} \subitem Power series, \hyperpage{80} \subitem Power6, \hyperpage{157} \subitem Roadrunner, \hyperpage{63}, \hyperpage{133}

  • IBVP, \hyperindexformat{\see{Initial Boundary Value Problem}}{536}

  • ICL \subitem DAP, \hyperpage{78}, \hyperpage{133}

  • idle, \hyperpage{138}

  • idle time, \hyperpage{106}, \hyperpage{484}

  • ILP, \hyperindexformat{\see{Instruction Level Parallelism}}{536}

  • ILU, \hyperindexformat{\see{Incomplete LU}}{536}

  • imbalance \subitem load, \hyperpage{116}

  • incidence matrix, \hyperpage{322}

  • \acl {ILU} (ILU), \hyperpage{223}

  • Incomplete LU (ILU) \subitem parallel, \hyperpage{265--266}

  • independent sets, \hyperpage{360}

  • indirect addressing, \hyperpage{209}

  • Inf, \hyperindexformat{\texttt}{157}

  • \acl {IBVP} (IBVP), \hyperpage{187--191}

  • \aclp {IBVP} (IBVP), \hyperpage{180}

  • \acl {IVP} (IVP), \hyperpage{173--179}

  • inner products, \hyperpage{261}

  • input redirection, \hyperindexformat{\see{redirection}}{386}

  • instruction \subitem handling \subsubitem in-order, \hyperpage{15} \subsubitem out-of-order, \hyperpage{15} \subitem issue, \hyperpage{79} \subitem pipeline, \hyperpage{21}

  • \acl {ILP} (ILP), \hyperpage{14}, \hyperpage{20}, \hyperpage{62}, \hyperpage{85}

  • instrumentation, \hyperpage{484} \subitem dynamic, \hyperpage{484}

  • Intel, \hyperpage{20}, \hyperpage{35, 36}, \hyperpage{79}, \hyperpage{149} \subitem 80287 co-processor, \hyperpage{171} \subitem Haswell, \hyperpage{17} \subitem i860, \hyperpage{80}, \hyperpage{133} \subitem Itanium, \hyperpage{24}, \hyperpage{149} \subitem MIC, \hyperpage{151} \subitem Paragon, \hyperpage{133} \subitem Sandy Bridge, \hyperpage{14}, \hyperpage{17}, \hyperpage{26} \subitem Woodcrest, \hyperpage{31} \subitem Xeon Phi, \hyperpage{15}, \hyperpage{78}, \hyperpage{91}, \hyperpage{96}, \hyperpage{109}, \hyperpage{134}, \hyperpage{137} \subsubitem bandwidth, \hyperpage{134}

  • inter-node communication, \hyperpage{108}

  • interior nodes, \hyperpage{360}

  • interoperability \subitem C to Fortran, \hyperpage{486--492}

  • interrupt, \hyperpage{158}

  • intra-node communication, \hyperpage{108}

  • inverse iteration, \hyperpage{338}

  • irreducible, \hyperpage{362}, \hyperindexformat{\see{reducible}}{537}

  • Ising model, \hyperpage{333--334}

  • ispc, \hyperpage{118}

  • iterative methods, \hyperpage{217--234} \subitem floating point performance of, \hyperpage{262}

  • iterative refinement, \hyperpage{222}

  • IVP, \hyperindexformat{\see{Initial Value Problem}}{536}

    \indexspace

  • Jacobi method, \hyperpage{220}

  • jumpshot, \hyperpage{485}

    \indexspace

  • kernel \subitem CUDA, \hyperpage{135}, \hyperpage{341}

  • Krylov methods, \hyperpage{217--234}

  • ksh, \hyperpage{377}

    \indexspace

  • LAN, \hyperindexformat{\see{Local Area Network}}{536}

  • language \subitem context-free, \hyperpage{371} \subitem context-sensitive, \hyperpage{371} \subitem recursively enumerable, \hyperpage{371} \subitem regular, \hyperpage{371}

  • language interoperability, \hyperindexformat{\see{interoperability}}{486}

  • Lapack, \hyperpage{149}, \hyperpage{451} \subitem routines, \hyperpage{452--453}

  • Laplace equation, \hyperpage{353}, \hyperpage{365}

  • latency, \hyperpage{22}, \hyperpage{130}, \hyperpage{236} \subitem hiding, \hyperpage{23}, \hyperpage{39}

  • latency hiding, \hyperpage{118}

  • {\LaTeX}, \hyperindexformat{\seealso{\TeX}}{493}, \hyperpage{493--506}

  • \aclp {LBM} (LBM), \hyperpage{161}

  • LBM, \hyperindexformat{\see{Lattice Boltzmann Method}}{536}

  • leaf nodes, \hyperpage{360}

  • level sets, \hyperpage{216}

  • lexicographic ordering, \hyperpage{184, 185}, \hyperpage{267}

  • libraries \subitem creating and using, \hyperpage{400--401} \subitem numerical, \hyperpage{440--457}, \hyperindexformat{\see{Lapack}}{457}, \hyperindexformat{\see{PETSc}}{457}

  • Linda, \hyperpage{114}

  • linear algebra, \hyperpage{345--351} \subitem software \subsubitem dense, \hyperpage{450--457} \subsubitem sparse, \hyperpage{440--450}

  • linear array (of processors), \hyperpage{122}

  • linker, \hyperpage{400}

  • LINPACK \subitem benchmark, \hyperpage{40}

  • Linpack, \hyperpage{149}, \hyperpage{451} \subitem benchmark, \hyperpage{46}, \hyperpage{149}, \hyperpage{451}

  • Linux \subitem distributions, \hyperpage{377} \subitem kernel, \hyperpage{83}

  • Lisp, \hyperpage{148}

  • Little's law, \hyperpage{33}

  • little-endian, \hyperpage{430}, \hyperpage{438}

  • load \subitem balancing, \hyperpage{138--142} \subitem balancing, dynamic, \hyperpage{139} \subitem balancing,static, \hyperpage{139} \subitem rebalancing, \hyperpage{140, 141}, \hyperpage{328} \subitem redistributing, \hyperpage{140, 141} \subitem unbalance, \hyperpage{68}, \hyperpage{80}, \hyperpage{87}, \hyperpage{98}, \hyperpage{138}, \hyperpage{484}

  • \acl {LAN} (LAN), \hyperpage{143}

  • local solve, \hyperpage{265}

  • locality, \hyperpage{14} \subitem core, \hyperpage{46} \subitem in parallel computing, \hyperpage{131--132} \subitem spatial, \hyperpage{41}, \hyperpage{43} \subitem temporal, \hyperpage{41}

  • lock, \hyperpage{90}, \hyperindexformat{\textbf}{92}

  • loop unrolling, \hyperpage{47}, \hyperpage{59}, \hyperpage{169}

  • LRU, \hyperindexformat{\see{Least Recently Used}}{536}

  • \texttt {ls}, \hyperpage{378}

  • LU factorization, \hyperindexformat{\textbf}{197--205} \subitem computation in parallel, \hyperpage{250--251} \subitem solution in parallel, \hyperpage{249--250}

    \indexspace

  • M-matrix, \hyperpage{183}, \hyperpage{224}

  • machine epsilon, \hyperindexformat{\see{machine precision}}{159}

  • machine precision, \hyperpage{159}

  • Make, \hyperpage{402--415} \subitem {and \LaTeX}, \hyperpage{414--415} \subitem automatic variables, \hyperpage{407} \subitem debugging, \hyperpage{413} \subitem template rules, \hyperpage{408}

  • malloc, \hyperindexformat{\texttt}{88}

  • Mandelbrot set, \hyperpage{86}, \hyperpage{139}

  • mantissa, \hyperpage{156--160}, \hyperpage{163, 164}

  • manycore, \hyperpage{135}

  • MapReduce, \hyperpage{147--148}

  • Markov \subitem chains, \hyperpage{363} \subitem matrix, \hyperpage{364}

  • MasPar, \hyperpage{78}

  • master-worker paradigm, \hyperpage{85}

  • matrix \subitem adjacency, \hyperpage{316}, \hyperindexformat{\see{adjacency matrix}}{360} \subitem nonnegative, \hyperpage{322} \subitem norms, \hyperpage{346} \subsubitem associated, \hyperpage{346} \subitem permutation, \hyperpage{361} \subitem reducible, \hyperpage{323} \subitem stochastic, \hyperpage{323} \subitem storage, dense, \hyperpage{250--251} \subitem storage, sparse, \hyperpage{206--210} \subitem strictly upper triangular, \hyperpage{281} \subitem structurally symmetric, \hyperpage{140}, \hyperindexformat{\textbf}{211}, \hyperpage{259} \subitem tesselation, \hyperpage{253} \subitem transposition, \hyperpage{57} \subitem unit upper triangular, \hyperpage{281}

  • Matrix Market, \hyperpage{209} \subitem matrix format, \hyperpage{209}

  • matrix-matrix product, \hyperpage{59}, \hyperpage{251} \subitem Cannon's algorithm for, \hyperpage{252} \subitem Goto implementation, \hyperindexformat{\textbf}{59--61}, \hyperpage{454} \subitem reuse in, \hyperpage{40}

  • matrix-vector product, \hyperpage{58} \subitem sparse, \hyperindexformat{\see{sparse, matrix-vector product}}{253}

  • memory \subitem access pattern, \hyperpage{27} \subitem banks, \hyperpage{34} \subitem distributed, \hyperpage{81} \subitem distributed shared, \hyperpage{83} \subitem hierarchy, \hyperpage{21} \subitem leak, \hyperpage{444}, \hyperpage{466} \subitem model, \hyperpage{93} \subitem pages, \hyperpage{35} \subitem shared, \hyperpage{81} \subitem stall, \hyperpage{22} \subitem violations, \hyperpage{465} \subitem virtual shared, \hyperpage{83} \subitem wall, \hyperpage{21}, \hyperpage{514}

  • memory model \subitem relaxed, \hyperpage{93}

  • memory-bound, \hyperpage{14}

  • mercurial, \hyperpage{425--429}

  • \acl {MPI} (MPI), \hyperpage{102--108}

  • Metropolis algorithm, \hyperpage{334}

  • MGS, \hyperindexformat{\see{Modified Gram-Schmidt}}{536}

  • MIC, \hyperindexformat{\see{Many Integrated Cores}}{536}

  • MIMD, \hyperindexformat{\see{Multiple Instruction Multiple Data}}{536}

  • minimum \subitem spanning tree, \hyperpage{236}, \hyperpage{319}

  • minimum degree ordering, \hyperpage{217}

  • MIPS, \hyperpage{80}

  • \texttt {mkdir}, \hyperpage{380}

  • MKL, \hyperpage{137}, \hyperpage{451}, \hyperpage{454}

  • modified Gramm-Schmidt, \hyperindexformat{\see{Gram-Schmidt, modified}}{537}

  • Moore's law, \hyperpage{61}

  • MPI, \hyperindexformat{\see{Message Passing Interface}}{536} \subitem MPI 3.0 draft, \hyperpage{105}

  • MSI, \hyperindexformat{\see{Modified-Shared-Invalid}}{536}

  • MTA, \hyperindexformat{\see{Multi-Threaded Architecture}}{536}

  • multi-colouring, \hyperpage{276}

  • \acl {MTA} (MTA), \hyperpage{132}

  • multi-threading, \hyperpage{96}, \hyperpage{132}

  • multicore, \hyperpage{21}, \hyperpage{27}, \hyperindexformat{\textbf}{35--39}, \hyperpage{62}, \hyperpage{82}, \hyperpage{204}, \hyperpage{285} \subitem motivated by power, \hyperpage{62}

  • multigrid, \hyperpage{228}

  • \acl {MIMD} (MIMD), \hyperpage{80}

  • MUMPS, \hyperpage{513}

    \indexspace

  • $n_{1/2}$, \hyperpage{17}

  • N-body problems, \hyperpage{325--329}

  • NaN, \hyperindexformat{\texttt}{157}

  • natural ordering, \hyperpage{185}

  • NBODY6, \hyperpage{327}

  • nearest neighbour, \hyperpage{123}

  • Needleman-Wunsch algorithm, \hyperpage{335}

  • neighbourhood, \hyperpage{321}

  • nested dissection, \hyperpage{216}, \hyperpage{267--273}, \hyperpage{320}

  • Netflix, \hyperpage{338}

  • netlib, \hyperpage{454}

  • Neumann boundary condition, \hyperpage{182}

  • Newton method, \hyperpage{224}

  • nm, \hyperindexformat{\texttt}{400}, \hyperpage{486}

  • node, \hyperpage{108}, \hyperpage{123}

  • nodes, \hyperpage{358}

  • non-blocking communication, \hyperpage{102}, \hyperpage{105, 106}, \hyperpage{119}

  • non-local operation, \hyperpage{106}

  • \acl {NUMA} (NUMA), \hyperpage{82}, \hyperpage{132}

  • norm, \hyperpage{345}

  • NP-complete, \hyperpage{146}

  • null termination, \hyperpage{491}

  • NUMA, \hyperindexformat{\see{Non-Uniform Memory Access}}{536}

  • NVidia, \hyperpage{134} \subitem Tesla, \hyperpage{161}

    \indexspace

  • object file, \hyperpage{400}, \hyperpage{486}

  • octtree, \hyperpage{327} \subitem hashed, \hyperpage{328}

  • odd-even transposition sort, \hyperpage{309}, \hyperindexformat{\textbf}{309}

  • ODE, \hyperindexformat{\see{Ordinary Diffential Equation}}{536}

  • offloading, \hyperpage{137}

  • one-sided communication, \hyperpage{107--108}, \hyperpage{114--116}

  • OpenMP, \hyperpage{96--99} \subitem version 4, \hyperpage{80}

  • \acl {OS} (OS), \hyperpage{95}, \hyperpage{377}

  • option pricing, \hyperpage{331}

  • order, \hyperpage{352}

  • \acl {ODE} (ODE), \hyperpage{173--179}

  • OS, \hyperindexformat{\see{Operating System}}{536}

  • out-of-order, \hyperindexformat{\see{instruction, handling, out-of-order}}{15}

  • output redirection, \hyperindexformat{\see{redirection}}{386}

  • overdecomposition, \hyperpage{87}, \hyperpage{116}, \hyperpage{139}, \hyperpage{250}

  • overflow, \hyperpage{155}, \hyperindexformat{\textbf}{157}, \hyperpage{461}

  • overflow bit, \hyperpage{155}

  • overlays, \hyperpage{35}

  • owner computes, \hyperpage{138}

    \indexspace

  • page table, \hyperpage{35}

  • PageRank, \hyperpage{257}, \hyperpage{320}, \hyperindexformat{\textbf}{322--324}

  • pages \subitem memory, \hyperindexformat{\see{memory, pages}}{35}

  • PAPI, \hyperpage{483}

  • Parabolic PDEs, \hyperpage{180}

  • parallel fraction, \hyperpage{72}

  • parallel prefix, \hyperindexformat{\see{prefix operation}}{537}

  • \acl {PRAM} (PRAM), \hyperpage{71}

  • parallelism \subitem average, \hyperpage{69} \subitem data, \hyperpage{78}, \hyperpage{84}, \hyperpage{111} \subitem dynamic, \hyperpage{97} \subitem fine-grained, \hyperpage{84} \subitem instruction-level, \hyperpage{64} \subitem irregular, \hyperpage{86}

  • parallelization \subitem incremental, \hyperpage{97}

  • parameter sweep, \hyperpage{86}

  • partial derivates, \hyperpage{353}

  • \acl {PDE} (PDE), \hyperpage{180--191}

  • partial differential equations, \hyperpage{353--354}

  • partial ordering, \hyperpage{287}

  • partial pivoting, \hyperpage{196}

  • \acl {PGAS} (PGAS), \hyperpage{83}, \hyperpage{109--115}

  • path (graph theory), \hyperpage{359}

  • patsubst, \hyperindexformat{\texttt}{409}

  • PCI bus, \hyperpage{134}

  • PCI-X, \hyperpage{137}

  • PCI-X bus, \hyperpage{136}

  • PDE, \hyperindexformat{\see{Partial Diffential Equation}}{536}

  • peak performance, \hyperpage{19}, \hyperpage{40, 41}, \hyperpage{46}

  • penta-diagonal, \hyperpage{185}

  • permutation, \hyperpage{216}

  • Perron vector, \hyperpage{322}, \hyperpage{324}

  • Perron-Frobenius theorem, \hyperpage{350}

  • PETSc, \hyperpage{440--450}, \hyperpage{483}

  • PGAS, \hyperindexformat{\see{Partitioned Global Address Space}}{536}

  • pipeline, \hyperindexformat{\textbf}{16--19}, \hyperpage{46--48} \subitem depth, \hyperpage{21}, \hyperpage{62} \subitem flush, \hyperpage{21} \subitem instruction, \hyperpage{341} \subitem length, \hyperpage{21} \subitem processor, \hyperpage{79} \subitem stall, \hyperpage{21}

  • pivoting, \hyperpage{194--196}, \hyperpage{201--202} \subitem diagonal, \hyperpage{196} \subitem full, \hyperpage{196} \subitem partial, \hyperpage{196}

  • pivots, \hyperpage{194}

  • PLapack, \hyperpage{451}

  • point-to-point communication, \hyperpage{102}

  • Poisson equation, \hyperpage{181}, \hyperpage{353}

  • posix\_memalign, \hyperindexformat{\texttt}{516}

  • power \subitem consumption, \hyperpage{61--63} \subitem efficiency, \hyperpage{78} \subitem wall, \hyperpage{62}

  • power method, \hyperpage{322, 323}, \hyperpage{348}

  • PowerPC, \hyperpage{79}

  • PRAM, \hyperindexformat{\see{Parallel Random Access Machine}}{536}

  • precision \subitem double, \hyperindexformat{\see{double precision}}{160} \subitem extended, \hyperindexformat{\see{extended precision}}{163} \subitem machine, \hyperindexformat{\see{machine precision}}{159} \subitem of the intermediate result, \hyperpage{163}, \hyperpage{169} \subitem single, \hyperindexformat{\see{single precision}}{160}

  • preconditioner, \hyperpage{222--224}

  • predicate, \hyperpage{316}

  • prefetch data stream, \hyperpage{32}

  • prefix operation, \hyperpage{280}, \hyperpage{311}, \hyperpage{372--374}

  • prefix operations \subitem sparse matrix vector product, \hyperpage{373--374}

  • Pregel, \hyperpage{117}

  • Prim's algorithm, \hyperpage{319}

  • prime number \subitem finding, \hyperpage{313--314}

  • priority queue, \hyperpage{320}

  • probability vector, \hyperpage{323}

  • process, \hyperpage{88} \subitem affinity, \hyperindexformat{\see{affinity}}{94}

  • program counter, \hyperpage{13}, \hyperpage{88}, \hyperpage{96}

  • projects \subitem genome, \hyperpage{335}

  • protein interactions, \hyperpage{315}

  • \texttt {ps}, \hyperpage{388}

  • pseudo-random numbers, \hyperindexformat{\see{random numbers}}{332}

  • pthreads, \hyperpage{89--91}

  • purify, \hyperpage{474}

  • PVM, \hyperpage{107}

    \indexspace

  • QR factorization, \hyperpage{347}

  • Quicksort, \hyperpage{308}, \hyperpage{310--312}

    \indexspace

  • race condition, \hyperpage{92, 93}

  • radix point, \hyperpage{156}

  • random numbers, \hyperpage{332--333} \subitem generator, \hyperpage{332--333} \subsubitem lagged Fibonacci, \hyperpage{332} \subsubitem linear congruential, \hyperpage{332}

  • random placement, \hyperpage{116}, \hyperpage{140}

  • rate \subitem computational, \hyperpage{76}

  • RDMA, \hyperindexformat{\see{Remote Direct Memory Access}}{536}

  • re-association, \hyperpage{169}

  • real numbers \subitem representation of, \hyperpage{153}

  • recursive doubling, \hyperpage{19}, \hyperindexformat{\textbf}{29}, \hyperpage{276}, \hyperpage{279--280}

  • Red Hat, \hyperpage{377}

  • red-black ordering, \hyperpage{185}, \hyperpage{273--276}

  • redirection, \hyperpage{385}, \hyperindexformat{\textbf}{387}

  • reduce-scatter, \hyperpage{239}, \hyperpage{246, 247}, \hyperpage{259}

  • reducible, \hyperpage{211}

  • reduction, \hyperpage{237--238} \subitem and thread safety, \hyperpage{92} \subitem under multi-threading, \hyperpage{92}

  • reduction operations, \hyperpage{98}

  • redundancy, \hyperpage{121}

  • redundant computation, \hyperpage{284}

  • refinement \subitem adaptive, \hyperpage{141}

  • region of influence, \hyperpage{180}, \hyperpage{260}

  • register, \hyperpage{13}, \hyperpage{21}, \hyperindexformat{\textbf}{23--25} \subitem file, \hyperpage{23} \subitem resident in, \hyperpage{24, 25} \subitem spill, \hyperpage{24}, \hyperpage{48} \subitem variable, \hyperpage{25} \subitem vector, \hyperpage{79}, \hyperindexformat{\textbf}{79}

  • \acl {RDMA} (RDMA), \hyperpage{107}

  • remote method invocation, \hyperpage{115}

  • remote procedure call, \hyperpage{143}

  • repository, \hyperpage{416}

  • representation error \subitem absolute, \hyperpage{159} \subitem relative, \hyperpage{159}

  • reproducibility, \hyperpage{169} \subitem bitwise, \hyperpage{169}

  • residual, \hyperpage{218}

  • resource contention, \hyperpage{95}

  • revision control systems, \hyperindexformat{\see{source code control}}{416}

  • Riemann sums, \hyperpage{331}

  • ring network, \hyperpage{122}

  • roofline model, \hyperpage{41}

  • round-off error analysis, \hyperpage{162--168} \subitem in parallel computing, \hyperpage{168}

  • round-robin \subitem storage assignment, \hyperpage{111} \subitem task scheduling, \hyperpage{98}

  • rounding, \hyperpage{158} \subitem correct, \hyperpage{160}

  • row-major, \hyperpage{453}, \hyperpage{489}

    \indexspace

  • SAN, \hyperindexformat{\see{Storage Area Network}}{536}

  • SAS, \hyperindexformat{\see{Software As a Service}}{536}

  • satisfyability, \hyperpage{67}

  • scalability, \hyperpage{74--77} \subitem strong, \hyperpage{74}, \hyperpage{244} \subitem weak, \hyperpage{74}, \hyperpage{244}

  • Scalapack, \hyperpage{149}, \hyperpage{451}

  • ScaleMP, \hyperpage{83}

  • scaling, \hyperpage{74--77}

  • scheduling \subitem dynamic, \hyperindexformat{\textbf}{87}, \hyperpage{98} \subitem fair-share, \hyperpage{146} \subitem job, \hyperpage{146} \subitem static, \hyperpage{98}

  • Scons, \hyperpage{402}

  • search direction, \hyperpage{230}, \hyperpage{234}

  • \texttt {sed}, \hyperpage{385}

  • segmentation fault, \hyperpage{472}

  • segmentation violation, \hyperpage{465}

  • semaphore, \hyperpage{92}

  • separable problem, \hyperpage{223}

  • separator, \hyperpage{267}, \hyperpage{320}

  • Sequent Symmetry, \hyperpage{37}

  • sequential complexity, \hyperpage{309}

  • sequential consistency, \hyperpage{93}, \hyperpage{96}, \hyperpage{287}

  • sequential fraction, \hyperpage{72}

  • serialized execution, \hyperpage{100}

  • SFC, \hyperindexformat{\see{Space-Filling Curve}}{536}

  • SGE, \hyperpage{146}

  • SGI, \hyperpage{133} \subitem UV, \hyperpage{83}

  • sh, \hyperpage{377}

  • shared libraries, \hyperpage{400}

  • shell, \hyperpage{377} \subitem command history, \hyperpage{414} \subitem startup files, \hyperindexformat{\textbf}{394--395}

  • shortest path \subitem all-pairs, \hyperpage{315} \subitem single source, \hyperpage{315}

  • side effects, \hyperpage{148}

  • side-effects, \hyperpage{462}

  • sign bit, \hyperpage{154}, \hyperpage{156}

  • signature selection, \hyperpage{337}

  • significant, \hyperpage{156}

  • significant digits, \hyperpage{164}

  • SIMD, \hyperindexformat{\see{Single Instruction Multiple Data}}{536} \subitem lanes, \hyperindexformat{\textbf}{79} \subitem width, \hyperpage{41}, \hyperpage{118}

  • \acl {SSE} (SSE), \hyperpage{79}, \hyperpage{133}, \hyperpage{328}

  • SIMT, \hyperindexformat{\see{Single Instruction Multiple Thread}}{536}

  • \acl {SIMD} (SIMD), \hyperpage{118}

  • \acl {SIMT} (SIMT), \hyperpage{135}

  • single precision, \hyperpage{136}

  • \acl {SPMD} (SPMD), \hyperpage{80}, \hyperpage{118}

  • skyline storage, \hyperpage{214}

  • slog2 file format, \hyperpage{485}

  • Slurm, \hyperpage{146}

  • SM, \hyperindexformat{\see{Streaming Multiprocessor}}{536}

  • small world, \hyperpage{321}

  • Smith-Waterman algorithm, \hyperpage{335}

  • smoothers, \hyperpage{228}

  • SMP, \hyperindexformat{\see{Symmetric Multi Processing}}{536}

  • snooping, \hyperpage{38}, \hyperpage{122}

  • SOA, \hyperindexformat{\see{Structure-Of-Arrays}}{536}

  • socket, \hyperpage{27}, \hyperpage{36}, \hyperpage{108}, \hyperpage{150}

  • \acl {SAS} (SAS), \hyperpage{144}

  • SOR, \hyperindexformat{\see{Successive Over-Relaxation}}{536}

  • sorting, \hyperpage{308--313} \subitem network, \hyperpage{309}, \hyperpage{313}

  • source code control, \hyperpage{416} \subitem distributed, \hyperpage{416, 417}, \hyperpage{425}

  • source-to-source transformations, \hyperpage{48}

  • SP, \hyperindexformat{\see{Streaming Processor}}{536}

  • space-filling curve, \hyperpage{138}, \hyperindexformat{\textbf}{140--142}

  • spanning tree, \hyperpage{319}

  • Spark, \hyperpage{148}

  • sparse \subitem linear algebra, \hyperpage{205--217} \subitem matrix, \hyperpage{183}, \hyperpage{206}, \hyperpage{361} \subsubitem from PDEs, \hyperpage{184} \subitem matrix-vector product \subsubitem implementation, \hyperpage{209} \subsubitem in graph theory, \hyperpage{315} \subsubitem locality in, \hyperpage{210} \subsubitem parallel, \hyperpage{254--259} \subsubitem parallel setup, \hyperpage{259} \subsubitem performance of, \hyperpage{263}

  • spatial locality, \hyperindexformat{\see{locality, spatial}}{41}

  • SPD, \hyperindexformat{\see{symmetric positive definite}}{536}

  • speculative execution, \hyperpage{21}

  • speedup, \hyperindexformat{\textbf}{68}, \hyperpage{80}

  • SPMD, \hyperindexformat{\see{Single Program Multiple Data}}{536}

  • SRAM, \hyperindexformat{\see{Static Random-Access Memory}}{536}

  • SSE, \hyperindexformat{\see{SIMD Streaming Extensions}}{536}

  • stall, \hyperpage{32}

  • static libraries, \hyperpage{400}

  • \acl {SRAM} (SRAM), \hyperpage{25}

  • stationary iteration, \hyperpage{219}

  • statistical mechanics, \hyperpage{334}

  • steady state, \hyperpage{178}, \hyperpage{180, 181}, \hyperpage{188}, \hyperpage{354}, \hyperpage{512}

  • stochastic matrix, \hyperpage{364}

  • \acl {SAN} (SAN), \hyperpage{143}

  • storage by diagonals, \hyperpage{206}

  • stride, \hyperpage{29}, \hyperpage{43}, \hyperpage{50}

  • strip mining, \hyperpage{59}

  • structurally symmetric, \hyperpage{361}

  • \acl {SOA} (SOA), \hyperpage{117}

  • subdomain, \hyperpage{267}

  • substructuring, \hyperpage{268}

  • Subversion, \hyperpage{416}, \hyperpage{418--424}

  • \acl {SOR} (SOR), \hyperpage{222}

  • suffix tree, \hyperpage{337}

  • Sun \subitem Ray, \hyperpage{144}

  • superlinear speedup, \hyperpage{68}

  • superscalar, \hyperpage{14}, \hyperindexformat{\textbf}{20}, \hyperpage{85}

  • superstep, \hyperpage{66}

  • supersteps, \hyperpage{108}, \hyperpage{116}

  • surface-to-volume, \hyperpage{87}

  • svn, \hyperindexformat{\see{Subversion}}{418}

  • swapped, \hyperpage{34}

  • switch, \hyperpage{126} \subitem leaf, \hyperpage{128}, \hyperpage{132}

  • symbol table, \hyperpage{470}

  • \acl {SMP} (SMP), \hyperpage{81}, \hyperpage{122}

  • \acl {SPD} (SPD), \hyperpage{202}, \hyperpage{228}, \hyperpage{233, 234}

  • system_clock, \hyperpage{481}

    \indexspace

  • TACC, \hyperpage{128} \subitem ranger cluster, \hyperpage{129} \subitem Stampede cluster, \hyperpage{129}, \hyperpage{134}

  • task \subitem parallelism, \hyperpage{85--86} \subitem queue, \hyperpage{86}, \hyperpage{95}, \hyperpage{317}

  • task parallelism, \hyperpage{67}

  • TAU, \hyperpage{484--485}

  • Taylor series, \hyperpage{175}, \hyperpage{355--357}

  • tcsh, \hyperpage{377}

  • temporal locality, \hyperindexformat{\see{locality, temporal}}{41}

  • Tera Computer \subitem MTA, \hyperpage{133}

  • ternary arithmetic, \hyperpage{157}

  • {\TeX}, \hyperpage{493} \subitem environment variables, \hyperpage{500}

  • thin client, \hyperpage{144}

  • thread, \hyperindexformat{\textbf}{88--96}, \hyperpage{136} \subitem affinity, \hyperindexformat{\see{affinity}}{94} \subitem blocks, \hyperpage{135} \subitem master, \hyperpage{88} \subitem private data, \hyperpage{91} \subitem safe, \hyperindexformat{\textbf}{92}, \hyperpage{97} \subitem shared data, \hyperpage{91} \subitem spawning, \hyperindexformat{\textbf}{88} \subitem team, \hyperpage{88} \subitem use in OpenMP, \hyperpage{97}

  • throughput computing, \hyperpage{134}

  • Tianhe-1A, \hyperpage{134}

  • Tianhe-2, \hyperpage{134}

  • Titanium, \hyperpage{112}

  • TLB, \hyperindexformat{\see{Translation Look-aside Buffer}}{536} \subitem miss, \hyperpage{35}

  • \texttt {top}, \hyperpage{396}

  • top 500, \hyperpage{148--150}

  • topology, \hyperpage{119}

  • torus, \hyperpage{123} \subitem clusters based on, \hyperpage{130}

  • TotalView, \hyperpage{469}

  • \texttt {touch}, \hyperpage{380}

  • transactional memory, \hyperpage{93}

  • \acl {TLB} (TLB), \hyperpage{35}, \hyperpage{522}

  • tree graph, \hyperpage{360}

  • tridiagonal, \hyperpage{185}

  • tridiagonal matrix, \hyperpage{183}

  • truncation, \hyperpage{158}

  • truncation error, \hyperpage{176}, \hyperpage{182}

  • tuple space, \hyperpage{114}

  • Turing machine, \hyperpage{371}

    \indexspace

  • Ubuntu, \hyperpage{377}

  • UMA, \hyperindexformat{\see{Uniform Memory Access}}{536}

  • unconditionally stable, \hyperpage{178}

  • underflow, \hyperindexformat{\textbf}{157} \subitem gradual, \hyperpage{157, 158}, \hyperpage{170}

  • \acl {UPC} (UPC), \hyperpage{110--112}

  • \acl {UMA} (UMA), \hyperpage{81}, \hyperpage{122}, \hyperpage{126}

  • unitary basis transformations, \hyperpage{216}

  • Unix \subitem user account, \hyperpage{395}

  • unsigned, \hyperpage{154}

  • UPC, \hyperindexformat{\see{Unified Parallel C}}{536}

  • \texttt {uptime}, \hyperpage{396}

  • utility computing, \hyperpage{143}

    \indexspace

  • valgrind, \hyperpage{474--475}

  • value safety, \hyperpage{169}

  • vector \subitem instructions, \hyperindexformat{\textbf}{79}, \hyperpage{278}, \hyperpage{341} \subitem norms, \hyperpage{345} \subitem pipeline, \hyperindexformat{\see{pipeline, processor}}{79} \subitem register, \hyperindexformat{\see{register, vector}}{79}

  • vector processor, \hyperpage{79}, \hyperpage{278}

  • verbatim mode, \hyperpage{498}

  • version control systems, \hyperindexformat{\see{source code control}}{416}

  • vertices, \hyperpage{358}

  • Virtual memory, \hyperpage{34}

  • virtualization, \hyperpage{143}

  • von Neumann architectures, \hyperpage{12}

    \indexspace

  • wallclock time, \hyperpage{483}

  • WAN, \hyperindexformat{\see{Wide Area Network}}{536}

  • wavefront, \hyperpage{278--279}, \hyperpage{336}

  • weak scaling, \hyperpage{72}

  • weighted graph, \hyperpage{359}

  • \texttt {which}, \hyperpage{379}, \hyperpage{386}

  • \texttt {who}, \hyperpage{395}

  • \texttt {whoami}, \hyperpage{395}

  • \acl {WAN} (WAN), \hyperpage{143}

  • wildcard, \hyperpage{383}, \hyperindexformat{\texttt}{409}

  • work pool, \hyperpage{139}

  • World Wide Web, \hyperpage{315}

    \indexspace

  • X10, \hyperpage{114}

  • x86, \hyperpage{149}

    \indexspace

  • zsh, \hyperpage{377}

    \end{theindex}