Added IBM Cell SDK 2.x software_cache to Bullet/Extras. There is an option to enable it for the BulletMultiThreaded Cell version.

See USE_SOFTWARE_CACHE in Bullet\src\BulletMultiThreaded\SpuNarrowPhaseCollisionTask\SpuGatheringCollisionTask.cpp
It improves the Bullet midphase collision detection (triangle/vertex fetch)
The license is CommonPublicLicense-1.0, see included license docs.
This commit is contained in:
erwin.coumans 2008-11-18 01:33:30 +00:00
parent 50f475feb5
commit dc8692ba94
26 changed files with 2554 additions and 0 deletions

View File

@ -0,0 +1,14 @@
# SCE CONFIDENTIAL
# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
# Copyright (C) 2005 Sony Computer Entertainment Inc.
# All Rights Reserved.
#
CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
include $(CELL_MK_DIR)/sdk.makedef.mk
MK_TARGET = spu_thr_printf.ppu.mk hello.spu.mk
include $(CELL_MK_DIR)/sdk.target.mk

View File

@ -0,0 +1,239 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="SPU_printf"
ProjectGUID="{47EE939D-CB3D-4600-B8B6-79FDF607E133}"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="PS3 Debug|Win32"
OutputDirectory="PS3_Debug"
IntermediateDirectory="PS3_Debug"
ConfigurationType="1"
DeleteExtensionsOnClean="*.obj;*.d;*.map;*.lst;*.pch;$(TargetPath);$(TargetDir)$(TargetName).self"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="-g -O0 -Wall -fno-exceptions"
AdditionalIncludeDirectories="&quot;$(SN_PS3_PATH)\spu\include\sn&quot;;&quot;$(SCE_PS3_ROOT)\target\spu\include&quot;;&quot;$(SCE_PS3_ROOT)\target\common\include&quot;;&quot;$(TargetDir)/cache/include&quot;"
PreprocessorDefinitions="SN_TARGET_PS3_SPU;_DEBUG;__GCC__;SPU"
ProgramDataBaseFileName="$(IntDir)/"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalOptions=""
AdditionalDependencies=""
OutputFile="../$(ProjectName).spu.elf"
AdditionalLibraryDirectories=""
GenerateManifest="false"
GenerateDebugInformation="true"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Fake Signing ELF &quot;$(TargetDir)$(TargetName).self&quot;"
CommandLine="$(SCE_PS3_ROOT)\host-win32\bin\make_fself &quot;$(TargetPath)&quot; &quot;$(TargetDir)$(TargetName).self&quot;"
/>
</Configuration>
<Configuration
Name="PS3 Release|Win32"
OutputDirectory="PS3_Release"
IntermediateDirectory="PS3_Release"
ConfigurationType="1"
DeleteExtensionsOnClean="*.obj;*.d;*.map;*.lst;*.pch;$(TargetPath);$(TargetDir)$(TargetName).self"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="-O2 -Wall -fno-exceptions"
AdditionalIncludeDirectories="&quot;$(SN_PS3_PATH)\spu\include\sn&quot;;&quot;$(SCE_PS3_ROOT)\target\spu\include&quot;;&quot;$(SCE_PS3_ROOT)\target\common\include&quot;;&quot;$(TargetDir)/cache/include&quot;"
PreprocessorDefinitions="SN_TARGET_PS3_SPU;NDEBUG;__GCC__;SPU"
ProgramDataBaseFileName="$(IntDir)/"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalOptions=""
AdditionalDependencies=""
OutputFile="../$(ProjectName).spu.elf"
AdditionalLibraryDirectories=""
GenerateManifest="false"
GenerateDebugInformation="true"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Fake Signing ELF &quot;$(TargetDir)$(TargetName).self&quot;"
CommandLine="$(SCE_PS3_ROOT)\host-win32\bin\make_fself &quot;$(TargetPath)&quot; &quot;$(TargetDir)$(TargetName).self&quot;"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;cc;s;asm"
>
<File
RelativePath="..\hello.spu.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp"
>
<File
RelativePath="..\cache\include\api.h"
>
</File>
<File
RelativePath="..\cache\include\cbe_mfc.h"
>
</File>
<File
RelativePath="..\cache\include\defs.h"
>
</File>
<File
RelativePath="..\cache\include\dma.h"
>
</File>
<File
RelativePath="..\cache\include\ilog2.h"
>
</File>
<File
RelativePath="..\cache\include\memset.h"
>
</File>
<File
RelativePath="..\cache\include\nway-lookup.h"
>
</File>
<File
RelativePath="..\cache\include\nway-miss.h"
>
</File>
<File
RelativePath="..\cache\include\nway-opt.h"
>
</File>
<File
RelativePath="..\cache\include\nway-replace.h"
>
</File>
<File
RelativePath="..\cache\include\nway.h"
>
</File>
<File
RelativePath="..\cache\include\spe_cache.h"
>
</File>
<File
RelativePath="..\cache\include\vec_literal.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,213 @@
Common Public License Version 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC
LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and
documentation distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are
distributed by that particular Contributor. A Contribution 'originates' from a
Contributor if it was added to the Program by such Contributor itself or anyone
acting on such Contributor's behalf. Contributions do not include additions to
the Program which: (i) are separate modules of software distributed in
conjunction with the Program under their own license agreement, and (ii) are not
derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents " mean patent claims licensable by a Contributor which are
necessarily infringed by the use or sale of its Contribution alone or when
combined with the Program.
"Program" means the Contributions distributed in accordance with this Agreement.
"Recipient" means anyone who receives the Program under this Agreement,
including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free copyright license to
reproduce, prepare derivative works of, publicly display, publicly perform,
distribute and sublicense the Contribution of such Contributor, if any, and such
derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed
Patents to make, use, sell, offer to sell, import and otherwise transfer the
Contribution of such Contributor, if any, in source code and object code form.
This patent license shall apply to the combination of the Contribution and the
Program if, at the time the Contribution is added by the Contributor, such
addition of the Contribution causes such combination to be covered by the
Licensed Patents. The patent license shall not apply to any other combinations
which include the Contribution. No hardware per se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses
to its Contributions set forth herein, no assurances are provided by any
Contributor that the Program does not infringe the patent or other intellectual
property rights of any other entity. Each Contributor disclaims any liability to
Recipient for claims brought by any other entity based on infringement of
intellectual property rights or otherwise. As a condition to exercising the
rights and licenses granted hereunder, each Recipient hereby assumes sole
responsibility to secure any other intellectual property rights needed, if any.
For example, if a third party patent license is required to allow Recipient to
distribute the Program, it is Recipient's responsibility to acquire that license
before distributing the Program.
d) Each Contributor represents that to its knowledge it has sufficient
copyright rights in its Contribution, if any, to grant the copyright license set
forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under its
own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties and
conditions, express and implied, including warranties or conditions of title and
non-infringement, and implied warranties or conditions of merchantability and
fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for
damages, including direct, indirect, special, incidental and consequential
damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are offered
by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such
Contributor, and informs licensees how to obtain it in a reasonable manner on or
through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained within the
Program.
Each Contributor must identify itself as the originator of its Contribution, if
any, in a manner that reasonably allows subsequent Recipients to identify the
originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with
respect to end users, business partners and the like. While this license is
intended to facilitate the commercial use of the Program, the Contributor who
includes the Program in a commercial product offering should do so in a manner
which does not create potential liability for other Contributors. Therefore, if
a Contributor includes the Program in a commercial product offering, such
Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
every other Contributor ("Indemnified Contributor") against any losses, damages
and costs (collectively "Losses") arising from claims, lawsuits and other legal
actions brought by a third party against the Indemnified Contributor to the
extent caused by the acts or omissions of such Commercial Contributor in
connection with its distribution of the Program in a commercial product
offering. The obligations in this section do not apply to any claims or Losses
relating to any actual or alleged intellectual property infringement. In order
to qualify, an Indemnified Contributor must: a) promptly notify the Commercial
Contributor in writing of such claim, and b) allow the Commercial Contributor to
control, and cooperate with the Commercial Contributor in, the defense and any
related settlement negotiations. The Indemnified Contributor may participate in
any such claim at its own expense.
For example, a Contributor might include the Program in a commercial product
offering, Product X. That Contributor is then a Commercial Contributor. If that
Commercial Contributor then makes performance claims, or offers warranties
related to Product X, those performance claims and warranties are such
Commercial Contributor's responsibility alone. Under this section, the
Commercial Contributor would have to defend claims against the other
Contributors related to those performance claims and warranties, and if a court
requires any other Contributor to pay any damages as a result, the Commercial
Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
Recipient is solely responsible for determining the appropriateness of using and
distributing the Program and assumes all risks associated with its exercise of
rights under this Agreement, including but not limited to the risks and costs of
program errors, compliance with applicable laws, damage to or loss of data,
programs or equipment, and unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS
GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under applicable
law, it shall not affect the validity or enforceability of the remainder of the
terms of this Agreement, and without further action by the parties hereto, such
provision shall be reformed to the minimum extent necessary to make such
provision valid and enforceable.
If Recipient institutes patent litigation against a Contributor with respect to
a patent applicable to software (including a cross-claim or counterclaim in a
lawsuit), then any patent licenses granted by that Contributor to such Recipient
under this Agreement shall terminate as of the date such litigation is filed. In
addition, if Recipient institutes patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that the Program
itself (excluding combinations of the Program with other software or hardware)
infringes such Recipient's patent(s), then such Recipient's rights granted under
Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to
comply with any of the material terms or conditions of this Agreement and does
not cure such failure in a reasonable period of time after becoming aware of
such noncompliance. If all Recipient's rights under this Agreement terminate,
Recipient agrees to cease use and distribution of the Program as soon as
reasonably practicable. However, Recipient's obligations under this Agreement
and any licenses granted by Recipient relating to the Program shall continue and
survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in
order to avoid inconsistency the Agreement is copyrighted and may only be
modified in the following manner. The Agreement Steward reserves the right to
publish new versions (including revisions) of this Agreement from time to time.
No one other than the Agreement Steward has the right to modify this Agreement.
IBM is the initial Agreement Steward. IBM may assign the responsibility to serve
as the Agreement Steward to a suitable separate entity. Each new version of the
Agreement will be given a distinguishing version number. The Program (including
Contributions) may always be distributed subject to the version of the Agreement
under which it was received. In addition, after a new version of the Agreement
is published, Contributor may elect to distribute the Program (including its
Contributions) under the new version. Except as expressly stated in Sections
2(a) and 2(b) above, Recipient receives no rights or licenses to the
intellectual property of any Contributor under this Agreement, whether
expressly, by implication, estoppel or otherwise. All rights in the Program not
expressly granted under this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the
intellectual property laws of the United States of America. No party to this
Agreement will bring a legal action under this Agreement more than one year
after the cause of action arose. Each party waives its rights to a jury trial in
any resulting litigation.

View File

@ -0,0 +1,28 @@
# ---------------------------------------------------------------
# PLEASE DO NOT MODIFY THIS SECTION
# This prolog section is automatically generated.
#
# (C) Copyright 2001,2006,
# International Business Machines Corporation,
#
# All Rights Reserved.
# ---------------------------------------------------------------
# PROLOG END TAG zYx
########################################################################
# Common Makefile
########################################################################
INSTALL_DIR = $(SDKINC_spu)/cache
INSTALL_FILES := api.h \
defs.h \
dma.h \
nway.h \
nway-lookup.h \
nway-miss.h \
nway-opt.h \
nway-replace.h \
spe_cache.h
include ../../../../../make.footer

View File

@ -0,0 +1,32 @@
%% ---------------------------------------------------------------
%% PLEASE DO NOT MODIFY THIS SECTION
%% This prolog section is automatically generated.
%%
%% (C) Copyright 2001,2006,
%% International Business Machines Corporation,
%%
%% All Rights Reserved.
%% ---------------------------------------------------------------
%% PROLOG END TAG zYx
This directory contains implementation of software managed cache for
SPE. Whenever possible, the cache interfaces are implemented as macros
or inline-able functions.
Depending on compile-time settings, different cache implementations can
be selected.
The include file heirarchy is:
+ spe_cache.h Top level header.
|
+ defs.h Common definitions.
+ dma.h Initiate DMA transfers.
+ nway.h Top level n-way header.
|
+ nway-lookup.h n-way lookup operations.
+ nway-miss.h n-way cache miss handler.
+ nway-replace.h n-way cache replace handler.
+ nway-opt.h "optimized" n-way interfaces.
|
+ api.h Basic application interfaces.

View File

@ -0,0 +1,31 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* api.h
*
* Copyright (C) 2005 IBM Corp.
*
* Simple API for software managed cache on SPEs.
* A sophisticated application would not use these,
* but rather use the low-level lookup functions.
*/
#ifndef __SPE_CACHE_API_H__
#define __SPE_CACHE_API_H__
typedef void *spe_cache_entry_t;
#define spe_cache_rd(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1)
#define spe_cache_tr(ea) _spe_cache_lookup_xfer_(ea, 0, 1)
#define spe_cache_lr(ea) _spe_cache_lookup_(ea, 0)
#define spe_cache_wait(entry) _spe_cache_wait_(entry)
#endif

View File

@ -0,0 +1,245 @@
/* @(#)17 1.4 src/include/cbe_mfc.h, sw.includes, sdk_pub 10/11/05 16:00:25 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _CBEA_MFC_H_
#define _CBEA_MFC_H_
/* This header file contains various definitions related to the Memory Flow
* Controller (MFC) portion of the Cell Broadband Engine Architecture (CBEA).
*/
/**************************************/
/* MFC DMA Command Opcode Definitions */
/**************************************/
/****************************************************************************/
/* MFC DMA Command flags which identify classes of operations. */
/****************************************************************************/
/* Note: These flags may be used in conjunction with the base command types
* (i.e. MFC_PUT_CMD, MFC_PUTR_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
* to construct the various command permutations.
*/
#define MFC_BARRIER_ENABLE 0x01
#define MFC_FENCE_ENABLE 0x02
#define MFC_LIST_ENABLE 0x04 /* SPU Only */
#define MFC_START_ENABLE 0x08 /* proxy Only */
#define MFC_RESULT_ENABLE 0x10
/****************************************************************************/
/* MFC DMA Put Commands */
/****************************************************************************/
#define MFC_PUT_CMD 0x20
#define MFC_PUTS_CMD 0x28 /* proxy Only */
#define MFC_PUTR_CMD 0x30
#define MFC_PUTF_CMD 0x22
#define MFC_PUTB_CMD 0x21
#define MFC_PUTFS_CMD 0x2A /* proxy Only */
#define MFC_PUTBS_CMD 0x29 /* proxy Only */
#define MFC_PUTRF_CMD 0x32
#define MFC_PUTRB_CMD 0x31
#define MFC_PUTL_CMD 0x24 /* SPU Only */
#define MFC_PUTRL_CMD 0x34 /* SPU Only */
#define MFC_PUTLF_CMD 0x26 /* SPU Only */
#define MFC_PUTLB_CMD 0x25 /* SPU Only */
#define MFC_PUTRLF_CMD 0x36 /* SPU Only */
#define MFC_PUTRLB_CMD 0x35 /* SPU Only */
/****************************************************************************/
/* MFC DMA Get Commands */
/****************************************************************************/
#define MFC_GET_CMD 0x40
#define MFC_GETS_CMD 0x48 /* proxy Only */
#define MFC_GETF_CMD 0x42
#define MFC_GETB_CMD 0x41
#define MFC_GETFS_CMD 0x4A /* proxy Only */
#define MFC_GETBS_CMD 0x49 /* proxy Only */
#define MFC_GETL_CMD 0x44 /* SPU Only */
#define MFC_GETLF_CMD 0x46 /* SPU Only */
#define MFC_GETLB_CMD 0x45 /* SPU Only */
/****************************************************************************/
/* MFC DMA Storage Control Commands */
/****************************************************************************/
/* Note: These are only supported on implementations with a SL1 cache
* They are no-ops on the initial (CBE) implementation.
*/
#define MFC_SDCRT_CMD 0x80
#define MFC_SDCRTST_CMD 0x81
#define MFC_SDCRZ_CMD 0x89
#define MFC_SDCRS_CMD 0x8D
#define MFC_SDCRF_CMD 0x8F
/****************************************************************************/
/* MFC Synchronization Commands */
/****************************************************************************/
#define MFC_GETLLAR_CMD 0xD0 /* SPU Only */
#define MFC_PUTLLC_CMD 0xB4 /* SPU Only */
#define MFC_PUTLLUC_CMD 0xB0 /* SPU Only */
#define MFC_PUTQLLUC_CMD 0xB8 /* SPU Only */
#define MFC_SNDSIG_CMD 0xA0
#define MFC_SNDSIGB_CMD 0xA1
#define MFC_SNDSIGF_CMD 0xA2
#define MFC_BARRIER_CMD 0xC0
#define MFC_EIEIO_CMD 0xC8
#define MFC_SYNC_CMD 0xCC
/****************************************************************************/
/* Definitions for constructing a 32-bit command word including the transfer
* and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_TCLASS(_tid) ((_tid) << 24)
#define MFC_RCLASS(_rid) ((_rid) << 16)
#define MFC_CMD_WORD(_tid, _rid, _cmd) (MFC_TCLASS(_tid) | MFC_RCLASS(_rid) | (_cmd))
/****************************************************************************/
/* Definitions for constructing a 64-bit command word including the size, tag,
* transfer and replacement class id and the command opcode.
*/
/****************************************************************************/
#define MFC_SIZE(_size) ((unsigned long long)(_size) << 48)
#define MFC_TAG(_tag_id) ((unsigned long long)(_tag_id) << 32)
#define MFC_TR_CMD(_trcmd) ((unsigned long long)(_trcmd))
#define MFC_CMD_DWORD(_size, _tag_id, _trcmd) (MFC_SIZE(_size) | MFC_TAG(_tag_id) | MFC_TR_CMD(_trcmd))
/****************************************************************************/
/* Mask definitions for obtaining DMA commands and class ids from packed words.
*/
/****************************************************************************/
#define MFC_CMD_MASK 0x0000FFFF
#define MFC_CLASS_MASK 0x000000FF
/****************************************************************************/
/* DMA max/min size definitions. */
/****************************************************************************/
#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */
#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */
#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
#define MFC_MIN_DMA_LIST_SIZE 0x0008 /* 8 bytes */
#define MFC_MAX_DMA_LIST_SIZE 0x4000 /* 16K bytes */
/****************************************************************************/
/* Mask definition for checking proper address alignment. */
/****************************************************************************/
#define MFC_ADDR_MATCH_MASK 0xF
#define MFC_BEST_ADDR_ALIGNMENT 0x80
/****************************************************************************/
/* Definitions related to the Proxy DMA Command Status register (DMA_CMDStatus).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_CMD_ENQUEUE_SUCCESSFUL 0x00
#define MFC_PROXY_DMA_CMD_SEQUENCE_ERROR 0x01
#define MFC_PROXY_DMA_QUEUE_FULL 0x02
/****************************************************************************/
/* Definitions related to the DMA Queue Status register (DMA_QStatus). */
/****************************************************************************/
#define MFC_PROXY_MAX_QUEUE_SPACE 0x08
#define MFC_PROXY_DMA_Q_EMPTY 0x80000000
#define MFC_PROXY_DMA_Q_FREE_SPACE_MASK 0x0000FFFF
#define MFC_SPU_MAX_QUEUE_SPACE 0x10
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Type register
* (Prxy_QueryType).
*/
/****************************************************************************/
#define MFC_PROXY_DMA_QUERYTYPE_ANY 0x1
#define MFC_PROXY_DMA_QUERYTYPE_ALL 0x2
/****************************************************************************/
/* Definitions related to the Proxy Tag-Group Query-Mask (Prxy_QueryMask)
* and PU Tag Status (DMA_TagStatus) registers.
*
* NOTE: The only use the bottom 5 bits of the tag id value passed to insure
* a valid tag id is used.
*/
/****************************************************************************/
#define MFC_TAGID_TO_TAGMASK(tag_id) (1 << (tag_id & 0x1F))
/****************************************************************************/
/* Definitions related to the Mailbox Status register (SPU_Mbox_Stat) and the
* depths of the outbound Mailbox Register (SPU_OutMbox), the outbound
* interrupting Mailbox Register (SPU_OutIntrMbox), and the inbound Mailbox
* Register (SPU_In_Mbox).
*/
/****************************************************************************/
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_MASK 0x000000FF
#define MFC_SPU_OUT_MBOX_COUNT_STATUS_SHIFT 0x0
#define MFC_SPU_IN_MBOX_COUNT_STATUS_MASK 0x0000FF00
#define MFC_SPU_IN_MBOX_COUNT_STATUS_SHIFT 0x8
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_MASK 0x00FF0000
#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_SHIFT 0x10
/****************************************************************************/
/* Definitions related to the SPC Multi Source Syncronization register
* (MFC_MSSync).
*/
/****************************************************************************/
#define MFC_SPC_MSS_STATUS_MASK 0x1
#define MFC_SPC_MSS_COMPLETE 0x0
#define MFC_SPC_MSS_NOT_COMPLETE 0x1
/*******************************************
* Channel Defines
*******************************************/
/* Events Defines for channels:
* 0 (SPU_RdEventStat),
* 1 (SPU_WrEventMask), and
* 2 (SPU_WrEventAck).
*/
#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001
#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002
#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008
#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010
#define MFC_DECREMENTER_EVENT 0x00000020
#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040
#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080
#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100
#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200
#define MFC_LLR_LOST_EVENT 0x00000400
#define MFC_PRIV_ATTN_EVENT 0x00000800
#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000
/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate)
*/
#define MFC_TAG_UPDATE_IMMEDIATE 0x0
#define MFC_TAG_UPDATE_ANY 0x1
#define MFC_TAG_UPDATE_ALL 0x2
/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat)
*/
#define MFC_PUTLLC_STATUS 0x00000001
#define MFC_PUTLLUC_STATUS 0x00000002
#define MFC_GETLLAR_STATUS 0x00000004
#endif /* _CBEA_MFC_H_ */

View File

@ -0,0 +1,149 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache_defs.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal definitions for software managed cache.
*/
#ifndef __SPE_CACHE_DEFS_H__
#define __SPE_CACHE_DEFS_H__
/**
** Defn's for number of cache sets.
** Default is 64 sets.
*/
#if (SPE_CACHE_NSETS==1024)
#define SPE_CACHE_NSETS_SHIFT 10
#elif (SPE_CACHE_NSETS==512)
#define SPE_CACHE_NSETS_SHIFT 9
#elif (SPE_CACHE_NSETS==256)
#define SPE_CACHE_NSETS_SHIFT 8
#elif (SPE_CACHE_NSETS==128)
#define SPE_CACHE_NSETS_SHIFT 7
#elif (SPE_CACHE_NSETS==64)
#define SPE_CACHE_NSETS_SHIFT 6
#elif (SPE_CACHE_NSETS==32)
#define SPE_CACHE_NSETS_SHIFT 5
#elif (SPE_CACHE_NSETS==16)
#define SPE_CACHE_NSETS_SHIFT 4
#elif (SPE_CACHE_NSETS==8)
#define SPE_CACHE_NSETS_SHIFT 3
#elif (SPE_CACHE_NSETS==4)
#define SPE_CACHE_NSETS_SHIFT 2
#elif (SPE_CACHE_NSETS==2)
#define SPE_CACHE_NSETS_SHIFT 1
#else
#undef SPE_CACHE_NSETS
#define SPE_CACHE_NSETS 64
#define SPE_CACHE_NSETS_SHIFT 6
#endif
/**
** Defn's for cachline size (bytes).
** Default is 128 bytes.
*/
#if (SPE_CACHELINE_SIZE==512)
#define SPE_CACHELINE_SHIFT 9
#elif (SPE_CACHELINE_SIZE==256)
#define SPE_CACHELINE_SHIFT 8
#elif (SPE_CACHELINE_SIZE==128)
#define SPE_CACHELINE_SHIFT 7
#elif (SPE_CACHELINE_SIZE==64)
#define SPE_CACHELINE_SHIFT 6
#elif (SPE_CACHELINE_SIZE==32)
#define SPE_CACHELINE_SHIFT 5
#else
#undef SPE_CACHELINE_SIZE
#define SPE_CACHELINE_SIZE 128
#define SPE_CACHELINE_SHIFT 7
#endif
/**
** Defn's derived from above settings.
*/
#define SPE_CACHE_NSETS_MASK (SPE_CACHE_NSETS - 1)
#define SPE_CACHELINE_MASK (SPE_CACHELINE_SIZE - 1)
/**
** Defn's for managing cacheline state.
*/
#define SPE_CACHELINE_DIRTY 0x1
#define SPE_CACHELINE_LOCKED 0x2
#define SPE_CACHELINE_STATE_MASK (SPE_CACHELINE_DIRTY | SPE_CACHELINE_LOCKED)
#ifdef _XLC
/**
* FIXME: For now disable manual branch hints
* on XLC due to performance degradation.
*/
#ifndef likely
#define likely(_c) (_c)
#define unlikely(_c) (_c)
#endif
#else /* !_XLC */
#ifndef likely
#define likely(_c) __builtin_expect((_c), 1)
#define unlikely(_c) __builtin_expect((_c), 0)
#endif
#endif
/**
** Debug controls. Set -DNDEBUG to
** disable both panic and assert.
*/
#include <assert.h>
#define _spe_cache_panic_(c) assert(c)
#ifdef SPE_CACHE_DBG
#define _spe_cache_assert_(c) assert(c)
#else
#define _spe_cache_assert_(c) /* No-op. */
#endif
#define _spe_cacheline_byte_offset_(ea) \
((ea) & SPE_CACHELINE_MASK)
#define _spe_cacheline_byte_offset_x4(ea) \
spu_and ((ea), SPE_CACHELINE_MASK)
#endif
static __inline vector unsigned int _load_vec_uint4(unsigned int ui1, unsigned int ui2, unsigned int ui3, unsigned int ui4)
{
vector unsigned int result;
vector unsigned int iv1, iv2, iv3, iv4;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
iv1 = spu_promote(ui1, 0);
iv2 = spu_promote(ui2, 0);
iv3 = spu_promote(ui3, 0);
iv4 = spu_promote(ui4, 0);
result = spu_or(spu_shuffle(iv1, iv2, shuffle), spu_shuffle(iv3, iv4, spu_rlqwbyte(shuffle, 8)));
return (result);
}
static __inline vector unsigned int _pack_vec_uint4(vector unsigned int ui1, vector unsigned int ui2, vector unsigned int ui3, vector unsigned int ui4)
{
vector unsigned int result;
vector unsigned char shuffle = VEC_LITERAL(vector unsigned char,
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
result = spu_or(spu_shuffle(ui1, ui2, shuffle), spu_shuffle(ui3, ui4, spu_rlqwbyte(shuffle, 8)));
return (result);
}

View File

@ -0,0 +1,40 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* dma.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal DMA utilities for software
* managed cache.
*/
#ifndef __SPE_CACHE_DMA_H__
#define __SPE_CACHE_DMA_H__
#define SPE_CACHE_TAGID_SHIFT (SPE_CACHELINE_SHIFT + SPE_CACHE_NWAY_SHIFT)
#define _SPE_CACHELINE_TAGID(_ptr) (16)
#define _SPE_CACHELINE_TAGMASK(_ptr) (1 << 16)
#define SPE_CACHELINE_TAGID(_line) \
_SPE_CACHELINE_TAGID(&spe_cache_mem[_line])
#define SPE_CACHELINE_TAGMASK(_line) \
_SPE_CACHELINE_TAGMASK(&spe_cache_mem[_line])
#ifndef SPE_CACHE_SET_TAGID
#define SPE_CACHE_SET_TAGID(set) ((set) & 0x1f)
#endif
#define SPE_CACHE_SET_TAGMASK(set) (1 << SPE_CACHE_SET_TAGID(set))
#define SPE_CACHE_PUT MFC_PUTF_CMD
#define SPE_CACHE_GET MFC_GET_CMD
#endif

View File

@ -0,0 +1,35 @@
/* @(#)12 1.5 src/lib/math/ilog2.h, sw.lib, sdk_pub 10/11/05 15:35:56 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _ILOG2_H_
#define _ILOG2_H_ 1
/*
* FUNCTION
* signed int _ilog2(signed int x)
*
* DESCRIPTION
* _ilog2 computes ceiling of log (base 2) of the input value x.
* The input value, x, must be a non-zero positive value.
*/
static __inline signed int _ilog2(signed int x)
{
#ifdef __SPU__
return (32 - spu_extract(spu_cntlz(spu_promote(x - 1, 0)), 0));
#else
signed int result;
for (result=0, x--; x > 0; result++, x>>=1);
return (result);
#endif
}
#endif /* _ILOG2_H_ */

View File

@ -0,0 +1,68 @@
/* @(#)85 1.4 src/lib/c/memset.h, sw.lib, sdk_pub 10/13/05 10:17:09 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#include <spu_intrinsics.h>
#include <stddef.h>
/* Fills the first n bytes of the memory area pointed to by s
* with the constant byte c. Returns a pointer to the memory area s.
*/
static __inline void * _memset(void *s, int c, size_t n)
{
int skip, cnt, i;
vec_uchar16 *vs;
vec_uchar16 vc, mask;
vs = (vec_uchar16 *)(s);
vc = spu_splats((unsigned char)c);
cnt = (int)(n);
/* Handle any leading partial quadwords as well a
* very short settings (ie, such that the n characters
* all reside in a single quadword.
*/
skip = (int)(s) & 15;
if (skip) {
mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip);
cnt -= 16 - skip;
if (cnt < 0) {
mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt)));
}
*vs = spu_sel(*vs, vc, mask);
vs++;
}
/* Handle 8 quadwords at a time
*/
for (i=127; i<cnt; cnt-=8*16) {
vs[0] = vc;
vs[1] = vc;
vs[2] = vc;
vs[3] = vc;
vs[4] = vc;
vs[5] = vc;
vs[6] = vc;
vs[7] = vc;
vs += 8;
}
/* Finish all remaining complete quadwords
*/
for (i=15; i<cnt; cnt-=16) *vs++ = vc;
/* Handle any trailing partial quadwords
*/
if (cnt > 0) {
mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt));
*vs = spu_sel(*vs, vc, mask);
}
return (s);
}

View File

@ -0,0 +1,194 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-lookup.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal lookup operations for software
* managed cache.
*
* See nway-opt.h for "optimized" nway
* lookup operations.
*/
#ifndef __SPE_CACHE_NWAY_LOOKUP_H_
#define __SPE_CACHE_NWAY_LOOKUP_H_
/**
* _decl_set_entries_
* Load up set entries (by 4) from an n-way
* set associative cache. Mask off the dirty
* bit, as needed.
*/
#define _decl_set_entries_(set, name, index) \
vec_uint4 name = *((vec_uint4 *) &spe_cache_dir[set][index])
#define _spe_cache_4_way_lookup_(set, ea) \
({ \
_decl_set_entries_(set, e0123, 0); \
spu_gather(spu_cmpeq(e0123, ea)); \
})
/**
* _spe_cache_set_lookup_
* Compare 'ea' against all entries of
* a set, and return a result that is
* consistent with spu_gather().
*/
#define _spe_cache_set_lookup_(set, ea) \
_spe_cache_4_way_lookup_(set, ea)
/**
* _spe_cache_nway_lookup_x4_
* Declare local variables and lookup four addresses
* in the n-way set associative cache. Upon return,
* 'idx_x4' contains the matching elements in the sets,
* or -1 if not found.
*/
#define _spe_cache_nway_lookup_x4(ea_x4, set_x4, idx_x4) \
({ \
vector unsigned int ea_aligned_x4 = spu_and ((ea_x4), ~SPE_CACHELINE_MASK); \
vector unsigned char splat0 = VEC_LITERAL(vector unsigned char, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03, \
0x00, 0x01, 0x02, 0x03); \
vector unsigned char splat1 = VEC_LITERAL(vector unsigned char, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07, \
0x04, 0x05, 0x06, 0x07); \
vector unsigned char splat2 = VEC_LITERAL(vector unsigned char, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b, \
0x08, 0x09, 0x0a, 0x0b); \
vector unsigned char splat3 = VEC_LITERAL(vector unsigned char, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f, \
0x0c, 0x0d, 0x0e, 0x0f); \
vec_uint4 ea_aligned0 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat0); \
vec_uint4 ea_aligned1 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat1); \
vec_uint4 ea_aligned2 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat2); \
vec_uint4 ea_aligned3 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat3); \
vec_uint4 found0, found1, found2, found3; \
vec_uint4 found_x4; \
(set_x4) = _spe_cache_set_num_x4(ea_x4); \
found0 = _spe_cache_set_lookup_(spu_extract (set_x4, 0), ea_aligned0); \
found1 = _spe_cache_set_lookup_(spu_extract (set_x4, 1), ea_aligned1); \
found2 = _spe_cache_set_lookup_(spu_extract (set_x4, 2), ea_aligned2); \
found3 = _spe_cache_set_lookup_(spu_extract (set_x4, 3), ea_aligned3); \
found_x4 = _pack_vec_uint4 (found0, found1, found2, found3); \
(idx_x4) = (vector signed int)_spe_cache_idx_num_x4(found_x4); \
})
#define _spe_cache_nway_lookup_(ea, set, idx) \
({ \
unsigned int ea_aligned = (ea) & ~SPE_CACHELINE_MASK; \
vec_uint4 ea_aligned4 = spu_splats(ea_aligned); \
vec_uint4 found; \
(set) = _spe_cache_set_num_(ea); \
found = _spe_cache_set_lookup_(set, ea_aligned4); \
(idx) = _spe_cache_idx_num_(found); \
})
/**
* _spe_cache_lookup_
* Lookup and return the LSA of an EA
* that is known to be in the cache.
*/
#define _spe_cache_lookup_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_wait_
* Wait for transfer of a cache line
* to complete.
*/
#define _spe_cache_wait_(_lsa) \
({ \
spu_writech(22, _SPE_CACHELINE_TAGMASK(_lsa)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
})
/**
* _spe_cache_lookup_wait_
* Lookup and return the LSA of an EA
* that is known to be in the cache,
* and guarantee that its transfer is
* complete.
*/
#define _spe_cache_lookup_wait_(ea, is_write) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer but do not wait
* for completion.
*/
#define _spe_cache_lookup_xfer_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
/**
* _spe_cache_lookup_xfer_wait_
* Lookup and return the LSA of an EA, where
* the line may either be in the cache or not.
* If not, initiate transfer and guarantee
* completion.
*/
#define _spe_cache_lookup_xfer_wait_(ea, is_write, rb) \
({ \
int set, idx, line, byte; \
_spe_cache_nway_lookup_(ea, set, idx); \
\
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
line = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
(void *) &spe_cache_mem[line + byte]; \
})
#endif

View File

@ -0,0 +1,51 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-miss.h
*
* Copyright (C) 2005 IBM Corp.
*
* Internal handler for cache misses.
*/
#ifndef __SPE_CACHE_NWAY_MISS_H__
#define __SPE_CACHE_NWAY_MISS_H__
static int _spe_cache_miss_(unsigned int ea, int set, int avail)
{
unsigned int ea_aligned = ea & ~SPE_CACHELINE_MASK;
vec_uint4 slot;
vec_uint4 exists = _spe_cache_set_lookup_(set, ea_aligned);
int idx, line;
/* Double check to make sure that the entry has not
* already been allocated in this set. This condition
* might occur if multiple lookups are being perfomed
* simultaneously.
*/
if (unlikely(spu_extract(exists, 0) != 0)) {
return _spe_cache_idx_num_(exists);
}
/* Now check to see if there are empty slots
* that are available in the set.
*/
slot = _spe_cache_replace_(set, avail);
idx = _spe_cache_idx_num_(slot);
line = _spe_cacheline_num_(set, idx);
spu_mfcdma32(&spe_cache_mem[line], ea_aligned, SPE_CACHELINE_SIZE,
SPE_CACHE_SET_TAGID(set), SPE_CACHE_GET);
spe_cache_dir[set][SPE_CACHE_NWAY_MASK - idx] = ea_aligned;
return idx;
}
#endif

View File

@ -0,0 +1,153 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-opt.h
*
* Copyright (C) 2006 IBM Corp.
*
* "Optimized" lookup operations for n-way set associative
* software managed cache.
*/
#include <spu_intrinsics.h>
#ifndef __SPE_CACHE_NWAY_OPT_H_
#define __SPE_CACHE_NWAY_OPT_H_
/* __spe_cache_rd
* Look up and return data from the cache. If the data
* is not currently in cache then transfer it from main
* storage.
*
* This code uses a conditional branch to the cache miss
* handler in the event that the requested data is not
* in the cache. A branch hint is used to avoid paying
* the branch stall penalty.
*/
#define __spe_cache_rd(type, ea) \
({ \
int set, idx, lnum, byte; \
type ret; \
_spe_cache_nway_lookup_(ea, set, idx); \
if (unlikely(idx < 0)) { \
idx = _spe_cache_miss_(ea, set, -1); \
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
} \
lnum = _spe_cacheline_num_(set, idx); \
byte = _spe_cacheline_byte_offset_(ea); \
ret = *((type *) (&spe_cache_mem[lnum + byte])); \
ret; \
})
/**
* __spe_cache_rd_x4
* Fetch four data elements from the cache.
*
* This code uses one conditional branch in
* the event that any of the four elements
* are missing.
*
* On a miss, light weight locking is used to
* avoid casting out entries that were found.
* Further, we wait just once for the transfers,
* allowing for parallel [rather than serial]
* transfers.
*/
#define __spe_cache_rd_x4(type, ea_x4) \
({ \
vector unsigned int missing; \
unsigned int ms; \
vector unsigned int cindex; \
unsigned int d0, d1, d2, d3; \
vector unsigned int s_x4; \
vector signed int i_x4; \
vector unsigned int ibyte, iline; \
vector unsigned int ret; \
unsigned int idx0, idx1, idx2, idx3; \
\
_spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \
missing = spu_rlmask ((vector unsigned int)i_x4, -8); \
ms = spu_extract (spu_gather (missing), 0); \
\
ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \
\
iline = _spe_cacheline_num_x4(s_x4, \
(vector unsigned int)i_x4); \
\
cindex = spu_add (iline, ibyte); \
\
idx0 = spu_extract (cindex, 0); \
idx1 = spu_extract (cindex, 1); \
idx2 = spu_extract (cindex, 2); \
idx3 = spu_extract (cindex, 3); \
\
d0 = *((type *) (&spe_cache_mem[idx0])); \
d1 = *((type *) (&spe_cache_mem[idx1])); \
d2 = *((type *) (&spe_cache_mem[idx2])); \
d3 = *((type *) (&spe_cache_mem[idx3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
\
if (unlikely(ms)) { \
int b0 = spu_extract (ibyte, 0); \
int b1 = spu_extract (ibyte, 1); \
int b2 = spu_extract (ibyte, 2); \
int b3 = spu_extract (ibyte, 3); \
int lnum0; \
int lnum1; \
int lnum2; \
int lnum3; \
int s0 = spu_extract (s_x4, 0); \
int s1 = spu_extract (s_x4, 1); \
int s2 = spu_extract (s_x4, 2); \
int s3 = spu_extract (s_x4, 3); \
int i0 = spu_extract (i_x4, 0); \
int i1 = spu_extract (i_x4, 1); \
int i2 = spu_extract (i_x4, 2); \
int i3 = spu_extract (i_x4, 3); \
unsigned int ea0 = spu_extract(ea_x4, 0); \
unsigned int ea1 = spu_extract(ea_x4, 1); \
unsigned int ea2 = spu_extract(ea_x4, 2); \
unsigned int ea3 = spu_extract(ea_x4, 3); \
int avail = -1; \
\
avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \
((i1 < 0) ? 0 : (1 << i1)) | \
((i2 < 0) ? 0 : (1 << i2)) | \
((i3 < 0) ? 0 : (1 << i3))); \
\
i0 = _spe_cache_miss_(ea0, s0, avail); \
avail &= ~(1 << i0); \
i1 = _spe_cache_miss_(ea1, s1, avail); \
avail &= ~(1 << i1); \
i2 = _spe_cache_miss_(ea2, s2, avail); \
avail &= ~(1 << i2); \
i3 = _spe_cache_miss_(ea3, s3, avail); \
\
lnum0 = _spe_cacheline_num_(s0, i0); \
lnum1 = _spe_cacheline_num_(s1, i1); \
lnum2 = _spe_cacheline_num_(s2, i2); \
lnum3 = _spe_cacheline_num_(s3, i3); \
\
spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \
spu_mfcstat(MFC_TAG_UPDATE_ALL); \
\
d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \
d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \
d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \
d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \
\
ret = _load_vec_uint4 (d0, d1, d2, d3); \
} \
ret; \
})
#endif /* _SPE_CACHE_NWAY_OPT_H_ */

View File

@ -0,0 +1,38 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway-replace.h
*
* Copyright (C) 2005 IBM Corp.
*
* Implement replacement for software
* managed cache.
*/
#ifndef __SPE_CACHE_NWAY_REPLACE_H_
#define __SPE_CACHE_NWAY_REPLACE_H_
static vec_uint4 spe_cache_replace_cntr[SPE_CACHE_NSETS+1];
static inline vec_uint4 _spe_cache_replace_(int set, int avail)
{
unsigned int mask = ((1 << SPE_CACHE_NWAY) - 1) & avail;
unsigned int curr, currbit, next;
curr = spu_extract(spe_cache_replace_cntr[set], 0) & SPE_CACHE_NWAY_MASK;
currbit = (1 << curr);
next = (curr + 1) & SPE_CACHE_NWAY_MASK;
spe_cache_replace_cntr[set] = (vec_uint4) spu_promote(next, 0);
mask = (mask & currbit) ? currbit : mask;
return (vec_uint4) spu_promote(mask, 0);
}
#endif

View File

@ -0,0 +1,105 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* nway.h
*
* Copyright (C) 2005 IBM Corp.
*
* Support for n-way set associative software
* managed cache. The 4-way associative cache
* is the only interface exposed currently.
*/
#ifndef __SPE_CACHE_NWAY_H_
#define __SPE_CACHE_NWAY_H_
/**
** Defn's for n-way set associativity.
** Default is 4-way.
*/
#define SPE_CACHE_NWAY 4
#define SPE_CACHE_NWAY_SHIFT 2
#define SPE_CACHE_NWAY_MASK (SPE_CACHE_NWAY - 1)
#define SPE_CACHE_NENTRIES (SPE_CACHE_NWAY * SPE_CACHE_NSETS)
#define SPE_CACHE_MEM_SIZE (SPE_CACHE_NENTRIES * SPE_CACHELINE_SIZE)
#define _spe_cache_set_num_(ea) \
({ \
unsigned int ead, eadm, ret; \
ead = ((ea) >> SPE_CACHELINE_SHIFT); \
eadm = ((ea) >> (SPE_CACHELINE_SHIFT+2)); \
ret = (ead ^ eadm) & SPE_CACHE_NSETS_MASK; \
ret; \
})
#define _spe_cache_set_num_x4(ea_x4) \
({ \
vector unsigned int tmp0; \
vector unsigned int tmp1; \
tmp0 = spu_rlmask (ea_x4, -SPE_CACHELINE_SHIFT); \
tmp1 = spu_rlmask (ea_x4, -(SPE_CACHELINE_SHIFT+1)); \
spu_and (spu_xor (tmp0, tmp1), SPE_CACHE_NSETS_MASK); \
})
#define _spe_cache_idx_num_x4(found) \
spu_sub((unsigned int) 31, spu_cntlz(found))
#define _spe_cache_idx_num_(found) \
spu_extract(spu_sub((unsigned int) 31, spu_cntlz(found)), 0)
#define _spe_cacheline_num_(set, idx) \
(((set << SPE_CACHE_NWAY_SHIFT) + idx) << SPE_CACHELINE_SHIFT)
#define _spe_cacheline_num_x4(set, idx) \
spu_sl (spu_add (spu_sl (set, SPE_CACHE_NWAY_SHIFT), idx), SPE_CACHELINE_SHIFT)
#define _spe_cacheline_is_dirty_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_DIRTY)
#define _spe_cacheline_is_locked_(set, idx) \
(spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_LOCKED)
#define _spe_lock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] |= SPE_CACHELINE_LOCKED
#define _spe_unlock_cacheline_(set, idx) \
spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] &= ~SPE_CACHELINE_LOCKED
/**
* spe_cache_dir
* This is the n-way set associative cache
* directory. Entries are either zero (unused)
* or non-zero (used).
*
* State for one additional (dummy) set is
* allocated to improve efficiency of cache
* line locking.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static unsigned int spe_cache_dir[SPE_CACHE_NSETS+1][SPE_CACHE_NWAY]
__attribute__ ((aligned(16)));
/**
* spe_cache_mem
* A contiguous set of cachelines in LS memory,
* one line for each entry in the cache.
* volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer
*/
static char spe_cache_mem[SPE_CACHE_MEM_SIZE]
__attribute__ ((aligned(128)));
#include "nway-lookup.h"
#include "nway-replace.h"
#include "nway-miss.h"
#include "nway-opt.h"
#endif

View File

@ -0,0 +1,32 @@
/* --------------------------------------------------------------- */
/* PLEASE DO NOT MODIFY THIS SECTION */
/* This prolog section is automatically generated. */
/* */
/* (C) Copyright 2001,2006, */
/* International Business Machines Corporation, */
/* */
/* All Rights Reserved. */
/* --------------------------------------------------------------- */
/* PROLOG END TAG zYx */
/* spe_cache.h
*
* Copyright (C) 2005 IBM Corp.
*
* Top level include file implementing
* software managed cache.
*/
#ifndef __SPE_CACHE_H__
#define __SPE_CACHE_H__ 1
#include "vec_literal.h"
#include "ilog2.h"
#include "memset.h"
//#include <cbe_mfc.h>
#include "defs.h"
#include "dma.h"
#include "nway.h"
#include "api.h"
#endif

View File

@ -0,0 +1,74 @@
/* @(#)86 1.3 src/include/vec_literal.h, sw.includes, sdk_pub 10/11/05 16:00:27 */
/* -------------------------------------------------------------- */
/* (C) Copyright 2001,2005, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment Incorporated, */
/* Toshiba Corporation. */
/* */
/* All Rights Reserved. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifndef _VEC_LITERAL_H_
#define _VEC_LITERAL_H_
/* This header files provides an abstraction for the various implementations
* of vector literal construction. The two formats are:
*
* 1) Altivec styled using parenthesis
* 2) C grammer friendly styled using curly braces
*
* The macro, VEC_LITERAL has been developed to provide some portability
* in these two styles. To achieve true portability, user must specify all
* elements of the vector being initialized. A single element can be provided
* but only the first element guarenteed across both construction styles.
*
* The VEC_SPLAT_* macros have been provided for portability of vector literal
* construction when all the elements of the vector contain the same value.
*/
#ifdef __SPU__
#include <spu_intrinsics.h>
#endif
#ifdef __ALTIVEC_LITERAL_STYLE__
/* Use altivec style.
*/
#define VEC_LITERAL(_type, ...) ((_type)(__VA_ARGS__))
#define VEC_SPLAT_U8(_val) ((vector unsigned char)(_val))
#define VEC_SPLAT_S8(_val) ((vector signed char)(_val))
#define VEC_SPLAT_U16(_val) ((vector unsigned short)(_val))
#define VEC_SPLAT_S16(_val) ((vector signed short)(_val))
#define VEC_SPLAT_U32(_val) ((vector unsigned int)(_val))
#define VEC_SPLAT_S32(_val) ((vector signed int)(_val))
#define VEC_SPLAT_F32(_val) ((vector float)(_val))
#define VEC_SPLAT_U64(_val) ((vector unsigned long long)(_val))
#define VEC_SPLAT_S64(_val) ((vector signed long long)(_val))
#define VEC_SPLAT_F64(_val) ((vector double)(_val))
#else
/* Use curly brace style.
*/
#define VEC_LITERAL(_type, ...) ((_type){__VA_ARGS__})
#define VEC_SPLAT_U8(_val) ((vector unsigned char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S8(_val) ((vector signed char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U16(_val) ((vector unsigned short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_S16(_val) ((vector signed short){_val, _val, _val, _val, _val, _val, _val, _val})
#define VEC_SPLAT_U32(_val) ((vector unsigned int){_val, _val, _val, _val})
#define VEC_SPLAT_S32(_val) ((vector signed int){_val, _val, _val, _val})
#define VEC_SPLAT_F32(_val) ((vector float){_val, _val, _val, _val})
#define VEC_SPLAT_U64(_val) ((vector unsigned long long){_val, _val})
#define VEC_SPLAT_S64(_val) ((vector signed long long){_val, _val})
#define VEC_SPLAT_F64(_val) ((vector double){_val, _val})
#endif
#endif /* _VEC_LITERAL_H_ */

View File

@ -0,0 +1,83 @@
/*
* SCE CONFIDENTIAL
* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
* Copyright (C) 2005 Sony Computer Entertainment Inc.
* All Rights Reserved.
*/
#include <sys/spu_thread.h>
#include <spu_printf.h>
#include <spu_intrinsics.h>
#include <sys/spu_thread.h>
#include <sys/spu_event.h>
#include <stdint.h>
#define SPE_CACHE_NWAY 4
#define SPE_CACHE_NSETS 32
#define SPE_CACHELINE_SIZE 512
#define SPE_CACHE_SET_TAGID(set) 16
#define USE_SOFTWARE_CACHE 1
#ifdef USE_SOFTWARE_CACHE
#include "cache/include/spe_cache.h"
void * spe_readcache(unsigned int ea)
{
int set, idx, line, byte;
_spe_cache_nway_lookup_(ea, set, idx);
if (unlikely(idx < 0)) {
idx = _spe_cache_miss_(ea, set, -1);
spu_writech(22, SPE_CACHE_SET_TAGMASK(set));
spu_mfcstat(MFC_TAG_UPDATE_ALL);
}
line = _spe_cacheline_num_(set, idx);
byte = _spe_cacheline_byte_offset_(ea);
return (void *) &spe_cache_mem[line + byte];
}
#endif //USE_SOFTWARE_CACHE
int main(int spu_num,uint64_t mainmemPtr)
{
int memPtr = (int) mainmemPtr;
#define MAX_BUF 256
char spuBuffer[MAX_BUF];
spuBuffer[0] = 0;
char* result,*result2; //= spe_cache_rd(mainmemPtr);
#ifdef USE_SOFTWARE_CACHE
//this is a brute-force sample.
//you can use the software cache more efficient using __spe_cache_rd_x4 to read 4 elements at a time
int i=0;
do
{
result = spe_readcache(mainmemPtr+i);
//spe_readcache is the expanded version of spe_cache_rd MACRO
spuBuffer[i] = result[0];
i++;
} while (result[0] && (i<MAX_BUF)); //assume that the buffer ends with [0] in main memory
//result = _spe_cache_lookup_xfer_wait_(mainmemPtr, 0, 1);
result = spe_readcache(mainmemPtr);
result2 = spe_readcache(mainmemPtr);
#endif //USE_SOFTWARE_CACHE
if (i>= MAX_BUF)
{
spu_printf("spe_readcache buffer overflow. is the buffer 0-terminated?\n");
}
spu_printf("spe_cache_rd(%x) = %s\n", memPtr,spuBuffer);
sys_spu_thread_exit(0);
}

View File

@ -0,0 +1,19 @@
# SCE CONFIDENTIAL
# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
# Copyright (C) 2005 Sony Computer Entertainment Inc.
# All Rights Reserved.
#
CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
include $(CELL_MK_DIR)/sdk.makedef.mk
SPU_INCDIRS += -Icache/include
SPU_SRCS = hello.spu.c
SPU_TARGET = hello.spu.elf
SPU_OPTIMIZE_LV=-O0
include $(CELL_MK_DIR)/sdk.target.mk

View File

@ -0,0 +1,19 @@
/* SCE CONFIDENTIAL */
/* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 */
/* Copyright (C) 2005 Sony Computer Entertainment Inc. */
/* All Rights Reserved. */
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
void spu_printf_server_entry(uint64_t arg);
int spu_printf_server_initialize(void);
int spu_printf_server_finalize(void);
int spu_printf_server_register(sys_spu_thread_t spu);
int spu_printf_server_unregister(sys_spu_thread_t spu);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -0,0 +1,204 @@
/*
* SCE CONFIDENTIAL
* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
* Copyright (C) 2005 Sony Computer Entertainment Inc.
* All Rights Reserved.
*
* The SPU printf server is a PPU thread which collaborates with an SPU to
* output strings.
*
* On SPU-side, spu_printf() places the output string and arguments on a stack
* in the local storage, and passes its local-storage address with an SPU
* thread user event from SPU port 1 to PPU. On PPU-side,
* spu_thread_sprintf fetches the stack in the local stroage by DMA, and parse
* it to string-format.
*
* The SPU printf server takes charge of the tasks on PPU-side. The sequence
* of its task is as follows.
* 1. Receive events by sys_event_queue_receive()
* 2. Parse the received spu_printf stack address by spu_thread_snprintf.
* 3. Output the parsed string by printf(), and go back to step 1.
*
* Initialization of the SPU printf server and registeration of SPU threads to
* the SPU printf server is required. These can be done by
* spu_printf_server_initilize() and spu_printf_server_register().
* What they actually do is to create a PPU thread and event queue, and
* connect the SPU thread to the event queue.
*/
#include <stdio.h>
#include <sys/ppu_thread.h>
#include <sys/spu_thread.h>
#include <sys/event.h>
#include <spu_printf.h>
#include "spu_printf_server.h"
#define STACK_SIZE 4096
#define PRIO 200
static sys_ppu_thread_t thread;
static sys_event_queue_t equeue;
static sys_event_queue_attribute_t eattr;
static sys_event_port_t terminating_port;
#define TERMINATING_PORT_NAME 0xFEE1DEAD
#define SPU_PORT_PRINTF 0x1
int spu_printf_server_initialize()
{
int ret;
/* Create event */
sys_event_queue_attribute_initialize(eattr);
ret = sys_event_queue_create(&equeue, &eattr, SYS_EVENT_PORT_LOCAL, 127);
if (ret) {
printf("sys_event_queue_create faild %d\n", ret);
return -1;
}
/* Create PPU thread */
ret = sys_ppu_thread_create(&thread, spu_printf_server_entry, 0UL, PRIO,
STACK_SIZE,
SYS_PPU_THREAD_CREATE_JOINABLE,
(char*)"spu_printf_server");
if (ret) {
printf ("spu_printf_server_initialize: sys_ppu_thread_create failed %d\n", ret);
return -1;
}
/*
* Create the terminating port. This port is used only in
* sys_printf_server_finalize().
*/
ret = sys_event_port_create(&terminating_port,
SYS_EVENT_PORT_LOCAL,
TERMINATING_PORT_NAME);
if (ret) {
printf ("spu_printf_server_initialize: sys_event_port_create failed %d\n", ret);
return -1;
}
ret = sys_event_port_connect_local(terminating_port, equeue);
if (ret) {
printf ("spu_printf_server_initialize: sys_event_port_connect_local failed %d\n", ret);
return -1;
}
return 0;
}
/*
* Before call this, SPU threads which are registered finishes to send
* printf event.
*/
int spu_printf_server_finalize()
{
int ret;
/*
* Send an event from the terminating port to notify the termination to
* the SPU printf server
*/
ret = sys_event_port_send(terminating_port, 0, 0, 0);
if (ret) {
printf("sys_event_queue_cancel failed %d\n", ret);
return -1;
}
/* Wait for the termination of the SPU printf server */
uint64_t exit_status;
ret = sys_ppu_thread_join(thread, &exit_status);
if (ret) {
printf("sys_ppu_thread_join failed %d\n", ret);
return -1;
}
/* Disconnect and destroy the terminating port */
ret = sys_event_port_disconnect(terminating_port);
if (ret) {
printf("sys_event_disconnect failed %d\n", ret);
}
ret = sys_event_port_destroy(terminating_port);
if (ret) {
printf("sys_event_port_destroy failed %d\n", ret);
}
/* Destroy the event queue */
ret = sys_event_queue_destroy(equeue, 0);
if (ret) {
printf("sys_event_queue_destroy failed %d\n", ret);
return -1;
}
return 0;
}
int spu_printf_server_register(sys_spu_thread_t spu)
{
int ret;
ret = sys_spu_thread_connect_event(spu, equeue,
SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF);
if (ret) {
printf("sys_spu_thread_connect_event faild %d\n", ret);
return -1;
}
return 0;
}
int spu_printf_server_unregister(sys_spu_thread_t spu)
{
int ret;
ret = sys_spu_thread_disconnect_event(spu,
SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF);
if (ret) {
printf("sys_spu_thread_disconnect_event faild %d\n", ret);
return -1;
}
return 0;
}
void spu_printf_server_entry(uint64_t arg)
{
(void)arg; /* This thread does not use the argument */
int ret;
sys_event_t event;
sys_spu_thread_t spu;
for (;;) {
ret = sys_event_queue_receive(equeue, &event, SYS_NO_TIMEOUT);
if (ret) {
printf("sys_event_queue_receive failed %d\n", ret);
break;
}
/*
* If an event is sent from the terminating port, the SPU printf
* server exits.
*/
if (event.source == TERMINATING_PORT_NAME) {
printf("Finalize the SPU printf server.\n");
break;
}
spu = event.data1;
int sret = spu_thread_printf(spu, event.data3);
ret = sys_spu_thread_write_spu_mb(spu, sret);
if (ret) {
printf("sys_spu_thread_write_spu_mb failed %d\n", ret);
break;
}
}
sys_ppu_thread_exit(0);
}

View File

@ -0,0 +1,243 @@
/*
* SCE CONFIDENTIAL
* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
* Copyright (C) 2005 Sony Computer Entertainment Inc.
* All Rights Reserved.
*
* File: spu_thr_printf.c
* Description:
* This sample shows how to output strings by SPU programs. spu_printf()
* called by
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/spu_initialize.h>
#include <sys/spu_image.h>
#include <sys/spu_thread.h>
#include <sys/spu_thread_group.h>
#include <sys/spu_utility.h>
#include <sys/paths.h>
#include "spu_printf_server.h" /* SPU printf server */
#define MAX_PHYSICAL_SPU 4
#define MAX_RAW_SPU 0
#define NUM_SPU_THREADS 4 /* The number of SPU threads in the group */
#define PRIORITY 100
#ifdef SN_TARGET_PS3
#define SPU_PROG (SYS_APP_HOME "/SPU_printf.spu.self")
#else
#define SPU_PROG (SYS_APP_HOME "/hello.spu.self")
#endif
#define IN_BUF_SIZE 256
#define OUT_BUF_SIZE 256
volatile uint8_t in_buf[IN_BUF_SIZE];
volatile uint8_t out_buf[OUT_BUF_SIZE];
uint32_t in_size = IN_BUF_SIZE;
uint32_t out_size = OUT_BUF_SIZE;
int main(void)
{
sprintf(in_buf,"hello world");
sys_spu_thread_group_t group; /* SPU thread group ID */
const char *group_name = "Group";
sys_spu_thread_group_attribute_t group_attr;/* SPU thread group attribute*/
sys_spu_thread_t threads[NUM_SPU_THREADS]; /* SPU thread IDs */
sys_spu_thread_attribute_t thread_attr; /* SPU thread attribute */
const char *thread_names[NUM_SPU_THREADS] =
{"SPU Thread 0",
"SPU Thread 1",
"SPU Thread 2",
"SPU Thread 3"}; /* The names of SPU threads */
sys_spu_image_t spu_img;
int ret;
/*
* Initialize SPUs
*/
printf("Initializing SPUs\n");
ret = sys_spu_initialize(MAX_PHYSICAL_SPU, MAX_RAW_SPU);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_initialize failed: %#.8x\n", ret);
exit(ret);
}
/*
* Create an SPU thread group
*/
printf("Creating an SPU thread group.\n");
group_attr.name = group_name;
group_attr.nsize = strlen(group_attr.name) + 1; /* Add 1 for '\0' */
group_attr.type = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
ret = sys_spu_thread_group_create(&group,
NUM_SPU_THREADS,
PRIORITY,
&group_attr);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_group_create failed: %#.8x\n", ret);
exit(ret);
}
ret = sys_spu_image_open(&spu_img, SPU_PROG);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_image_open failed: %#.8x\n", ret);
exit(ret);
}
/*
* Initialize the SPU printf server
*
* What spu_printf_server_initialize() actually does is to create an
* PPU thread and an event queue which handle the events sent by
* spu_printf().
*/
ret = spu_printf_server_initialize();
if (ret != SUCCEEDED) {
fprintf(stderr, "spu_printf_server_initialize failed: %#.8x\n", ret);
exit(ret);
}
/*
* In this loop, all SPU threads in the SPU thread group are initialized
* with the loaded SPU ELF image.
*/
for (int i = 0; i < NUM_SPU_THREADS; i++) {
sys_spu_thread_argument_t thread_args;
int spu_num = i;
printf("Initializing SPU thread %d\n", i);
/*
* nsegs, segs and entry_point have already been initialized by
* sys_spu_thread_elf_loader().
*/
thread_attr.name = thread_names[i];
thread_attr.nsize = strlen(thread_names[i]) + 1;
thread_attr.option = SYS_SPU_THREAD_OPTION_NONE;
/*
* Pass the SPU number to the SPU thread as the first parameter.
*/
thread_args.arg1 = SYS_SPU_THREAD_ARGUMENT_LET_32(spu_num);
thread_args.arg2 = SYS_SPU_THREAD_ARGUMENT_LET_64((uint64_t)in_buf);
/*
* The third argument specifies the SPU number.
* The SPU number of each SPU thread must be unique within the SPU
* thread group.
*/
ret = sys_spu_thread_initialize(&threads[i],
group,
spu_num,
&spu_img,
&thread_attr,
&thread_args);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_initialize failed: %#.8x\n", ret);
exit(ret);
}
/*
* Register the SPU thread to the SPU printf server.
*
* spu_printf_server_register() establishes the connection between
* the SPU thread and the SPU printf server's event queue.
*/
ret = spu_printf_server_register(threads[i]);
if (ret != CELL_OK) {
fprintf(stderr, "spu_printf_server_register failed: %#.8x\n", ret);
exit(ret);
}
}
printf("All SPU threads have been successfully initialized.\n");
/*
* Start the SPU thread group
*
* The SPU thread group will be in the READY state, and will become in
* the RUNNING state when the kernel assigns and executes it onto SPUs.
*/
printf("Starting the SPU thread group.\n");
ret = sys_spu_thread_group_start(group);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_group_start failed: %#.8x\n", ret);
exit(ret);
}
/*
* Wait for the termination of the SPU thread group.
*/
printf("Waiting for the SPU thread group to be terminated.\n");
int cause, status;
ret = sys_spu_thread_group_join(group, &cause, &status);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_group_join failed: %#.8x\n", ret);
exit(ret);
}
/*
* Show the exit cause and status.
*/
switch(cause) {
case SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT:
printf("The SPU thread group exited by sys_spu_thread_group_exit().\n");
printf("The group's exit status = %d\n", status);
break;
case SYS_SPU_THREAD_GROUP_JOIN_ALL_THREADS_EXIT:
printf("All SPU thread exited by sys_spu_thread_exit().\n");
for (int i = 0; i < NUM_SPU_THREADS; i++) {
int thr_exit_status;
ret = sys_spu_thread_get_exit_status(threads[i], &thr_exit_status);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_get_exit_status failed: %#.8x\n", ret);
}
printf("SPU thread %d's exit status = %d\n", i, thr_exit_status);
}
break;
case SYS_SPU_THREAD_GROUP_JOIN_TERMINATED:
printf("The SPU thread group is terminated by sys_spu_thread_terminate().\n");
printf("The group's exit status = %d\n", status);
break;
default:
fprintf(stderr, "Unknown exit cause: %d\n", cause);
break;
}
/*
* Destroy the SPU thread group and clean up resources.
*/
ret = sys_spu_thread_group_destroy(group);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_thread_group_destroy failed: %#.8x\n", ret);
}
ret = sys_spu_image_close(&spu_img);
if (ret != CELL_OK) {
fprintf(stderr, "sys_spu_image_close failed: %.8x\n", ret);
}
/*
* Finalize the SPU printf server.
*
* This function let the PPU thread exit.
* The event queue will be destroyed.
*/
ret = spu_printf_server_finalize();
if (ret != CELL_OK) {
fprintf(stderr, "spu_printf_server_finalize failed: %#.8x\n", ret);
}
printf("Exiting.\n");
return 0;
}

View File

@ -0,0 +1,16 @@
# SCE CONFIDENTIAL
# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007
# Copyright (C) 2005 Sony Computer Entertainment Inc.
# All Rights Reserved.
#
CELL_MK_DIR ?= $(CELL_SDK)/samples/mk
include $(CELL_MK_DIR)/sdk.makedef.mk
PPU_SRCS = spu_thr_printf.ppu.c spu_printf_server.ppu.c
PPU_TARGET = spu_thr_printf.ppu.elf
include $(CELL_MK_DIR)/sdk.target.mk

View File

@ -0,0 +1,29 @@

Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spu_thr_printf_2", "spu_thr_printf_2.vcproj", "{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}"
ProjectSection(ProjectDependencies) = postProject
{47EE939D-CB3D-4600-B8B6-79FDF607E133} = {47EE939D-CB3D-4600-B8B6-79FDF607E133}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU_printf", "SPU_printf\SPU_printf.vcproj", "{47EE939D-CB3D-4600-B8B6-79FDF607E133}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
PS3 Debug|Win32 = PS3 Debug|Win32
PS3 Release|Win32 = PS3 Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32
{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32
{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32
{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.Build.0 = PS3 Release|Win32
{47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32
{47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32
{47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32
{47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.Build.0 = PS3 Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,200 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="spu_thr_printf_2"
ProjectGUID="{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="PS3 Debug|Win32"
OutputDirectory="PS3_PPU_Debug"
IntermediateDirectory="PS3_PPU_Debug"
ConfigurationType="1"
DeleteExtensionsOnClean="*.obj;*.d;*.map;*.lst;*.pch;$(TargetPath);$(TargetDir)$(TargetName).self"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="-g -Wall -fno-exceptions"
AdditionalIncludeDirectories="&quot;$(SN_PS3_PATH)\ppu\include\sn&quot;;&quot;$(SCE_PS3_ROOT)\target\ppu\include&quot;;&quot;$(SCE_PS3_ROOT)\target\common\include&quot;"
PreprocessorDefinitions="SN_TARGET_PS3;_DEBUG;__GCC__"
ProgramDataBaseFileName="$(IntDir)/"
CompileAs="0"
ForcedIncludeFiles=""
ForcedUsingFiles=""
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="-fno-exceptions"
AdditionalDependencies="&quot;$(SN_PS3_PATH)\ppu\lib\sn\libsn.a&quot; &quot;$(SCE_PS3_ROOT)\target\ppu\lib\libm.a&quot; &quot;$(SCE_PS3_ROOT)\target\ppu\lib\libio.a&quot;"
OutputFile="$(ProjectName).ppu.elf"
AdditionalLibraryDirectories=""
GenerateManifest="false"
GenerateDebugInformation="true"
ProgramDatabaseFile=""
ImportLibrary=""
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Fake Signing ELF &quot;$(TargetDir)$(TargetName).self&quot;"
CommandLine="$(SCE_PS3_ROOT)\host-win32\bin\make_fself &quot;$(TargetPath)&quot; &quot;$(TargetDir)$(TargetName).self&quot;"
/>
</Configuration>
<Configuration
Name="PS3 Release|Win32"
OutputDirectory="PS3_PPU_Release"
IntermediateDirectory="PS3_PPU_Release"
ConfigurationType="1"
DeleteExtensionsOnClean="*.obj;*.d;*.map;*.lst;*.pch;$(TargetPath);$(TargetDir)$(TargetName).self"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="-O2 -Wall -fno-exceptions"
AdditionalIncludeDirectories="&quot;$(SN_PS3_PATH)\ppu\include\sn&quot;;&quot;$(SCE_PS3_ROOT)\target\ppu\include&quot;;&quot;$(SCE_PS3_ROOT)\target\common\include&quot;"
PreprocessorDefinitions="SN_TARGET_PS3;NDEBUG;__GCC__"
ProgramDataBaseFileName="$(IntDir)/"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="-fno-exceptions"
AdditionalDependencies="&quot;$(SN_PS3_PATH)\ppu\lib\sn\libsn.a&quot; &quot;$(SCE_PS3_ROOT)\target\ppu\lib\libm.a&quot; &quot;$(SCE_PS3_ROOT)\target\ppu\lib\libio.a&quot;"
OutputFile="$(ProjectName).ppu.elf"
AdditionalLibraryDirectories=""
GenerateManifest="false"
GenerateDebugInformation="true"
ProgramDatabaseFile=""
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Fake Signing ELF &quot;$(TargetDir)$(TargetName).self&quot;"
CommandLine="$(SCE_PS3_ROOT)\host-win32\bin\make_fself &quot;$(TargetPath)&quot; &quot;$(TargetDir)$(TargetName).self&quot;"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;cc;s;asm"
>
<File
RelativePath=".\spu_printf_server.ppu.c"
>
</File>
<File
RelativePath=".\spu_thr_printf.ppu.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp"
>
<File
RelativePath=".\spu_printf_server.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>