From dc8692ba9401a2f3240573db773f8878cb5332a8 Mon Sep 17 00:00:00 2001 From: "erwin.coumans" Date: Tue, 18 Nov 2008 01:33:30 +0000 Subject: [PATCH] Added IBM Cell SDK 2.x software_cache to Bullet/Extras. There is an option to enable it for the BulletMultiThreaded Cell version. See USE_SOFTWARE_CACHE in Bullet\src\BulletMultiThreaded\SpuNarrowPhaseCollisionTask\SpuGatheringCollisionTask.cpp It improves the Bullet midphase collision detection (triangle/vertex fetch) The license is CommonPublicLicense-1.0, see included license docs. --- Extras/software_cache/Makefile | 14 + .../SPU_printf/SPU_printf.vcproj | 239 +++++++++++++++++ .../cache/CommonPublicLicense-1.0 | 213 +++++++++++++++ Extras/software_cache/cache/include/Makefile | 28 ++ Extras/software_cache/cache/include/README | 32 +++ Extras/software_cache/cache/include/api.h | 31 +++ Extras/software_cache/cache/include/cbe_mfc.h | 245 ++++++++++++++++++ Extras/software_cache/cache/include/defs.h | 149 +++++++++++ Extras/software_cache/cache/include/dma.h | 40 +++ Extras/software_cache/cache/include/ilog2.h | 35 +++ Extras/software_cache/cache/include/memset.h | 68 +++++ .../cache/include/nway-lookup.h | 194 ++++++++++++++ .../software_cache/cache/include/nway-miss.h | 51 ++++ .../software_cache/cache/include/nway-opt.h | 153 +++++++++++ .../cache/include/nway-replace.h | 38 +++ Extras/software_cache/cache/include/nway.h | 105 ++++++++ .../software_cache/cache/include/spe_cache.h | 32 +++ .../cache/include/vec_literal.h | 74 ++++++ Extras/software_cache/hello.spu.c | 83 ++++++ Extras/software_cache/hello.spu.mk | 19 ++ Extras/software_cache/spu_printf_server.h | 19 ++ Extras/software_cache/spu_printf_server.ppu.c | 204 +++++++++++++++ Extras/software_cache/spu_thr_printf.ppu.c | 243 +++++++++++++++++ Extras/software_cache/spu_thr_printf.ppu.mk | 16 ++ Extras/software_cache/spu_thr_printf_2.sln | 29 +++ Extras/software_cache/spu_thr_printf_2.vcproj | 200 ++++++++++++++ 26 files changed, 2554 insertions(+) create mode 100644 Extras/software_cache/Makefile create mode 100644 Extras/software_cache/SPU_printf/SPU_printf.vcproj create mode 100644 Extras/software_cache/cache/CommonPublicLicense-1.0 create mode 100644 Extras/software_cache/cache/include/Makefile create mode 100644 Extras/software_cache/cache/include/README create mode 100644 Extras/software_cache/cache/include/api.h create mode 100644 Extras/software_cache/cache/include/cbe_mfc.h create mode 100644 Extras/software_cache/cache/include/defs.h create mode 100644 Extras/software_cache/cache/include/dma.h create mode 100644 Extras/software_cache/cache/include/ilog2.h create mode 100644 Extras/software_cache/cache/include/memset.h create mode 100644 Extras/software_cache/cache/include/nway-lookup.h create mode 100644 Extras/software_cache/cache/include/nway-miss.h create mode 100644 Extras/software_cache/cache/include/nway-opt.h create mode 100644 Extras/software_cache/cache/include/nway-replace.h create mode 100644 Extras/software_cache/cache/include/nway.h create mode 100644 Extras/software_cache/cache/include/spe_cache.h create mode 100644 Extras/software_cache/cache/include/vec_literal.h create mode 100644 Extras/software_cache/hello.spu.c create mode 100644 Extras/software_cache/hello.spu.mk create mode 100644 Extras/software_cache/spu_printf_server.h create mode 100644 Extras/software_cache/spu_printf_server.ppu.c create mode 100644 Extras/software_cache/spu_thr_printf.ppu.c create mode 100644 Extras/software_cache/spu_thr_printf.ppu.mk create mode 100644 Extras/software_cache/spu_thr_printf_2.sln create mode 100644 Extras/software_cache/spu_thr_printf_2.vcproj diff --git a/Extras/software_cache/Makefile b/Extras/software_cache/Makefile new file mode 100644 index 000000000..6d1bbc505 --- /dev/null +++ b/Extras/software_cache/Makefile @@ -0,0 +1,14 @@ +# SCE CONFIDENTIAL +# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 +# Copyright (C) 2005 Sony Computer Entertainment Inc. +# All Rights Reserved. +# + +CELL_MK_DIR ?= $(CELL_SDK)/samples/mk +include $(CELL_MK_DIR)/sdk.makedef.mk + +MK_TARGET = spu_thr_printf.ppu.mk hello.spu.mk + +include $(CELL_MK_DIR)/sdk.target.mk + + diff --git a/Extras/software_cache/SPU_printf/SPU_printf.vcproj b/Extras/software_cache/SPU_printf/SPU_printf.vcproj new file mode 100644 index 000000000..072afd444 --- /dev/null +++ b/Extras/software_cache/SPU_printf/SPU_printf.vcproj @@ -0,0 +1,239 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Extras/software_cache/cache/CommonPublicLicense-1.0 b/Extras/software_cache/cache/CommonPublicLicense-1.0 new file mode 100644 index 000000000..5723258b5 --- /dev/null +++ b/Extras/software_cache/cache/CommonPublicLicense-1.0 @@ -0,0 +1,213 @@ +Common Public License Version 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM +CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a) in the case of the initial Contributor, the initial code and +documentation distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + + i) changes to the Program, and + + ii) additions to the Program; + + where such changes and/or additions to the Program originate from and are +distributed by that particular Contributor. A Contribution 'originates' from a +Contributor if it was added to the Program by such Contributor itself or anyone +acting on such Contributor's behalf. Contributions do not include additions to +the Program which: (i) are separate modules of software distributed in +conjunction with the Program under their own license agreement, and (ii) are not +derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents " mean patent claims licensable by a Contributor which are +necessarily infringed by the use or sale of its Contribution alone or when +combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free copyright license to +reproduce, prepare derivative works of, publicly display, publicly perform, +distribute and sublicense the Contribution of such Contributor, if any, and such +derivative works, in source code and object code form. + + b) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed +Patents to make, use, sell, offer to sell, import and otherwise transfer the +Contribution of such Contributor, if any, in source code and object code form. +This patent license shall apply to the combination of the Contribution and the +Program if, at the time the Contribution is added by the Contributor, such +addition of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other combinations +which include the Contribution. No hardware per se is licensed hereunder. + + c) Recipient understands that although each Contributor grants the licenses +to its Contributions set forth herein, no assurances are provided by any +Contributor that the Program does not infringe the patent or other intellectual +property rights of any other entity. Each Contributor disclaims any liability to +Recipient for claims brought by any other entity based on infringement of +intellectual property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby assumes sole +responsibility to secure any other intellectual property rights needed, if any. +For example, if a third party patent license is required to allow Recipient to +distribute the Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + + d) Each Contributor represents that to its knowledge it has sufficient +copyright rights in its Contribution, if any, to grant the copyright license set +forth in this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its +own license agreement, provided that: + + a) it complies with the terms and conditions of this Agreement; and + + b) its license agreement: + + i) effectively disclaims on behalf of all Contributors all warranties and +conditions, express and implied, including warranties or conditions of title and +non-infringement, and implied warranties or conditions of merchantability and +fitness for a particular purpose; + + ii) effectively excludes on behalf of all Contributors all liability for +damages, including direct, indirect, special, incidental and consequential +damages, such as lost profits; + + iii) states that any provisions which differ from this Agreement are offered +by that Contributor alone and not by any other party; and + + iv) states that source code for the Program is available from such +Contributor, and informs licensees how to obtain it in a reasonable manner on or +through a medium customarily used for software exchange. + +When the Program is made available in source code form: + + a) it must be made available under this Agreement; and + + b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the +Program. + +Each Contributor must identify itself as the originator of its Contribution, if +any, in a manner that reasonably allows subsequent Recipients to identify the +originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with +respect to end users, business partners and the like. While this license is +intended to facilitate the commercial use of the Program, the Contributor who +includes the Program in a commercial product offering should do so in a manner +which does not create potential liability for other Contributors. Therefore, if +a Contributor includes the Program in a commercial product offering, such +Contributor ("Commercial Contributor") hereby agrees to defend and indemnify +every other Contributor ("Indemnified Contributor") against any losses, damages +and costs (collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to the +extent caused by the acts or omissions of such Commercial Contributor in +connection with its distribution of the Program in a commercial product +offering. The obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. In order +to qualify, an Indemnified Contributor must: a) promptly notify the Commercial +Contributor in writing of such claim, and b) allow the Commercial Contributor to +control, and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may participate in +any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product +offering, Product X. That Contributor is then a Commercial Contributor. If that +Commercial Contributor then makes performance claims, or offers warranties +related to Product X, those performance claims and warranties are such +Commercial Contributor's responsibility alone. Under this section, the +Commercial Contributor would have to defend claims against the other +Contributors related to those performance claims and warranties, and if a court +requires any other Contributor to pay any damages as a result, the Commercial +Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each +Recipient is solely responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its exercise of +rights under this Agreement, including but not limited to the risks and costs of +program errors, compliance with applicable laws, damage to or loss of data, +programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY +CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS +GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable +law, it shall not affect the validity or enforceability of the remainder of the +terms of this Agreement, and without further action by the parties hereto, such +provision shall be reformed to the minimum extent necessary to make such +provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with respect to +a patent applicable to software (including a cross-claim or counterclaim in a +lawsuit), then any patent licenses granted by that Contributor to such Recipient +under this Agreement shall terminate as of the date such litigation is filed. In +addition, if Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or hardware) +infringes such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to +comply with any of the material terms or conditions of this Agreement and does +not cure such failure in a reasonable period of time after becoming aware of +such noncompliance. If all Recipient's rights under this Agreement terminate, +Recipient agrees to cease use and distribution of the Program as soon as +reasonably practicable. However, Recipient's obligations under this Agreement +and any licenses granted by Recipient relating to the Program shall continue and +survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in +order to avoid inconsistency the Agreement is copyrighted and may only be +modified in the following manner. The Agreement Steward reserves the right to +publish new versions (including revisions) of this Agreement from time to time. +No one other than the Agreement Steward has the right to modify this Agreement. +IBM is the initial Agreement Steward. IBM may assign the responsibility to serve +as the Agreement Steward to a suitable separate entity. Each new version of the +Agreement will be given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the Agreement +under which it was received. In addition, after a new version of the Agreement +is published, Contributor may elect to distribute the Program (including its +Contributions) under the new version. Except as expressly stated in Sections +2(a) and 2(b) above, Recipient receives no rights or licenses to the +intellectual property of any Contributor under this Agreement, whether +expressly, by implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to this +Agreement will bring a legal action under this Agreement more than one year +after the cause of action arose. Each party waives its rights to a jury trial in +any resulting litigation. diff --git a/Extras/software_cache/cache/include/Makefile b/Extras/software_cache/cache/include/Makefile new file mode 100644 index 000000000..27a5c4347 --- /dev/null +++ b/Extras/software_cache/cache/include/Makefile @@ -0,0 +1,28 @@ +# --------------------------------------------------------------- +# PLEASE DO NOT MODIFY THIS SECTION +# This prolog section is automatically generated. +# +# (C) Copyright 2001,2006, +# International Business Machines Corporation, +# +# All Rights Reserved. +# --------------------------------------------------------------- +# PROLOG END TAG zYx + +######################################################################## +# Common Makefile +######################################################################## + +INSTALL_DIR = $(SDKINC_spu)/cache + +INSTALL_FILES := api.h \ + defs.h \ + dma.h \ + nway.h \ + nway-lookup.h \ + nway-miss.h \ + nway-opt.h \ + nway-replace.h \ + spe_cache.h + +include ../../../../../make.footer diff --git a/Extras/software_cache/cache/include/README b/Extras/software_cache/cache/include/README new file mode 100644 index 000000000..b0e532433 --- /dev/null +++ b/Extras/software_cache/cache/include/README @@ -0,0 +1,32 @@ +%% --------------------------------------------------------------- +%% PLEASE DO NOT MODIFY THIS SECTION +%% This prolog section is automatically generated. +%% +%% (C) Copyright 2001,2006, +%% International Business Machines Corporation, +%% +%% All Rights Reserved. +%% --------------------------------------------------------------- +%% PROLOG END TAG zYx +This directory contains implementation of software managed cache for +SPE. Whenever possible, the cache interfaces are implemented as macros +or inline-able functions. + +Depending on compile-time settings, different cache implementations can +be selected. + +The include file heirarchy is: + + + spe_cache.h Top level header. + | + + defs.h Common definitions. + + dma.h Initiate DMA transfers. + + nway.h Top level n-way header. + | + + nway-lookup.h n-way lookup operations. + + nway-miss.h n-way cache miss handler. + + nway-replace.h n-way cache replace handler. + + nway-opt.h "optimized" n-way interfaces. + | + + api.h Basic application interfaces. + diff --git a/Extras/software_cache/cache/include/api.h b/Extras/software_cache/cache/include/api.h new file mode 100644 index 000000000..77594221b --- /dev/null +++ b/Extras/software_cache/cache/include/api.h @@ -0,0 +1,31 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* api.h + * + * Copyright (C) 2005 IBM Corp. + * + * Simple API for software managed cache on SPEs. + * A sophisticated application would not use these, + * but rather use the low-level lookup functions. + */ + +#ifndef __SPE_CACHE_API_H__ +#define __SPE_CACHE_API_H__ + +typedef void *spe_cache_entry_t; + +#define spe_cache_rd(ea) _spe_cache_lookup_xfer_wait_(ea, 0, 1) +#define spe_cache_tr(ea) _spe_cache_lookup_xfer_(ea, 0, 1) +#define spe_cache_lr(ea) _spe_cache_lookup_(ea, 0) + +#define spe_cache_wait(entry) _spe_cache_wait_(entry) + +#endif diff --git a/Extras/software_cache/cache/include/cbe_mfc.h b/Extras/software_cache/cache/include/cbe_mfc.h new file mode 100644 index 000000000..c685118ac --- /dev/null +++ b/Extras/software_cache/cache/include/cbe_mfc.h @@ -0,0 +1,245 @@ +/* @(#)17 1.4 src/include/cbe_mfc.h, sw.includes, sdk_pub 10/11/05 16:00:25 */ +/* -------------------------------------------------------------- */ +/* (C) Copyright 2001,2005, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment Incorporated, */ +/* Toshiba Corporation. */ +/* */ +/* All Rights Reserved. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _CBEA_MFC_H_ +#define _CBEA_MFC_H_ + +/* This header file contains various definitions related to the Memory Flow + * Controller (MFC) portion of the Cell Broadband Engine Architecture (CBEA). + */ + +/**************************************/ +/* MFC DMA Command Opcode Definitions */ +/**************************************/ + +/****************************************************************************/ +/* MFC DMA Command flags which identify classes of operations. */ +/****************************************************************************/ +/* Note: These flags may be used in conjunction with the base command types + * (i.e. MFC_PUT_CMD, MFC_PUTR_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD) + * to construct the various command permutations. + */ + +#define MFC_BARRIER_ENABLE 0x01 +#define MFC_FENCE_ENABLE 0x02 +#define MFC_LIST_ENABLE 0x04 /* SPU Only */ +#define MFC_START_ENABLE 0x08 /* proxy Only */ +#define MFC_RESULT_ENABLE 0x10 + +/****************************************************************************/ +/* MFC DMA Put Commands */ +/****************************************************************************/ + +#define MFC_PUT_CMD 0x20 +#define MFC_PUTS_CMD 0x28 /* proxy Only */ +#define MFC_PUTR_CMD 0x30 +#define MFC_PUTF_CMD 0x22 +#define MFC_PUTB_CMD 0x21 +#define MFC_PUTFS_CMD 0x2A /* proxy Only */ +#define MFC_PUTBS_CMD 0x29 /* proxy Only */ +#define MFC_PUTRF_CMD 0x32 +#define MFC_PUTRB_CMD 0x31 +#define MFC_PUTL_CMD 0x24 /* SPU Only */ +#define MFC_PUTRL_CMD 0x34 /* SPU Only */ +#define MFC_PUTLF_CMD 0x26 /* SPU Only */ +#define MFC_PUTLB_CMD 0x25 /* SPU Only */ +#define MFC_PUTRLF_CMD 0x36 /* SPU Only */ +#define MFC_PUTRLB_CMD 0x35 /* SPU Only */ + +/****************************************************************************/ +/* MFC DMA Get Commands */ +/****************************************************************************/ + +#define MFC_GET_CMD 0x40 +#define MFC_GETS_CMD 0x48 /* proxy Only */ +#define MFC_GETF_CMD 0x42 +#define MFC_GETB_CMD 0x41 +#define MFC_GETFS_CMD 0x4A /* proxy Only */ +#define MFC_GETBS_CMD 0x49 /* proxy Only */ +#define MFC_GETL_CMD 0x44 /* SPU Only */ +#define MFC_GETLF_CMD 0x46 /* SPU Only */ +#define MFC_GETLB_CMD 0x45 /* SPU Only */ + +/****************************************************************************/ +/* MFC DMA Storage Control Commands */ +/****************************************************************************/ +/* Note: These are only supported on implementations with a SL1 cache + * They are no-ops on the initial (CBE) implementation. + */ + +#define MFC_SDCRT_CMD 0x80 +#define MFC_SDCRTST_CMD 0x81 +#define MFC_SDCRZ_CMD 0x89 +#define MFC_SDCRS_CMD 0x8D +#define MFC_SDCRF_CMD 0x8F + +/****************************************************************************/ +/* MFC Synchronization Commands */ +/****************************************************************************/ + +#define MFC_GETLLAR_CMD 0xD0 /* SPU Only */ +#define MFC_PUTLLC_CMD 0xB4 /* SPU Only */ +#define MFC_PUTLLUC_CMD 0xB0 /* SPU Only */ +#define MFC_PUTQLLUC_CMD 0xB8 /* SPU Only */ + +#define MFC_SNDSIG_CMD 0xA0 +#define MFC_SNDSIGB_CMD 0xA1 +#define MFC_SNDSIGF_CMD 0xA2 +#define MFC_BARRIER_CMD 0xC0 +#define MFC_EIEIO_CMD 0xC8 +#define MFC_SYNC_CMD 0xCC + + +/****************************************************************************/ +/* Definitions for constructing a 32-bit command word including the transfer + * and replacement class id and the command opcode. + */ +/****************************************************************************/ +#define MFC_TCLASS(_tid) ((_tid) << 24) +#define MFC_RCLASS(_rid) ((_rid) << 16) + +#define MFC_CMD_WORD(_tid, _rid, _cmd) (MFC_TCLASS(_tid) | MFC_RCLASS(_rid) | (_cmd)) + +/****************************************************************************/ +/* Definitions for constructing a 64-bit command word including the size, tag, + * transfer and replacement class id and the command opcode. + */ +/****************************************************************************/ +#define MFC_SIZE(_size) ((unsigned long long)(_size) << 48) +#define MFC_TAG(_tag_id) ((unsigned long long)(_tag_id) << 32) +#define MFC_TR_CMD(_trcmd) ((unsigned long long)(_trcmd)) + +#define MFC_CMD_DWORD(_size, _tag_id, _trcmd) (MFC_SIZE(_size) | MFC_TAG(_tag_id) | MFC_TR_CMD(_trcmd)) + +/****************************************************************************/ +/* Mask definitions for obtaining DMA commands and class ids from packed words. + */ +/****************************************************************************/ +#define MFC_CMD_MASK 0x0000FFFF +#define MFC_CLASS_MASK 0x000000FF + +/****************************************************************************/ +/* DMA max/min size definitions. */ +/****************************************************************************/ +#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */ +#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */ + +#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT) +#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT) + +#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1) +#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1) + +#define MFC_MIN_DMA_LIST_SIZE 0x0008 /* 8 bytes */ +#define MFC_MAX_DMA_LIST_SIZE 0x4000 /* 16K bytes */ + +/****************************************************************************/ +/* Mask definition for checking proper address alignment. */ +/****************************************************************************/ +#define MFC_ADDR_MATCH_MASK 0xF +#define MFC_BEST_ADDR_ALIGNMENT 0x80 + +/****************************************************************************/ +/* Definitions related to the Proxy DMA Command Status register (DMA_CMDStatus). + */ +/****************************************************************************/ +#define MFC_PROXY_DMA_CMD_ENQUEUE_SUCCESSFUL 0x00 +#define MFC_PROXY_DMA_CMD_SEQUENCE_ERROR 0x01 +#define MFC_PROXY_DMA_QUEUE_FULL 0x02 + +/****************************************************************************/ +/* Definitions related to the DMA Queue Status register (DMA_QStatus). */ +/****************************************************************************/ +#define MFC_PROXY_MAX_QUEUE_SPACE 0x08 +#define MFC_PROXY_DMA_Q_EMPTY 0x80000000 +#define MFC_PROXY_DMA_Q_FREE_SPACE_MASK 0x0000FFFF + +#define MFC_SPU_MAX_QUEUE_SPACE 0x10 + +/****************************************************************************/ +/* Definitions related to the Proxy Tag-Group Query-Type register + * (Prxy_QueryType). + */ +/****************************************************************************/ +#define MFC_PROXY_DMA_QUERYTYPE_ANY 0x1 +#define MFC_PROXY_DMA_QUERYTYPE_ALL 0x2 + +/****************************************************************************/ +/* Definitions related to the Proxy Tag-Group Query-Mask (Prxy_QueryMask) + * and PU Tag Status (DMA_TagStatus) registers. + * + * NOTE: The only use the bottom 5 bits of the tag id value passed to insure + * a valid tag id is used. + */ +/****************************************************************************/ + +#define MFC_TAGID_TO_TAGMASK(tag_id) (1 << (tag_id & 0x1F)) + +/****************************************************************************/ +/* Definitions related to the Mailbox Status register (SPU_Mbox_Stat) and the + * depths of the outbound Mailbox Register (SPU_OutMbox), the outbound + * interrupting Mailbox Register (SPU_OutIntrMbox), and the inbound Mailbox + * Register (SPU_In_Mbox). + */ +/****************************************************************************/ +#define MFC_SPU_OUT_MBOX_COUNT_STATUS_MASK 0x000000FF +#define MFC_SPU_OUT_MBOX_COUNT_STATUS_SHIFT 0x0 +#define MFC_SPU_IN_MBOX_COUNT_STATUS_MASK 0x0000FF00 +#define MFC_SPU_IN_MBOX_COUNT_STATUS_SHIFT 0x8 +#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_MASK 0x00FF0000 +#define MFC_SPU_OUT_INTR_MBOX_COUNT_STATUS_SHIFT 0x10 + +/****************************************************************************/ +/* Definitions related to the SPC Multi Source Syncronization register + * (MFC_MSSync). + */ +/****************************************************************************/ +#define MFC_SPC_MSS_STATUS_MASK 0x1 +#define MFC_SPC_MSS_COMPLETE 0x0 +#define MFC_SPC_MSS_NOT_COMPLETE 0x1 + + +/******************************************* + * Channel Defines + *******************************************/ + +/* Events Defines for channels: + * 0 (SPU_RdEventStat), + * 1 (SPU_WrEventMask), and + * 2 (SPU_WrEventAck). + */ +#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001 +#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002 +#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008 +#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010 +#define MFC_DECREMENTER_EVENT 0x00000020 +#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040 +#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080 +#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100 +#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200 +#define MFC_LLR_LOST_EVENT 0x00000400 +#define MFC_PRIV_ATTN_EVENT 0x00000800 +#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000 + + + +/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate) + */ +#define MFC_TAG_UPDATE_IMMEDIATE 0x0 +#define MFC_TAG_UPDATE_ANY 0x1 +#define MFC_TAG_UPDATE_ALL 0x2 + +/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat) + */ +#define MFC_PUTLLC_STATUS 0x00000001 +#define MFC_PUTLLUC_STATUS 0x00000002 +#define MFC_GETLLAR_STATUS 0x00000004 + +#endif /* _CBEA_MFC_H_ */ diff --git a/Extras/software_cache/cache/include/defs.h b/Extras/software_cache/cache/include/defs.h new file mode 100644 index 000000000..d15d9361a --- /dev/null +++ b/Extras/software_cache/cache/include/defs.h @@ -0,0 +1,149 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* spe_cache_defs.h + * + * Copyright (C) 2005 IBM Corp. + * + * Internal definitions for software managed cache. + */ + +#ifndef __SPE_CACHE_DEFS_H__ +#define __SPE_CACHE_DEFS_H__ + +/** + ** Defn's for number of cache sets. + ** Default is 64 sets. + */ +#if (SPE_CACHE_NSETS==1024) +#define SPE_CACHE_NSETS_SHIFT 10 +#elif (SPE_CACHE_NSETS==512) +#define SPE_CACHE_NSETS_SHIFT 9 +#elif (SPE_CACHE_NSETS==256) +#define SPE_CACHE_NSETS_SHIFT 8 +#elif (SPE_CACHE_NSETS==128) +#define SPE_CACHE_NSETS_SHIFT 7 +#elif (SPE_CACHE_NSETS==64) +#define SPE_CACHE_NSETS_SHIFT 6 +#elif (SPE_CACHE_NSETS==32) +#define SPE_CACHE_NSETS_SHIFT 5 +#elif (SPE_CACHE_NSETS==16) +#define SPE_CACHE_NSETS_SHIFT 4 +#elif (SPE_CACHE_NSETS==8) +#define SPE_CACHE_NSETS_SHIFT 3 +#elif (SPE_CACHE_NSETS==4) +#define SPE_CACHE_NSETS_SHIFT 2 +#elif (SPE_CACHE_NSETS==2) +#define SPE_CACHE_NSETS_SHIFT 1 +#else +#undef SPE_CACHE_NSETS +#define SPE_CACHE_NSETS 64 +#define SPE_CACHE_NSETS_SHIFT 6 +#endif + +/** + ** Defn's for cachline size (bytes). + ** Default is 128 bytes. + */ +#if (SPE_CACHELINE_SIZE==512) +#define SPE_CACHELINE_SHIFT 9 +#elif (SPE_CACHELINE_SIZE==256) +#define SPE_CACHELINE_SHIFT 8 +#elif (SPE_CACHELINE_SIZE==128) +#define SPE_CACHELINE_SHIFT 7 +#elif (SPE_CACHELINE_SIZE==64) +#define SPE_CACHELINE_SHIFT 6 +#elif (SPE_CACHELINE_SIZE==32) +#define SPE_CACHELINE_SHIFT 5 +#else +#undef SPE_CACHELINE_SIZE +#define SPE_CACHELINE_SIZE 128 +#define SPE_CACHELINE_SHIFT 7 +#endif + +/** + ** Defn's derived from above settings. + */ +#define SPE_CACHE_NSETS_MASK (SPE_CACHE_NSETS - 1) +#define SPE_CACHELINE_MASK (SPE_CACHELINE_SIZE - 1) + +/** + ** Defn's for managing cacheline state. + */ +#define SPE_CACHELINE_DIRTY 0x1 +#define SPE_CACHELINE_LOCKED 0x2 +#define SPE_CACHELINE_STATE_MASK (SPE_CACHELINE_DIRTY | SPE_CACHELINE_LOCKED) + +#ifdef _XLC +/** + * FIXME: For now disable manual branch hints + * on XLC due to performance degradation. + */ +#ifndef likely +#define likely(_c) (_c) +#define unlikely(_c) (_c) +#endif + +#else /* !_XLC */ + +#ifndef likely +#define likely(_c) __builtin_expect((_c), 1) +#define unlikely(_c) __builtin_expect((_c), 0) +#endif +#endif + + +/** + ** Debug controls. Set -DNDEBUG to + ** disable both panic and assert. + */ +#include +#define _spe_cache_panic_(c) assert(c) +#ifdef SPE_CACHE_DBG +#define _spe_cache_assert_(c) assert(c) +#else +#define _spe_cache_assert_(c) /* No-op. */ +#endif + +#define _spe_cacheline_byte_offset_(ea) \ + ((ea) & SPE_CACHELINE_MASK) + +#define _spe_cacheline_byte_offset_x4(ea) \ + spu_and ((ea), SPE_CACHELINE_MASK) + +#endif + +static __inline vector unsigned int _load_vec_uint4(unsigned int ui1, unsigned int ui2, unsigned int ui3, unsigned int ui4) +{ + vector unsigned int result; + vector unsigned int iv1, iv2, iv3, iv4; + + vector unsigned char shuffle = VEC_LITERAL(vector unsigned char, + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + iv1 = spu_promote(ui1, 0); + iv2 = spu_promote(ui2, 0); + iv3 = spu_promote(ui3, 0); + iv4 = spu_promote(ui4, 0); + + result = spu_or(spu_shuffle(iv1, iv2, shuffle), spu_shuffle(iv3, iv4, spu_rlqwbyte(shuffle, 8))); + return (result); +} + +static __inline vector unsigned int _pack_vec_uint4(vector unsigned int ui1, vector unsigned int ui2, vector unsigned int ui3, vector unsigned int ui4) +{ + vector unsigned int result; + vector unsigned char shuffle = VEC_LITERAL(vector unsigned char, + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + + result = spu_or(spu_shuffle(ui1, ui2, shuffle), spu_shuffle(ui3, ui4, spu_rlqwbyte(shuffle, 8))); + return (result); +} diff --git a/Extras/software_cache/cache/include/dma.h b/Extras/software_cache/cache/include/dma.h new file mode 100644 index 000000000..fdcc8313c --- /dev/null +++ b/Extras/software_cache/cache/include/dma.h @@ -0,0 +1,40 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* dma.h + * + * Copyright (C) 2005 IBM Corp. + * + * Internal DMA utilities for software + * managed cache. + */ + +#ifndef __SPE_CACHE_DMA_H__ +#define __SPE_CACHE_DMA_H__ + +#define SPE_CACHE_TAGID_SHIFT (SPE_CACHELINE_SHIFT + SPE_CACHE_NWAY_SHIFT) + +#define _SPE_CACHELINE_TAGID(_ptr) (16) +#define _SPE_CACHELINE_TAGMASK(_ptr) (1 << 16) + +#define SPE_CACHELINE_TAGID(_line) \ + _SPE_CACHELINE_TAGID(&spe_cache_mem[_line]) +#define SPE_CACHELINE_TAGMASK(_line) \ + _SPE_CACHELINE_TAGMASK(&spe_cache_mem[_line]) + +#ifndef SPE_CACHE_SET_TAGID +#define SPE_CACHE_SET_TAGID(set) ((set) & 0x1f) +#endif +#define SPE_CACHE_SET_TAGMASK(set) (1 << SPE_CACHE_SET_TAGID(set)) + +#define SPE_CACHE_PUT MFC_PUTF_CMD +#define SPE_CACHE_GET MFC_GET_CMD + +#endif diff --git a/Extras/software_cache/cache/include/ilog2.h b/Extras/software_cache/cache/include/ilog2.h new file mode 100644 index 000000000..86a5ca865 --- /dev/null +++ b/Extras/software_cache/cache/include/ilog2.h @@ -0,0 +1,35 @@ +/* @(#)12 1.5 src/lib/math/ilog2.h, sw.lib, sdk_pub 10/11/05 15:35:56 */ +/* -------------------------------------------------------------- */ +/* (C) Copyright 2001,2005, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment Incorporated, */ +/* Toshiba Corporation. */ +/* */ +/* All Rights Reserved. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _ILOG2_H_ +#define _ILOG2_H_ 1 + +/* + * FUNCTION + * signed int _ilog2(signed int x) + * + * DESCRIPTION + * _ilog2 computes ceiling of log (base 2) of the input value x. + * The input value, x, must be a non-zero positive value. + */ + +static __inline signed int _ilog2(signed int x) +{ +#ifdef __SPU__ + return (32 - spu_extract(spu_cntlz(spu_promote(x - 1, 0)), 0)); +#else + signed int result; + + for (result=0, x--; x > 0; result++, x>>=1); + return (result); +#endif +} + +#endif /* _ILOG2_H_ */ diff --git a/Extras/software_cache/cache/include/memset.h b/Extras/software_cache/cache/include/memset.h new file mode 100644 index 000000000..b070d3e35 --- /dev/null +++ b/Extras/software_cache/cache/include/memset.h @@ -0,0 +1,68 @@ +/* @(#)85 1.4 src/lib/c/memset.h, sw.lib, sdk_pub 10/13/05 10:17:09 */ +/* -------------------------------------------------------------- */ +/* (C) Copyright 2001,2005, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment Incorporated, */ +/* Toshiba Corporation. */ +/* */ +/* All Rights Reserved. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#include +#include + +/* Fills the first n bytes of the memory area pointed to by s + * with the constant byte c. Returns a pointer to the memory area s. + */ +static __inline void * _memset(void *s, int c, size_t n) +{ + int skip, cnt, i; + vec_uchar16 *vs; + vec_uchar16 vc, mask; + + vs = (vec_uchar16 *)(s); + vc = spu_splats((unsigned char)c); + cnt = (int)(n); + + /* Handle any leading partial quadwords as well a + * very short settings (ie, such that the n characters + * all reside in a single quadword. + */ + skip = (int)(s) & 15; + if (skip) { + mask = spu_rlmaskqwbyte((vec_uchar16)(-1), 0-skip); + cnt -= 16 - skip; + if (cnt < 0) { + mask = spu_and(mask, spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(-cnt))); + } + *vs = spu_sel(*vs, vc, mask); + vs++; + } + + /* Handle 8 quadwords at a time + */ + for (i=127; i 0) { + mask = spu_slqwbyte((vec_uchar16)(-1), (unsigned int)(16-cnt)); + *vs = spu_sel(*vs, vc, mask); + } + + return (s); +} diff --git a/Extras/software_cache/cache/include/nway-lookup.h b/Extras/software_cache/cache/include/nway-lookup.h new file mode 100644 index 000000000..33e552c6d --- /dev/null +++ b/Extras/software_cache/cache/include/nway-lookup.h @@ -0,0 +1,194 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* nway-lookup.h + * + * Copyright (C) 2005 IBM Corp. + * + * Internal lookup operations for software + * managed cache. + * + * See nway-opt.h for "optimized" nway + * lookup operations. + */ + +#ifndef __SPE_CACHE_NWAY_LOOKUP_H_ +#define __SPE_CACHE_NWAY_LOOKUP_H_ + + +/** + * _decl_set_entries_ + * Load up set entries (by 4) from an n-way + * set associative cache. Mask off the dirty + * bit, as needed. + */ +#define _decl_set_entries_(set, name, index) \ + vec_uint4 name = *((vec_uint4 *) &spe_cache_dir[set][index]) + + +#define _spe_cache_4_way_lookup_(set, ea) \ +({ \ + _decl_set_entries_(set, e0123, 0); \ + spu_gather(spu_cmpeq(e0123, ea)); \ +}) + +/** + * _spe_cache_set_lookup_ + * Compare 'ea' against all entries of + * a set, and return a result that is + * consistent with spu_gather(). + */ +#define _spe_cache_set_lookup_(set, ea) \ + _spe_cache_4_way_lookup_(set, ea) + + +/** + * _spe_cache_nway_lookup_x4_ + * Declare local variables and lookup four addresses + * in the n-way set associative cache. Upon return, + * 'idx_x4' contains the matching elements in the sets, + * or -1 if not found. + */ +#define _spe_cache_nway_lookup_x4(ea_x4, set_x4, idx_x4) \ +({ \ + vector unsigned int ea_aligned_x4 = spu_and ((ea_x4), ~SPE_CACHELINE_MASK); \ + vector unsigned char splat0 = VEC_LITERAL(vector unsigned char, \ + 0x00, 0x01, 0x02, 0x03, \ + 0x00, 0x01, 0x02, 0x03, \ + 0x00, 0x01, 0x02, 0x03, \ + 0x00, 0x01, 0x02, 0x03); \ + vector unsigned char splat1 = VEC_LITERAL(vector unsigned char, \ + 0x04, 0x05, 0x06, 0x07, \ + 0x04, 0x05, 0x06, 0x07, \ + 0x04, 0x05, 0x06, 0x07, \ + 0x04, 0x05, 0x06, 0x07); \ + vector unsigned char splat2 = VEC_LITERAL(vector unsigned char, \ + 0x08, 0x09, 0x0a, 0x0b, \ + 0x08, 0x09, 0x0a, 0x0b, \ + 0x08, 0x09, 0x0a, 0x0b, \ + 0x08, 0x09, 0x0a, 0x0b); \ + vector unsigned char splat3 = VEC_LITERAL(vector unsigned char, \ + 0x0c, 0x0d, 0x0e, 0x0f, \ + 0x0c, 0x0d, 0x0e, 0x0f, \ + 0x0c, 0x0d, 0x0e, 0x0f, \ + 0x0c, 0x0d, 0x0e, 0x0f); \ + vec_uint4 ea_aligned0 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat0); \ + vec_uint4 ea_aligned1 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat1); \ + vec_uint4 ea_aligned2 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat2); \ + vec_uint4 ea_aligned3 = spu_shuffle(ea_aligned_x4, ea_aligned_x4, splat3); \ + vec_uint4 found0, found1, found2, found3; \ + vec_uint4 found_x4; \ + (set_x4) = _spe_cache_set_num_x4(ea_x4); \ + found0 = _spe_cache_set_lookup_(spu_extract (set_x4, 0), ea_aligned0); \ + found1 = _spe_cache_set_lookup_(spu_extract (set_x4, 1), ea_aligned1); \ + found2 = _spe_cache_set_lookup_(spu_extract (set_x4, 2), ea_aligned2); \ + found3 = _spe_cache_set_lookup_(spu_extract (set_x4, 3), ea_aligned3); \ + found_x4 = _pack_vec_uint4 (found0, found1, found2, found3); \ + (idx_x4) = (vector signed int)_spe_cache_idx_num_x4(found_x4); \ +}) + +#define _spe_cache_nway_lookup_(ea, set, idx) \ +({ \ + unsigned int ea_aligned = (ea) & ~SPE_CACHELINE_MASK; \ + vec_uint4 ea_aligned4 = spu_splats(ea_aligned); \ + vec_uint4 found; \ + (set) = _spe_cache_set_num_(ea); \ + found = _spe_cache_set_lookup_(set, ea_aligned4); \ + (idx) = _spe_cache_idx_num_(found); \ +}) + +/** + * _spe_cache_lookup_ + * Lookup and return the LSA of an EA + * that is known to be in the cache. + */ +#define _spe_cache_lookup_(ea, is_write) \ +({ \ + int set, idx, line, byte; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + \ + line = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + (void *) &spe_cache_mem[line + byte]; \ +}) + +/** + * _spe_cache_wait_ + * Wait for transfer of a cache line + * to complete. + */ +#define _spe_cache_wait_(_lsa) \ +({ \ + spu_writech(22, _SPE_CACHELINE_TAGMASK(_lsa)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ +}) + +/** + * _spe_cache_lookup_wait_ + * Lookup and return the LSA of an EA + * that is known to be in the cache, + * and guarantee that its transfer is + * complete. + */ +#define _spe_cache_lookup_wait_(ea, is_write) \ +({ \ + int set, idx, line, byte; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + \ + line = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ + (void *) &spe_cache_mem[line + byte]; \ +}) + +/** + * _spe_cache_lookup_xfer_ + * Lookup and return the LSA of an EA, where + * the line may either be in the cache or not. + * If not, initiate transfer but do not wait + * for completion. + */ +#define _spe_cache_lookup_xfer_(ea, is_write, rb) \ +({ \ + int set, idx, line, byte; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + \ + if (unlikely(idx < 0)) { \ + idx = _spe_cache_miss_(ea, set, -1); \ + } \ + line = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + (void *) &spe_cache_mem[line + byte]; \ +}) + +/** + * _spe_cache_lookup_xfer_wait_ + * Lookup and return the LSA of an EA, where + * the line may either be in the cache or not. + * If not, initiate transfer and guarantee + * completion. + */ +#define _spe_cache_lookup_xfer_wait_(ea, is_write, rb) \ +({ \ + int set, idx, line, byte; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + \ + if (unlikely(idx < 0)) { \ + idx = _spe_cache_miss_(ea, set, -1); \ + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ + } \ + line = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + (void *) &spe_cache_mem[line + byte]; \ +}) + +#endif diff --git a/Extras/software_cache/cache/include/nway-miss.h b/Extras/software_cache/cache/include/nway-miss.h new file mode 100644 index 000000000..4c73ae8bf --- /dev/null +++ b/Extras/software_cache/cache/include/nway-miss.h @@ -0,0 +1,51 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* nway-miss.h + * + * Copyright (C) 2005 IBM Corp. + * + * Internal handler for cache misses. + */ + +#ifndef __SPE_CACHE_NWAY_MISS_H__ +#define __SPE_CACHE_NWAY_MISS_H__ + +static int _spe_cache_miss_(unsigned int ea, int set, int avail) +{ + unsigned int ea_aligned = ea & ~SPE_CACHELINE_MASK; + vec_uint4 slot; + vec_uint4 exists = _spe_cache_set_lookup_(set, ea_aligned); + int idx, line; + + /* Double check to make sure that the entry has not + * already been allocated in this set. This condition + * might occur if multiple lookups are being perfomed + * simultaneously. + */ + if (unlikely(spu_extract(exists, 0) != 0)) { + return _spe_cache_idx_num_(exists); + } + + /* Now check to see if there are empty slots + * that are available in the set. + */ + slot = _spe_cache_replace_(set, avail); + idx = _spe_cache_idx_num_(slot); + line = _spe_cacheline_num_(set, idx); + + spu_mfcdma32(&spe_cache_mem[line], ea_aligned, SPE_CACHELINE_SIZE, + SPE_CACHE_SET_TAGID(set), SPE_CACHE_GET); + + spe_cache_dir[set][SPE_CACHE_NWAY_MASK - idx] = ea_aligned; + + return idx; +} +#endif diff --git a/Extras/software_cache/cache/include/nway-opt.h b/Extras/software_cache/cache/include/nway-opt.h new file mode 100644 index 000000000..6a96773c6 --- /dev/null +++ b/Extras/software_cache/cache/include/nway-opt.h @@ -0,0 +1,153 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* nway-opt.h + * + * Copyright (C) 2006 IBM Corp. + * + * "Optimized" lookup operations for n-way set associative + * software managed cache. + */ +#include + +#ifndef __SPE_CACHE_NWAY_OPT_H_ +#define __SPE_CACHE_NWAY_OPT_H_ + +/* __spe_cache_rd + * Look up and return data from the cache. If the data + * is not currently in cache then transfer it from main + * storage. + * + * This code uses a conditional branch to the cache miss + * handler in the event that the requested data is not + * in the cache. A branch hint is used to avoid paying + * the branch stall penalty. + */ +#define __spe_cache_rd(type, ea) \ +({ \ + int set, idx, lnum, byte; \ + type ret; \ + _spe_cache_nway_lookup_(ea, set, idx); \ + if (unlikely(idx < 0)) { \ + idx = _spe_cache_miss_(ea, set, -1); \ + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ + } \ + lnum = _spe_cacheline_num_(set, idx); \ + byte = _spe_cacheline_byte_offset_(ea); \ + ret = *((type *) (&spe_cache_mem[lnum + byte])); \ + ret; \ +}) + +/** + * __spe_cache_rd_x4 + * Fetch four data elements from the cache. + * + * This code uses one conditional branch in + * the event that any of the four elements + * are missing. + * + * On a miss, light weight locking is used to + * avoid casting out entries that were found. + * Further, we wait just once for the transfers, + * allowing for parallel [rather than serial] + * transfers. + */ + +#define __spe_cache_rd_x4(type, ea_x4) \ +({ \ + vector unsigned int missing; \ + unsigned int ms; \ + vector unsigned int cindex; \ + unsigned int d0, d1, d2, d3; \ + vector unsigned int s_x4; \ + vector signed int i_x4; \ + vector unsigned int ibyte, iline; \ + vector unsigned int ret; \ + unsigned int idx0, idx1, idx2, idx3; \ + \ + _spe_cache_nway_lookup_x4(ea_x4, s_x4, i_x4); \ + missing = spu_rlmask ((vector unsigned int)i_x4, -8); \ + ms = spu_extract (spu_gather (missing), 0); \ + \ + ibyte = _spe_cacheline_byte_offset_x4(ea_x4); \ + \ + iline = _spe_cacheline_num_x4(s_x4, \ + (vector unsigned int)i_x4); \ + \ + cindex = spu_add (iline, ibyte); \ + \ + idx0 = spu_extract (cindex, 0); \ + idx1 = spu_extract (cindex, 1); \ + idx2 = spu_extract (cindex, 2); \ + idx3 = spu_extract (cindex, 3); \ + \ + d0 = *((type *) (&spe_cache_mem[idx0])); \ + d1 = *((type *) (&spe_cache_mem[idx1])); \ + d2 = *((type *) (&spe_cache_mem[idx2])); \ + d3 = *((type *) (&spe_cache_mem[idx3])); \ + \ + ret = _load_vec_uint4 (d0, d1, d2, d3); \ + \ + if (unlikely(ms)) { \ + int b0 = spu_extract (ibyte, 0); \ + int b1 = spu_extract (ibyte, 1); \ + int b2 = spu_extract (ibyte, 2); \ + int b3 = spu_extract (ibyte, 3); \ + int lnum0; \ + int lnum1; \ + int lnum2; \ + int lnum3; \ + int s0 = spu_extract (s_x4, 0); \ + int s1 = spu_extract (s_x4, 1); \ + int s2 = spu_extract (s_x4, 2); \ + int s3 = spu_extract (s_x4, 3); \ + int i0 = spu_extract (i_x4, 0); \ + int i1 = spu_extract (i_x4, 1); \ + int i2 = spu_extract (i_x4, 2); \ + int i3 = spu_extract (i_x4, 3); \ + unsigned int ea0 = spu_extract(ea_x4, 0); \ + unsigned int ea1 = spu_extract(ea_x4, 1); \ + unsigned int ea2 = spu_extract(ea_x4, 2); \ + unsigned int ea3 = spu_extract(ea_x4, 3); \ + int avail = -1; \ + \ + avail &= ~(((i0 < 0) ? 0 : (1 << i0)) | \ + ((i1 < 0) ? 0 : (1 << i1)) | \ + ((i2 < 0) ? 0 : (1 << i2)) | \ + ((i3 < 0) ? 0 : (1 << i3))); \ + \ + i0 = _spe_cache_miss_(ea0, s0, avail); \ + avail &= ~(1 << i0); \ + i1 = _spe_cache_miss_(ea1, s1, avail); \ + avail &= ~(1 << i1); \ + i2 = _spe_cache_miss_(ea2, s2, avail); \ + avail &= ~(1 << i2); \ + i3 = _spe_cache_miss_(ea3, s3, avail); \ + \ + lnum0 = _spe_cacheline_num_(s0, i0); \ + lnum1 = _spe_cacheline_num_(s1, i1); \ + lnum2 = _spe_cacheline_num_(s2, i2); \ + lnum3 = _spe_cacheline_num_(s3, i3); \ + \ + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); \ + spu_mfcstat(MFC_TAG_UPDATE_ALL); \ + \ + d0 = *((type *) (&spe_cache_mem[lnum0 + b0])); \ + d1 = *((type *) (&spe_cache_mem[lnum1 + b1])); \ + d2 = *((type *) (&spe_cache_mem[lnum2 + b2])); \ + d3 = *((type *) (&spe_cache_mem[lnum3 + b3])); \ + \ + ret = _load_vec_uint4 (d0, d1, d2, d3); \ + } \ + ret; \ +}) + +#endif /* _SPE_CACHE_NWAY_OPT_H_ */ diff --git a/Extras/software_cache/cache/include/nway-replace.h b/Extras/software_cache/cache/include/nway-replace.h new file mode 100644 index 000000000..72fce1876 --- /dev/null +++ b/Extras/software_cache/cache/include/nway-replace.h @@ -0,0 +1,38 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* nway-replace.h + * + * Copyright (C) 2005 IBM Corp. + * + * Implement replacement for software + * managed cache. + */ + +#ifndef __SPE_CACHE_NWAY_REPLACE_H_ +#define __SPE_CACHE_NWAY_REPLACE_H_ + +static vec_uint4 spe_cache_replace_cntr[SPE_CACHE_NSETS+1]; + +static inline vec_uint4 _spe_cache_replace_(int set, int avail) +{ + unsigned int mask = ((1 << SPE_CACHE_NWAY) - 1) & avail; + unsigned int curr, currbit, next; + + curr = spu_extract(spe_cache_replace_cntr[set], 0) & SPE_CACHE_NWAY_MASK; + currbit = (1 << curr); + next = (curr + 1) & SPE_CACHE_NWAY_MASK; + spe_cache_replace_cntr[set] = (vec_uint4) spu_promote(next, 0); + mask = (mask & currbit) ? currbit : mask; + + return (vec_uint4) spu_promote(mask, 0); +} + +#endif diff --git a/Extras/software_cache/cache/include/nway.h b/Extras/software_cache/cache/include/nway.h new file mode 100644 index 000000000..494cd6bea --- /dev/null +++ b/Extras/software_cache/cache/include/nway.h @@ -0,0 +1,105 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* nway.h + * + * Copyright (C) 2005 IBM Corp. + * + * Support for n-way set associative software + * managed cache. The 4-way associative cache + * is the only interface exposed currently. + */ + +#ifndef __SPE_CACHE_NWAY_H_ +#define __SPE_CACHE_NWAY_H_ + +/** + ** Defn's for n-way set associativity. + ** Default is 4-way. + */ +#define SPE_CACHE_NWAY 4 +#define SPE_CACHE_NWAY_SHIFT 2 + +#define SPE_CACHE_NWAY_MASK (SPE_CACHE_NWAY - 1) +#define SPE_CACHE_NENTRIES (SPE_CACHE_NWAY * SPE_CACHE_NSETS) +#define SPE_CACHE_MEM_SIZE (SPE_CACHE_NENTRIES * SPE_CACHELINE_SIZE) + +#define _spe_cache_set_num_(ea) \ +({ \ + unsigned int ead, eadm, ret; \ + ead = ((ea) >> SPE_CACHELINE_SHIFT); \ + eadm = ((ea) >> (SPE_CACHELINE_SHIFT+2)); \ + ret = (ead ^ eadm) & SPE_CACHE_NSETS_MASK; \ + ret; \ +}) + +#define _spe_cache_set_num_x4(ea_x4) \ +({ \ + vector unsigned int tmp0; \ + vector unsigned int tmp1; \ + tmp0 = spu_rlmask (ea_x4, -SPE_CACHELINE_SHIFT); \ + tmp1 = spu_rlmask (ea_x4, -(SPE_CACHELINE_SHIFT+1)); \ + spu_and (spu_xor (tmp0, tmp1), SPE_CACHE_NSETS_MASK); \ +}) + +#define _spe_cache_idx_num_x4(found) \ + spu_sub((unsigned int) 31, spu_cntlz(found)) + +#define _spe_cache_idx_num_(found) \ + spu_extract(spu_sub((unsigned int) 31, spu_cntlz(found)), 0) + +#define _spe_cacheline_num_(set, idx) \ + (((set << SPE_CACHE_NWAY_SHIFT) + idx) << SPE_CACHELINE_SHIFT) + +#define _spe_cacheline_num_x4(set, idx) \ + spu_sl (spu_add (spu_sl (set, SPE_CACHE_NWAY_SHIFT), idx), SPE_CACHELINE_SHIFT) + +#define _spe_cacheline_is_dirty_(set, idx) \ + (spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_DIRTY) + +#define _spe_cacheline_is_locked_(set, idx) \ + (spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] & SPE_CACHELINE_LOCKED) + +#define _spe_lock_cacheline_(set, idx) \ + spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] |= SPE_CACHELINE_LOCKED + +#define _spe_unlock_cacheline_(set, idx) \ + spe_cache_dir[set][SPE_CACHE_NWAY_MASK-(idx)] &= ~SPE_CACHELINE_LOCKED + + +/** + * spe_cache_dir + * This is the n-way set associative cache + * directory. Entries are either zero (unused) + * or non-zero (used). + * + * State for one additional (dummy) set is + * allocated to improve efficiency of cache + * line locking. + * volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer + */ +static unsigned int spe_cache_dir[SPE_CACHE_NSETS+1][SPE_CACHE_NWAY] + __attribute__ ((aligned(16))); + +/** + * spe_cache_mem + * A contiguous set of cachelines in LS memory, + * one line for each entry in the cache. + * volatile seems not to be necessary here, the SCE toolchain guarantees a barrier after dma transfer + */ +static char spe_cache_mem[SPE_CACHE_MEM_SIZE] + __attribute__ ((aligned(128))); + +#include "nway-lookup.h" +#include "nway-replace.h" +#include "nway-miss.h" +#include "nway-opt.h" + +#endif diff --git a/Extras/software_cache/cache/include/spe_cache.h b/Extras/software_cache/cache/include/spe_cache.h new file mode 100644 index 000000000..f632bf11b --- /dev/null +++ b/Extras/software_cache/cache/include/spe_cache.h @@ -0,0 +1,32 @@ +/* --------------------------------------------------------------- */ +/* PLEASE DO NOT MODIFY THIS SECTION */ +/* This prolog section is automatically generated. */ +/* */ +/* (C) Copyright 2001,2006, */ +/* International Business Machines Corporation, */ +/* */ +/* All Rights Reserved. */ +/* --------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +/* spe_cache.h + * + * Copyright (C) 2005 IBM Corp. + * + * Top level include file implementing + * software managed cache. + */ + +#ifndef __SPE_CACHE_H__ +#define __SPE_CACHE_H__ 1 + +#include "vec_literal.h" +#include "ilog2.h" +#include "memset.h" +//#include + +#include "defs.h" +#include "dma.h" +#include "nway.h" +#include "api.h" + +#endif diff --git a/Extras/software_cache/cache/include/vec_literal.h b/Extras/software_cache/cache/include/vec_literal.h new file mode 100644 index 000000000..a7734708f --- /dev/null +++ b/Extras/software_cache/cache/include/vec_literal.h @@ -0,0 +1,74 @@ +/* @(#)86 1.3 src/include/vec_literal.h, sw.includes, sdk_pub 10/11/05 16:00:27 */ +/* -------------------------------------------------------------- */ +/* (C) Copyright 2001,2005, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment Incorporated, */ +/* Toshiba Corporation. */ +/* */ +/* All Rights Reserved. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _VEC_LITERAL_H_ +#define _VEC_LITERAL_H_ + +/* This header files provides an abstraction for the various implementations + * of vector literal construction. The two formats are: + * + * 1) Altivec styled using parenthesis + * 2) C grammer friendly styled using curly braces + * + * The macro, VEC_LITERAL has been developed to provide some portability + * in these two styles. To achieve true portability, user must specify all + * elements of the vector being initialized. A single element can be provided + * but only the first element guarenteed across both construction styles. + * + * The VEC_SPLAT_* macros have been provided for portability of vector literal + * construction when all the elements of the vector contain the same value. + */ + +#ifdef __SPU__ +#include +#endif + + +#ifdef __ALTIVEC_LITERAL_STYLE__ +/* Use altivec style. + */ +#define VEC_LITERAL(_type, ...) ((_type)(__VA_ARGS__)) + +#define VEC_SPLAT_U8(_val) ((vector unsigned char)(_val)) +#define VEC_SPLAT_S8(_val) ((vector signed char)(_val)) + +#define VEC_SPLAT_U16(_val) ((vector unsigned short)(_val)) +#define VEC_SPLAT_S16(_val) ((vector signed short)(_val)) + +#define VEC_SPLAT_U32(_val) ((vector unsigned int)(_val)) +#define VEC_SPLAT_S32(_val) ((vector signed int)(_val)) +#define VEC_SPLAT_F32(_val) ((vector float)(_val)) + +#define VEC_SPLAT_U64(_val) ((vector unsigned long long)(_val)) +#define VEC_SPLAT_S64(_val) ((vector signed long long)(_val)) +#define VEC_SPLAT_F64(_val) ((vector double)(_val)) + +#else +/* Use curly brace style. + */ +#define VEC_LITERAL(_type, ...) ((_type){__VA_ARGS__}) + +#define VEC_SPLAT_U8(_val) ((vector unsigned char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val}) +#define VEC_SPLAT_S8(_val) ((vector signed char){_val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val, _val}) + +#define VEC_SPLAT_U16(_val) ((vector unsigned short){_val, _val, _val, _val, _val, _val, _val, _val}) +#define VEC_SPLAT_S16(_val) ((vector signed short){_val, _val, _val, _val, _val, _val, _val, _val}) + +#define VEC_SPLAT_U32(_val) ((vector unsigned int){_val, _val, _val, _val}) +#define VEC_SPLAT_S32(_val) ((vector signed int){_val, _val, _val, _val}) +#define VEC_SPLAT_F32(_val) ((vector float){_val, _val, _val, _val}) + +#define VEC_SPLAT_U64(_val) ((vector unsigned long long){_val, _val}) +#define VEC_SPLAT_S64(_val) ((vector signed long long){_val, _val}) +#define VEC_SPLAT_F64(_val) ((vector double){_val, _val}) + +#endif + +#endif /* _VEC_LITERAL_H_ */ diff --git a/Extras/software_cache/hello.spu.c b/Extras/software_cache/hello.spu.c new file mode 100644 index 000000000..3433c9f58 --- /dev/null +++ b/Extras/software_cache/hello.spu.c @@ -0,0 +1,83 @@ +/* + * SCE CONFIDENTIAL + * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 + * Copyright (C) 2005 Sony Computer Entertainment Inc. + * All Rights Reserved. + */ +#include +#include + +#include +#include +#include +#include + +#define SPE_CACHE_NWAY 4 +#define SPE_CACHE_NSETS 32 +#define SPE_CACHELINE_SIZE 512 +#define SPE_CACHE_SET_TAGID(set) 16 + +#define USE_SOFTWARE_CACHE 1 +#ifdef USE_SOFTWARE_CACHE + +#include "cache/include/spe_cache.h" + +void * spe_readcache(unsigned int ea) +{ + + int set, idx, line, byte; + _spe_cache_nway_lookup_(ea, set, idx); + + if (unlikely(idx < 0)) { + idx = _spe_cache_miss_(ea, set, -1); + spu_writech(22, SPE_CACHE_SET_TAGMASK(set)); + spu_mfcstat(MFC_TAG_UPDATE_ALL); + } + line = _spe_cacheline_num_(set, idx); + byte = _spe_cacheline_byte_offset_(ea); + return (void *) &spe_cache_mem[line + byte]; +} +#endif //USE_SOFTWARE_CACHE + +int main(int spu_num,uint64_t mainmemPtr) +{ + int memPtr = (int) mainmemPtr; + + +#define MAX_BUF 256 + char spuBuffer[MAX_BUF]; + spuBuffer[0] = 0; + + char* result,*result2; //= spe_cache_rd(mainmemPtr); + +#ifdef USE_SOFTWARE_CACHE + + //this is a brute-force sample. + //you can use the software cache more efficient using __spe_cache_rd_x4 to read 4 elements at a time + + int i=0; + do + { + result = spe_readcache(mainmemPtr+i); + //spe_readcache is the expanded version of spe_cache_rd MACRO + + spuBuffer[i] = result[0]; + i++; + } while (result[0] && (i= MAX_BUF) + { + spu_printf("spe_readcache buffer overflow. is the buffer 0-terminated?\n"); + } + spu_printf("spe_cache_rd(%x) = %s\n", memPtr,spuBuffer); + + sys_spu_thread_exit(0); +} + diff --git a/Extras/software_cache/hello.spu.mk b/Extras/software_cache/hello.spu.mk new file mode 100644 index 000000000..0b1997241 --- /dev/null +++ b/Extras/software_cache/hello.spu.mk @@ -0,0 +1,19 @@ +# SCE CONFIDENTIAL +# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 +# Copyright (C) 2005 Sony Computer Entertainment Inc. +# All Rights Reserved. +# + +CELL_MK_DIR ?= $(CELL_SDK)/samples/mk + + +include $(CELL_MK_DIR)/sdk.makedef.mk + +SPU_INCDIRS += -Icache/include +SPU_SRCS = hello.spu.c +SPU_TARGET = hello.spu.elf +SPU_OPTIMIZE_LV=-O0 + +include $(CELL_MK_DIR)/sdk.target.mk + + diff --git a/Extras/software_cache/spu_printf_server.h b/Extras/software_cache/spu_printf_server.h new file mode 100644 index 000000000..e477eac50 --- /dev/null +++ b/Extras/software_cache/spu_printf_server.h @@ -0,0 +1,19 @@ +/* SCE CONFIDENTIAL */ +/* PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 */ +/* Copyright (C) 2005 Sony Computer Entertainment Inc. */ +/* All Rights Reserved. */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +void spu_printf_server_entry(uint64_t arg); +int spu_printf_server_initialize(void); +int spu_printf_server_finalize(void); +int spu_printf_server_register(sys_spu_thread_t spu); +int spu_printf_server_unregister(sys_spu_thread_t spu); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + diff --git a/Extras/software_cache/spu_printf_server.ppu.c b/Extras/software_cache/spu_printf_server.ppu.c new file mode 100644 index 000000000..48dab3b0f --- /dev/null +++ b/Extras/software_cache/spu_printf_server.ppu.c @@ -0,0 +1,204 @@ +/* + * SCE CONFIDENTIAL + * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 + * Copyright (C) 2005 Sony Computer Entertainment Inc. + * All Rights Reserved. + * + * The SPU printf server is a PPU thread which collaborates with an SPU to + * output strings. + * + * On SPU-side, spu_printf() places the output string and arguments on a stack + * in the local storage, and passes its local-storage address with an SPU + * thread user event from SPU port 1 to PPU. On PPU-side, + * spu_thread_sprintf fetches the stack in the local stroage by DMA, and parse + * it to string-format. + * + * The SPU printf server takes charge of the tasks on PPU-side. The sequence + * of its task is as follows. + * 1. Receive events by sys_event_queue_receive() + * 2. Parse the received spu_printf stack address by spu_thread_snprintf. + * 3. Output the parsed string by printf(), and go back to step 1. + * + * Initialization of the SPU printf server and registeration of SPU threads to + * the SPU printf server is required. These can be done by + * spu_printf_server_initilize() and spu_printf_server_register(). + * What they actually do is to create a PPU thread and event queue, and + * connect the SPU thread to the event queue. + */ + +#include +#include +#include +#include +#include +#include "spu_printf_server.h" + +#define STACK_SIZE 4096 +#define PRIO 200 + +static sys_ppu_thread_t thread; +static sys_event_queue_t equeue; +static sys_event_queue_attribute_t eattr; +static sys_event_port_t terminating_port; + +#define TERMINATING_PORT_NAME 0xFEE1DEAD +#define SPU_PORT_PRINTF 0x1 + +int spu_printf_server_initialize() +{ + int ret; + + /* Create event */ + sys_event_queue_attribute_initialize(eattr); + ret = sys_event_queue_create(&equeue, &eattr, SYS_EVENT_PORT_LOCAL, 127); + if (ret) { + printf("sys_event_queue_create faild %d\n", ret); + return -1; + } + + /* Create PPU thread */ + ret = sys_ppu_thread_create(&thread, spu_printf_server_entry, 0UL, PRIO, + STACK_SIZE, + SYS_PPU_THREAD_CREATE_JOINABLE, + (char*)"spu_printf_server"); + if (ret) { + printf ("spu_printf_server_initialize: sys_ppu_thread_create failed %d\n", ret); + return -1; + } + + /* + * Create the terminating port. This port is used only in + * sys_printf_server_finalize(). + */ + ret = sys_event_port_create(&terminating_port, + SYS_EVENT_PORT_LOCAL, + TERMINATING_PORT_NAME); + if (ret) { + printf ("spu_printf_server_initialize: sys_event_port_create failed %d\n", ret); + return -1; + } + + ret = sys_event_port_connect_local(terminating_port, equeue); + if (ret) { + printf ("spu_printf_server_initialize: sys_event_port_connect_local failed %d\n", ret); + return -1; + } + + return 0; +} + + +/* + * Before call this, SPU threads which are registered finishes to send + * printf event. + */ +int spu_printf_server_finalize() +{ + int ret; + + /* + * Send an event from the terminating port to notify the termination to + * the SPU printf server + */ + ret = sys_event_port_send(terminating_port, 0, 0, 0); + if (ret) { + printf("sys_event_queue_cancel failed %d\n", ret); + return -1; + } + + /* Wait for the termination of the SPU printf server */ + uint64_t exit_status; + ret = sys_ppu_thread_join(thread, &exit_status); + if (ret) { + printf("sys_ppu_thread_join failed %d\n", ret); + return -1; + } + + /* Disconnect and destroy the terminating port */ + ret = sys_event_port_disconnect(terminating_port); + if (ret) { + printf("sys_event_disconnect failed %d\n", ret); + } + ret = sys_event_port_destroy(terminating_port); + if (ret) { + printf("sys_event_port_destroy failed %d\n", ret); + } + + /* Destroy the event queue */ + ret = sys_event_queue_destroy(equeue, 0); + if (ret) { + printf("sys_event_queue_destroy failed %d\n", ret); + return -1; + } + + return 0; +} + + +int spu_printf_server_register(sys_spu_thread_t spu) +{ + int ret; + + ret = sys_spu_thread_connect_event(spu, equeue, + SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF); + if (ret) { + printf("sys_spu_thread_connect_event faild %d\n", ret); + return -1; + } + + return 0; +} + + +int spu_printf_server_unregister(sys_spu_thread_t spu) +{ + int ret; + + ret = sys_spu_thread_disconnect_event(spu, + SYS_SPU_THREAD_EVENT_USER, SPU_PORT_PRINTF); + if (ret) { + printf("sys_spu_thread_disconnect_event faild %d\n", ret); + return -1; + } + + return 0; +} + + +void spu_printf_server_entry(uint64_t arg) +{ + (void)arg; /* This thread does not use the argument */ + + int ret; + sys_event_t event; + sys_spu_thread_t spu; + + for (;;) { + ret = sys_event_queue_receive(equeue, &event, SYS_NO_TIMEOUT); + if (ret) { + printf("sys_event_queue_receive failed %d\n", ret); + break; + } + + /* + * If an event is sent from the terminating port, the SPU printf + * server exits. + */ + if (event.source == TERMINATING_PORT_NAME) { + printf("Finalize the SPU printf server.\n"); + break; + } + + spu = event.data1; + + int sret = spu_thread_printf(spu, event.data3); + ret = sys_spu_thread_write_spu_mb(spu, sret); + if (ret) { + printf("sys_spu_thread_write_spu_mb failed %d\n", ret); + break; + } + } + + sys_ppu_thread_exit(0); +} + diff --git a/Extras/software_cache/spu_thr_printf.ppu.c b/Extras/software_cache/spu_thr_printf.ppu.c new file mode 100644 index 000000000..38d2f4bb1 --- /dev/null +++ b/Extras/software_cache/spu_thr_printf.ppu.c @@ -0,0 +1,243 @@ +/* + * SCE CONFIDENTIAL + * PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 + * Copyright (C) 2005 Sony Computer Entertainment Inc. + * All Rights Reserved. + * + * File: spu_thr_printf.c + * Description: + * This sample shows how to output strings by SPU programs. spu_printf() + * called by + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "spu_printf_server.h" /* SPU printf server */ + +#define MAX_PHYSICAL_SPU 4 +#define MAX_RAW_SPU 0 +#define NUM_SPU_THREADS 4 /* The number of SPU threads in the group */ +#define PRIORITY 100 +#ifdef SN_TARGET_PS3 +#define SPU_PROG (SYS_APP_HOME "/SPU_printf.spu.self") +#else +#define SPU_PROG (SYS_APP_HOME "/hello.spu.self") +#endif + +#define IN_BUF_SIZE 256 +#define OUT_BUF_SIZE 256 + +volatile uint8_t in_buf[IN_BUF_SIZE]; +volatile uint8_t out_buf[OUT_BUF_SIZE]; +uint32_t in_size = IN_BUF_SIZE; +uint32_t out_size = OUT_BUF_SIZE; + +int main(void) +{ + + + + sprintf(in_buf,"hello world"); + + sys_spu_thread_group_t group; /* SPU thread group ID */ + const char *group_name = "Group"; + sys_spu_thread_group_attribute_t group_attr;/* SPU thread group attribute*/ + sys_spu_thread_t threads[NUM_SPU_THREADS]; /* SPU thread IDs */ + sys_spu_thread_attribute_t thread_attr; /* SPU thread attribute */ + const char *thread_names[NUM_SPU_THREADS] = + {"SPU Thread 0", + "SPU Thread 1", + "SPU Thread 2", + "SPU Thread 3"}; /* The names of SPU threads */ + sys_spu_image_t spu_img; + int ret; + + /* + * Initialize SPUs + */ + printf("Initializing SPUs\n"); + ret = sys_spu_initialize(MAX_PHYSICAL_SPU, MAX_RAW_SPU); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_initialize failed: %#.8x\n", ret); + exit(ret); + } + + /* + * Create an SPU thread group + */ + printf("Creating an SPU thread group.\n"); + group_attr.name = group_name; + group_attr.nsize = strlen(group_attr.name) + 1; /* Add 1 for '\0' */ + group_attr.type = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; + ret = sys_spu_thread_group_create(&group, + NUM_SPU_THREADS, + PRIORITY, + &group_attr); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_group_create failed: %#.8x\n", ret); + exit(ret); + } + + ret = sys_spu_image_open(&spu_img, SPU_PROG); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_image_open failed: %#.8x\n", ret); + exit(ret); + } + + /* + * Initialize the SPU printf server + * + * What spu_printf_server_initialize() actually does is to create an + * PPU thread and an event queue which handle the events sent by + * spu_printf(). + */ + ret = spu_printf_server_initialize(); + if (ret != SUCCEEDED) { + fprintf(stderr, "spu_printf_server_initialize failed: %#.8x\n", ret); + exit(ret); + } + /* + * In this loop, all SPU threads in the SPU thread group are initialized + * with the loaded SPU ELF image. + */ + for (int i = 0; i < NUM_SPU_THREADS; i++) { + sys_spu_thread_argument_t thread_args; + int spu_num = i; + + printf("Initializing SPU thread %d\n", i); + + /* + * nsegs, segs and entry_point have already been initialized by + * sys_spu_thread_elf_loader(). + */ + thread_attr.name = thread_names[i]; + thread_attr.nsize = strlen(thread_names[i]) + 1; + thread_attr.option = SYS_SPU_THREAD_OPTION_NONE; + + /* + * Pass the SPU number to the SPU thread as the first parameter. + */ + thread_args.arg1 = SYS_SPU_THREAD_ARGUMENT_LET_32(spu_num); + thread_args.arg2 = SYS_SPU_THREAD_ARGUMENT_LET_64((uint64_t)in_buf); + + + + /* + * The third argument specifies the SPU number. + * The SPU number of each SPU thread must be unique within the SPU + * thread group. + */ + ret = sys_spu_thread_initialize(&threads[i], + group, + spu_num, + &spu_img, + &thread_attr, + &thread_args); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_initialize failed: %#.8x\n", ret); + exit(ret); + } + + /* + * Register the SPU thread to the SPU printf server. + * + * spu_printf_server_register() establishes the connection between + * the SPU thread and the SPU printf server's event queue. + */ + ret = spu_printf_server_register(threads[i]); + if (ret != CELL_OK) { + fprintf(stderr, "spu_printf_server_register failed: %#.8x\n", ret); + exit(ret); + } + } + + printf("All SPU threads have been successfully initialized.\n"); + + /* + * Start the SPU thread group + * + * The SPU thread group will be in the READY state, and will become in + * the RUNNING state when the kernel assigns and executes it onto SPUs. + */ + printf("Starting the SPU thread group.\n"); + ret = sys_spu_thread_group_start(group); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_group_start failed: %#.8x\n", ret); + exit(ret); + } + + /* + * Wait for the termination of the SPU thread group. + */ + printf("Waiting for the SPU thread group to be terminated.\n"); + int cause, status; + ret = sys_spu_thread_group_join(group, &cause, &status); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_group_join failed: %#.8x\n", ret); + exit(ret); + } + + /* + * Show the exit cause and status. + */ + switch(cause) { + case SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT: + printf("The SPU thread group exited by sys_spu_thread_group_exit().\n"); + printf("The group's exit status = %d\n", status); + break; + case SYS_SPU_THREAD_GROUP_JOIN_ALL_THREADS_EXIT: + printf("All SPU thread exited by sys_spu_thread_exit().\n"); + for (int i = 0; i < NUM_SPU_THREADS; i++) { + int thr_exit_status; + ret = sys_spu_thread_get_exit_status(threads[i], &thr_exit_status); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_get_exit_status failed: %#.8x\n", ret); + } + printf("SPU thread %d's exit status = %d\n", i, thr_exit_status); + } + break; + case SYS_SPU_THREAD_GROUP_JOIN_TERMINATED: + printf("The SPU thread group is terminated by sys_spu_thread_terminate().\n"); + printf("The group's exit status = %d\n", status); + break; + default: + fprintf(stderr, "Unknown exit cause: %d\n", cause); + break; + } + + /* + * Destroy the SPU thread group and clean up resources. + */ + ret = sys_spu_thread_group_destroy(group); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_thread_group_destroy failed: %#.8x\n", ret); + } + + ret = sys_spu_image_close(&spu_img); + if (ret != CELL_OK) { + fprintf(stderr, "sys_spu_image_close failed: %.8x\n", ret); + } + + /* + * Finalize the SPU printf server. + * + * This function let the PPU thread exit. + * The event queue will be destroyed. + */ + ret = spu_printf_server_finalize(); + if (ret != CELL_OK) { + fprintf(stderr, "spu_printf_server_finalize failed: %#.8x\n", ret); + } + + printf("Exiting.\n"); + return 0; +} + + diff --git a/Extras/software_cache/spu_thr_printf.ppu.mk b/Extras/software_cache/spu_thr_printf.ppu.mk new file mode 100644 index 000000000..9f7d0c477 --- /dev/null +++ b/Extras/software_cache/spu_thr_printf.ppu.mk @@ -0,0 +1,16 @@ +# SCE CONFIDENTIAL +# PLAYSTATION(R)3 Programmer Tool Runtime Library 085.007 +# Copyright (C) 2005 Sony Computer Entertainment Inc. +# All Rights Reserved. +# + +CELL_MK_DIR ?= $(CELL_SDK)/samples/mk + +include $(CELL_MK_DIR)/sdk.makedef.mk + +PPU_SRCS = spu_thr_printf.ppu.c spu_printf_server.ppu.c +PPU_TARGET = spu_thr_printf.ppu.elf + +include $(CELL_MK_DIR)/sdk.target.mk + + diff --git a/Extras/software_cache/spu_thr_printf_2.sln b/Extras/software_cache/spu_thr_printf_2.sln new file mode 100644 index 000000000..890915779 --- /dev/null +++ b/Extras/software_cache/spu_thr_printf_2.sln @@ -0,0 +1,29 @@ + +Microsoft Visual Studio Solution File, Format Version 9.00 +# Visual Studio 2005 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spu_thr_printf_2", "spu_thr_printf_2.vcproj", "{3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}" + ProjectSection(ProjectDependencies) = postProject + {47EE939D-CB3D-4600-B8B6-79FDF607E133} = {47EE939D-CB3D-4600-B8B6-79FDF607E133} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU_printf", "SPU_printf\SPU_printf.vcproj", "{47EE939D-CB3D-4600-B8B6-79FDF607E133}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + PS3 Debug|Win32 = PS3 Debug|Win32 + PS3 Release|Win32 = PS3 Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32 + {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32 + {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32 + {3494AF8B-FDA7-4CEA-B775-4C5C45599D5F}.PS3 Release|Win32.Build.0 = PS3 Release|Win32 + {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.ActiveCfg = PS3 Debug|Win32 + {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Debug|Win32.Build.0 = PS3 Debug|Win32 + {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.ActiveCfg = PS3 Release|Win32 + {47EE939D-CB3D-4600-B8B6-79FDF607E133}.PS3 Release|Win32.Build.0 = PS3 Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Extras/software_cache/spu_thr_printf_2.vcproj b/Extras/software_cache/spu_thr_printf_2.vcproj new file mode 100644 index 000000000..a98cb1ca5 --- /dev/null +++ b/Extras/software_cache/spu_thr_printf_2.vcproj @@ -0,0 +1,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +