#include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "../port/error.h" #define Image IMAGE #include #include #include #include "screen.h" enum { PCIS3 = 0x5333, /* PCI VID */ SAVAGE3D = 0x8A20, /* PCI DID */ SAVAGE3DMV = 0x8A21, SAVAGE4 = 0x8A22, PROSAVAGEP = 0x8A25, PROSAVAGEK = 0x8A26, PROSAVAGE8 = 0x8D04, SAVAGEMXMV = 0x8C10, SAVAGEMX = 0x8C11, SAVAGEIXMV = 0x8C12, SAVAGEIX = 0x8C13, SUPERSAVAGEIXC16 = 0x8C2E, SAVAGE2000 = 0x9102, VIRGE = 0x5631, VIRGEGX2 = 0x8A10, VIRGEDXGX = 0x8A01, VIRGEVX = 0x883D, VIRGEMX = 0x8C01, VIRGEMXP = 0x8C03, AURORA64VPLUS = 0x8812, }; /* * Savage4 et al. acceleration. * * This is based only on the Savage4 documentation. * It is expected to work on other Savage cards as well, * but has not been tried. * * There are five ways to access the 2D graphics engine registers: * - Old MMIO non-packed format * - Old MMIO packed format * - New MMIO non-packed format * - New MMIO packed format * - Burst Command Interface (BCI) * * Of these, the manual hints that the first three are deprecated, * and it does not document any of those three well enough to use. * * I have tried for many hours with no success to understand the BCI * interface well enough to use it. It is not well documented, and the * XFree86 driver seems to completely contradict what little documentation * there is. * * This leaves the packed new MMIO. * The manual contradicts itself here, claming that the registers * start at 0x2008100 as well as at 0x0008100 from the base of the * mmio segment. Since the segment is only 512k, we assume that * the latter is the correct offset. * * According to the manual, only 16-bit reads of the 2D registers * are supported: 32-bit reads will return garbage in the upper word. * 32-bit writes must be enabled explicitly. * * 32-bit reads of the status registers seem just fine. */ /* 2D graphics engine registers for Savage4; others appear to be mostly the same */ enum { SubsystemStatus = 0x8504, /* Subsystem Status: read only */ /* read only: whether we get interrupts on various events */ VsyncInt = 1<<0, /* vertical sync */ GeBusyInt = 1<<1, /* 2D graphics engine busy */ BfifoFullInt = 1<<2, /* BIU FIFO full */ BfifoEmptyInt = 1<<3, /* BIU FIFO empty */ CfifoFullInt = 1<<4, /* command FIFO full */ CfifoEmptyInt = 1<<5, /* command FIFO empty */ BciInt = 1<<6, /* BCI */ LpbInt = 1<<7, /* LPB */ CbHiInt = 1<<16, /* COB upper threshold */ CbLoInt = 1<<17, /* COB lower threshold */ SubsystemCtl = 0x8504, /* Subsystem Control: write only */ /* clear interrupts for various events */ VsyncClr = 1<<0, GeBusyClr = 1<<1, BfifoFullClr = 1<<2, BfifoEmptyClr = 1<<3, CfifoFullClr = 1<<4, CfifoEmptyClr = 1<<5, BciClr = 1<<6, LpbClr = 1<<7, CbHiClr = 1<<16, CbLoClr = 1<<17, /* enable interrupts for various events */ VsyncEna = 1<<8, Busy2DEna = 1<<9, BfifoFullEna = 1<<10, BfifoEmptyEna = 1<<11, CfifoFullEna = 1<<12, CfifoEmptyEna = 1<<13, SubsysBciEna = 1<<14, CbHiEna = 1<<24, CbLoEna = 1<<25, /* 2D graphics engine software reset */ GeSoftReset = 1<<15, FifoStatus = 0x8508, /* FIFO status: read only */ CwbEmpty = 1<<0, /* command write buffer empty */ CrbEmpty = 1<<1, /* command read buffer empty */ CobEmpty = 1<<2, /* command overflow buffer empty */ CfifoEmpty = 1<<3, /* command FIFO empty */ CwbFull = 1<<8, /* command write buffer full */ CrbFull = 1<<9, /* command read buffer full */ CobFull = 1<<10, /* command overflow buffer full */ CfifoFull = 1<<11, /* command FIFO full */ AdvFunCtl = 0x850C, /* Advanced Function Control: read/write */ GeEna = 1<<0, /* enable 2D/3D engine */ /* * according to the manual, BigPixel should be * set when bpp >= 8 (bpp != 4), and then CR50_5-4 are * used to figure out bpp example. however, it does bad things * to the screen in 8bpp mode. */ BigPixel = 1<<2, /* 8 or more bpp enhanced mode */ LaEna = 1<<3, /* linear addressing ena: or'ed with CR58_4 */ Mclk_2 = 0<<8, /* 2D engine clock divide: MCLK/2 */ Mclk_4 = 1<<8, /* " MCLK/4 */ Mclk = 2<<8, /* " MCLK */ /* Mclk = 3<<8, /* " MCLK */ Ic33mhz = 1<<16, /* Internal clock 33 MHz (instead of 66) */ WakeupReg = 0x8510, /* Wakeup: read/write */ WakeupBit = 1<<0, /* wake up: or'ed with 3C3_0 */ SourceY = 0x8100, /* UL corner of bitblt source */ SourceX = 0x8102, /* " */ RectY = 0x8100, /* UL corner of rectangle fill */ RectX = 0x8102, /* " */ DestY = 0x8108, /* UL corner of bitblt dest */ DestX = 0x810A, /* " */ Height = 0x8148, /* bitblt, image xfer rectangle height */ Width = 0x814A, /* bitblt, image xfer rectangle width */ StartY = 0x8100, /* Line draw: first point*/ StartX = 0x8102, /* " */ /* * For line draws, the following must be programmed: * axial step constant = 2*min(|dx|,|dy|) * diagonal step constant = 2*[min(|dx|,|dy|) - max(|dx|,|dy|)] * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy| - 1 * [sic] when start X < end X * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy| * [sic] when start X >= end X */ AxialStep = 0x8108, DiagonalStep = 0x810A, LineError = 0x8110, MinorLength = 0x8148, /* pixel count along minor axis */ MajorLength = 0x814A, /* pixel count along major axis */ DrawCmd = 0x8118, /* Drawing Command: write only */ CmdMagic = 0<<1, AcrossPlane = 1<<1, /* across the plane mode */ LastPixelOff = 1<<2, /* last pixel of line or vector draw not drawn */ Radial = 1<<3, /* enable radial direction (else axial) */ DoDraw = 1<<4, /* draw pixels (else only move current pos) */ DrawRight = 1<<5, /* axial drawing direction: left to right */ /* DrawLeft = 0<<5, */ MajorY = 1<<6, /* MajorX = 0<<6, */ DrawDown = 1<<7, /* DrawUp = 0<<7, */ Degree0 = 0<<5, /* drawing direction when Radial */ Degree45 = 1<<5, /* ... */ Degree315 = 7<<5, UseCPUData = 1<<8, /* image write bus transfer width */ Bus8 = 0<<9, Bus16 = 1<<9, /* * in Bus32 mode, doubleword bits beyond the image rect width are * discarded. each line starts on a new doubleword. * Bus32AP is intended for across-the-plane mode and * rounds to byte boundaries instead. */ Bus32 = 2<<9, Bus32AP = 3<<9, CmdNop = 0<<13, /* nop */ CmdLine = 1<<13, /* draw line */ CmdFill = 2<<13, /* fill rectangle */ CmdBitblt = 6<<13, /* bitblt */ CmdPatblt = 7<<13, /* 8x8 pattern blt */ SrcGBD = 0<<16, SrcPBD = 1<<16, SrcSBD = 2<<16, DstGBD = 0<<18, DstPBD = 1<<18, DstSBD = 2<<18, /* color sources, controls */ BgColor = 0x8120, /* Background Color: read/write */ FgColor = 0x8124, /* Foreground Color: read/write */ BitplaneWmask = 0x8128, /* Bitplane Write Mask: read/write */ BitplaneRmask = 0x812C, /* Bitplane Read Mask: read/write */ CmpColor = 0x8130, /* Color Compare: read/write */ BgMix = 0x8134, FgMix = 0x8136, MixNew = 7, SrcBg = 0<<5, SrcFg = 1<<5, SrcCPU = 2<<5, SrcDisp = 3<<5, /* clipping rectangle */ TopScissors = 0x8138, /* Top Scissors: write only */ LeftScissors = 0x813A, /* Left Scissors: write only */ BottomScissors = 0x813C, /* Bottom Scissors: write only */ RightScissors = 0x813E, /* Right Scissors: write only */ /* * Registers with Magic were indirectly accessed in older modes. * It is not clear whether the Magic is necessary. * In the older modes, writes to these registers were pipelined, * so that you had to issue an engine command and wait for engine * idle before reading a write back. It is not clear if this is * still the case either. */ PixCtl = 0x8140, /* Pixel Control: write only */ PixMagic = 0xA<<12, PixMixFg = 0<<6, /* foreground mix register always */ PixMixCPU = 2<<6, /* CPU data determines mix register */ PixMixDisp = 3<<6, /* display data determines mix register */ MfMisc2Ctl = 0x8142, /* Multifunction Control Misc. 2: write only */ MfMisc2Magic = 0xD<<12, DstShift = 0, /* 3 bits: destination base address in MB */ SrcShift = 4, /* 3 bits: source base address in MB */ WaitFifoEmpty = 2<<8, /* wait for write FIFO empty between draws */ MfMiscCtl = 0x8144, /* Multifunction Control Misc: write only */ MfMiscMagic = 0xE<<12, UseHighBits = 1<<4, /* select upper 16 bits for 32-bit reg access */ ClipInvert = 1<<5, /* only touch pixels outside clip rectangle */ SkipSame = 0<<6, /* ignore pixels with color CmpColor */ SkipDifferent = 1<<7, /* ignore pixels not color CmpColor */ CmpEna = 1<<8, /* enable color compare */ W32Ena = 1<<9, /* enable 32-bit register write */ ClipDis = 1<<11, /* disable clipping */ /* * The bitmap descriptor 1 registers contain the starting * address of the bitmap (in bytes). * The bitmap descriptor 2 registesr contain stride (in pixels) * in the lower 16 bits, depth (in bits) in the next 8 bits, * and whether block write is disabled. */ GBD1 = 0x8168, /* Global Bitmap Descriptor 1: read/write */ GBD2 = 0x816C, /* Global Bitmap Descriptor 2: read/write */ /* GBD2-only bits */ BDS64 = 1<<0, /* bitmap descriptor size 64 bits */ GBDBciEna = 1<<3, /* BCI enable */ /* generic BD2 bits */ BlockWriteDis = 1<<28, StrideShift = 0, DepthShift = 16, PBD1 = 0x8170, /* Primary Bitmap Descriptor: read/write */ PBD2 = 0x8174, SBD1 = 0x8178, /* Secondary Bitmap Descriptor: read/write */ SBD2 = 0x817C, }; /* mastered data transfer registers */ /* configuration/status registers */ enum { XStatus0 = 0x48C00, /* Status Word 0: read only */ /* rev. A silicon differs from rev. B; use AltStatus0 */ CBEMaskA = 0x1FFFF, /* filled command buffer entries */ CBEShiftA = 0, BciIdleA = 1<<17, /* BCI idle */ Ge3IdleA = 1<<18, /* 3D engine idle */ Ge2IdleA = 1<<19, /* 2D engine idle */ McpIdleA = 1<<20, /* motion compensation processor idle */ MeIdleA = 1<<22, /* master engine idle */ PfPendA = 1<<23, /* page flip pending */ CBEMaskB = 0x1FFFFF, CBEShiftB = 0, BciIdleB = 1<<25, Ge3IdleB = 1<<26, Ge2IdleB = 1<<27, McpIdleB = 1<<28, MeIdleB = 1<<30, PfPendB = 1<<31, AltStatus0 = 0x48C60, /* Alternate Status Word 0: read only */ CBEMask = 0x1FFFF, CBEShift = 0, /* the Savage4 manual says bits 17..23 for these, like Status0 */ /* empirically, they are bits 21..26 */ BciIdle = 1<<21, Ge3Idle = 1<<22, Ge2Idle = 1<<23, McpIdle = 1<<24, MeIdle = 1<<25, PfPend = 1<<26, XStatus1 = 0x48C04, /* Status Word 1: read only */ /* contains event tag 1, event tag 0, both 16 bits */ XStatus2 = 0x48C08, /* Status Word 2: read only */ ScanMask = 0x3FF, /* current scan line */ ScanShift = 0, VRTMask = 0x7F100, /* vert retrace count */ VRTShift = 11, CbThresh = 0x48C10, /* Command Buffer Thresholds: read/write */ CobOff = 0x48C14, /* Command Overflow Buffer: read/write */ CobPtr = 0x48C18, /* Command Overflow Buffer Pointers: read/write */ CobEna = 1<<2, /* command overflow buffer enable */ CobBciEna = 1<<3, /* BCI function enable */ CbeMask = 0xFFFF8000, /* no. of entries in command buffer */ CbeShift = 15, AltStatus1 = 0x48C64, /* Alternate Status Word 1: read onnly */ /* contains current texture surface tag, vertex buffer tag */ }; struct { ulong idletimeout; ulong tostatw[16]; } savagestats; enum { Maxloop = 1<<20 }; static void savagewaitidle(VGAscr *scr) { long x; ulong *statw, mask, goal; switch(scr->id){ case SAVAGE4: case PROSAVAGEP: case PROSAVAGEK: case PROSAVAGE8: /* wait for engine idle and FIFO empty */ statw = (ulong*)((uchar*)scr->mmio+AltStatus0); mask = CBEMask | Ge2Idle; goal = Ge2Idle; break; /* case SAVAGEMXMV: ? */ /* case SAVAGEMX: ? */ /* case SAVAGEIX: ? */ case SUPERSAVAGEIXC16: case SAVAGEIXMV: case SAVAGEMXMV: /* wait for engine idle and FIFO empty */ statw = (ulong*)((uchar*)scr->mmio+XStatus0); mask = CBEMaskA | Ge2IdleA; goal = Ge2IdleA; break; default: /* * best we can do: can't print or we'll call ourselves. * savageinit is supposed to not let this happen. */ return; } for(x=0; xmmio; *(ulong*)(mmio+FgColor) = sval; *(ulong*)(mmio+BgColor) = sval; *(ulong*)(mmio+BgMix) = SrcFg|MixNew; *(ulong*)(mmio+FgMix) = SrcFg|MixNew; *(ushort*)(mmio+RectY) = r.min.y; *(ushort*)(mmio+RectX) = r.min.x; *(ushort*)(mmio+Width) = Dx(r)-1; *(ushort*)(mmio+Height) = Dy(r)-1; *(ulong*)(mmio+DrawCmd) = CmdMagic | DoDraw | CmdFill | DrawRight | DrawDown; savagewaitidle(scr); return 1; } static int savagescroll(VGAscr *scr, Rectangle r, Rectangle sr) { uchar *mmio; ulong cmd; Point dp, sp; cmd = CmdMagic | DoDraw | CmdBitblt | SrcPBD | DstGBD; if(r.min.x <= sr.min.x){ cmd |= DrawRight; dp.x = r.min.x; sp.x = sr.min.x; }else{ dp.x = r.max.x-1; sp.x = sr.max.x-1; } if(r.min.y <= sr.min.y){ cmd |= DrawDown; dp.y = r.min.y; sp.y = sr.min.y; }else{ dp.y = r.max.y-1; sp.y = sr.max.y-1; } mmio = (uchar*)scr->mmio; *(ushort*)(mmio+SourceX) = sp.x; *(ushort*)(mmio+SourceY) = sp.y; *(ushort*)(mmio+DestX) = dp.x; *(ushort*)(mmio+DestY) = dp.y; *(ushort*)(mmio+Width) = Dx(r)-1; *(ushort*)(mmio+Height) = Dy(r)-1; *(ulong*)(mmio+BgMix) = SrcDisp|MixNew; *(ulong*)(mmio+FgMix) = SrcDisp|MixNew; *(ulong*)(mmio+DrawCmd) = cmd; savagewaitidle(scr); return 1; } static void savageblank(VGAscr*, int blank) { uchar seqD; /* * Will handle DPMS to monitor */ vgaxo(Seqx, 8, vgaxi(Seqx,8)|0x06); seqD = vgaxi(Seqx, 0xD); seqD &= 0x03; if(blank) seqD |= 0x50; vgaxo(Seqx, 0xD, seqD); /* * Will handle LCD */ if(blank) vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) & ~0x10); else vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) | 0x10); } void savageinit(VGAscr *scr) { uchar *mmio; ulong bd; /* if you add chip IDs here be sure to update savagewaitidle */ switch(scr->id){ case SAVAGE4: case PROSAVAGEP: case PROSAVAGEK: case PROSAVAGE8: case SAVAGEIXMV: case SUPERSAVAGEIXC16: case SAVAGEMXMV: break; default: print("unknown savage %.4lux\n", scr->id); return; } mmio = (uchar*)scr->mmio; if(mmio == nil) { print("savageinit: no mmio\n"); return; } /* 2D graphics engine software reset */ *(ushort*)(mmio+SubsystemCtl) = GeSoftReset; delay(2); *(ushort*)(mmio+SubsystemCtl) = 0; savagewaitidle(scr); /* disable BCI as much as possible */ *(ushort*)(mmio+CobPtr) &= ~CobBciEna; *(ushort*)(mmio+GBD2) &= ~GBDBciEna; savagewaitidle(scr); /* enable 32-bit writes, disable clipping */ *(ushort*)(mmio+MfMiscCtl) = MfMiscMagic|W32Ena|ClipDis; savagewaitidle(scr); /* enable all read, write planes */ *(ulong*)(mmio+BitplaneRmask) = ~0; *(ulong*)(mmio+BitplaneWmask) = ~0; savagewaitidle(scr); /* turn on linear access, 2D engine */ *(ulong*)(mmio+AdvFunCtl) |= GeEna|LaEna; savagewaitidle(scr); /* set bitmap descriptors */ bd = (scr->gscreen->depth<gscreen->r)<>16; savagewaitidle(scr); scr->fill = savagefill; scr->scroll = savagescroll; scr->blank = savageblank; hwblank = 0; }