; ; Hugi size coding Compo 28 Entry (final submission) ; -------------------------------------------------------------- ; ; by Tapani Utriainen (once known as nadir / RAGE ... back in the demo days) ; ; Final version. There could still be some 20 bytes to trim down. ; ; Some tricks used here to get the size down: ; - PCI scan routine using self-modifying code, base port is read by same code ; - Use of outsb/outsw for communicating with ports ; - Clever memory layout: each TD entry is 32 bytes, but with 16 last bytes ; unused (available for application). My TD table is "overlapping", i.e. ; the last 16 bytes of one table entry contains the first 16 of the next. ; - All memory data is placed in same region so it can be all setup by one ; pass of stos/movs instructions/ ; - Use of hlt to wait for timer interrupts ; - Storing UHCI port in bp allows one to "lea dx, [bp+4]" to set dx to ; base port + 4 ; ; Compile by tasm: ; tasm /m9 tapani233.asm ; and link ; tlink /x /t /3 tapani233.obj ; ; Run, for instance, in Bochs v2.4 ; ; //Tapani ; .486 ; an elegant weapon, for a more civilized time cseg segment use16 assume cs:cseg, ds:cseg, ss:cseg, es:cseg org 100h ; ------------------------------------------------------------------------ ; | Scan PCI for an UHCI ; ------------------------------------------------------------------------ start: call pci_scan cmp eax, 0c030000h je uhci_found dec word ptr [ds:pci_scan + 3] jnz start ; loop until PCI adress space is probed pci_scan: ; if not found, do a redundant read and exit mov eax, 8001ff08h ; this constant contains loop counter mov dx, 0cf8h out dx, eax mov dl, 0fch in eax, dx and al, 0fch ; filter away PCI revision / low bits of port ret ; return value in eax (or exit if no UHCI found) ; ------------------------------------------------------------------------ ; | UHCI found ; ------------------------------------------------------------------------ uhci_found: mov byte ptr [ds:pci_scan + 2], 20h call pci_scan ; uses the same PCI scanning code to get base port hlt ; wait for interrupt (timer) xchg bp, ax ; base port in bp mov dl, 04h mov ax, 05h out dx, al ; allow access to PCI mov dx, bp out dx, ax ; ax = 0005h : reset (GRESET) and start the controller hlt ; wait xor eax, eax out dx, ax ; ------------------------------------------------------------------------ ; | Set up memory ; ------------------------------------------------------------------------ ; ; * convert Frame TD adress to 32-bit and write to the controller ; * convert Setup_Packet adress to 32-bit ; initialize data: Terminate each element in frame list by setting bit 0 ; 5 Frame TDs are initialized using 4 dwords each: ; ; [ ptr to next (current + 20h) ] [ from TD_Data block ] [ from TD_Data block ] [ ofs Setup_Packet ] [ 16 bytes of whatever ] ; [ ptr to next (current + 20h) ] [ from TD_Data block ] [ from TD_Data block ] [ ofs Setup_Packet + 8] [ 16 bytes of whatever ] ; ... ; [ something with bit 1 set ] ; ; Memory aligned 4k at OFFSET: ; +0 400h dwords ( = offset queue horiz or 3 ) ; +1000h queue horiz ( = terminate) ; +1004h queue vert ( = offset Frame TDs ) ; +1010h 5 x 32 bytes of Frame TDs ( see above ) ; +1060h Setup_Packet ; +1068h Device Descriptor ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ; Memory setup: Calculate memory adresses to use, and send to USB controller ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - NOF_TDS equ 5 SETUP_PACKET equ (1010h + NOF_TDS*10h) mov cx, 0404h ; dwords to init before Frame TDs (frame list + queue + some) ; cl=4 is also used for shifting push cs pop ax add ax, cx ; add ~16k, then binary and it to 4k alignment and ax, 0fc00h ; al = 0 gives a 4k alignment after shift push ax pop es shl eax, cl ; eax <-- ptr to OFFSET lea dx, [bp+8] out dx, eax ; send TD adress to the UHCI ; - - - - - - - - - - - - - - - - - - ; Memory setup: Init the frame list ; - - - - - - - - - - - - - - - - - - add ax, SETUP_PACKET ; eax <-- ptr to Setup_Packet mov bx, ax ; bx <-- ptr to Setup_Packet (high 16 bits are from eax) mov al, 11h ; eax <-- ptr to TD block + 16 | TERMINATE xor di, di ; es:di <-- ptr to OFFSET rep stosd dec ax mov [es:di-12], ax ; queue vertical ; eax is 32-bit address to TD block ; es:di is 16-bit ptr to TD block ; ds:si is (will be) 16-bit to TD_Data ; ebx is offset to Setup_Packet / Device_Descriptor ; - - - - - - - - - - - - - - - - - - - - - - - - ; Memory setup: Loop to set up the TDs in memory ; - - - - - - - - - - - - - - - - - - - - - - - - mov cl, NOF_TDS @tdloop: add al, 16 ; add al suffices since eax (initially) points 4k aligned! stosd ; store ptr to next TD entry mov si, offset TD_Data movsd movsd xchg ax, bx stosd add al, 8 xchg ax, bx loop @tdloop ; - - - - - - - - - - - - - - - - - - - - - - - - - - - ; Memory setup: Setup packet is directly after the TDs ; - - - - - - - - - - - - - - - - - - - - - - - - - - - movsd stosw movsw ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ; Memory setup: patch TD data that is not set up correctly by the loop ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - mov di, 1057h movsb mov di, 1018h movsb ; Done setting up memory ; ------------------------------------------------------------------------ ; | Configure UHCI, wait for the interrupt ; ------------------------------------------------------------------------ lea dx, [bp+4] outsw ; 000fh, enable interrupts mov dx, bp outsw ; 0001h, set status (RUN) ; first reset the port and then wait lea dx, [bp+10h] outsw ; 0200h hlt ; clear the reset and wait outsw ; 0005h hlt ; set the first frame in TD frame list to point to the queue xor di, di movsb mov dx, bp outsw ; 0001h push es pop ds inc dx inc dx @3: in ax, dx and al, 1 ; use and instead of test, al is always zero after loop jz short @3 ; wait for the interrupt mov dx, bp out dx, al ; ------------------------------------------------------------------------ ; | Print out result ; ------------------------------------------------------------------------ mov si, SETUP_PACKET + 8 mov cl, 10h call prt_seq mov cl, 02h prt_seq: mov di, dx ; result string(s) will be stored in ds:dx ; it is possible to use base port as offset, since no ; more memory ops are used, and base port >> 100h mov ax, 0d0ah stosw @prt_sibyte: lodsb db 0d4h, 10h aaa adc al, 30h xchg al, ah aaa adc al, 30h stosw ; remember, intel byte order mov al, 20h stosb loop @prt_sibyte dec di ; undo space mov ax, 0924h ; 09h for int 21h (print string), 24h = '$' (terminate) stosb mov byte ptr [di-25], '-' ; put that damn hyphen there... int 21h ret ; return to call, or exit program on 2nd run ; ------------------------------------------------------------------------ ; | DATA ; ------------------------------------------------------------------------ TD_Data: dd 04800000h dd 00e00069h ;Setup_Packet: db 80h ; dev->host, type=standard, recipient=device db 06h ; get descriptor db 00h ; index = 0 db 01h ; type = device dw 12h db 1dh ; manual patch for TD status entry db 2dh ; data to send to ports dw 000fh dw 0001h dw 0200h dw 0005h db 02h ; offset queue->horiz | IS_QUEUE dw 0001h cseg ends end start