Browse Source

Merge branch 'upstream'

Jeff Garzik 19 years ago
parent
commit
ffe75ef650

+ 1072 - 0
Documentation/DocBook/libata.tmpl

@@ -415,6 +415,362 @@ and other resources, etc.
      </sect1>
   </chapter>
 
+  <chapter id="libataEH">
+        <title>Error handling</title>
+
+	<para>
+	This chapter describes how errors are handled under libata.
+	Readers are advised to read SCSI EH
+	(Documentation/scsi/scsi_eh.txt) and ATA exceptions doc first.
+	</para>
+
+	<sect1><title>Origins of commands</title>
+	<para>
+	In libata, a command is represented with struct ata_queued_cmd
+	or qc.  qc's are preallocated during port initialization and
+	repetitively used for command executions.  Currently only one
+	qc is allocated per port but yet-to-be-merged NCQ branch
+	allocates one for each tag and maps each qc to NCQ tag 1-to-1.
+	</para>
+	<para>
+	libata commands can originate from two sources - libata itself
+	and SCSI midlayer.  libata internal commands are used for
+	initialization and error handling.  All normal blk requests
+	and commands for SCSI emulation are passed as SCSI commands
+	through queuecommand callback of SCSI host template.
+	</para>
+	</sect1>
+
+	<sect1><title>How commands are issued</title>
+
+	<variablelist>
+
+	<varlistentry><term>Internal commands</term>
+	<listitem>
+	<para>
+	First, qc is allocated and initialized using
+	ata_qc_new_init().  Although ata_qc_new_init() doesn't
+	implement any wait or retry mechanism when qc is not
+	available, internal commands are currently issued only during
+	initialization and error recovery, so no other command is
+	active and allocation is guaranteed to succeed.
+	</para>
+	<para>
+	Once allocated qc's taskfile is initialized for the command to
+	be executed.  qc currently has two mechanisms to notify
+	completion.  One is via qc->complete_fn() callback and the
+	other is completion qc->waiting.  qc->complete_fn() callback
+	is the asynchronous path used by normal SCSI translated
+	commands and qc->waiting is the synchronous (issuer sleeps in
+	process context) path used by internal commands.
+	</para>
+	<para>
+	Once initialization is complete, host_set lock is acquired
+	and the qc is issued.
+	</para>
+	</listitem>
+	</varlistentry>
+
+	<varlistentry><term>SCSI commands</term>
+	<listitem>
+	<para>
+	All libata drivers use ata_scsi_queuecmd() as
+	hostt->queuecommand callback.  scmds can either be simulated
+	or translated.  No qc is involved in processing a simulated
+	scmd.  The result is computed right away and the scmd is
+	completed.
+	</para>
+	<para>
+	For a translated scmd, ata_qc_new_init() is invoked to
+	allocate a qc and the scmd is translated into the qc.  SCSI
+	midlayer's completion notification function pointer is stored
+	into qc->scsidone.
+	</para>
+	<para>
+	qc->complete_fn() callback is used for completion
+	notification.  ATA commands use ata_scsi_qc_complete() while
+	ATAPI commands use atapi_qc_complete().  Both functions end up
+	calling qc->scsidone to notify upper layer when the qc is
+	finished.  After translation is completed, the qc is issued
+	with ata_qc_issue().
+	</para>
+	<para>
+	Note that SCSI midlayer invokes hostt->queuecommand while
+	holding host_set lock, so all above occur while holding
+	host_set lock.
+	</para>
+	</listitem>
+	</varlistentry>
+
+	</variablelist>
+	</sect1>
+
+	<sect1><title>How commands are processed</title>
+	<para>
+	Depending on which protocol and which controller are used,
+	commands are processed differently.  For the purpose of
+	discussion, a controller which uses taskfile interface and all
+	standard callbacks is assumed.
+	</para>
+	<para>
+	Currently 6 ATA command protocols are used.  They can be
+	sorted into the following four categories according to how
+	they are processed.
+	</para>
+
+	<variablelist>
+	   <varlistentry><term>ATA NO DATA or DMA</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_NODATA and ATA_PROT_DMA fall into this category.
+	   These types of commands don't require any software
+	   intervention once issued.  Device will raise interrupt on
+	   completion.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATA PIO</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_PIO is in this category.  libata currently
+	   implements PIO with polling.  ATA_NIEN bit is set to turn
+	   off interrupt and pio_task on ata_wq performs polling and
+	   IO.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI NODATA or DMA</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_ATAPI_NODATA and ATA_PROT_ATAPI_DMA are in this
+	   category.  packet_task is used to poll BSY bit after
+	   issuing PACKET command.  Once BSY is turned off by the
+	   device, packet_task transfers CDB and hands off processing
+	   to interrupt handler.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI PIO</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_ATAPI is in this category.  ATA_NIEN bit is set
+	   and, as in ATAPI NODATA or DMA, packet_task submits cdb.
+	   However, after submitting cdb, further processing (data
+	   transfer) is handed off to pio_task.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+	</variablelist>
+        </sect1>
+
+	<sect1><title>How commands are completed</title>
+	<para>
+	Once issued, all qc's are either completed with
+	ata_qc_complete() or time out.  For commands which are handled
+	by interrupts, ata_host_intr() invokes ata_qc_complete(), and,
+	for PIO tasks, pio_task invokes ata_qc_complete().  In error
+	cases, packet_task may also complete commands.
+	</para>
+	<para>
+	ata_qc_complete() does the following.
+	</para>
+
+	<orderedlist>
+
+	<listitem>
+	<para>
+	DMA memory is unmapped.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	ATA_QCFLAG_ACTIVE is clared from qc->flags.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	qc->complete_fn() callback is invoked.  If the return value of
+	the callback is not zero.  Completion is short circuited and
+	ata_qc_complete() returns.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	__ata_qc_complete() is called, which does
+	   <orderedlist>
+
+	   <listitem>
+	   <para>
+	   qc->flags is cleared to zero.
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   ap->active_tag and qc->tag are poisoned.
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   qc->waiting is claread &amp; completed (in that order).
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   qc is deallocated by clearing appropriate bit in ap->qactive.
+	   </para>
+	   </listitem>
+
+	   </orderedlist>
+	</para>
+	</listitem>
+
+	</orderedlist>
+
+	<para>
+	So, it basically notifies upper layer and deallocates qc.  One
+	exception is short-circuit path in #3 which is used by
+	atapi_qc_complete().
+	</para>
+	<para>
+	For all non-ATAPI commands, whether it fails or not, almost
+	the same code path is taken and very little error handling
+	takes place.  A qc is completed with success status if it
+	succeeded, with failed status otherwise.
+	</para>
+	<para>
+	However, failed ATAPI commands require more handling as
+	REQUEST SENSE is needed to acquire sense data.  If an ATAPI
+	command fails, ata_qc_complete() is invoked with error status,
+	which in turn invokes atapi_qc_complete() via
+	qc->complete_fn() callback.
+	</para>
+	<para>
+	This makes atapi_qc_complete() set scmd->result to
+	SAM_STAT_CHECK_CONDITION, complete the scmd and return 1.  As
+	the sense data is empty but scmd->result is CHECK CONDITION,
+	SCSI midlayer will invoke EH for the scmd, and returning 1
+	makes ata_qc_complete() to return without deallocating the qc.
+	This leads us to ata_scsi_error() with partially completed qc.
+	</para>
+
+	</sect1>
+
+	<sect1><title>ata_scsi_error()</title>
+	<para>
+	ata_scsi_error() is the current hostt->eh_strategy_handler()
+	for libata.  As discussed above, this will be entered in two
+	cases - timeout and ATAPI error completion.  This function
+	calls low level libata driver's eng_timeout() callback, the
+	standard callback for which is ata_eng_timeout().  It checks
+	if a qc is active and calls ata_qc_timeout() on the qc if so.
+	Actual error handling occurs in ata_qc_timeout().
+	</para>
+	<para>
+	If EH is invoked for timeout, ata_qc_timeout() stops BMDMA and
+	completes the qc.  Note that as we're currently in EH, we
+	cannot call scsi_done.  As described in SCSI EH doc, a
+	recovered scmd should be either retried with
+	scsi_queue_insert() or finished with scsi_finish_command().
+	Here, we override qc->scsidone with scsi_finish_command() and
+	calls ata_qc_complete().
+	</para>
+	<para>
+	If EH is invoked due to a failed ATAPI qc, the qc here is
+	completed but not deallocated.  The purpose of this
+	half-completion is to use the qc as place holder to make EH
+	code reach this place.  This is a bit hackish, but it works.
+	</para>
+	<para>
+	Once control reaches here, the qc is deallocated by invoking
+	__ata_qc_complete() explicitly.  Then, internal qc for REQUEST
+	SENSE is issued.  Once sense data is acquired, scmd is
+	finished by directly invoking scsi_finish_command() on the
+	scmd.  Note that as we already have completed and deallocated
+	the qc which was associated with the scmd, we don't need
+	to/cannot call ata_qc_complete() again.
+	</para>
+
+	</sect1>
+
+	<sect1><title>Problems with the current EH</title>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	Error representation is too crude.  Currently any and all
+	error conditions are represented with ATA STATUS and ERROR
+	registers.  Errors which aren't ATA device errors are treated
+	as ATA device errors by setting ATA_ERR bit.  Better error
+	descriptor which can properly represent ATA and other
+	errors/exceptions is needed.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	When handling timeouts, no action is taken to make device
+	forget about the timed out command and ready for new commands.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	EH handling via ata_scsi_error() is not properly protected
+	from usual command processing.  On EH entrance, the device is
+	not in quiescent state.  Timed out commands may succeed or
+	fail any time.  pio_task and atapi_task may still be running.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Too weak error recovery.  Devices / controllers causing HSM
+	mismatch errors and other errors quite often require reset to
+	return to known state.  Also, advanced error handling is
+	necessary to support features like NCQ and hotplug.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	ATA errors are directly handled in the interrupt handler and
+	PIO errors in pio_task.  This is problematic for advanced
+	error handling for the following reasons.
+	</para>
+	<para>
+	First, advanced error handling often requires context and
+	internal qc execution.
+	</para>
+	<para>
+	Second, even a simple failure (say, CRC error) needs
+	information gathering and could trigger complex error handling
+	(say, resetting &amp; reconfiguring).  Having multiple code
+	paths to gather information, enter EH and trigger actions
+	makes life painful.
+	</para>
+	<para>
+	Third, scattered EH code makes implementing low level drivers
+	difficult.  Low level drivers override libata callbacks.  If
+	EH is scattered over several places, each affected callbacks
+	should perform its part of error handling.  This can be error
+	prone and painful.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+	</sect1>
+  </chapter>
+
   <chapter id="libataExt">
      <title>libata Library</title>
 !Edrivers/scsi/libata-core.c
@@ -431,6 +787,722 @@ and other resources, etc.
 !Idrivers/scsi/libata-scsi.c
   </chapter>
 
+  <chapter id="ataExceptions">
+     <title>ATA errors &amp; exceptions</title>
+
+  <para>
+  This chapter tries to identify what error/exception conditions exist
+  for ATA/ATAPI devices and describe how they should be handled in
+  implementation-neutral way.
+  </para>
+
+  <para>
+  The term 'error' is used to describe conditions where either an
+  explicit error condition is reported from device or a command has
+  timed out.
+  </para>
+
+  <para>
+  The term 'exception' is either used to describe exceptional
+  conditions which are not errors (say, power or hotplug events), or
+  to describe both errors and non-error exceptional conditions.  Where
+  explicit distinction between error and exception is necessary, the
+  term 'non-error exception' is used.
+  </para>
+
+  <sect1 id="excat">
+     <title>Exception categories</title>
+     <para>
+     Exceptions are described primarily with respect to legacy
+     taskfile + bus master IDE interface.  If a controller provides
+     other better mechanism for error reporting, mapping those into
+     categories described below shouldn't be difficult.
+     </para>
+
+     <para>
+     In the following sections, two recovery actions - reset and
+     reconfiguring transport - are mentioned.  These are described
+     further in <xref linkend="exrec"/>.
+     </para>
+
+     <sect2 id="excatHSMviolation">
+        <title>HSM violation</title>
+        <para>
+        This error is indicated when STATUS value doesn't match HSM
+        requirement during issuing or excution any ATA/ATAPI command.
+        </para>
+
+	<itemizedlist>
+	<title>Examples</title>
+
+        <listitem>
+	<para>
+	ATA_STATUS doesn't contain !BSY &amp;&amp; DRDY &amp;&amp; !DRQ while trying
+	to issue a command.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	!BSY &amp;&amp; !DRQ during PIO data transfer.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	DRQ on command completion.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	!BSY &amp;&amp; ERR after CDB tranfer starts but before the
+        last byte of CDB is transferred.  ATA/ATAPI standard states
+        that &quot;The device shall not terminate the PACKET command
+        with an error before the last byte of the command packet has
+        been written&quot; in the error outputs description of PACKET
+        command and the state diagram doesn't include such
+        transitions.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	In these cases, HSM is violated and not much information
+	regarding the error can be acquired from STATUS or ERROR
+	register.  IOW, this error can be anything - driver bug,
+	faulty device, controller and/or cable.
+	</para>
+
+	<para>
+	As HSM is violated, reset is necessary to restore known state.
+	Reconfiguring transport for lower speed might be helpful too
+	as transmission errors sometimes cause this kind of errors.
+	</para>
+     </sect2>
+     
+     <sect2 id="excatDevErr">
+        <title>ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION)</title>
+
+	<para>
+	These are errors detected and reported by ATA/ATAPI devices
+	indicating device problems.  For this type of errors, STATUS
+	and ERROR register values are valid and describe error
+	condition.  Note that some of ATA bus errors are detected by
+	ATA/ATAPI devices and reported using the same mechanism as
+	device errors.  Those cases are described later in this
+	section.
+	</para>
+
+	<para>
+	For ATA commands, this type of errors are indicated by !BSY
+	&amp;&amp; ERR during command execution and on completion.
+	</para>
+
+	<para>For ATAPI commands,</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR &amp;&amp; ABRT right after issuing PACKET
+	indicates that PACKET command is not supported and falls in
+	this category.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR(==CHK) &amp;&amp; !ABRT after the last
+	byte of CDB is transferred indicates CHECK CONDITION and
+	doesn't fall in this category.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR(==CHK) &amp;&amp; ABRT after the last byte
+        of CDB is transferred *probably* indicates CHECK CONDITION and
+        doesn't fall in this category.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	Of errors detected as above, the followings are not ATA/ATAPI
+	device errors but ATA bus errors and should be handled
+	according to <xref linkend="excatATAbusErr"/>.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry>
+	   <term>CRC error during data transfer</term>
+	   <listitem>
+	   <para>
+	   This is indicated by ICRC bit in the ERROR register and
+	   means that corruption occurred during data transfer.  Upto
+	   ATA/ATAPI-7, the standard specifies that this bit is only
+	   applicable to UDMA transfers but ATA/ATAPI-8 draft revision
+	   1f says that the bit may be applicable to multiword DMA and
+	   PIO.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry>
+	   <term>ABRT error during data transfer or on completion</term>
+	   <listitem>
+	   <para>
+	   Upto ATA/ATAPI-7, the standard specifies that ABRT could be
+	   set on ICRC errors and on cases where a device is not able
+	   to complete a command.  Combined with the fact that MWDMA
+	   and PIO transfer errors aren't allowed to use ICRC bit upto
+	   ATA/ATAPI-7, it seems to imply that ABRT bit alone could
+	   indicate tranfer errors.
+	   </para>
+	   <para>
+	   However, ATA/ATAPI-8 draft revision 1f removes the part
+	   that ICRC errors can turn on ABRT.  So, this is kind of
+	   gray area.  Some heuristics are needed here.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	ATA/ATAPI device errors can be further categorized as follows.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry>
+	   <term>Media errors</term>
+	   <listitem>
+	   <para>
+	   This is indicated by UNC bit in the ERROR register.  ATA
+	   devices reports UNC error only after certain number of
+	   retries cannot recover the data, so there's nothing much
+	   else to do other than notifying upper layer.
+	   </para>
+	   <para>
+	   READ and WRITE commands report CHS or LBA of the first
+	   failed sector but ATA/ATAPI standard specifies that the
+	   amount of transferred data on error completion is
+	   indeterminate, so we cannot assume that sectors preceding
+	   the failed sector have been transferred and thus cannot
+	   complete those sectors successfully as SCSI does.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry>
+	   <term>Media changed / media change requested error</term>
+	   <listitem>
+	   <para>
+	   &lt;&lt;TODO: fill here&gt;&gt;
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Address error</term>
+	   <listitem>
+	   <para>
+	   This is indicated by IDNF bit in the ERROR register.
+	   Report to upper layer.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Other errors</term>
+	   <listitem>
+	   <para>
+	   This can be invalid command or parameter indicated by ABRT
+	   ERROR bit or some other error condition.  Note that ABRT
+	   bit can indicate a lot of things including ICRC and Address
+	   errors.  Heuristics needed.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	Depending on commands, not all STATUS/ERROR bits are
+	applicable.  These non-applicable bits are marked with
+	&quot;na&quot; in the output descriptions but upto ATA/ATAPI-7
+	no definition of &quot;na&quot; can be found.  However,
+	ATA/ATAPI-8 draft revision 1f describes &quot;N/A&quot; as
+	follows.
+	</para>
+
+	<blockquote>
+	<variablelist>
+	   <varlistentry><term>3.2.3.3a N/A</term>
+	   <listitem>
+	   <para>
+	   A keyword the indicates a field has no defined value in
+	   this standard and should not be checked by the host or
+	   device. N/A fields should be cleared to zero.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+	</variablelist>
+	</blockquote>
+
+	<para>
+	So, it seems reasonable to assume that &quot;na&quot; bits are
+	cleared to zero by devices and thus need no explicit masking.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatATAPIcc">
+        <title>ATAPI device CHECK CONDITION</title>
+
+	<para>
+	ATAPI device CHECK CONDITION error is indicated by set CHK bit
+	(ERR bit) in the STATUS register after the last byte of CDB is
+	transferred for a PACKET command.  For this kind of errors,
+	sense data should be acquired to gather information regarding
+	the errors.  REQUEST SENSE packet command should be used to
+	acquire sense data.
+	</para>
+
+	<para>
+	Once sense data is acquired, this type of errors can be
+	handled similary to other SCSI errors.  Note that sense data
+	may indicate ATA bus error (e.g. Sense Key 04h HARDWARE ERROR
+	&amp;&amp; ASC/ASCQ 47h/00h SCSI PARITY ERROR).  In such
+	cases, the error should be considered as an ATA bus error and
+	handled according to <xref linkend="excatATAbusErr"/>.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatNCQerr">
+        <title>ATA device error (NCQ)</title>
+
+	<para>
+	NCQ command error is indicated by cleared BSY and set ERR bit
+	during NCQ command phase (one or more NCQ commands
+	outstanding).  Although STATUS and ERROR registers will
+	contain valid values describing the error, READ LOG EXT is
+	required to clear the error condition, determine which command
+	has failed and acquire more information.
+	</para>
+
+	<para>
+	READ LOG EXT Log Page 10h reports which tag has failed and
+	taskfile register values describing the error.  With this
+	information the failed command can be handled as a normal ATA
+	command error as in <xref linkend="excatDevErr"/> and all
+	other in-flight commands must be retried.  Note that this
+	retry should not be counted - it's likely that commands
+	retried this way would have completed normally if it were not
+	for the failed command.
+	</para>
+
+	<para>
+	Note that ATA bus errors can be reported as ATA device NCQ
+	errors.  This should be handled as described in <xref
+	linkend="excatATAbusErr"/>.
+	</para>
+
+	<para>
+	If READ LOG EXT Log Page 10h fails or reports NQ, we're
+	thoroughly screwed.  This condition should be treated
+	according to <xref linkend="excatHSMviolation"/>.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatATAbusErr">
+        <title>ATA bus error</title>
+
+	<para>
+	ATA bus error means that data corruption occurred during
+	transmission over ATA bus (SATA or PATA).  This type of errors
+	can be indicated by
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	ICRC or ABRT error as described in <xref linkend="excatDevErr"/>.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Controller-specific error completion with error information
+	indicating transmission error.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	On some controllers, command timeout.  In this case, there may
+	be a mechanism to determine that the timeout is due to
+	transmission error.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Unknown/random errors, timeouts and all sorts of weirdities.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	As described above, transmission errors can cause wide variety
+	of symptoms ranging from device ICRC error to random device
+	lockup, and, for many cases, there is no way to tell if an
+	error condition is due to transmission error or not;
+	therefore, it's necessary to employ some kind of heuristic
+	when dealing with errors and timeouts.  For example,
+	encountering repetitive ABRT errors for known supported
+	command is likely to indicate ATA bus error.
+	</para>
+
+	<para>
+	Once it's determined that ATA bus errors have possibly
+	occurred, lowering ATA bus transmission speed is one of
+	actions which may alleviate the problem.  See <xref
+	linkend="exrecReconf"/> for more information.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatPCIbusErr">
+        <title>PCI bus error</title>
+
+	<para>
+	Data corruption or other failures during transmission over PCI
+	(or other system bus).  For standard BMDMA, this is indicated
+	by Error bit in the BMDMA Status register.  This type of
+	errors must be logged as it indicates something is very wrong
+	with the system.  Resetting host controller is recommended.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatLateCompletion">
+        <title>Late completion</title>
+
+	<para>
+	This occurs when timeout occurs and the timeout handler finds
+	out that the timed out command has completed successfully or
+	with error.  This is usually caused by lost interrupts.  This
+	type of errors must be logged.  Resetting host controller is
+	recommended.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatUnknown">
+        <title>Unknown error (timeout)</title>
+
+	<para>
+	This is when timeout occurs and the command is still
+	processing or the host and device are in unknown state.  When
+	this occurs, HSM could be in any valid or invalid state.  To
+	bring the device to known state and make it forget about the
+	timed out command, resetting is necessary.  The timed out
+	command may be retried.
+	</para>
+
+	<para>
+	Timeouts can also be caused by transmission errors.  Refer to
+	<xref linkend="excatATAbusErr"/> for more details.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatHoplugPM">
+        <title>Hotplug and power management exceptions</title>
+
+	<para>
+	&lt;&lt;TODO: fill here&gt;&gt;
+	</para>
+
+     </sect2>
+
+  </sect1>
+
+  <sect1 id="exrec">
+     <title>EH recovery actions</title>
+
+     <para>
+     This section discusses several important recovery actions.
+     </para>
+
+     <sect2 id="exrecClr">
+        <title>Clearing error condition</title>
+
+	<para>
+	Many controllers require its error registers to be cleared by
+	error handler.  Different controllers may have different
+	requirements.
+	</para>
+
+	<para>
+	For SATA, it's strongly recommended to clear at least SError
+	register during error handling.
+	</para>
+     </sect2>
+
+     <sect2 id="exrecRst">
+        <title>Reset</title>
+
+	<para>
+	During EH, resetting is necessary in the following cases.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	HSM is in unknown or invalid state
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	HBA is in unknown or invalid state
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	EH needs to make HBA/device forget about in-flight commands
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	HBA/device behaves weirdly
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	Resetting during EH might be a good idea regardless of error
+	condition to improve EH robustness.  Whether to reset both or
+	either one of HBA and device depends on situation but the
+	following scheme is recommended.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	When it's known that HBA is in ready state but ATA/ATAPI
+	device in in unknown state, reset only device.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	If HBA is in unknown state, reset both HBA and device.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	HBA resetting is implementation specific.  For a controller
+	complying to taskfile/BMDMA PCI IDE, stopping active DMA
+	transaction may be sufficient iff BMDMA state is the only HBA
+	context.  But even mostly taskfile/BMDMA PCI IDE complying
+	controllers may have implementation specific requirements and
+	mechanism to reset themselves.  This must be addressed by
+	specific drivers.
+	</para>
+
+	<para>
+	OTOH, ATA/ATAPI standard describes in detail ways to reset
+	ATA/ATAPI devices.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry><term>PATA hardware reset</term>
+	   <listitem>
+	   <para>
+	   This is hardware initiated device reset signalled with
+	   asserted PATA RESET- signal.  There is no standard way to
+	   initiate hardware reset from software although some
+	   hardware provides registers that allow driver to directly
+	   tweak the RESET- signal.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Software reset</term>
+	   <listitem>
+	   <para>
+	   This is achieved by turning CONTROL SRST bit on for at
+	   least 5us.  Both PATA and SATA support it but, in case of
+	   SATA, this may require controller-specific support as the
+	   second Register FIS to clear SRST should be transmitted
+	   while BSY bit is still set.  Note that on PATA, this resets
+	   both master and slave devices on a channel.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>EXECUTE DEVICE DIAGNOSTIC command</term>
+	   <listitem>
+	   <para>
+	   Although ATA/ATAPI standard doesn't describe exactly, EDD
+	   implies some level of resetting, possibly similar level
+	   with software reset.  Host-side EDD protocol can be handled
+	   with normal command processing and most SATA controllers
+	   should be able to handle EDD's just like other commands.
+	   As in software reset, EDD affects both devices on a PATA
+	   bus.
+	   </para>
+	   <para>
+	   Although EDD does reset devices, this doesn't suit error
+	   handling as EDD cannot be issued while BSY is set and it's
+	   unclear how it will act when device is in unknown/weird
+	   state.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI DEVICE RESET command</term>
+	   <listitem>
+	   <para>
+	   This is very similar to software reset except that reset
+	   can be restricted to the selected device without affecting
+	   the other device sharing the cable.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>SATA phy reset</term>
+	   <listitem>
+	   <para>
+	   This is the preferred way of resetting a SATA device.  In
+	   effect, it's identical to PATA hardware reset.  Note that
+	   this can be done with the standard SCR Control register.
+	   As such, it's usually easier to implement than software
+	   reset.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	One more thing to consider when resetting devices is that
+	resetting clears certain configuration parameters and they
+	need to be set to their previous or newly adjusted values
+	after reset.
+	</para>
+
+	<para>
+	Parameters affected are.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	CHS set up with INITIALIZE DEVICE PARAMETERS (seldomly used)
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Parameters set with SET FEATURES including transfer mode setting
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Block count set with SET MULTIPLE MODE
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Other parameters (SET MAX, MEDIA LOCK...)
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	ATA/ATAPI standard specifies that some parameters must be
+	maintained across hardware or software reset, but doesn't
+	strictly specify all of them.  Always reconfiguring needed
+	parameters after reset is required for robustness.  Note that
+	this also applies when resuming from deep sleep (power-off).
+	</para>
+
+	<para>
+	Also, ATA/ATAPI standard requires that IDENTIFY DEVICE /
+	IDENTIFY PACKET DEVICE is issued after any configuration
+	parameter is updated or a hardware reset and the result used
+	for further operation.  OS driver is required to implement
+	revalidation mechanism to support this.
+	</para>
+
+     </sect2>
+
+     <sect2 id="exrecReconf">
+        <title>Reconfigure transport</title>
+
+	<para>
+	For both PATA and SATA, a lot of corners are cut for cheap
+	connectors, cables or controllers and it's quite common to see
+	high transmission error rate.  This can be mitigated by
+	lowering transmission speed.
+	</para>
+
+	<para>
+	The following is a possible scheme Jeff Garzik suggested.
+	</para>
+
+	<blockquote>
+	<para>
+	If more than $N (3?) transmission errors happen in 15 minutes,
+	</para>	
+	<itemizedlist>
+	<listitem>
+	<para>
+	if SATA, decrease SATA PHY speed.  if speed cannot be decreased,
+	</para>
+	</listitem>
+	<listitem>
+	<para>
+	decrease UDMA xfer speed.  if at UDMA0, switch to PIO4,
+	</para>
+	</listitem>
+	<listitem>
+	<para>
+	decrease PIO xfer speed.  if at PIO3, complain, but continue
+	</para>
+	</listitem>
+	</itemizedlist>
+	</blockquote>
+
+     </sect2>
+
+  </sect1>
+
+  </chapter>
+
   <chapter id="PiixInt">
      <title>ata_piix Internals</title>
 !Idrivers/scsi/ata_piix.c

+ 25 - 6
drivers/scsi/ahci.c

@@ -672,17 +672,36 @@ static irqreturn_t ahci_interrupt (int irq, void *dev_instance, struct pt_regs *
 
         for (i = 0; i < host_set->n_ports; i++) {
 		struct ata_port *ap;
-		u32 tmp;
 
-		VPRINTK("port %u\n", i);
+		if (!(irq_stat & (1 << i)))
+			continue;
+
 		ap = host_set->ports[i];
-		tmp = irq_stat & (1 << i);
-		if (tmp && ap) {
+		if (ap) {
 			struct ata_queued_cmd *qc;
 			qc = ata_qc_from_tag(ap, ap->active_tag);
-			if (ahci_host_intr(ap, qc))
-				irq_ack |= (1 << i);
+			if (!ahci_host_intr(ap, qc))
+				if (ata_ratelimit()) {
+					struct pci_dev *pdev =
+					  to_pci_dev(ap->host_set->dev);
+					printk(KERN_WARNING
+					  "ahci(%s): unhandled interrupt on port %u\n",
+					  pci_name(pdev), i);
+				}
+
+			VPRINTK("port %u\n", i);
+		} else {
+			VPRINTK("port %u (no irq)\n", i);
+			if (ata_ratelimit()) {
+				struct pci_dev *pdev =
+				  to_pci_dev(ap->host_set->dev);
+				printk(KERN_WARNING
+				  "ahci(%s): interrupt on disabled port %u\n",
+				  pci_name(pdev), i);
+			}
 		}
+
+		irq_ack |= (1 << i);
 	}
 
 	if (irq_ack) {

+ 259 - 174
drivers/scsi/libata-core.c

@@ -48,6 +48,7 @@
 #include <linux/completion.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
+#include <linux/jiffies.h>
 #include <scsi/scsi.h>
 #include "scsi.h"
 #include "scsi_priv.h"
@@ -62,6 +63,7 @@
 static unsigned int ata_busy_sleep (struct ata_port *ap,
 				    unsigned long tmout_pat,
 			    	    unsigned long tmout);
+static void ata_dev_init_params(struct ata_port *ap, struct ata_device *dev);
 static void ata_set_mode(struct ata_port *ap);
 static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev);
 static unsigned int ata_get_mode_mask(struct ata_port *ap, int shift);
@@ -69,7 +71,6 @@ static int fgb(u32 bitmap);
 static int ata_choose_xfer_mode(struct ata_port *ap,
 				u8 *xfer_mode_out,
 				unsigned int *xfer_shift_out);
-static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat);
 static void __ata_qc_complete(struct ata_queued_cmd *qc);
 
 static unsigned int ata_unique_id = 1;
@@ -1131,7 +1132,7 @@ static inline void ata_dump_id(struct ata_device *dev)
 static void ata_dev_identify(struct ata_port *ap, unsigned int device)
 {
 	struct ata_device *dev = &ap->device[device];
-	unsigned int i;
+	unsigned int major_version;
 	u16 tmp;
 	unsigned long xfer_modes;
 	u8 status;
@@ -1229,9 +1230,9 @@ retry:
 	 * common ATA, ATAPI feature tests
 	 */
 
-	/* we require LBA and DMA support (bits 8 & 9 of word 49) */
-	if (!ata_id_has_dma(dev->id) || !ata_id_has_lba(dev->id)) {
-		printk(KERN_DEBUG "ata%u: no dma/lba\n", ap->id);
+	/* we require DMA support (bits 8 of word 49) */
+	if (!ata_id_has_dma(dev->id)) {
+		printk(KERN_DEBUG "ata%u: no dma\n", ap->id);
 		goto err_out_nosup;
 	}
 
@@ -1251,32 +1252,69 @@ retry:
 		if (!ata_id_is_ata(dev->id))	/* sanity check */
 			goto err_out_nosup;
 
+		/* get major version */
 		tmp = dev->id[ATA_ID_MAJOR_VER];
-		for (i = 14; i >= 1; i--)
-			if (tmp & (1 << i))
+		for (major_version = 14; major_version >= 1; major_version--)
+			if (tmp & (1 << major_version))
 				break;
 
-		/* we require at least ATA-3 */
-		if (i < 3) {
-			printk(KERN_DEBUG "ata%u: no ATA-3\n", ap->id);
-			goto err_out_nosup;
-		}
+		/*
+		 * The exact sequence expected by certain pre-ATA4 drives is:
+		 * SRST RESET
+		 * IDENTIFY
+		 * INITIALIZE DEVICE PARAMETERS
+		 * anything else..
+		 * Some drives were very specific about that exact sequence.
+		 */
+		if (major_version < 4 || (!ata_id_has_lba(dev->id)))
+			ata_dev_init_params(ap, dev);
+
+		if (ata_id_has_lba(dev->id)) {
+			dev->flags |= ATA_DFLAG_LBA;
+
+			if (ata_id_has_lba48(dev->id)) {
+				dev->flags |= ATA_DFLAG_LBA48;
+				dev->n_sectors = ata_id_u64(dev->id, 100);
+			} else {
+				dev->n_sectors = ata_id_u32(dev->id, 60);
+			}
+
+			/* print device info to dmesg */
+			printk(KERN_INFO "ata%u: dev %u ATA-%d, max %s, %Lu sectors:%s\n",
+			       ap->id, device,
+			       major_version,
+			       ata_mode_string(xfer_modes),
+			       (unsigned long long)dev->n_sectors,
+			       dev->flags & ATA_DFLAG_LBA48 ? " LBA48" : " LBA");
+		} else { 
+			/* CHS */
+
+			/* Default translation */
+			dev->cylinders	= dev->id[1];
+			dev->heads	= dev->id[3];
+			dev->sectors	= dev->id[6];
+			dev->n_sectors	= dev->cylinders * dev->heads * dev->sectors;
+
+			if (ata_id_current_chs_valid(dev->id)) {
+				/* Current CHS translation is valid. */
+				dev->cylinders = dev->id[54];
+				dev->heads     = dev->id[55];
+				dev->sectors   = dev->id[56];
+				
+				dev->n_sectors = ata_id_u32(dev->id, 57);
+			}
+
+			/* print device info to dmesg */
+			printk(KERN_INFO "ata%u: dev %u ATA-%d, max %s, %Lu sectors: CHS %d/%d/%d\n",
+			       ap->id, device,
+			       major_version,
+			       ata_mode_string(xfer_modes),
+			       (unsigned long long)dev->n_sectors,
+			       (int)dev->cylinders, (int)dev->heads, (int)dev->sectors);
 
-		if (ata_id_has_lba48(dev->id)) {
-			dev->flags |= ATA_DFLAG_LBA48;
-			dev->n_sectors = ata_id_u64(dev->id, 100);
-		} else {
-			dev->n_sectors = ata_id_u32(dev->id, 60);
 		}
 
 		ap->host->max_cmd_len = 16;
-
-		/* print device info to dmesg */
-		printk(KERN_INFO "ata%u: dev %u ATA, max %s, %Lu sectors:%s\n",
-		       ap->id, device,
-		       ata_mode_string(xfer_modes),
-		       (unsigned long long)dev->n_sectors,
-		       dev->flags & ATA_DFLAG_LBA48 ? " lba48" : "");
 	}
 
 	/* ATAPI-specific feature tests */
@@ -2143,6 +2181,54 @@ static void ata_dev_set_xfermode(struct ata_port *ap, struct ata_device *dev)
 	DPRINTK("EXIT\n");
 }
 
+/**
+ *	ata_dev_init_params - Issue INIT DEV PARAMS command
+ *	@ap: Port associated with device @dev
+ *	@dev: Device to which command will be sent
+ *
+ *	LOCKING:
+ */
+
+static void ata_dev_init_params(struct ata_port *ap, struct ata_device *dev)
+{
+	DECLARE_COMPLETION(wait);
+	struct ata_queued_cmd *qc;
+	int rc;
+	unsigned long flags;
+	u16 sectors = dev->id[6];
+	u16 heads   = dev->id[3];
+
+	/* Number of sectors per track 1-255. Number of heads 1-16 */
+	if (sectors < 1 || sectors > 255 || heads < 1 || heads > 16)
+		return;
+
+	/* set up init dev params taskfile */
+	DPRINTK("init dev params \n");
+
+	qc = ata_qc_new_init(ap, dev);
+	BUG_ON(qc == NULL);
+
+	qc->tf.command = ATA_CMD_INIT_DEV_PARAMS;
+	qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	qc->tf.protocol = ATA_PROT_NODATA;
+	qc->tf.nsect = sectors;
+	qc->tf.device |= (heads - 1) & 0x0f; /* max head = num. of heads - 1 */
+
+	qc->waiting = &wait;
+	qc->complete_fn = ata_qc_complete_noop;
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	rc = ata_qc_issue(qc);
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	if (rc)
+		ata_port_disable(ap);
+	else
+		wait_for_completion(&wait);
+
+	DPRINTK("EXIT\n");
+}
+
 /**
  *	ata_sg_clean - Unmap DMA memory associated with command
  *	@qc: Command containing DMA memory to be released
@@ -2425,20 +2511,20 @@ void ata_poll_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
 static unsigned long ata_pio_poll(struct ata_port *ap)
 {
 	u8 status;
-	unsigned int poll_state = PIO_ST_UNKNOWN;
-	unsigned int reg_state = PIO_ST_UNKNOWN;
-	const unsigned int tmout_state = PIO_ST_TMOUT;
-
-	switch (ap->pio_task_state) {
-	case PIO_ST:
-	case PIO_ST_POLL:
-		poll_state = PIO_ST_POLL;
-		reg_state = PIO_ST;
+	unsigned int poll_state = HSM_ST_UNKNOWN;
+	unsigned int reg_state = HSM_ST_UNKNOWN;
+	const unsigned int tmout_state = HSM_ST_TMOUT;
+
+	switch (ap->hsm_task_state) {
+	case HSM_ST:
+	case HSM_ST_POLL:
+		poll_state = HSM_ST_POLL;
+		reg_state = HSM_ST;
 		break;
-	case PIO_ST_LAST:
-	case PIO_ST_LAST_POLL:
-		poll_state = PIO_ST_LAST_POLL;
-		reg_state = PIO_ST_LAST;
+	case HSM_ST_LAST:
+	case HSM_ST_LAST_POLL:
+		poll_state = HSM_ST_LAST_POLL;
+		reg_state = HSM_ST_LAST;
 		break;
 	default:
 		BUG();
@@ -2448,14 +2534,14 @@ static unsigned long ata_pio_poll(struct ata_port *ap)
 	status = ata_chk_status(ap);
 	if (status & ATA_BUSY) {
 		if (time_after(jiffies, ap->pio_task_timeout)) {
-			ap->pio_task_state = tmout_state;
+			ap->hsm_task_state = tmout_state;
 			return 0;
 		}
-		ap->pio_task_state = poll_state;
+		ap->hsm_task_state = poll_state;
 		return ATA_SHORT_PAUSE;
 	}
 
-	ap->pio_task_state = reg_state;
+	ap->hsm_task_state = reg_state;
 	return 0;
 }
 
@@ -2480,14 +2566,14 @@ static int ata_pio_complete (struct ata_port *ap)
 	 * we enter, BSY will be cleared in a chk-status or two.  If not,
 	 * the drive is probably seeking or something.  Snooze for a couple
 	 * msecs, then chk-status again.  If still busy, fall back to
-	 * PIO_ST_POLL state.
+	 * HSM_ST_POLL state.
 	 */
 	drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10);
 	if (drv_stat & (ATA_BUSY | ATA_DRQ)) {
 		msleep(2);
 		drv_stat = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 10);
 		if (drv_stat & (ATA_BUSY | ATA_DRQ)) {
-			ap->pio_task_state = PIO_ST_LAST_POLL;
+			ap->hsm_task_state = HSM_ST_LAST_POLL;
 			ap->pio_task_timeout = jiffies + ATA_TMOUT_PIO;
 			return 0;
 		}
@@ -2495,14 +2581,14 @@ static int ata_pio_complete (struct ata_port *ap)
 
 	drv_stat = ata_wait_idle(ap);
 	if (!ata_ok(drv_stat)) {
-		ap->pio_task_state = PIO_ST_ERR;
+		ap->hsm_task_state = HSM_ST_ERR;
 		return 0;
 	}
 
 	qc = ata_qc_from_tag(ap, ap->active_tag);
 	assert(qc != NULL);
 
-	ap->pio_task_state = PIO_ST_IDLE;
+	ap->hsm_task_state = HSM_ST_IDLE;
 
 	ata_poll_qc_complete(qc, drv_stat);
 
@@ -2662,7 +2748,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 	unsigned char *buf;
 
 	if (qc->cursect == (qc->nsect - 1))
-		ap->pio_task_state = PIO_ST_LAST;
+		ap->hsm_task_state = HSM_ST_LAST;
 
 	page = sg[qc->cursg].page;
 	offset = sg[qc->cursg].offset + qc->cursg_ofs * ATA_SECT_SIZE;
@@ -2712,7 +2798,7 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 	unsigned int offset, count;
 
 	if (qc->curbytes + bytes >= qc->nbytes)
-		ap->pio_task_state = PIO_ST_LAST;
+		ap->hsm_task_state = HSM_ST_LAST;
 
 next_sg:
 	if (unlikely(qc->cursg >= qc->n_elem)) {
@@ -2734,7 +2820,7 @@ next_sg:
 		for (i = 0; i < words; i++)
 			ata_data_xfer(ap, (unsigned char*)pad_buf, 2, do_write);
 
-		ap->pio_task_state = PIO_ST_LAST;
+		ap->hsm_task_state = HSM_ST_LAST;
 		return;
 	}
 
@@ -2815,7 +2901,7 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc)
 err_out:
 	printk(KERN_INFO "ata%u: dev %u: ATAPI check failed\n",
 	      ap->id, dev->devno);
-	ap->pio_task_state = PIO_ST_ERR;
+	ap->hsm_task_state = HSM_ST_ERR;
 }
 
 /**
@@ -2837,14 +2923,14 @@ static void ata_pio_block(struct ata_port *ap)
 	 * a chk-status or two.  If not, the drive is probably seeking
 	 * or something.  Snooze for a couple msecs, then
 	 * chk-status again.  If still busy, fall back to
-	 * PIO_ST_POLL state.
+	 * HSM_ST_POLL state.
 	 */
 	status = ata_busy_wait(ap, ATA_BUSY, 5);
 	if (status & ATA_BUSY) {
 		msleep(2);
 		status = ata_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
-			ap->pio_task_state = PIO_ST_POLL;
+			ap->hsm_task_state = HSM_ST_POLL;
 			ap->pio_task_timeout = jiffies + ATA_TMOUT_PIO;
 			return;
 		}
@@ -2856,7 +2942,7 @@ static void ata_pio_block(struct ata_port *ap)
 	if (is_atapi_taskfile(&qc->tf)) {
 		/* no more data to transfer or unsupported ATAPI command */
 		if ((status & ATA_DRQ) == 0) {
-			ap->pio_task_state = PIO_ST_LAST;
+			ap->hsm_task_state = HSM_ST_LAST;
 			return;
 		}
 
@@ -2864,7 +2950,7 @@ static void ata_pio_block(struct ata_port *ap)
 	} else {
 		/* handle BSY=0, DRQ=0 as error */
 		if ((status & ATA_DRQ) == 0) {
-			ap->pio_task_state = PIO_ST_ERR;
+			ap->hsm_task_state = HSM_ST_ERR;
 			return;
 		}
 
@@ -2884,7 +2970,7 @@ static void ata_pio_error(struct ata_port *ap)
 	printk(KERN_WARNING "ata%u: PIO error, drv_stat 0x%x\n",
 	       ap->id, drv_stat);
 
-	ap->pio_task_state = PIO_ST_IDLE;
+	ap->hsm_task_state = HSM_ST_IDLE;
 
 	ata_poll_qc_complete(qc, drv_stat | ATA_ERR);
 }
@@ -2899,25 +2985,25 @@ fsm_start:
 	timeout = 0;
 	qc_completed = 0;
 
-	switch (ap->pio_task_state) {
-	case PIO_ST_IDLE:
+	switch (ap->hsm_task_state) {
+	case HSM_ST_IDLE:
 		return;
 
-	case PIO_ST:
+	case HSM_ST:
 		ata_pio_block(ap);
 		break;
 
-	case PIO_ST_LAST:
+	case HSM_ST_LAST:
 		qc_completed = ata_pio_complete(ap);
 		break;
 
-	case PIO_ST_POLL:
-	case PIO_ST_LAST_POLL:
+	case HSM_ST_POLL:
+	case HSM_ST_LAST_POLL:
 		timeout = ata_pio_poll(ap);
 		break;
 
-	case PIO_ST_TMOUT:
-	case PIO_ST_ERR:
+	case HSM_ST_TMOUT:
+	case HSM_ST_ERR:
 		ata_pio_error(ap);
 		return;
 	}
@@ -2928,52 +3014,6 @@ fsm_start:
 		goto fsm_start;
 }
 
-static void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
-				struct scsi_cmnd *cmd)
-{
-	DECLARE_COMPLETION(wait);
-	struct ata_queued_cmd *qc;
-	unsigned long flags;
-	int rc;
-
-	DPRINTK("ATAPI request sense\n");
-
-	qc = ata_qc_new_init(ap, dev);
-	BUG_ON(qc == NULL);
-
-	/* FIXME: is this needed? */
-	memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
-
-	ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
-	qc->dma_dir = DMA_FROM_DEVICE;
-
-	memset(&qc->cdb, 0, ap->cdb_len);
-	qc->cdb[0] = REQUEST_SENSE;
-	qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
-	qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
-	qc->tf.command = ATA_CMD_PACKET;
-
-	qc->tf.protocol = ATA_PROT_ATAPI;
-	qc->tf.lbam = (8 * 1024) & 0xff;
-	qc->tf.lbah = (8 * 1024) >> 8;
-	qc->nbytes = SCSI_SENSE_BUFFERSIZE;
-
-	qc->waiting = &wait;
-	qc->complete_fn = ata_qc_complete_noop;
-
-	spin_lock_irqsave(&ap->host_set->lock, flags);
-	rc = ata_qc_issue(qc);
-	spin_unlock_irqrestore(&ap->host_set->lock, flags);
-
-	if (rc)
-		ata_port_disable(ap);
-	else
-		wait_for_completion(&wait);
-
-	DPRINTK("EXIT\n");
-}
-
 /**
  *	ata_qc_timeout - Handle timeout of queued command
  *	@qc: Command that timed out
@@ -3091,14 +3131,14 @@ void ata_eng_timeout(struct ata_port *ap)
 	DPRINTK("ENTER\n");
 
 	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (!qc) {
+	if (qc)
+		ata_qc_timeout(qc);
+	else {
 		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
 		       ap->id);
 		goto out;
 	}
 
-	ata_qc_timeout(qc);
-
 out:
 	DPRINTK("EXIT\n");
 }
@@ -3156,14 +3196,18 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 
 		ata_tf_init(ap, &qc->tf, dev->devno);
 
-		if (dev->flags & ATA_DFLAG_LBA48)
-			qc->tf.flags |= ATA_TFLAG_LBA48;
+		if (dev->flags & ATA_DFLAG_LBA) {
+			qc->tf.flags |= ATA_TFLAG_LBA;
+
+			if (dev->flags & ATA_DFLAG_LBA48)
+				qc->tf.flags |= ATA_TFLAG_LBA48;
+		}
 	}
 
 	return qc;
 }
 
-static int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat)
+int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat)
 {
 	return 0;
 }
@@ -3360,7 +3404,7 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc)
 	case ATA_PROT_PIO: /* load tf registers, initiate polling pio */
 		ata_qc_set_polling(qc);
 		ata_tf_to_host_nolock(ap, &qc->tf);
-		ap->pio_task_state = PIO_ST;
+		ap->hsm_task_state = HSM_ST;
 		queue_work(ata_wq, &ap->pio_task);
 		break;
 
@@ -3586,7 +3630,7 @@ u8 ata_bmdma_status(struct ata_port *ap)
 		void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr;
 		host_stat = readb(mmio + ATA_DMA_STATUS);
 	} else
-	host_stat = inb(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
+		host_stat = inb(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
 	return host_stat;
 }
 
@@ -3806,7 +3850,7 @@ static void atapi_packet_task(void *_data)
 		ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1);
 
 		/* PIO commands are handled by polling */
-		ap->pio_task_state = PIO_ST;
+		ap->hsm_task_state = HSM_ST;
 		queue_work(ata_wq, &ap->pio_task);
 	}
 
@@ -4113,7 +4157,7 @@ int ata_device_add(struct ata_probe_ent *ent)
 	for (i = 0; i < count; i++) {
 		struct ata_port *ap = host_set->ports[i];
 
-		scsi_scan_host(ap->host);
+		ata_scsi_scan_host(ap);
 	}
 
 	dev_set_drvdata(dev, host_set);
@@ -4273,85 +4317,87 @@ void ata_pci_host_stop (struct ata_host_set *host_set)
  *	ata_pci_init_native_mode - Initialize native-mode driver
  *	@pdev:  pci device to be initialized
  *	@port:  array[2] of pointers to port info structures.
+ *	@ports: bitmap of ports present
  *
  *	Utility function which allocates and initializes an
  *	ata_probe_ent structure for a standard dual-port
  *	PIO-based IDE controller.  The returned ata_probe_ent
  *	structure can be passed to ata_device_add().  The returned
  *	ata_probe_ent structure should then be freed with kfree().
+ *
+ *	The caller need only pass the address of the primary port, the
+ *	secondary will be deduced automatically. If the device has non
+ *	standard secondary port mappings this function can be called twice,
+ *	once for each interface.
  */
 
 struct ata_probe_ent *
-ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port)
+ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int ports)
 {
 	struct ata_probe_ent *probe_ent =
 		ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]);
+	int p = 0;
+
 	if (!probe_ent)
 		return NULL;
 
-	probe_ent->n_ports = 2;
 	probe_ent->irq = pdev->irq;
 	probe_ent->irq_flags = SA_SHIRQ;
 
-	probe_ent->port[0].cmd_addr = pci_resource_start(pdev, 0);
-	probe_ent->port[0].altstatus_addr =
-	probe_ent->port[0].ctl_addr =
-		pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
-	probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
-
-	probe_ent->port[1].cmd_addr = pci_resource_start(pdev, 2);
-	probe_ent->port[1].altstatus_addr =
-	probe_ent->port[1].ctl_addr =
-		pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
-	probe_ent->port[1].bmdma_addr = pci_resource_start(pdev, 4) + 8;
+	if (ports & ATA_PORT_PRIMARY) {
+		probe_ent->port[p].cmd_addr = pci_resource_start(pdev, 0);
+		probe_ent->port[p].altstatus_addr =
+		probe_ent->port[p].ctl_addr =
+			pci_resource_start(pdev, 1) | ATA_PCI_CTL_OFS;
+		probe_ent->port[p].bmdma_addr = pci_resource_start(pdev, 4);
+		ata_std_ports(&probe_ent->port[p]);
+		p++;
+	}
 
-	ata_std_ports(&probe_ent->port[0]);
-	ata_std_ports(&probe_ent->port[1]);
+	if (ports & ATA_PORT_SECONDARY) {
+		probe_ent->port[p].cmd_addr = pci_resource_start(pdev, 2);
+		probe_ent->port[p].altstatus_addr =
+		probe_ent->port[p].ctl_addr =
+			pci_resource_start(pdev, 3) | ATA_PCI_CTL_OFS;
+		probe_ent->port[p].bmdma_addr = pci_resource_start(pdev, 4) + 8;
+		ata_std_ports(&probe_ent->port[p]);
+		p++;
+	}
 
+	probe_ent->n_ports = p;
 	return probe_ent;
 }
 
-static struct ata_probe_ent *
-ata_pci_init_legacy_mode(struct pci_dev *pdev, struct ata_port_info **port,
-    struct ata_probe_ent **ppe2)
+static struct ata_probe_ent *ata_pci_init_legacy_port(struct pci_dev *pdev, struct ata_port_info **port, int port_num)
 {
-	struct ata_probe_ent *probe_ent, *probe_ent2;
+	struct ata_probe_ent *probe_ent;
 
 	probe_ent = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[0]);
 	if (!probe_ent)
 		return NULL;
-	probe_ent2 = ata_probe_ent_alloc(pci_dev_to_dev(pdev), port[1]);
-	if (!probe_ent2) {
-		kfree(probe_ent);
-		return NULL;
-	}
 
-	probe_ent->n_ports = 1;
-	probe_ent->irq = 14;
-
-	probe_ent->hard_port_no = 0;
+	
 	probe_ent->legacy_mode = 1;
-
-	probe_ent2->n_ports = 1;
-	probe_ent2->irq = 15;
-
-	probe_ent2->hard_port_no = 1;
-	probe_ent2->legacy_mode = 1;
-
-	probe_ent->port[0].cmd_addr = 0x1f0;
-	probe_ent->port[0].altstatus_addr =
-	probe_ent->port[0].ctl_addr = 0x3f6;
-	probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4);
-
-	probe_ent2->port[0].cmd_addr = 0x170;
-	probe_ent2->port[0].altstatus_addr =
-	probe_ent2->port[0].ctl_addr = 0x376;
-	probe_ent2->port[0].bmdma_addr = pci_resource_start(pdev, 4)+8;
-
+	probe_ent->n_ports = 1;
+	probe_ent->hard_port_no = port_num;
+
+	switch(port_num)
+	{
+		case 0:
+			probe_ent->irq = 14;
+			probe_ent->port[0].cmd_addr = 0x1f0;
+			probe_ent->port[0].altstatus_addr =
+			probe_ent->port[0].ctl_addr = 0x3f6;
+			break;
+		case 1:
+			probe_ent->irq = 15;
+			probe_ent->port[0].cmd_addr = 0x170;
+			probe_ent->port[0].altstatus_addr =
+			probe_ent->port[0].ctl_addr = 0x376;
+			break;
+	}
+	probe_ent->port[0].bmdma_addr = pci_resource_start(pdev, 4) + 8 * port_num;
 	ata_std_ports(&probe_ent->port[0]);
-	ata_std_ports(&probe_ent2->port[0]);
-
-	*ppe2 = probe_ent2;
 	return probe_ent;
 }
 
@@ -4380,7 +4426,7 @@ ata_pci_init_legacy_mode(struct pci_dev *pdev, struct ata_port_info **port,
 int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 		      unsigned int n_ports)
 {
-	struct ata_probe_ent *probe_ent, *probe_ent2 = NULL;
+	struct ata_probe_ent *probe_ent = NULL, *probe_ent2 = NULL;
 	struct ata_port_info *port[2];
 	u8 tmp8, mask;
 	unsigned int legacy_mode = 0;
@@ -4397,7 +4443,7 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 
 	if ((port[0]->host_flags & ATA_FLAG_NO_LEGACY) == 0
 	    && (pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
-		/* TODO: support transitioning to native mode? */
+		/* TODO: What if one channel is in native mode ... */
 		pci_read_config_byte(pdev, PCI_CLASS_PROG, &tmp8);
 		mask = (1 << 2) | (1 << 0);
 		if ((tmp8 & mask) != mask)
@@ -4405,11 +4451,20 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 	}
 
 	/* FIXME... */
-	if ((!legacy_mode) && (n_ports > 1)) {
-		printk(KERN_ERR "ata: BUG: native mode, n_ports > 1\n");
-		return -EINVAL;
+	if ((!legacy_mode) && (n_ports > 2)) {
+		printk(KERN_ERR "ata: BUG: native mode, n_ports > 2\n");
+		n_ports = 2;
+		/* For now */
 	}
 
+	/* FIXME: Really for ATA it isn't safe because the device may be
+	   multi-purpose and we want to leave it alone if it was already
+	   enabled. Secondly for shared use as Arjan says we want refcounting
+	   
+	   Checking dev->is_enabled is insufficient as this is not set at
+	   boot for the primary video which is BIOS enabled
+         */
+         
 	rc = pci_enable_device(pdev);
 	if (rc)
 		return rc;
@@ -4420,6 +4475,7 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 		goto err_out;
 	}
 
+	/* FIXME: Should use platform specific mappers for legacy port ranges */
 	if (legacy_mode) {
 		if (!request_region(0x1f0, 8, "libata")) {
 			struct resource *conflict, res;
@@ -4464,10 +4520,17 @@ int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 		goto err_out_regions;
 
 	if (legacy_mode) {
-		probe_ent = ata_pci_init_legacy_mode(pdev, port, &probe_ent2);
-	} else
-		probe_ent = ata_pci_init_native_mode(pdev, port);
-	if (!probe_ent) {
+		if (legacy_mode & (1 << 0))
+			probe_ent = ata_pci_init_legacy_port(pdev, port, 0);
+		if (legacy_mode & (1 << 1))
+			probe_ent2 = ata_pci_init_legacy_port(pdev, port, 1);
+	} else {
+		if (n_ports == 2)
+			probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+		else
+			probe_ent = ata_pci_init_native_mode(pdev, port, ATA_PORT_PRIMARY);
+	}
+	if (!probe_ent && !probe_ent2) {
 		rc = -ENOMEM;
 		goto err_out_regions;
 	}
@@ -4579,6 +4642,27 @@ static void __exit ata_exit(void)
 module_init(ata_init);
 module_exit(ata_exit);
 
+static unsigned long ratelimit_time;
+static spinlock_t ata_ratelimit_lock = SPIN_LOCK_UNLOCKED;
+
+int ata_ratelimit(void)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ata_ratelimit_lock, flags);
+
+	if (time_after(jiffies, ratelimit_time)) {
+		rc = 1;
+		ratelimit_time = jiffies + (HZ/5);
+	} else
+		rc = 0;
+
+	spin_unlock_irqrestore(&ata_ratelimit_lock, flags);
+
+	return rc;
+}
+
 /*
  * libata is essentially a library of internal helper functions for
  * low-level ATA host controller drivers.  As such, the API/ABI is
@@ -4620,6 +4704,7 @@ EXPORT_SYMBOL_GPL(sata_phy_reset);
 EXPORT_SYMBOL_GPL(__sata_phy_reset);
 EXPORT_SYMBOL_GPL(ata_bus_reset);
 EXPORT_SYMBOL_GPL(ata_port_disable);
+EXPORT_SYMBOL_GPL(ata_ratelimit);
 EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
 EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
 EXPORT_SYMBOL_GPL(ata_scsi_error);

+ 494 - 170
drivers/scsi/libata-scsi.c

@@ -52,6 +52,14 @@ static struct ata_device *
 ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev);
 
 
+static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
+				   void (*done)(struct scsi_cmnd *))
+{
+	ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	/* "Invalid field in cbd" */
+	done(cmd);
+}
+
 /**
  *	ata_std_bios_param - generic bios head/sector/cylinder calculator used by sd.
  *	@sdev: SCSI device for which BIOS geometry is to be determined
@@ -385,6 +393,7 @@ void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc,
 			u8 *ascq)
 {
 	int i;
+
 	/* Based on the 3ware driver translation table */
 	static unsigned char sense_table[][4] = {
 		/* BBD|ECC|ID|MAR */
@@ -596,10 +605,7 @@ void ata_gen_fixed_sense(struct ata_queued_cmd *qc)
 	sb[0] = 0x70;
 	sb[7] = 0x0a;
 
-#if 0 /* when C/H/S support is merged */
 	if (tf->flags & ATA_TFLAG_LBA && !(tf->flags & ATA_TFLAG_LBA48)) {
-#endif
-	if (!(tf->flags & ATA_TFLAG_LBA48)) {
 		/* A small (28b) LBA will fit in the 32b info field */
 		sb[0] |= 0x80;		/* set valid bit */
 		sb[3] = tf->device & 0x0f;
@@ -714,15 +720,26 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc,
 		;	/* ignore IMMED bit, violates sat-r05 */
 	}
 	if (scsicmd[4] & 0x2)
-		return 1;	/* LOEJ bit set not supported */
+		goto invalid_fld;       /* LOEJ bit set not supported */
 	if (((scsicmd[4] >> 4) & 0xf) != 0)
-		return 1;	/* power conditions not supported */
+		goto invalid_fld;       /* power conditions not supported */
 	if (scsicmd[4] & 0x1) {
 		tf->nsect = 1;	/* 1 sector, lba=0 */
-		tf->lbah = 0x0;
-		tf->lbam = 0x0;
-		tf->lbal = 0x0;
-		tf->device |= ATA_LBA;
+
+		if (qc->dev->flags & ATA_DFLAG_LBA) {
+			qc->tf.flags |= ATA_TFLAG_LBA;
+
+			tf->lbah = 0x0;
+			tf->lbam = 0x0;
+			tf->lbal = 0x0;
+			tf->device |= ATA_LBA;
+		} else {
+			/* CHS */
+			tf->lbal = 0x1; /* sect */
+			tf->lbam = 0x0; /* cyl low */
+			tf->lbah = 0x0; /* cyl high */
+		}
+
 		tf->command = ATA_CMD_VERIFY;	/* READ VERIFY */
 	} else {
 		tf->nsect = 0;	/* time period value (0 implies now) */
@@ -737,6 +754,11 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc,
 	 */
 
 	return 0;
+
+invalid_fld:
+	ata_scsi_set_sense(qc->scsicmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	/* "Invalid field in cbd" */
+	return 1;
 }
 
 
@@ -771,6 +793,99 @@ static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 	return 0;
 }
 
+/**
+ *	scsi_6_lba_len - Get LBA and transfer length
+ *	@scsicmd: SCSI command to translate
+ *
+ *	Calculate LBA and transfer length for 6-byte commands.
+ *
+ *	RETURNS:
+ *	@plba: the LBA
+ *	@plen: the transfer length
+ */
+
+static void scsi_6_lba_len(u8 *scsicmd, u64 *plba, u32 *plen)
+{
+	u64 lba = 0;
+	u32 len = 0;
+
+	VPRINTK("six-byte command\n");
+
+	lba |= ((u64)scsicmd[2]) << 8;
+	lba |= ((u64)scsicmd[3]);
+
+	len |= ((u32)scsicmd[4]);
+
+	*plba = lba;
+	*plen = len;
+}
+
+/**
+ *	scsi_10_lba_len - Get LBA and transfer length
+ *	@scsicmd: SCSI command to translate
+ *
+ *	Calculate LBA and transfer length for 10-byte commands.
+ *
+ *	RETURNS:
+ *	@plba: the LBA
+ *	@plen: the transfer length
+ */
+
+static void scsi_10_lba_len(u8 *scsicmd, u64 *plba, u32 *plen)
+{
+	u64 lba = 0;
+	u32 len = 0;
+
+	VPRINTK("ten-byte command\n");
+
+	lba |= ((u64)scsicmd[2]) << 24;
+	lba |= ((u64)scsicmd[3]) << 16;
+	lba |= ((u64)scsicmd[4]) << 8;
+	lba |= ((u64)scsicmd[5]);
+
+	len |= ((u32)scsicmd[7]) << 8;
+	len |= ((u32)scsicmd[8]);
+
+	*plba = lba;
+	*plen = len;
+}
+
+/**
+ *	scsi_16_lba_len - Get LBA and transfer length
+ *	@scsicmd: SCSI command to translate
+ *
+ *	Calculate LBA and transfer length for 16-byte commands.
+ *
+ *	RETURNS:
+ *	@plba: the LBA
+ *	@plen: the transfer length
+ */
+
+static void scsi_16_lba_len(u8 *scsicmd, u64 *plba, u32 *plen)
+{
+	u64 lba = 0;
+	u32 len = 0;
+
+	VPRINTK("sixteen-byte command\n");
+
+	lba |= ((u64)scsicmd[2]) << 56;
+	lba |= ((u64)scsicmd[3]) << 48;
+	lba |= ((u64)scsicmd[4]) << 40;
+	lba |= ((u64)scsicmd[5]) << 32;
+	lba |= ((u64)scsicmd[6]) << 24;
+	lba |= ((u64)scsicmd[7]) << 16;
+	lba |= ((u64)scsicmd[8]) << 8;
+	lba |= ((u64)scsicmd[9]);
+
+	len |= ((u32)scsicmd[10]) << 24;
+	len |= ((u32)scsicmd[11]) << 16;
+	len |= ((u32)scsicmd[12]) << 8;
+	len |= ((u32)scsicmd[13]);
+
+	*plba = lba;
+	*plen = len;
+}
+
 /**
  *	ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one
  *	@qc: Storage for translated ATA taskfile
@@ -788,79 +903,102 @@ static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 {
 	struct ata_taskfile *tf = &qc->tf;
+	struct ata_device *dev = qc->dev;
+	unsigned int lba   = tf->flags & ATA_TFLAG_LBA;
 	unsigned int lba48 = tf->flags & ATA_TFLAG_LBA48;
 	u64 dev_sectors = qc->dev->n_sectors;
-	u64 sect = 0;
-	u32 n_sect = 0;
+	u64 block;
+	u32 n_block;
 
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
 	tf->protocol = ATA_PROT_NODATA;
-	tf->device |= ATA_LBA;
-
-	if (scsicmd[0] == VERIFY) {
-		sect |= ((u64)scsicmd[2]) << 24;
-		sect |= ((u64)scsicmd[3]) << 16;
-		sect |= ((u64)scsicmd[4]) << 8;
-		sect |= ((u64)scsicmd[5]);
-
-		n_sect |= ((u32)scsicmd[7]) << 8;
-		n_sect |= ((u32)scsicmd[8]);
-	}
-
-	else if (scsicmd[0] == VERIFY_16) {
-		sect |= ((u64)scsicmd[2]) << 56;
-		sect |= ((u64)scsicmd[3]) << 48;
-		sect |= ((u64)scsicmd[4]) << 40;
-		sect |= ((u64)scsicmd[5]) << 32;
-		sect |= ((u64)scsicmd[6]) << 24;
-		sect |= ((u64)scsicmd[7]) << 16;
-		sect |= ((u64)scsicmd[8]) << 8;
-		sect |= ((u64)scsicmd[9]);
-
-		n_sect |= ((u32)scsicmd[10]) << 24;
-		n_sect |= ((u32)scsicmd[11]) << 16;
-		n_sect |= ((u32)scsicmd[12]) << 8;
-		n_sect |= ((u32)scsicmd[13]);
-	}
 
+	if (scsicmd[0] == VERIFY)
+		scsi_10_lba_len(scsicmd, &block, &n_block);
+	else if (scsicmd[0] == VERIFY_16)
+		scsi_16_lba_len(scsicmd, &block, &n_block);
 	else
-		return 1;
-
-	if (!n_sect)
-		return 1;
-	if (sect >= dev_sectors)
-		return 1;
-	if ((sect + n_sect) > dev_sectors)
-		return 1;
+		goto invalid_fld;
+
+	if (!n_block)
+		goto nothing_to_do;
+	if (block >= dev_sectors)
+		goto out_of_range;
+	if ((block + n_block) > dev_sectors)
+		goto out_of_range;
 	if (lba48) {
-		if (n_sect > (64 * 1024))
-			return 1;
+		if (n_block > (64 * 1024))
+			goto invalid_fld;
 	} else {
-		if (n_sect > 256)
-			return 1;
+		if (n_block > 256)
+			goto invalid_fld;
 	}
 
-	if (lba48) {
-		tf->command = ATA_CMD_VERIFY_EXT;
+	if (lba) {
+		if (lba48) {
+			tf->command = ATA_CMD_VERIFY_EXT;
 
-		tf->hob_nsect = (n_sect >> 8) & 0xff;
+			tf->hob_nsect = (n_block >> 8) & 0xff;
 
-		tf->hob_lbah = (sect >> 40) & 0xff;
-		tf->hob_lbam = (sect >> 32) & 0xff;
-		tf->hob_lbal = (sect >> 24) & 0xff;
+			tf->hob_lbah = (block >> 40) & 0xff;
+			tf->hob_lbam = (block >> 32) & 0xff;
+			tf->hob_lbal = (block >> 24) & 0xff;
+		} else {
+			tf->command = ATA_CMD_VERIFY;
+
+			tf->device |= (block >> 24) & 0xf;
+		}
+
+		tf->nsect = n_block & 0xff;
+
+		tf->lbah = (block >> 16) & 0xff;
+		tf->lbam = (block >> 8) & 0xff;
+		tf->lbal = block & 0xff;
+
+		tf->device |= ATA_LBA;
 	} else {
+		/* CHS */
+		u32 sect, head, cyl, track;
+
+		/* Convert LBA to CHS */
+		track = (u32)block / dev->sectors;
+		cyl   = track / dev->heads;
+		head  = track % dev->heads;
+		sect  = (u32)block % dev->sectors + 1;
+
+		DPRINTK("block %u track %u cyl %u head %u sect %u\n",
+			(u32)block, track, cyl, head, sect);
+		
+		/* Check whether the converted CHS can fit. 
+		   Cylinder: 0-65535 
+		   Head: 0-15
+		   Sector: 1-255*/
+		if ((cyl >> 16) || (head >> 4) || (sect >> 8) || (!sect)) 
+			goto out_of_range;
+		
 		tf->command = ATA_CMD_VERIFY;
-
-		tf->device |= (sect >> 24) & 0xf;
+		tf->nsect = n_block & 0xff; /* Sector count 0 means 256 sectors */
+		tf->lbal = sect;
+		tf->lbam = cyl;
+		tf->lbah = cyl >> 8;
+		tf->device |= head;
 	}
 
-	tf->nsect = n_sect & 0xff;
+	return 0;
+
+invalid_fld:
+	ata_scsi_set_sense(qc->scsicmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	/* "Invalid field in cbd" */
+	return 1;
 
-	tf->lbah = (sect >> 16) & 0xff;
-	tf->lbam = (sect >> 8) & 0xff;
-	tf->lbal = sect & 0xff;
+out_of_range:
+	ata_scsi_set_sense(qc->scsicmd, ILLEGAL_REQUEST, 0x21, 0x0);
+	/* "Logical Block Address out of range" */
+	return 1;
 
-	return 0;
+nothing_to_do:
+	qc->scsicmd->result = SAM_STAT_GOOD;
+	return 1;
 }
 
 /**
@@ -886,11 +1024,14 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 {
 	struct ata_taskfile *tf = &qc->tf;
+	struct ata_device *dev = qc->dev;
+	unsigned int lba   = tf->flags & ATA_TFLAG_LBA;
 	unsigned int lba48 = tf->flags & ATA_TFLAG_LBA48;
+	u64 block;
+	u32 n_block;
 
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
 	tf->protocol = qc->dev->xfer_protocol;
-	tf->device |= ATA_LBA;
 
 	if (scsicmd[0] == READ_10 || scsicmd[0] == READ_6 ||
 	    scsicmd[0] == READ_16) {
@@ -900,89 +1041,115 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, u8 *scsicmd)
 		tf->flags |= ATA_TFLAG_WRITE;
 	}
 
-	if (scsicmd[0] == READ_10 || scsicmd[0] == WRITE_10) {
-		if (lba48) {
-			tf->hob_nsect = scsicmd[7];
-			tf->hob_lbal = scsicmd[2];
-
-			qc->nsect = ((unsigned int)scsicmd[7] << 8) |
-					scsicmd[8];
-		} else {
-			/* if we don't support LBA48 addressing, the request
-			 * -may- be too large. */
-			if ((scsicmd[2] & 0xf0) || scsicmd[7])
-				return 1;
-
-			/* stores LBA27:24 in lower 4 bits of device reg */
-			tf->device |= scsicmd[2];
+	/* Calculate the SCSI LBA and transfer length. */
+	switch (scsicmd[0]) {
+	case READ_10:
+	case WRITE_10:
+		scsi_10_lba_len(scsicmd, &block, &n_block);
+		break;
+	case READ_6:
+	case WRITE_6:
+		scsi_6_lba_len(scsicmd, &block, &n_block);
 
-			qc->nsect = scsicmd[8];
-		}
+		/* for 6-byte r/w commands, transfer length 0
+		 * means 256 blocks of data, not 0 block.
+		 */
+		if (!n_block)
+			n_block = 256;
+		break;
+	case READ_16:
+	case WRITE_16:
+		scsi_16_lba_len(scsicmd, &block, &n_block);
+		break;
+	default:
+		DPRINTK("no-byte command\n");
+		goto invalid_fld;
+	}
 
-		tf->nsect = scsicmd[8];
-		tf->lbal = scsicmd[5];
-		tf->lbam = scsicmd[4];
-		tf->lbah = scsicmd[3];
+	/* Check and compose ATA command */
+	if (!n_block)
+		/* For 10-byte and 16-byte SCSI R/W commands, transfer
+		 * length 0 means transfer 0 block of data.
+		 * However, for ATA R/W commands, sector count 0 means
+		 * 256 or 65536 sectors, not 0 sectors as in SCSI.
+		 */
+		goto nothing_to_do;
 
-		VPRINTK("ten-byte command\n");
-		if (qc->nsect == 0) /* we don't support length==0 cmds */
-			return 1;
-		return 0;
-	}
+	if (lba) {
+		if (lba48) {
+			/* The request -may- be too large for LBA48. */
+			if ((block >> 48) || (n_block > 65536))
+				goto out_of_range;
 
-	if (scsicmd[0] == READ_6 || scsicmd[0] == WRITE_6) {
-		qc->nsect = tf->nsect = scsicmd[4];
-		if (!qc->nsect) {
-			qc->nsect = 256;
-			if (lba48)
-				tf->hob_nsect = 1;
-		}
+			tf->hob_nsect = (n_block >> 8) & 0xff;
 
-		tf->lbal = scsicmd[3];
-		tf->lbam = scsicmd[2];
-		tf->lbah = scsicmd[1] & 0x1f; /* mask out reserved bits */
+			tf->hob_lbah = (block >> 40) & 0xff;
+			tf->hob_lbam = (block >> 32) & 0xff;
+			tf->hob_lbal = (block >> 24) & 0xff;
+		} else { 
+			/* LBA28 */
 
-		VPRINTK("six-byte command\n");
-		return 0;
-	}
+			/* The request -may- be too large for LBA28. */
+			if ((block >> 28) || (n_block > 256))
+				goto out_of_range;
 
-	if (scsicmd[0] == READ_16 || scsicmd[0] == WRITE_16) {
-		/* rule out impossible LBAs and sector counts */
-		if (scsicmd[2] || scsicmd[3] || scsicmd[10] || scsicmd[11])
-			return 1;
+			tf->device |= (block >> 24) & 0xf;
+		}
 
-		if (lba48) {
-			tf->hob_nsect = scsicmd[12];
-			tf->hob_lbal = scsicmd[6];
-			tf->hob_lbam = scsicmd[5];
-			tf->hob_lbah = scsicmd[4];
+		qc->nsect = n_block;
+		tf->nsect = n_block & 0xff;
 
-			qc->nsect = ((unsigned int)scsicmd[12] << 8) |
-					scsicmd[13];
-		} else {
-			/* once again, filter out impossible non-zero values */
-			if (scsicmd[4] || scsicmd[5] || scsicmd[12] ||
-			    (scsicmd[6] & 0xf0))
-				return 1;
+		tf->lbah = (block >> 16) & 0xff;
+		tf->lbam = (block >> 8) & 0xff;
+		tf->lbal = block & 0xff;
 
-			/* stores LBA27:24 in lower 4 bits of device reg */
-			tf->device |= scsicmd[6];
+		tf->device |= ATA_LBA;
+	} else { 
+		/* CHS */
+		u32 sect, head, cyl, track;
+
+		/* The request -may- be too large for CHS addressing. */
+		if ((block >> 28) || (n_block > 256))
+			goto out_of_range;
+
+		/* Convert LBA to CHS */
+		track = (u32)block / dev->sectors;
+		cyl   = track / dev->heads;
+		head  = track % dev->heads;
+		sect  = (u32)block % dev->sectors + 1;
+
+		DPRINTK("block %u track %u cyl %u head %u sect %u\n",
+			(u32)block, track, cyl, head, sect);
+
+		/* Check whether the converted CHS can fit. 
+		   Cylinder: 0-65535 
+		   Head: 0-15
+		   Sector: 1-255*/
+		if ((cyl >> 16) || (head >> 4) || (sect >> 8) || (!sect))
+			goto out_of_range;
+
+		qc->nsect = n_block;
+		tf->nsect = n_block & 0xff; /* Sector count 0 means 256 sectors */
+		tf->lbal = sect;
+		tf->lbam = cyl;
+		tf->lbah = cyl >> 8;
+		tf->device |= head;
+	}
 
-			qc->nsect = scsicmd[13];
-		}
+	return 0;
 
-		tf->nsect = scsicmd[13];
-		tf->lbal = scsicmd[9];
-		tf->lbam = scsicmd[8];
-		tf->lbah = scsicmd[7];
+invalid_fld:
+	ata_scsi_set_sense(qc->scsicmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	/* "Invalid field in cbd" */
+	return 1;
 
-		VPRINTK("sixteen-byte command\n");
-		if (qc->nsect == 0) /* we don't support length==0 cmds */
-			return 1;
-		return 0;
-	}
+out_of_range:
+	ata_scsi_set_sense(qc->scsicmd, ILLEGAL_REQUEST, 0x21, 0x0);
+	/* "Logical Block Address out of range" */
+	return 1;
 
-	DPRINTK("no-byte command\n");
+nothing_to_do:
+	qc->scsicmd->result = SAM_STAT_GOOD;
 	return 1;
 }
 
@@ -1040,6 +1207,12 @@ static int ata_scsi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
  *	This function sets up an ata_queued_cmd structure for the
  *	SCSI command, and sends that ata_queued_cmd to the hardware.
  *
+ *	The xlat_func argument (actor) returns 0 if ready to execute
+ *	ATA command, else 1 to finish translation. If 1 is returned
+ *	then cmd->result (and possibly cmd->sense_buffer) are assumed
+ *	to be set reflecting an error condition or clean (early)
+ *	termination.
+ *
  *	LOCKING:
  *	spin_lock_irqsave(host_set lock)
  */
@@ -1056,7 +1229,7 @@ static void ata_scsi_translate(struct ata_port *ap, struct ata_device *dev,
 
 	qc = ata_scsi_qc_new(ap, dev, cmd, done);
 	if (!qc)
-		return;
+		goto err_mem;
 
 	/* data is present; dma-map it */
 	if (cmd->sc_data_direction == DMA_FROM_DEVICE ||
@@ -1064,7 +1237,7 @@ static void ata_scsi_translate(struct ata_port *ap, struct ata_device *dev,
 		if (unlikely(cmd->request_bufflen < 1)) {
 			printk(KERN_WARNING "ata%u(%u): WARNING: zero len r/w req\n",
 			       ap->id, dev->devno);
-			goto err_out;
+			goto err_did;
 		}
 
 		if (cmd->use_sg)
@@ -1079,18 +1252,28 @@ static void ata_scsi_translate(struct ata_port *ap, struct ata_device *dev,
 	qc->complete_fn = ata_scsi_qc_complete;
 
 	if (xlat_func(qc, scsicmd))
-		goto err_out;
+		goto early_finish;
+
 	/* select device, send command to hardware */
 	if (ata_qc_issue(qc))
-		goto err_out;
+		goto err_did;
 
 	VPRINTK("EXIT\n");
 	return;
 
-err_out:
+early_finish:
+        ata_qc_free(qc);
+	done(cmd);
+	DPRINTK("EXIT - early finish (good or error)\n");
+	return;
+
+err_did:
 	ata_qc_free(qc);
-	ata_bad_cdb(cmd, done);
-	DPRINTK("EXIT - badcmd\n");
+err_mem:
+	cmd->result = (DID_ERROR << 16);
+	done(cmd);
+	DPRINTK("EXIT - internal\n");
+	return;
 }
 
 /**
@@ -1157,7 +1340,8 @@ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf)
  *	Mapping the response buffer, calling the command's handler,
  *	and handling the handler's return value.  This return value
  *	indicates whether the handler wishes the SCSI command to be
- *	completed successfully, or not.
+ *	completed successfully (0), or not (in which case cmd->result
+ *	and sense buffer are assumed to be set).
  *
  *	LOCKING:
  *	spin_lock_irqsave(host_set lock)
@@ -1176,12 +1360,9 @@ void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
 	rc = actor(args, rbuf, buflen);
 	ata_scsi_rbuf_put(cmd, rbuf);
 
-	if (rc)
-		ata_bad_cdb(cmd, args->done);
-	else {
+	if (rc == 0)
 		cmd->result = SAM_STAT_GOOD;
-		args->done(cmd);
-	}
+	args->done(cmd);
 }
 
 /**
@@ -1487,8 +1668,16 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 	 * in the same manner)
 	 */
 	page_control = scsicmd[2] >> 6;
-	if ((page_control != 0) && (page_control != 3))
-		return 1;
+	switch (page_control) {
+	case 0: /* current */
+		break;  /* supported */
+	case 3: /* saved */
+		goto saving_not_supp;
+	case 1: /* changeable */
+	case 2: /* defaults */
+	default:
+		goto invalid_fld;
+	}
 
 	if (six_byte)
 		output_len = 4;
@@ -1519,7 +1708,7 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 		break;
 
 	default:		/* invalid page code */
-		return 1;
+		goto invalid_fld;
 	}
 
 	if (six_byte) {
@@ -1532,6 +1721,16 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 	}
 
 	return 0;
+
+invalid_fld:
+	ata_scsi_set_sense(args->cmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	/* "Invalid field in cbd" */
+	return 1;
+
+saving_not_supp:
+	ata_scsi_set_sense(args->cmd, ILLEGAL_REQUEST, 0x39, 0x0);
+	 /* "Saving parameters not supported" */
+	return 1;
 }
 
 /**
@@ -1554,10 +1753,20 @@ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf,
 
 	VPRINTK("ENTER\n");
 
-	if (ata_id_has_lba48(args->id))
-		n_sectors = ata_id_u64(args->id, 100);
-	else
-		n_sectors = ata_id_u32(args->id, 60);
+	if (ata_id_has_lba(args->id)) {
+		if (ata_id_has_lba48(args->id))
+			n_sectors = ata_id_u64(args->id, 100);
+		else
+			n_sectors = ata_id_u32(args->id, 60);
+	} else {
+		/* CHS default translation */
+		n_sectors = args->id[1] * args->id[3] * args->id[6];
+
+		if (ata_id_current_chs_valid(args->id))
+			/* CHS current translation */
+			n_sectors = ata_id_u32(args->id, 57);
+	}
+
 	n_sectors--;		/* ATA TotalUserSectors - 1 */
 
 	if (args->cmd->cmnd[0] == READ_CAPACITY) {
@@ -1620,6 +1829,34 @@ unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf,
 	return 0;
 }
 
+/**
+ *	ata_scsi_set_sense - Set SCSI sense data and status
+ *	@cmd: SCSI request to be handled
+ *	@sk: SCSI-defined sense key
+ *	@asc: SCSI-defined additional sense code
+ *	@ascq: SCSI-defined additional sense code qualifier
+ *
+ *	Helper function that builds a valid fixed format, current
+ *	response code and the given sense key (sk), additional sense
+ *	code (asc) and additional sense code qualifier (ascq) with
+ *	a SCSI command status of %SAM_STAT_CHECK_CONDITION and
+ *	DRIVER_SENSE set in the upper bits of scsi_cmnd::result .
+ *
+ *	LOCKING:
+ *	Not required
+ */
+
+void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
+{
+	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+
+	cmd->sense_buffer[0] = 0x70;	/* fixed format, current */
+	cmd->sense_buffer[2] = sk;
+	cmd->sense_buffer[7] = 18 - 8;	/* additional sense length */
+	cmd->sense_buffer[12] = asc;
+	cmd->sense_buffer[13] = ascq;
+}
+
 /**
  *	ata_scsi_badcmd - End a SCSI request with an error
  *	@cmd: SCSI request to be handled
@@ -1638,30 +1875,84 @@ unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf,
 void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq)
 {
 	DPRINTK("ENTER\n");
-	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
-
-	cmd->sense_buffer[0] = 0x70;
-	cmd->sense_buffer[2] = ILLEGAL_REQUEST;
-	cmd->sense_buffer[7] = 14 - 8;	/* addnl. sense len. FIXME: correct? */
-	cmd->sense_buffer[12] = asc;
-	cmd->sense_buffer[13] = ascq;
+	ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, asc, ascq);
 
 	done(cmd);
 }
 
+void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
+			 struct scsi_cmnd *cmd)
+{
+	DECLARE_COMPLETION(wait);
+	struct ata_queued_cmd *qc;
+	unsigned long flags;
+	int rc;
+
+	DPRINTK("ATAPI request sense\n");
+
+	qc = ata_qc_new_init(ap, dev);
+	BUG_ON(qc == NULL);
+
+	/* FIXME: is this needed? */
+	memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer));
+
+	ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer));
+	qc->dma_dir = DMA_FROM_DEVICE;
+
+	memset(&qc->cdb, 0, ap->cdb_len);
+	qc->cdb[0] = REQUEST_SENSE;
+	qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
+
+	qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	qc->tf.command = ATA_CMD_PACKET;
+
+	qc->tf.protocol = ATA_PROT_ATAPI;
+	qc->tf.lbam = (8 * 1024) & 0xff;
+	qc->tf.lbah = (8 * 1024) >> 8;
+	qc->nbytes = SCSI_SENSE_BUFFERSIZE;
+
+	qc->waiting = &wait;
+	qc->complete_fn = ata_qc_complete_noop;
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	rc = ata_qc_issue(qc);
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	if (rc)
+		ata_port_disable(ap);
+	else
+		wait_for_completion(&wait);
+
+	DPRINTK("EXIT\n");
+}
+
 static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
 {
 	struct scsi_cmnd *cmd = qc->scsicmd;
 
-	if (unlikely(drv_stat & (ATA_ERR | ATA_BUSY | ATA_DRQ))) {
+	VPRINTK("ENTER, drv_stat == 0x%x\n", drv_stat);
+
+	if (unlikely(drv_stat & (ATA_BUSY | ATA_DRQ)))
+		ata_to_sense_error(qc, drv_stat);
+
+	else if (unlikely(drv_stat & ATA_ERR)) {
 		DPRINTK("request check condition\n");
 
+		/* FIXME: command completion with check condition
+		 * but no sense causes the error handler to run,
+		 * which then issues REQUEST SENSE, fills in the sense 
+		 * buffer, and completes the command (for the second
+		 * time).  We need to issue REQUEST SENSE some other
+		 * way, to avoid completing the command twice.
+		 */
 		cmd->result = SAM_STAT_CHECK_CONDITION;
 
 		qc->scsidone(cmd);
 
 		return 1;
-	} else {
+	}
+
+	else {
 		u8 *scsicmd = cmd->cmnd;
 
 		if (scsicmd[0] == INQUIRY) {
@@ -1669,15 +1960,30 @@ static int atapi_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat)
 			unsigned int buflen;
 
 			buflen = ata_scsi_rbuf_get(cmd, &buf);
-			buf[2] = 0x5;
-			buf[3] = (buf[3] & 0xf0) | 2;
+
+	/* ATAPI devices typically report zero for their SCSI version,
+	 * and sometimes deviate from the spec WRT response data
+	 * format.  If SCSI version is reported as zero like normal,
+	 * then we make the following fixups:  1) Fake MMC-5 version,
+	 * to indicate to the Linux scsi midlayer this is a modern
+	 * device.  2) Ensure response data format / ATAPI information
+	 * are always correct.
+	 */
+	/* FIXME: do we ever override EVPD pages and the like, with
+	 * this code?
+	 */
+			if (buf[2] == 0) {
+				buf[2] = 0x5;
+				buf[3] = 0x32;
+			}
+
 			ata_scsi_rbuf_put(cmd, buf);
 		}
+
 		cmd->result = SAM_STAT_GOOD;
 	}
 
 	qc->scsidone(cmd);
-
 	return 0;
 }
 /**
@@ -2080,7 +2386,7 @@ void ata_scsi_simulate(u16 *id,
 
 		case INQUIRY:
 			if (scsicmd[1] & 2)	           /* is CmdDt set?  */
-				ata_bad_cdb(cmd, done);
+				ata_scsi_invalid_field(cmd, done);
 			else if ((scsicmd[1] & 1) == 0)    /* is EVPD clear? */
 				ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std);
 			else if (scsicmd[2] == 0x00)
@@ -2090,7 +2396,7 @@ void ata_scsi_simulate(u16 *id,
 			else if (scsicmd[2] == 0x83)
 				ata_scsi_rbuf_fill(&args, ata_scsiop_inq_83);
 			else
-				ata_bad_cdb(cmd, done);
+				ata_scsi_invalid_field(cmd, done);
 			break;
 
 		case MODE_SENSE:
@@ -2100,7 +2406,7 @@ void ata_scsi_simulate(u16 *id,
 
 		case MODE_SELECT:	/* unconditionally return */
 		case MODE_SELECT_10:	/* bad-field-in-cdb */
-			ata_bad_cdb(cmd, done);
+			ata_scsi_invalid_field(cmd, done);
 			break;
 
 		case READ_CAPACITY:
@@ -2111,7 +2417,7 @@ void ata_scsi_simulate(u16 *id,
 			if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16)
 				ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
 			else
-				ata_bad_cdb(cmd, done);
+				ata_scsi_invalid_field(cmd, done);
 			break;
 
 		case REPORT_LUNS:
@@ -2123,8 +2429,26 @@ void ata_scsi_simulate(u16 *id,
 
 		/* all other commands */
 		default:
-			ata_bad_scsiop(cmd, done);
+			ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x20, 0x0);
+			/* "Invalid command operation code" */
+			done(cmd);
 			break;
 	}
 }
 
+void ata_scsi_scan_host(struct ata_port *ap)
+{
+	struct ata_device *dev;
+	unsigned int i;
+
+	if (ap->flags & ATA_FLAG_PORT_DISABLED)
+		return;
+
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		dev = &ap->device[i];
+
+		if (ata_dev_present(dev))
+			scsi_scan_target(&ap->host->shost_gendev, 0, i, 0, 0);
+	}
+}
+

+ 6 - 10
drivers/scsi/libata.h

@@ -39,6 +39,7 @@ struct ata_scsi_args {
 
 /* libata-core.c */
 extern int atapi_enabled;
+extern int ata_qc_complete_noop(struct ata_queued_cmd *qc, u8 drv_stat);
 extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 				      struct ata_device *dev);
 extern void ata_qc_free(struct ata_queued_cmd *qc);
@@ -53,6 +54,9 @@ extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 
 
 /* libata-scsi.c */
+extern void atapi_request_sense(struct ata_port *ap, struct ata_device *dev,
+			 struct scsi_cmnd *cmd);
+extern void ata_scsi_scan_host(struct ata_port *ap);
 extern int ata_scsi_error(struct Scsi_Host *host);
 extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
 			       unsigned int buflen);
@@ -77,18 +81,10 @@ extern unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf,
 extern void ata_scsi_badcmd(struct scsi_cmnd *cmd,
 			    void (*done)(struct scsi_cmnd *),
 			    u8 asc, u8 ascq);
+extern void ata_scsi_set_sense(struct scsi_cmnd *cmd,
+			       u8 sk, u8 asc, u8 ascq);
 extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
                         unsigned int (*actor) (struct ata_scsi_args *args,
                                            u8 *rbuf, unsigned int buflen));
 
-static inline void ata_bad_scsiop(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
-{
-	ata_scsi_badcmd(cmd, done, 0x20, 0x00);
-}
-
-static inline void ata_bad_cdb(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
-{
-	ata_scsi_badcmd(cmd, done, 0x24, 0x00);
-}
-
 #endif /* __LIBATA_H__ */

File diff suppressed because it is too large
+ 687 - 106
drivers/scsi/sata_mv.c


+ 8 - 8
drivers/scsi/sata_nv.c

@@ -29,6 +29,8 @@
  *  NV-specific details such as register offsets, SATA phy location,
  *  hotplug info, etc.
  *
+ *  0.09
+ *     - Fixed bug introduced by 0.08's MCP51 and MCP55 support.
  *
  *  0.08
  *     - Added support for MCP51 and MCP55.
@@ -132,9 +134,7 @@ enum nv_host_type
 	GENERIC,
 	NFORCE2,
 	NFORCE3,
-	CK804,
-	MCP51,
-	MCP55
+	CK804
 };
 
 static struct pci_device_id nv_pci_tbl[] = {
@@ -153,13 +153,13 @@ static struct pci_device_id nv_pci_tbl[] = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA,
-		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2,
-		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA,
-		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2,
-		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 		PCI_ANY_ID, PCI_ANY_ID,
 		PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
@@ -405,7 +405,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	rc = -ENOMEM;
 
 	ppi = &nv_port_info;
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi);
+	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent)
 		goto err_out_regions;
 

+ 3 - 3
drivers/scsi/sata_promise.c

@@ -438,11 +438,11 @@ static inline unsigned int pdc_host_intr( struct ata_port *ap,
 		break;
 
         default:
-                ap->stats.idle_irq++;
-                break;
+		ap->stats.idle_irq++;
+		break;
         }
 
-        return handled;
+	return handled;
 }
 
 static void pdc_irq_clear(struct ata_port *ap)

+ 1 - 1
drivers/scsi/sata_sis.c

@@ -263,7 +263,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_regions;
 
 	ppi = &sis_port_info;
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi);
+	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent) {
 		rc = -ENOMEM;
 		goto err_out_regions;

+ 1 - 1
drivers/scsi/sata_uli.c

@@ -202,7 +202,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_regions;
 
 	ppi = &uli_port_info;
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi);
+	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent) {
 		rc = -ENOMEM;
 		goto err_out_regions;

+ 1 - 1
drivers/scsi/sata_via.c

@@ -212,7 +212,7 @@ static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
 	struct ata_probe_ent *probe_ent;
 	struct ata_port_info *ppi = &svia_port_info;
 
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi);
+	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent)
 		return NULL;
 

+ 17 - 3
include/linux/ata.h

@@ -132,6 +132,7 @@ enum {
 	ATA_CMD_PACKET		= 0xA0,
 	ATA_CMD_VERIFY		= 0x40,
 	ATA_CMD_VERIFY_EXT	= 0x42,
+	ATA_CMD_INIT_DEV_PARAMS	= 0x91,
 
 	/* SETFEATURES stuff */
 	SETFEATURES_XFER	= 0x03,
@@ -146,14 +147,14 @@ enum {
 	XFER_MW_DMA_2		= 0x22,
 	XFER_MW_DMA_1		= 0x21,
 	XFER_MW_DMA_0		= 0x20,
+	XFER_SW_DMA_2		= 0x12,
+	XFER_SW_DMA_1		= 0x11,
+	XFER_SW_DMA_0		= 0x10,
 	XFER_PIO_4		= 0x0C,
 	XFER_PIO_3		= 0x0B,
 	XFER_PIO_2		= 0x0A,
 	XFER_PIO_1		= 0x09,
 	XFER_PIO_0		= 0x08,
-	XFER_SW_DMA_2		= 0x12,
-	XFER_SW_DMA_1		= 0x11,
-	XFER_SW_DMA_0		= 0x10,
 	XFER_PIO_SLOW		= 0x00,
 
 	/* ATAPI stuff */
@@ -181,6 +182,7 @@ enum {
 	ATA_TFLAG_ISADDR	= (1 << 1), /* enable r/w to nsect/lba regs */
 	ATA_TFLAG_DEVICE	= (1 << 2), /* enable r/w to device reg */
 	ATA_TFLAG_WRITE		= (1 << 3), /* data dir: host->dev==1 (write) */
+	ATA_TFLAG_LBA		= (1 << 4), /* enable LBA */
 };
 
 enum ata_tf_protocols {
@@ -250,6 +252,18 @@ struct ata_taskfile {
 	  ((u64) (id)[(n) + 1] << 16) |	\
 	  ((u64) (id)[(n) + 0]) )
 
+static inline int ata_id_current_chs_valid(u16 *id)
+{
+	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command 
+	   has not been issued to the device then the values of 
+	   id[54] to id[56] are vendor specific. */
+	return (id[53] & 0x01) && /* Current translation valid */
+		id[54] &&  /* cylinders in current translation */
+		id[55] &&  /* heads in current translation */
+		id[55] <= 16 &&
+		id[56];    /* sectors in current translation */
+}
+
 static inline int atapi_cdb_len(u16 *dev_id)
 {
 	u16 tmp = dev_id[0] & 0x3;

+ 23 - 11
include/linux/libata.h

@@ -97,6 +97,7 @@ enum {
 	ATA_DFLAG_LBA48		= (1 << 0), /* device supports LBA48 */
 	ATA_DFLAG_PIO		= (1 << 1), /* device currently in PIO mode */
 	ATA_DFLAG_LOCK_SECTORS	= (1 << 2), /* don't adjust max_sectors */
+	ATA_DFLAG_LBA		= (1 << 3), /* device supports LBA */
 
 	ATA_DEV_UNKNOWN		= 0,	/* unknown device */
 	ATA_DEV_ATA		= 1,	/* ATA device */
@@ -154,17 +155,21 @@ enum {
 	ATA_SHIFT_UDMA		= 0,
 	ATA_SHIFT_MWDMA		= 8,
 	ATA_SHIFT_PIO		= 11,
+	
+	/* Masks for port functions */
+	ATA_PORT_PRIMARY	= (1 << 0),
+	ATA_PORT_SECONDARY	= (1 << 1),
 };
 
-enum pio_task_states {
-	PIO_ST_UNKNOWN,
-	PIO_ST_IDLE,
-	PIO_ST_POLL,
-	PIO_ST_TMOUT,
-	PIO_ST,
-	PIO_ST_LAST,
-	PIO_ST_LAST_POLL,
-	PIO_ST_ERR,
+enum hsm_task_states {
+	HSM_ST_UNKNOWN,
+	HSM_ST_IDLE,
+	HSM_ST_POLL,
+	HSM_ST_TMOUT,
+	HSM_ST,
+	HSM_ST_LAST,
+	HSM_ST_LAST_POLL,
+	HSM_ST_ERR,
 };
 
 /* forward declarations */
@@ -282,6 +287,11 @@ struct ata_device {
 	u8			xfer_protocol;	/* taskfile xfer protocol */
 	u8			read_cmd;	/* opcode to use on read */
 	u8			write_cmd;	/* opcode to use on write */
+
+	/* for CHS addressing */
+	u16			cylinders;	/* Number of cylinders */
+	u16			heads;		/* Number of heads */
+	u16			sectors;	/* Number of sectors per track */
 };
 
 struct ata_port {
@@ -319,7 +329,7 @@ struct ata_port {
 	struct work_struct	packet_task;
 
 	struct work_struct	pio_task;
-	unsigned int		pio_task_state;
+	unsigned int		hsm_task_state;
 	unsigned long		pio_task_timeout;
 
 	void			*private_data;
@@ -400,6 +410,8 @@ extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 extern int ata_scsi_error(struct Scsi_Host *host);
 extern int ata_scsi_release(struct Scsi_Host *host);
 extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
+extern int ata_ratelimit(void);
+
 /*
  * Default driver ops implementations
  */
@@ -452,7 +464,7 @@ struct pci_bits {
 
 extern void ata_pci_host_stop (struct ata_host_set *host_set);
 extern struct ata_probe_ent *
-ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port);
+ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port, int portmask);
 extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits);
 
 #endif /* CONFIG_PCI */

Some files were not shown because too many files changed in this diff